From 1ca4677b2876f0b68bde843b961b14e69ffce342 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 7 Nov 2025 21:36:28 +0000 Subject: [PATCH 01/91] smp server: messaging services (#1565) * smp server: refactor message delivery to always respond SOK to subscriptions * refactor ntf subscribe * cancel subscription thread and reduce service subscription count when queue is deleted * subscribe rcv service, deliver sent messages to subscribed service * subscribe rcv service to messages (TODO delivery on subscription) * WIP * efficient initial delivery of messages to subscribed service * test: delivery to client with service certificate * test: upgrade/downgrade to/from service subscriptions * remove service association from agent API, add per-user flag to use the service * agent client (WIP) * service certificates in the client * rfc about drift detection, and SALL to mark end of message delivery * fix test * fix test * add function for postgresql message storage * update migration --- rfcs/2025-08-20-service-subs-drift.md | 101 ++++++++ simplexmq.cabal | 2 + src/Simplex/Messaging/Agent.hs | 150 +++++++----- src/Simplex/Messaging/Agent/Client.hs | 66 +++++- src/Simplex/Messaging/Agent/Env/SQLite.hs | 1 + src/Simplex/Messaging/Agent/Protocol.hs | 18 +- src/Simplex/Messaging/Agent/Store.hs | 6 +- .../Messaging/Agent/Store/AgentStore.hs | 105 ++++++++- .../Agent/Store/SQLite/Migrations/App.hs | 4 +- .../Migrations/M20250517_service_certs.hs | 40 ---- .../Migrations/M20251020_service_certs.hs | 40 ++++ .../Store/SQLite/Migrations/agent_schema.sql | 17 ++ src/Simplex/Messaging/Client.hs | 4 +- src/Simplex/Messaging/Client/Agent.hs | 6 +- src/Simplex/Messaging/Crypto.hs | 18 +- src/Simplex/Messaging/Protocol.hs | 21 +- src/Simplex/Messaging/Server.hs | 218 ++++++++++++------ .../Messaging/Server/MsgStore/Journal.hs | 20 ++ .../Messaging/Server/MsgStore/Postgres.hs | 33 +++ src/Simplex/Messaging/Server/MsgStore/STM.hs | 5 + .../Messaging/Server/MsgStore/Types.hs | 1 + .../Messaging/Server/QueueStore/Postgres.hs | 17 +- .../Messaging/Server/QueueStore/STM.hs | 13 +- src/Simplex/Messaging/Transport.hs | 23 +- tests/AgentTests/FunctionalAPITests.hs | 70 +++--- tests/AgentTests/SQLiteTests.hs | 4 +- tests/AgentTests/ServerChoice.hs | 1 + tests/SMPAgentClient.hs | 1 + tests/SMPClient.hs | 34 ++- tests/SMPProxyTests.hs | 18 +- tests/ServerTests.hs | 217 ++++++++++++++++- 31 files changed, 969 insertions(+), 305 deletions(-) create mode 100644 rfcs/2025-08-20-service-subs-drift.md delete mode 100644 src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20250517_service_certs.hs create mode 100644 src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs diff --git a/rfcs/2025-08-20-service-subs-drift.md b/rfcs/2025-08-20-service-subs-drift.md new file mode 100644 index 0000000000..1ca9e6018a --- /dev/null +++ b/rfcs/2025-08-20-service-subs-drift.md @@ -0,0 +1,101 @@ +# Detecting and fixing state with service subscriptions + +## Problem + +While service certificates and subscriptions hugely decrease startup time and delivery delays on server restarts, they introduce the risk of losing subscriptions in case of state drifts. They also do not provide efficient mechanism for validating that the list of subscribed queues is in sync. + +How can the state drift happen? + +There are several possibilities: +- lost broker response would make the broker consider that the queue is associated, but the client won't know it, and will have to re-associate. While in itself it is not a problem, as it'll be resolved, it would make drift detected more frequently (regardless of the detection logic used). That service certificates are used on clients with good connection would make it less likely though. +- server state restored from the backup, in case of some failure. Nothing can be done to recover lost queues, but we may restore lost service associations. +- queue blocking or removal by server operator because of policy violation. +- server downgrade (when it loses all service associations) with subsequent upgrade - the client would think queues are associated, while they are not, and won't receive any messages at all in this scenario. +- any other server-side error or logic error. + +In addition to the possibility of the drift, we simply need to have confidence that service subscriptions work as intended, without skipping queues. We ignored this consideration for notifications, as the tolerance to lost notifications is higher, but we can't ignore it for messages. + +## Solution + +Previously considered approach of sending NIL to all queues without messages is very expensive for traffic (most queues don't have messages), and it is also very expensive to detect and validate drift in the client because of asynchronous / concurrent events. + +We cannot read all queues into memory, and we cannot aggregate all responses in memory, and we cannot create database writes on every single service subscription to say 1m queues (a realistic number), as it simply won't work well even at the current scale. + +An approach of having an efficient way to detect drift, but load the full list of IDs when drift is detected, also won't work well, as drifts may be common, so we need both efficient way to detect there is diff and also to reconcile it. + +### Drift detection + +Both client and server would maintain the number of associated queues and the "symmetric" hash over the set of queue IDs. The requirements for this hash algorithm are: +- not cryptographically strong, to be fast. +- 128 bits to minimize collisions over the large set of millions of queues. +- symmetric - the result should not depend on ID order. +- allows fast additions and removals. + +In this way, every time association is added or removed (including queue marked as deleted), both peers would recompute this hash in the same transaction. + +The client would suspend sending and processing any other commands on the server and the queues of this server until SOKS response is received from this server, to prevent drift. It can be achieved with per-server semaphores/locks in memory. UI clients need to become responsive sooner than these responses are received, but we do not service certificates on UI clients, and chat relays may prevent operations on server queues until SOKS response is received. + +SOKS response would include both the count of associated queues (as now) and the hash over all associated queue IDs (to be added). If both count and hash match, the client will not do anything. If either does not match the client would perform full sync (see below). + +There is a value from doing the same in notification server as well to detect and "fix" drifts. + +The algorithm to compute hashes can be the following. + +1. Compute hash of each queue ID using xxHash3_128 ([xxhash-ffi](https://hackage.haskell.org/package/xxhash-ffi) library). They don't need to be stored or loaded at once, initially, it can be done with streaming if it is detected on start that there is no pre-computed hash. +2. Combine hashes using XOR. XOR is both commutative and associative, so it would produce the same aggregate hash irrespective of the ID order. +3. Adding queue ID to pre-computed hash requires a single XOR with ID hash: `new_aggregate = aggregate XOR hash(queue_id)`. +4. Removing queue ID from pre-computed hash also requires the same XOR (XOR is involutory, it undoes itself): `new_aggregate = aggregate XOR hash(queue_id)`. + +These hashes need to be computed per user/server in the client and per service certificate in the server - on startup both have to validate and compute them once if necessary. + +There can be also a start-up option to recompute hashe(s) to detect and fix any errors. + +This is all rather simple and would help detecting drifts. + +### Synchronization when drift is detected + +The assumption here is that in most cases drifts are rare, and isolated to few IDs (e.g., this is the case with notification server). + +But the algorithm should be resilient to losing all associations, and it should not be substantially worse than simply restoring all associations or loading all IDs. + +We have `c_n` and `c_hash` for client-side count and hash of queue IDs and `s_n` and `s_hash` for server-side, which are returned in SOKS response to SUBS command. + +1. If `c_n /= s_n || c_hash /= s_hash`, the client must perform sync. + +2. If `abs(c_n - s_n) / max(c_n, s_n) > 0.5`, the client will request the full list of queues (more than half of the queues are different), and will perform diff with the queues it has. While performing the diff the client will continue block operations with this user/server. + +3. Otherwise would perform some algorithm for determining the difference between queue IDs between client and server. This algorithm can be made efficient (`O(log N)`) by relying on efficient sorting of IDs and database loading of ranges, via computing and communicating hashes of ranges, and performing a binary search on ranges, with batching to optimize network traffic. + +This algorithm is similar to Merkle tree reconcilliation, but it is optimized for database reading of ordered ranges, and for our 16kb block size to minimize network requests. + +The algorithm: +1. The client would request all ranges from the server. +2. The server would compute hashes for N ranges of IDs and send them to the client. Each range would include start_id, optional end_id (for single ID ranges) and XOR-hash of the range. N is determined based on the block size and the range size. +3. The client would perform the same computation for the same ranges, and compare them with the returned ranges from the server, while detecting any gaps between ranges and missing range boundaries. +4. If more than half of the ranges don't match, the client would request the full list. Otherwise it would repeat the same algorithm for each mismatched range and for gaps. + +It can be further optimized by merging adjacent ranges and by batching all range requests, it is quite simple. + +Once the client determines the list of missing and extra queues it can: +- create associations (via SUB) for missing queues, +- request removal of association (a new command, e.g. BUS) for extra queues on the server. + +The pseudocode for the algorightm: + +For the server to return all ranges or subranges of requested range: + +```haskell +getSubRanges :: Maybe (RecipientId, RecipientId) -> [(RecipientId, Maybe RecipientId, Hash)] +getSubRanges range_ = do + ((min_id, max_id), s_n) <- case range_ of + Nothing -> getAssociatedQueueRange -- with the certificate in the client session. + Just range -> (range,) <$> getAssociatedQueueCount range + if + | s_n <= max_N -> reply_with_single_queue_ranges + | otherwise -> do + let range_size = s_n `div` max_N + read_all_ranges -- in a recursive loop, with max_id, range_hash and next_min_id in each step + reply_ranges +``` + +We don't need to implement this synchronization logic right now, so not including client logic here, it's sufficient to implement drift detection, and the action to fix the drift would be to disable and to re-enable certificates via some command-line parameter of CLI. diff --git a/simplexmq.cabal b/simplexmq.cabal index 7fd1396e19..081c05bca8 100644 --- a/simplexmq.cabal +++ b/simplexmq.cabal @@ -216,6 +216,7 @@ library Simplex.Messaging.Agent.Store.SQLite.Migrations.M20250702_conn_invitations_remove_cascade_delete Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251009_queue_to_subscribe Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251010_client_notices + Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251020_service_certs if flag(client_postgres) || flag(server_postgres) exposed-modules: Simplex.Messaging.Agent.Store.Postgres @@ -553,6 +554,7 @@ test-suite simplexmq-test , text , time , timeit ==2.0.* + , tls >=1.9.0 && <1.10 , transformers , unliftio , unliftio-core diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index c19d4aeea1..f9f1dc0894 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -47,6 +47,7 @@ module Simplex.Messaging.Agent withInvLock, createUser, deleteUser, + setUserService, connRequestPQSupport, createConnectionAsync, joinConnectionAsync, @@ -78,7 +79,7 @@ module Simplex.Messaging.Agent getNotificationConns, resubscribeConnection, resubscribeConnections, - subscribeClientService, + subscribeClientServices, sendMessage, sendMessages, sendMessagesB, @@ -210,6 +211,7 @@ import Simplex.Messaging.Protocol ErrorType (AUTH), MsgBody, MsgFlags (..), + IdsHash, NtfServer, ProtoServerWithAuth (..), ProtocolServer (..), @@ -340,6 +342,11 @@ deleteUser :: AgentClient -> UserId -> Bool -> AE () deleteUser c = withAgentEnv c .: deleteUser' c {-# INLINE deleteUser #-} +-- | Enable using service certificate for this user +setUserService :: AgentClient -> UserId -> Bool -> AE () +setUserService c = withAgentEnv c .: setUserService' c +{-# INLINE setUserService #-} + -- | Create SMP agent connection (NEW command) asynchronously, synchronous response is new connection id createConnectionAsync :: ConnectionModeI c => AgentClient -> UserId -> ACorrId -> Bool -> SConnectionMode c -> CR.InitialKeys -> SubscriptionMode -> AE ConnId createConnectionAsync c userId aCorrId enableNtfs = withAgentEnv c .:. newConnAsync c userId aCorrId enableNtfs @@ -381,7 +388,7 @@ deleteConnectionsAsync c waitDelivery = withAgentEnv c . deleteConnectionsAsync' {-# INLINE deleteConnectionsAsync #-} -- | Create SMP agent connection (NEW command) -createConnection :: ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> Bool -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> AE (ConnId, (CreatedConnLink c, Maybe ClientServiceId)) +createConnection :: ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> Bool -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> AE (ConnId, CreatedConnLink c) createConnection c nm userId enableNtfs checkNotices = withAgentEnv c .::. newConn c nm userId enableNtfs checkNotices {-# INLINE createConnection #-} @@ -424,7 +431,7 @@ prepareConnectionToAccept c userId enableNtfs = withAgentEnv c .: newConnToAccep {-# INLINE prepareConnectionToAccept #-} -- | Join SMP agent connection (JOIN command). -joinConnection :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> AE (SndQueueSecured, Maybe ClientServiceId) +joinConnection :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> AE SndQueueSecured joinConnection c nm userId connId enableNtfs = withAgentEnv c .:: joinConn c nm userId connId enableNtfs {-# INLINE joinConnection #-} @@ -434,7 +441,7 @@ allowConnection c = withAgentEnv c .:. allowConnection' c {-# INLINE allowConnection #-} -- | Accept contact after REQ notification (ACPT command) -acceptContact :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConfirmationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AE (SndQueueSecured, Maybe ClientServiceId) +acceptContact :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConfirmationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AE SndQueueSecured acceptContact c userId connId enableNtfs = withAgentEnv c .::. acceptContact' c userId connId enableNtfs {-# INLINE acceptContact #-} @@ -462,12 +469,12 @@ syncConnections c = withAgentEnv c .: syncConnections' c {-# INLINE syncConnections #-} -- | Subscribe to receive connection messages (SUB command) -subscribeConnection :: AgentClient -> ConnId -> AE (Maybe ClientServiceId) +subscribeConnection :: AgentClient -> ConnId -> AE () subscribeConnection c = withAgentEnv c . subscribeConnection' c {-# INLINE subscribeConnection #-} -- | Subscribe to receive connection messages from multiple connections, batching commands when possible -subscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) +subscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either AgentErrorType ())) subscribeConnections c = withAgentEnv c . subscribeConnections' c {-# INLINE subscribeConnections #-} @@ -485,18 +492,17 @@ getNotificationConns :: AgentClient -> C.CbNonce -> ByteString -> AE (NonEmpty N getNotificationConns c = withAgentEnv c .: getNotificationConns' c {-# INLINE getNotificationConns #-} -resubscribeConnection :: AgentClient -> ConnId -> AE (Maybe ClientServiceId) +resubscribeConnection :: AgentClient -> ConnId -> AE () resubscribeConnection c = withAgentEnv c . resubscribeConnection' c {-# INLINE resubscribeConnection #-} -resubscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) +resubscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either AgentErrorType ())) resubscribeConnections c = withAgentEnv c . resubscribeConnections' c {-# INLINE resubscribeConnections #-} --- TODO [certs rcv] how to communicate that service ID changed - as error or as result? -subscribeClientService :: AgentClient -> ClientServiceId -> AE Int -subscribeClientService c = withAgentEnv c . subscribeClientService' c -{-# INLINE subscribeClientService #-} +subscribeClientServices :: AgentClient -> UserId -> AE (Map SMPServer (Either AgentErrorType (Int64, IdsHash))) +subscribeClientServices c = withAgentEnv c . subscribeClientServices' c +{-# INLINE subscribeClientServices #-} -- | Send message to the connection (SEND command) sendMessage :: AgentClient -> ConnId -> PQEncryption -> MsgFlags -> MsgBody -> AE (AgentMsgId, PQEncryption) @@ -746,6 +752,7 @@ createUser' c smp xftp = do userId <- withStore' c createUserRecord atomically $ TM.insert userId (mkUserServers smp) $ smpServers c atomically $ TM.insert userId (mkUserServers xftp) $ xftpServers c + atomically $ TM.insert userId False $ useClientServices c pure userId deleteUser' :: AgentClient -> UserId -> Bool -> AM () @@ -755,6 +762,7 @@ deleteUser' c@AgentClient {smpServersStats, xftpServersStats} userId delSMPQueue else withStore c (`deleteUserRecord` userId) atomically $ TM.delete userId $ smpServers c atomically $ TM.delete userId $ xftpServers c + atomically $ TM.delete userId $ useClientServices c atomically $ modifyTVar' smpServersStats $ M.filterWithKey (\(userId', _) _ -> userId' /= userId) atomically $ modifyTVar' xftpServersStats $ M.filterWithKey (\(userId', _) _ -> userId' /= userId) lift $ saveServersStats c @@ -763,6 +771,13 @@ deleteUser' c@AgentClient {smpServersStats, xftpServersStats} userId delSMPQueue whenM (withStore' c (`deleteUserWithoutConns` userId)) . atomically $ writeTBQueue (subQ c) ("", "", AEvt SAENone $ DEL_USER userId) +setUserService' :: AgentClient -> UserId -> Bool -> AM () +setUserService' c userId enable = do + wasEnabled <- liftIO $ fromMaybe False <$> TM.lookupIO userId (useClientServices c) + when (enable /= wasEnabled) $ do + atomically $ TM.insert userId enable $ useClientServices c + unless enable $ withStore' c (`deleteClientServices` userId) + newConnAsync :: ConnectionModeI c => AgentClient -> UserId -> ACorrId -> Bool -> SConnectionMode c -> CR.InitialKeys -> SubscriptionMode -> AM ConnId newConnAsync c userId corrId enableNtfs cMode pqInitKeys subMode = do connId <- newConnNoQueues c userId enableNtfs cMode (CR.connPQEncryption pqInitKeys) @@ -865,7 +880,7 @@ switchConnectionAsync' c corrId connId = connectionStats c $ DuplexConnection cData rqs' sqs _ -> throwE $ CMD PROHIBITED "switchConnectionAsync: not duplex" -newConn :: ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> Bool -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> AM (ConnId, (CreatedConnLink c, Maybe ClientServiceId)) +newConn :: ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> Bool -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> AM (ConnId, CreatedConnLink c) newConn c nm userId enableNtfs checkNotices cMode linkData_ clientData pqInitKeys subMode = do srv <- getSMPServer c userId when (checkNotices && connMode cMode == CMContact) $ checkClientNotices c srv @@ -989,7 +1004,7 @@ changeConnectionUser' c oldUserId connId newUserId = do where updateConn = withStore' c $ \db -> setConnUserId db oldUserId connId newUserId -newRcvConnSrv :: forall c. ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> SMPServerWithAuth -> AM (CreatedConnLink c, Maybe ClientServiceId) +newRcvConnSrv :: forall c. ConnectionModeI c => AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> SConnectionMode c -> Maybe (UserConnLinkData c) -> Maybe CRClientData -> CR.InitialKeys -> SubscriptionMode -> SMPServerWithAuth -> AM (CreatedConnLink c) newRcvConnSrv c nm userId connId enableNtfs cMode userLinkData_ clientData pqInitKeys subMode srvWithAuth@(ProtoServerWithAuth srv _) = do case (cMode, pqInitKeys) of (SCMContact, CR.IKUsePQ) -> throwE $ CMD PROHIBITED "newRcvConnSrv" @@ -1000,12 +1015,12 @@ newRcvConnSrv c nm userId connId enableNtfs cMode userLinkData_ clientData pqIni (nonce, qUri, cReq, qd) <- prepareLinkData d $ fst e2eKeys (rq, qUri') <- createRcvQueue (Just nonce) qd e2eKeys ccLink <- connReqWithShortLink qUri cReq qUri' (shortLink rq) - pure (ccLink, clientServiceId rq) + pure ccLink Nothing -> do let qd = case cMode of SCMContact -> CQRContact Nothing; SCMInvitation -> CQRMessaging Nothing - (rq, qUri) <- createRcvQueue Nothing qd e2eKeys + (_rq, qUri) <- createRcvQueue Nothing qd e2eKeys cReq <- createConnReq qUri - pure (CCLink cReq Nothing, clientServiceId rq) + pure $ CCLink cReq Nothing where createRcvQueue :: Maybe C.CbNonce -> ClntQueueReqData -> C.KeyPairX25519 -> AM (RcvQueue, SMPQueueUri) createRcvQueue nonce_ qd e2eKeys = do @@ -1107,7 +1122,7 @@ newConnToAccept c userId connId enableNtfs invId pqSup = do Invitation {connReq} <- withStore c $ \db -> getInvitation db "newConnToAccept" invId newConnToJoin c userId connId enableNtfs connReq pqSup -joinConn :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> AM (SndQueueSecured, Maybe ClientServiceId) +joinConn :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> AM SndQueueSecured joinConn c nm userId connId enableNtfs cReq cInfo pqSupport subMode = do srv <- getNextSMPServer c userId [qServer $ connReqQueue cReq] joinConnSrv c nm userId connId enableNtfs cReq cInfo pqSupport subMode srv @@ -1187,7 +1202,7 @@ versionPQSupport_ :: VersionSMPA -> Maybe CR.VersionE2E -> PQSupport versionPQSupport_ agentV e2eV_ = PQSupport $ agentV >= pqdrSMPAgentVersion && maybe True (>= CR.pqRatchetE2EEncryptVersion) e2eV_ {-# INLINE versionPQSupport_ #-} -joinConnSrv :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> SMPServerWithAuth -> AM (SndQueueSecured, Maybe ClientServiceId) +joinConnSrv :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> SMPServerWithAuth -> AM SndQueueSecured joinConnSrv c nm userId connId enableNtfs inv@CRInvitationUri {} cInfo pqSup subMode srv = withInvLock c (strEncode inv) "joinConnSrv" $ do SomeConn cType conn <- withStore c (`getConn` connId) @@ -1198,7 +1213,7 @@ joinConnSrv c nm userId connId enableNtfs inv@CRInvitationUri {} cInfo pqSup sub | sqStatus == New || sqStatus == Secured -> doJoin (Just rq) (Just sq) _ -> throwE $ CMD PROHIBITED $ "joinConnSrv: bad connection " <> show cType where - doJoin :: Maybe RcvQueue -> Maybe SndQueue -> AM (SndQueueSecured, Maybe ClientServiceId) + doJoin :: Maybe RcvQueue -> Maybe SndQueue -> AM SndQueueSecured doJoin rq_ sq_ = do (cData, sq, e2eSndParams, lnkId_) <- startJoinInvitation c userId connId sq_ enableNtfs inv pqSup secureConfirmQueue c nm cData rq_ sq srv cInfo (Just e2eSndParams) subMode @@ -1209,14 +1224,14 @@ joinConnSrv c nm userId connId enableNtfs cReqUri@CRContactUri {} cInfo pqSup su withInvLock c (strEncode cReqUri) "joinConnSrv" $ do SomeConn cType conn <- withStore c (`getConn` connId) let pqInitKeys = CR.joinContactInitialKeys (v >= pqdrSMPAgentVersion) pqSup - (CCLink cReq _, service) <- case conn of + CCLink cReq _ <- case conn of NewConnection _ -> newRcvConnSrv c NRMBackground userId connId enableNtfs SCMInvitation Nothing Nothing pqInitKeys subMode srv RcvConnection _ rq -> mkJoinInvitation rq pqInitKeys _ -> throwE $ CMD PROHIBITED $ "joinConnSrv: bad connection " <> show cType void $ sendInvitation c nm userId connId qInfo vrsn cReq cInfo - pure (False, service) + pure False where - mkJoinInvitation rq@RcvQueue {clientService} pqInitKeys = do + mkJoinInvitation rq pqInitKeys = do g <- asks random AgentConfig {smpClientVRange = vr, smpAgentVRange, e2eEncryptVRange = e2eVR} <- asks config let qUri = SMPQueueUri vr $ (rcvSMPQueueAddress rq) {queueMode = Just QMMessaging} @@ -1231,7 +1246,7 @@ joinConnSrv c nm userId connId enableNtfs cReqUri@CRContactUri {} cInfo pqSup su createRatchetX3dhKeys db connId pk1 pk2 pKem pure e2eRcvParams let cReq = CRInvitationUri crData $ toVersionRangeT e2eRcvParams e2eVR - pure (CCLink cReq Nothing, dbServiceId <$> clientService) + pure $ CCLink cReq Nothing Nothing -> throwE $ AGENT A_VERSION delInvSL :: AgentClient -> ConnId -> SMPServerWithAuth -> SMP.LinkId -> AM () @@ -1239,7 +1254,7 @@ delInvSL c connId srv lnkId = withStore' c (\db -> deleteInvShortLink db (protoServer srv) lnkId) `catchE` \e -> liftIO $ nonBlockingWriteTBQueue (subQ c) ("", connId, AEvt SAEConn (ERR $ INTERNAL $ "error deleting short link " <> show e)) -joinConnSrvAsync :: AgentClient -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> SMPServerWithAuth -> AM (SndQueueSecured, Maybe ClientServiceId) +joinConnSrvAsync :: AgentClient -> UserId -> ConnId -> Bool -> ConnectionRequestUri c -> ConnInfo -> PQSupport -> SubscriptionMode -> SMPServerWithAuth -> AM SndQueueSecured joinConnSrvAsync c userId connId enableNtfs inv@CRInvitationUri {} cInfo pqSupport subMode srv = do SomeConn cType conn <- withStore c (`getConn` connId) case conn of @@ -1251,7 +1266,7 @@ joinConnSrvAsync c userId connId enableNtfs inv@CRInvitationUri {} cInfo pqSuppo | sqStatus == New || sqStatus == Secured -> doJoin (Just rq) (Just sq) _ -> throwE $ CMD PROHIBITED $ "joinConnSrvAsync: bad connection " <> show cType where - doJoin :: Maybe RcvQueue -> Maybe SndQueue -> AM (SndQueueSecured, Maybe ClientServiceId) + doJoin :: Maybe RcvQueue -> Maybe SndQueue -> AM SndQueueSecured doJoin rq_ sq_ = do (cData, sq, e2eSndParams, lnkId_) <- startJoinInvitation c userId connId sq_ enableNtfs inv pqSupport secureConfirmQueueAsync c cData rq_ sq srv cInfo (Just e2eSndParams) subMode @@ -1259,7 +1274,7 @@ joinConnSrvAsync c userId connId enableNtfs inv@CRInvitationUri {} cInfo pqSuppo joinConnSrvAsync _c _userId _connId _enableNtfs (CRContactUri _) _cInfo _subMode _pqSupport _srv = do throwE $ CMD PROHIBITED "joinConnSrvAsync" -createReplyQueue :: AgentClient -> NetworkRequestMode -> ConnData -> SndQueue -> SubscriptionMode -> SMPServerWithAuth -> AM (SMPQueueInfo, Maybe ClientServiceId) +createReplyQueue :: AgentClient -> NetworkRequestMode -> ConnData -> SndQueue -> SubscriptionMode -> SMPServerWithAuth -> AM SMPQueueInfo createReplyQueue c nm ConnData {userId, connId, enableNtfs} SndQueue {smpClientVersion} subMode srv = do ntfServer_ <- if enableNtfs then newQueueNtfServer else pure Nothing (rq, qUri, tSess, sessId) <- newRcvQueue c nm userId connId srv (versionToRange smpClientVersion) SCMInvitation (isJust ntfServer_) subMode @@ -1268,7 +1283,7 @@ createReplyQueue c nm ConnData {userId, connId, enableNtfs} SndQueue {smpClientV rq' <- withStore c $ \db -> upgradeSndConnToDuplex db connId rq subMode lift . when (subMode == SMSubscribe) $ addNewQueueSubscription c rq' tSess sessId mapM_ (newQueueNtfSubscription c rq') ntfServer_ - pure (qInfo, clientServiceId rq') + pure qInfo -- | Approve confirmation (LET command) in Reader monad allowConnection' :: AgentClient -> ConnId -> ConfirmationId -> ConnInfo -> AM () @@ -1281,7 +1296,7 @@ allowConnection' c connId confId ownConnInfo = withConnLock c connId "allowConne _ -> throwE $ CMD PROHIBITED "allowConnection" -- | Accept contact (ACPT command) in Reader monad -acceptContact' :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> InvitationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AM (SndQueueSecured, Maybe ClientServiceId) +acceptContact' :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> Bool -> InvitationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AM SndQueueSecured acceptContact' c nm userId connId enableNtfs invId ownConnInfo pqSupport subMode = withConnLock c connId "acceptContact" $ do Invitation {connReq} <- withStore c $ \db -> getInvitation db "acceptContact'" invId r <- joinConn c nm userId connId enableNtfs connReq ownConnInfo pqSupport subMode @@ -1316,7 +1331,7 @@ databaseDiff passed known = in DatabaseDiff {missingIds, extraIds} -- | Subscribe to receive connection messages (SUB command) in Reader monad -subscribeConnection' :: AgentClient -> ConnId -> AM (Maybe ClientServiceId) +subscribeConnection' :: AgentClient -> ConnId -> AM () subscribeConnection' c connId = toConnResult connId =<< subscribeConnections' c [connId] {-# INLINE subscribeConnection' #-} @@ -1332,12 +1347,13 @@ type QDelResult = QCmdResult () type QSubResult = QCmdResult (Maybe SMP.ServiceId) -subscribeConnections' :: AgentClient -> [ConnId] -> AM (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) +subscribeConnections' :: AgentClient -> [ConnId] -> AM (Map ConnId (Either AgentErrorType ())) subscribeConnections' _ [] = pure M.empty subscribeConnections' c connIds = subscribeConnections_ c . zip connIds =<< withStore' c (`getConnSubs` connIds) -subscribeConnections_ :: AgentClient -> [(ConnId, Either StoreError SomeConnSub)] -> AM (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) +subscribeConnections_ :: AgentClient -> [(ConnId, Either StoreError SomeConnSub)] -> AM (Map ConnId (Either AgentErrorType ())) subscribeConnections_ c conns = do + -- TODO [certs rcv] - it should exclude connections already associated, and then if some don't deliver any response they may be unassociated let (subRs, cs) = foldr partitionResultsConns ([], []) conns resumeDelivery cs resumeConnCmds c $ map fst cs @@ -1351,8 +1367,8 @@ subscribeConnections_ c conns = do pure rs where partitionResultsConns :: (ConnId, Either StoreError SomeConnSub) -> - (Map ConnId (Either AgentErrorType (Maybe ClientServiceId)), [(ConnId, SomeConnSub)]) -> - (Map ConnId (Either AgentErrorType (Maybe ClientServiceId)), [(ConnId, SomeConnSub)]) + (Map ConnId (Either AgentErrorType ()), [(ConnId, SomeConnSub)]) -> + (Map ConnId (Either AgentErrorType ()), [(ConnId, SomeConnSub)]) partitionResultsConns (connId, conn_) (rs, cs) = case conn_ of Left e -> (M.insert connId (Left $ storeError e) rs, cs) Right c'@(SomeConn _ conn) -> case conn of @@ -1360,12 +1376,12 @@ subscribeConnections_ c conns = do SndConnection _ sq -> (M.insert connId (sndSubResult sq) rs, cs') RcvConnection _ _ -> (rs, cs') ContactConnection _ _ -> (rs, cs') - NewConnection _ -> (M.insert connId (Right Nothing) rs, cs') + NewConnection _ -> (M.insert connId (Right ()) rs, cs') where cs' = (connId, c') : cs - sndSubResult :: SndQueue -> Either AgentErrorType (Maybe ClientServiceId) + sndSubResult :: SndQueue -> Either AgentErrorType () sndSubResult SndQueue {status} = case status of - Confirmed -> Right Nothing + Confirmed -> Right () Active -> Left $ CONN SIMPLEX "subscribeConnections" _ -> Left $ INTERNAL "unexpected queue status" rcvQueues :: (ConnId, SomeConnSub) -> [RcvQueueSub] @@ -1386,9 +1402,9 @@ subscribeConnections_ c conns = do order (_, Right _) = 3 order _ = 4 -- TODO [certs rcv] store associations of queues with client service ID - storeClientServiceAssocs :: Map ConnId (Either AgentErrorType (Maybe SMP.ServiceId)) -> AM (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) - storeClientServiceAssocs = pure . M.map (Nothing <$) - sendNtfCreate :: NtfSupervisor -> Map ConnId (Either AgentErrorType (Maybe ClientServiceId)) -> [(ConnId, SomeConnSub)] -> AM' () + storeClientServiceAssocs :: Map ConnId (Either AgentErrorType (Maybe SMP.ServiceId)) -> AM (Map ConnId (Either AgentErrorType ())) + storeClientServiceAssocs = pure . M.map (() <$) + sendNtfCreate :: NtfSupervisor -> Map ConnId (Either AgentErrorType ()) -> [(ConnId, SomeConnSub)] -> AM' () sendNtfCreate ns rcvRs cs = do let oks = M.keysSet $ M.filter (either temporaryAgentError $ const True) rcvRs (csCreate, csDelete) = foldr (groupConnIds oks) ([], []) cs @@ -1412,7 +1428,7 @@ subscribeConnections_ c conns = do DuplexConnection _ _ sqs -> L.toList sqs SndConnection _ sq -> [sq] _ -> [] - notifyResultError :: Map ConnId (Either AgentErrorType (Maybe ClientServiceId)) -> AM () + notifyResultError :: Map ConnId (Either AgentErrorType ()) -> AM () notifyResultError rs = do let actual = M.size rs expected = length conns @@ -1472,15 +1488,15 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do sqs <- withStore' c getAllSndQueuesForDelivery lift $ mapM_ (resumeMsgDelivery c) sqs -resubscribeConnection' :: AgentClient -> ConnId -> AM (Maybe ClientServiceId) +resubscribeConnection' :: AgentClient -> ConnId -> AM () resubscribeConnection' c connId = toConnResult connId =<< resubscribeConnections' c [connId] {-# INLINE resubscribeConnection' #-} -resubscribeConnections' :: AgentClient -> [ConnId] -> AM (Map ConnId (Either AgentErrorType (Maybe ClientServiceId))) +resubscribeConnections' :: AgentClient -> [ConnId] -> AM (Map ConnId (Either AgentErrorType ())) resubscribeConnections' _ [] = pure M.empty resubscribeConnections' c connIds = do conns <- zip connIds <$> withStore' c (`getConnSubs` connIds) - let r = M.fromList $ map (,Right Nothing) connIds -- TODO [certs rcv] + let r = M.fromList $ map (,Right ()) connIds conns' <- filterM (fmap not . isActiveConn . snd) conns -- union is left-biased, so results returned by subscribeConnections' take precedence (`M.union` r) <$> subscribeConnections_ c conns' @@ -1491,9 +1507,15 @@ resubscribeConnections' c connIds = do [] -> pure True rqs' -> anyM $ map (atomically . hasActiveSubscription c) rqs' --- TODO [certs rcv] -subscribeClientService' :: AgentClient -> ClientServiceId -> AM Int -subscribeClientService' = undefined +-- TODO [certs rcv] compare hash with lock +subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType (Int64, IdsHash))) +subscribeClientServices' c userId = + ifM useService subscribe $ throwError $ CMD PROHIBITED "no user service allowed" + where + useService = liftIO $ (Just True ==) <$> TM.lookupIO userId (useClientServices c) + subscribe = do + srvs <- withStore' c (`getClientServiceServers` userId) + lift $ M.fromList . zip srvs <$> mapConcurrently (tryAllErrors' . subscribeClientService c userId) srvs -- requesting messages sequentially, to reduce memory usage getConnectionMessages' :: AgentClient -> NonEmpty ConnMsgReq -> AM' (NonEmpty (Either AgentErrorType (Maybe SMPMsgMeta))) @@ -1655,13 +1677,13 @@ runCommandProcessing c@AgentClient {subQ} connId server_ Worker {doWork} = do NEW enableNtfs (ACM cMode) pqEnc subMode -> noServer $ do triedHosts <- newTVarIO S.empty tryCommand . withNextSrv c userId storageSrvs triedHosts [] $ \srv -> do - (CCLink cReq _, service) <- newRcvConnSrv c NRMBackground userId connId enableNtfs cMode Nothing Nothing pqEnc subMode srv - notify $ INV (ACR cMode cReq) service + CCLink cReq _ <- newRcvConnSrv c NRMBackground userId connId enableNtfs cMode Nothing Nothing pqEnc subMode srv + notify $ INV (ACR cMode cReq) JOIN enableNtfs (ACR _ cReq@(CRInvitationUri ConnReqUriData {crSmpQueues = q :| _} _)) pqEnc subMode connInfo -> noServer $ do triedHosts <- newTVarIO S.empty tryCommand . withNextSrv c userId storageSrvs triedHosts [qServer q] $ \srv -> do - (sqSecured, service) <- joinConnSrvAsync c userId connId enableNtfs cReq connInfo pqEnc subMode srv - notify $ JOINED sqSecured service + sqSecured <- joinConnSrvAsync c userId connId enableNtfs cReq connInfo pqEnc subMode srv + notify $ JOINED sqSecured LET confId ownCInfo -> withServer' . tryCommand $ allowConnection' c connId confId ownCInfo >> notify OK ACK msgId rcptInfo_ -> withServer' . tryCommand $ ackMessage' c connId msgId rcptInfo_ >> notify OK SWCH -> @@ -2818,7 +2840,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId SMP.SUB -> case respOrErr of Right SMP.OK -> liftIO $ processSubOk rq upConnIds -- TODO [certs rcv] associate queue with the service - Right (SMP.SOK serviceId_) -> liftIO $ processSubOk rq upConnIds + Right (SMP.SOK _serviceId_) -> liftIO $ processSubOk rq upConnIds Right msg@SMP.MSG {} -> do liftIO $ processSubOk rq upConnIds -- the connection is UP even when processing this particular message fails runProcessSMP rq conn (toConnData conn) msg @@ -3053,7 +3075,9 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId notifyEnd removed | removed = notify END >> logServer "<--" c srv rId "END" | otherwise = logServer "<--" c srv rId "END from disconnected client - ignored" - -- Possibly, we need to add some flag to connection that it was deleted + -- TODO [certs rcv] + r@(SMP.ENDS _) -> unexpected r + -- TODO [certs rcv] Possibly, we need to add some flag to connection that it was deleted SMP.DELD -> atomically (removeSubscription c tSess connId rq) >> notify DELD SMP.ERR e -> notify $ ERR $ SMP (B.unpack $ strEncode srv) e r -> unexpected r @@ -3439,22 +3463,22 @@ connectReplyQueues c cData@ConnData {userId, connId} ownConnInfo sq_ (qInfo :| _ (sq, _) <- lift $ newSndQueue userId connId qInfo' Nothing withStore c $ \db -> upgradeRcvConnToDuplex db connId sq -secureConfirmQueueAsync :: AgentClient -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> Maybe (CR.SndE2ERatchetParams 'C.X448) -> SubscriptionMode -> AM (SndQueueSecured, Maybe ClientServiceId) +secureConfirmQueueAsync :: AgentClient -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> Maybe (CR.SndE2ERatchetParams 'C.X448) -> SubscriptionMode -> AM SndQueueSecured secureConfirmQueueAsync c cData rq_ sq srv connInfo e2eEncryption_ subMode = do sqSecured <- agentSecureSndQueue c NRMBackground cData sq - (qInfo, service) <- mkAgentConfirmation c NRMBackground cData rq_ sq srv connInfo subMode + qInfo <- mkAgentConfirmation c NRMBackground cData rq_ sq srv connInfo subMode storeConfirmation c cData sq e2eEncryption_ qInfo lift $ submitPendingMsg c sq - pure (sqSecured, service) + pure sqSecured -secureConfirmQueue :: AgentClient -> NetworkRequestMode -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> Maybe (CR.SndE2ERatchetParams 'C.X448) -> SubscriptionMode -> AM (SndQueueSecured, Maybe ClientServiceId) +secureConfirmQueue :: AgentClient -> NetworkRequestMode -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> Maybe (CR.SndE2ERatchetParams 'C.X448) -> SubscriptionMode -> AM SndQueueSecured secureConfirmQueue c nm cData@ConnData {connId, connAgentVersion, pqSupport} rq_ sq srv connInfo e2eEncryption_ subMode = do sqSecured <- agentSecureSndQueue c nm cData sq - (qInfo, service) <- mkAgentConfirmation c nm cData rq_ sq srv connInfo subMode + qInfo <- mkAgentConfirmation c nm cData rq_ sq srv connInfo subMode msg <- mkConfirmation qInfo void $ sendConfirmation c nm sq msg withStore' c $ \db -> setSndQueueStatus db sq Confirmed - pure (sqSecured, service) + pure sqSecured where mkConfirmation :: AgentMessage -> AM MsgBody mkConfirmation aMessage = do @@ -3480,12 +3504,12 @@ agentSecureSndQueue c nm ConnData {connAgentVersion} sq@SndQueue {queueMode, sta sndSecure = senderCanSecure queueMode initiatorRatchetOnConf = connAgentVersion >= ratchetOnConfSMPAgentVersion -mkAgentConfirmation :: AgentClient -> NetworkRequestMode -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> SubscriptionMode -> AM (AgentMessage, Maybe ClientServiceId) +mkAgentConfirmation :: AgentClient -> NetworkRequestMode -> ConnData -> Maybe RcvQueue -> SndQueue -> SMPServerWithAuth -> ConnInfo -> SubscriptionMode -> AM AgentMessage mkAgentConfirmation c nm cData rq_ sq srv connInfo subMode = do - (qInfo, service) <- case rq_ of + qInfo <- case rq_ of Nothing -> createReplyQueue c nm cData sq subMode srv - Just rq@RcvQueue {smpClientVersion = v, clientService} -> pure (SMPQueueInfo v $ rcvSMPQueueAddress rq, dbServiceId <$> clientService) - pure (AgentConnInfoReply (qInfo :| []) connInfo, service) + Just rq@RcvQueue {smpClientVersion = v} -> pure $ SMPQueueInfo v $ rcvSMPQueueAddress rq + pure $ AgentConnInfoReply (qInfo :| []) connInfo enqueueConfirmation :: AgentClient -> ConnData -> SndQueue -> ConnInfo -> Maybe (CR.SndE2ERatchetParams 'C.X448) -> AM () enqueueConfirmation c cData sq connInfo e2eEncryption_ = do diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 217a1682a0..4a10d07efb 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -49,6 +49,7 @@ module Simplex.Messaging.Agent.Client newRcvQueue_, subscribeQueues, subscribeUserServerQueues, + subscribeClientService, processClientNotices, getQueueMessage, decryptSMPMessage, @@ -223,6 +224,7 @@ import Data.Text.Encoding import Data.Time (UTCTime, addUTCTime, defaultTimeLocale, formatTime, getCurrentTime) import Data.Time.Clock.System (getSystemTime) import Data.Word (Word16) +import qualified Data.X509.Validation as XV import Network.Socket (HostName) import Simplex.FileTransfer.Client (XFTPChunkSpec (..), XFTPClient, XFTPClientConfig (..), XFTPClientError) import qualified Simplex.FileTransfer.Client as X @@ -238,7 +240,7 @@ import Simplex.Messaging.Agent.Protocol import Simplex.Messaging.Agent.RetryInterval import Simplex.Messaging.Agent.Stats import Simplex.Messaging.Agent.Store -import Simplex.Messaging.Agent.Store.AgentStore (getClientNotices, updateClientNotices) +import Simplex.Messaging.Agent.Store.AgentStore import Simplex.Messaging.Agent.Store.Common (DBStore, withTransaction) import qualified Simplex.Messaging.Agent.Store.DB as DB import Simplex.Messaging.Agent.Store.Entity @@ -262,6 +264,7 @@ import Simplex.Messaging.Protocol NetworkError (..), MsgFlags (..), MsgId, + IdsHash, NtfServer, NtfServerWithAuth, ProtoServer, @@ -296,8 +299,9 @@ import Simplex.Messaging.Session import Simplex.Messaging.SystemTime import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM -import Simplex.Messaging.Transport (SMPVersion, SessionId, THandleParams (sessionId, thVersion), TransportError (..), TransportPeer (..), sndAuthKeySMPVersion, shortLinksSMPVersion, newNtfCredsSMPVersion) +import Simplex.Messaging.Transport (SMPServiceRole (..), SMPVersion, ServiceCredentials (..), SessionId, THClientService' (..), THandleParams (sessionId, thVersion), TransportError (..), TransportPeer (..), sndAuthKeySMPVersion, shortLinksSMPVersion, newNtfCredsSMPVersion) import Simplex.Messaging.Transport.Client (TransportHost (..)) +import Simplex.Messaging.Transport.Credentials import Simplex.Messaging.Util import Simplex.Messaging.Version import System.Mem.Weak (Weak, deRefWeak) @@ -331,6 +335,7 @@ data AgentClient = AgentClient msgQ :: TBQueue (ServerTransmissionBatch SMPVersion ErrorType BrokerMsg), smpServers :: TMap UserId (UserServers 'PSMP), smpClients :: TMap SMPTransportSession SMPClientVar, + useClientServices :: TMap UserId Bool, -- smpProxiedRelays: -- SMPTransportSession defines connection from proxy to relay, -- SMPServerWithAuth defines client connected to SMP proxy (with the same userId and entityId in TransportSession) @@ -495,7 +500,7 @@ data UserNetworkType = UNNone | UNCellular | UNWifi | UNEthernet | UNOther -- | Creates an SMP agent client instance that receives commands and sends responses via 'TBQueue's. newAgentClient :: Int -> InitialAgentServers -> UTCTime -> Map (Maybe SMPServer) (Maybe SystemSeconds) -> Env -> IO AgentClient -newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, presetDomains, presetServers} currentTs notices agentEnv = do +newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, useServices, presetDomains, presetServers} currentTs notices agentEnv = do let cfg = config agentEnv qSize = tbqSize cfg proxySessTs <- newTVarIO =<< getCurrentTime @@ -505,6 +510,7 @@ newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, presetDomai msgQ <- newTBQueueIO qSize smpServers <- newTVarIO $ M.map mkUserServers smp smpClients <- TM.emptyIO + useClientServices <- newTVarIO useServices smpProxiedRelays <- TM.emptyIO ntfServers <- newTVarIO ntf ntfClients <- TM.emptyIO @@ -544,6 +550,7 @@ newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, presetDomai msgQ, smpServers, smpClients, + useClientServices, smpProxiedRelays, ntfServers, ntfClients, @@ -598,6 +605,28 @@ agentDRG :: AgentClient -> TVar ChaChaDRG agentDRG AgentClient {agentEnv = Env {random}} = random {-# INLINE agentDRG #-} +getServiceCredentials :: AgentClient -> UserId -> SMPServer -> AM (Maybe (ServiceCredentials, Maybe ServiceId)) +getServiceCredentials c userId srv = + liftIO (TM.lookupIO userId $ useClientServices c) + $>>= \useService -> if useService then Just <$> getService else pure Nothing + where + getService :: AM (ServiceCredentials, Maybe ServiceId) + getService = do + let g = agentDRG c + ((C.KeyHash kh, serviceCreds), serviceId_) <- + withStore' c $ \db -> + getClientService db userId srv >>= \case + Just service -> pure service + Nothing -> do + cred <- genCredentials g Nothing (25, 24 * 999999) "simplex" + let tlsCreds = tlsCredentials [cred] + createClientService db userId srv tlsCreds + pure (tlsCreds, Nothing) + (_, pk) <- atomically $ C.generateKeyPair g + let serviceSignKey = C.APrivateSignKey C.SEd25519 pk + creds = ServiceCredentials {serviceRole = SRMessaging, serviceCreds, serviceCertHash = XV.Fingerprint kh, serviceSignKey} + pure (creds, serviceId_) + class (Encoding err, Show err) => ProtocolServerClient v err msg | msg -> v, msg -> err where type Client msg = c | c -> msg getProtocolServerClient :: AgentClient -> NetworkRequestMode -> TransportSession msg -> AM (Client msg) @@ -701,7 +730,7 @@ getSMPProxyClient c@AgentClient {active, smpClients, smpProxiedRelays, workerSeq Nothing -> Left $ BROKER (B.unpack $ strEncode srv) TIMEOUT smpConnectClient :: AgentClient -> NetworkRequestMode -> SMPTransportSession -> TMap SMPServer ProxiedRelayVar -> SMPClientVar -> AM SMPConnectedClient -smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm tSess@(_, srv, _) prs v = +smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm tSess@(userId, srv, _) prs v = newProtocolClient c tSess smpClients connectClient v `catchAllErrors` \e -> lift (resubscribeSMPSession c tSess) >> throwE e where @@ -709,12 +738,22 @@ smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm connectClient v' = do cfg <- lift $ getClientConfig c smpCfg g <- asks random + service <- getServiceCredentials c userId srv + let cfg' = cfg {serviceCredentials = fst <$> service} env <- ask - liftError (protocolClientError SMP $ B.unpack $ strEncode srv) $ do + smp <- liftError (protocolClientError SMP $ B.unpack $ strEncode srv) $ do ts <- readTVarIO proxySessTs - smp <- ExceptT $ getProtocolClient g nm tSess cfg presetDomains (Just msgQ) ts $ smpClientDisconnected c tSess env v' prs - atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c - pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} + ExceptT $ getProtocolClient g nm tSess cfg' presetDomains (Just msgQ) ts $ smpClientDisconnected c tSess env v' prs + atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c + updateClientService service smp + pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} + updateClientService service smp = case (service, smpClientService smp) of + (Just (_, serviceId_), Just THClientService {serviceId}) + | serviceId_ /= Just serviceId -> withStore' c $ \db -> setClientServiceId db userId srv serviceId + | otherwise -> pure () + (Just _, Nothing) -> withStore' c $ \db -> deleteClientService db userId srv -- e.g., server version downgrade + (Nothing, Just _) -> logError "server returned serviceId without service credentials in request" + (Nothing, Nothing) -> pure () smpClientDisconnected :: AgentClient -> SMPTransportSession -> Env -> SMPClientVar -> TMap SMPServer ProxiedRelayVar -> SMPClient -> IO () smpClientDisconnected c@AgentClient {active, smpClients, smpProxiedRelays} tSess@(userId, srv, cId) env v prs client = do @@ -862,7 +901,6 @@ waitForProtocolClient c nm tSess@(_, srv, _) clients v = do (throwE e) Nothing -> throwE $ BROKER (B.unpack $ strEncode srv) TIMEOUT --- clientConnected arg is only passed for SMP server newProtocolClient :: forall v err msg. (ProtocolTypeI (ProtoType msg), ProtocolServerClient v err msg) => @@ -1399,7 +1437,8 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl withClient c nm tSess $ \(SMPConnectedClient smp _) -> do (ntfKeys, ntfCreds) <- liftIO $ mkNtfCreds a g smp (thParams smp,ntfKeys,) <$> createSMPQueue smp nm nonce_ rKeys dhKey auth subMode (queueReqData cqrd) ntfCreds - -- TODO [certs rcv] validate that serviceId is the same as in the client session + -- TODO [certs rcv] validate that serviceId is the same as in the client session, fail otherwise + -- possibly, it should allow returning Nothing - it would indicate incorrect old version liftIO . logServer "<--" c srv NoEntity $ B.unwords ["IDS", logSecret rcvId, logSecret sndId] shortLink <- mkShortLinkCreds thParams' qik let rq = @@ -1415,7 +1454,7 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl sndId, queueMode, shortLink, - clientService = ClientService DBNewEntity <$> serviceId, + rcvServiceAssoc = isJust serviceId, status = New, enableNtfs, clientNoticeId = Nothing, @@ -1650,6 +1689,11 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do logError $ "processClientNotices error: " <> tshow e notifySub' c "" $ ERR e +subscribeClientService :: AgentClient -> UserId -> SMPServer -> AM (Int64, IdsHash) +subscribeClientService c userId srv = + withLogClient c NRMBackground (userId, srv, Nothing) B.empty "SUBS" $ + (`subscribeService` SMP.SRecipientService) . connectedClient + activeClientSession :: AgentClient -> SMPTransportSession -> SessionId -> STM Bool activeClientSession c tSess sessId = sameSess <$> tryReadSessVar tSess (smpClients c) where diff --git a/src/Simplex/Messaging/Agent/Env/SQLite.hs b/src/Simplex/Messaging/Agent/Env/SQLite.hs index 57bc11e3c1..129a582393 100644 --- a/src/Simplex/Messaging/Agent/Env/SQLite.hs +++ b/src/Simplex/Messaging/Agent/Env/SQLite.hs @@ -90,6 +90,7 @@ data InitialAgentServers = InitialAgentServers ntf :: [NtfServer], xftp :: Map UserId (NonEmpty (ServerCfg 'PXFTP)), netCfg :: NetworkConfig, + useServices :: Map UserId Bool, presetDomains :: [HostName], presetServers :: [SMPServer] } diff --git a/src/Simplex/Messaging/Agent/Protocol.hs b/src/Simplex/Messaging/Agent/Protocol.hs index 05ebc1b270..15d51aed91 100644 --- a/src/Simplex/Messaging/Agent/Protocol.hs +++ b/src/Simplex/Messaging/Agent/Protocol.hs @@ -126,9 +126,6 @@ module Simplex.Messaging.Agent.Protocol ContactConnType (..), ShortLinkScheme (..), LinkKey (..), - StoredClientService (..), - ClientService, - ClientServiceId, sameConnReqContact, sameShortLinkContact, simplexChat, @@ -212,7 +209,6 @@ import Simplex.FileTransfer.Transport (XFTPErrorType) import Simplex.FileTransfer.Types (FileErrorType) import Simplex.Messaging.Agent.QueryString import Simplex.Messaging.Agent.Store.DB (Binary (..), FromField (..), ToField (..), blobFieldDecoder, fromTextField_) -import Simplex.Messaging.Agent.Store.Entity import Simplex.Messaging.Client (ProxyClientError) import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Crypto.Ratchet @@ -381,7 +377,7 @@ type SndQueueSecured = Bool -- | Parameterized type for SMP agent events data AEvent (e :: AEntity) where - INV :: AConnectionRequestUri -> Maybe ClientServiceId -> AEvent AEConn + INV :: AConnectionRequestUri -> AEvent AEConn CONF :: ConfirmationId -> PQSupport -> [SMPServer] -> ConnInfo -> AEvent AEConn -- ConnInfo is from sender, [SMPServer] will be empty only in v1 handshake REQ :: InvitationId -> PQSupport -> NonEmpty SMPServer -> ConnInfo -> AEvent AEConn -- ConnInfo is from sender INFO :: PQSupport -> ConnInfo -> AEvent AEConn @@ -407,7 +403,7 @@ data AEvent (e :: AEntity) where DEL_USER :: Int64 -> AEvent AENone STAT :: ConnectionStats -> AEvent AEConn OK :: AEvent AEConn - JOINED :: SndQueueSecured -> Maybe ClientServiceId -> AEvent AEConn + JOINED :: SndQueueSecured -> AEvent AEConn ERR :: AgentErrorType -> AEvent AEConn ERRS :: NonEmpty (ConnId, AgentErrorType) -> AEvent AENone SUSPENDED :: AEvent AENone @@ -1783,16 +1779,6 @@ instance Encoding UserLinkData where smpP = UserLinkData <$> ((A.char '\255' *> (unLarge <$> smpP)) <|> smpP) {-# INLINE smpP #-} -data StoredClientService (s :: DBStored) = ClientService - { dbServiceId :: DBEntityId' s, - serviceId :: SMP.ServiceId - } - deriving (Eq, Show) - -type ClientService = StoredClientService 'DBStored - -type ClientServiceId = DBEntityId - -- | SMP queue status. data QueueStatus = -- | queue is created diff --git a/src/Simplex/Messaging/Agent/Store.hs b/src/Simplex/Messaging/Agent/Store.hs index c054cb2673..ab831ad385 100644 --- a/src/Simplex/Messaging/Agent/Store.hs +++ b/src/Simplex/Messaging/Agent/Store.hs @@ -85,7 +85,7 @@ data StoredRcvQueue (q :: DBStored) = RcvQueue -- | short link ID and credentials shortLink :: Maybe ShortLinkCreds, -- | associated client service - clientService :: Maybe (StoredClientService q), + rcvServiceAssoc :: ServiceAssoc, -- | queue status status :: QueueStatus, -- | to enable notifications for this queue - this field is duplicated from ConnData @@ -134,9 +134,7 @@ data ShortLinkCreds = ShortLinkCreds } deriving (Show) -clientServiceId :: RcvQueue -> Maybe ClientServiceId -clientServiceId = fmap dbServiceId . clientService -{-# INLINE clientServiceId #-} +type ServiceAssoc = Bool rcvSMPQueueAddress :: RcvQueue -> SMPQueueAddress rcvSMPQueueAddress RcvQueue {server, sndId, e2ePrivKey, queueMode} = diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index ef66eca38b..0b2c632fa4 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -35,6 +35,14 @@ module Simplex.Messaging.Agent.Store.AgentStore deleteUsersWithoutConns, checkUser, + -- * Client services + createClientService, + getClientService, + getClientServiceServers, + setClientServiceId, + deleteClientService, + deleteClientServices, + -- * Queues and connections createNewConn, updateNewConnRcv, @@ -274,7 +282,9 @@ import qualified Data.Set as S import Data.Text.Encoding (decodeLatin1, encodeUtf8) import Data.Time.Clock (NominalDiffTime, UTCTime, addUTCTime, getCurrentTime) import Data.Word (Word32) +import qualified Data.X509 as X import Network.Socket (ServiceName) +import qualified Network.TLS as TLS import Simplex.FileTransfer.Client (XFTPChunkSpec (..)) import Simplex.FileTransfer.Description import Simplex.FileTransfer.Protocol (FileParty (..), SFileParty (..)) @@ -390,6 +400,75 @@ deleteUsersWithoutConns db = do forM_ userIds $ DB.execute db "DELETE FROM users WHERE user_id = ?" . Only pure userIds +createClientService :: DB.Connection -> UserId -> SMPServer -> (C.KeyHash, TLS.Credential) -> IO () +createClientService db userId srv (kh, (cert, pk)) = + DB.execute + db + [sql| + INSERT INTO client_services + (user_id, host, port, service_cert_hash, service_cert, service_priv_key) + VALUES (?,?,?,?,?,?) + ON CONFLICT (user_id, host, port) + DO UPDATE SET + service_cert_hash = EXCLUDED.service_cert_hash, + service_cert = EXCLUDED.service_cert, + service_priv_key = EXCLUDED.service_priv_key, + rcv_service_id = NULL + |] + (userId, host srv, port srv, kh, cert, pk) + +getClientService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) +getClientService db userId srv = + maybeFirstRow toService $ + DB.query + db + [sql| + SELECT service_cert_hash, service_cert, service_priv_key, rcv_service_id + FROM client_services + WHERE user_id = ? AND host = ? AND port = ? + |] + (userId, host srv, port srv) + where + toService (kh, cert, pk, serviceId_) = ((kh, (cert, pk)), serviceId_) + +getClientServiceServers :: DB.Connection -> UserId -> IO [SMPServer] +getClientServiceServers db userId = + map toServer + <$> DB.query + db + [sql| + SELECT c.host, c.port, s.key_hash + FROM client_services c + JOIN servers s ON s.host = c.host AND s.port = c.port + |] + (Only userId) + where + toServer (host, port, kh) = SMPServer host port kh + +setClientServiceId :: DB.Connection -> UserId -> SMPServer -> ServiceId -> IO () +setClientServiceId db userId srv serviceId = + DB.execute + db + [sql| + UPDATE client_services + SET rcv_service_id = ? + WHERE user_id = ? AND host = ? AND port = ? + |] + (serviceId, userId, host srv, port srv) + +deleteClientService :: DB.Connection -> UserId -> SMPServer -> IO () +deleteClientService db userId srv = + DB.execute + db + [sql| + DELETE FROM client_services + WHERE user_id = ? AND host = ? AND port = ? + |] + (userId, host srv, port srv) + +deleteClientServices :: DB.Connection -> UserId -> IO () +deleteClientServices db userId = DB.execute db "DELETE FROM client_services WHERE user_id = ?" (Only userId) + createConn_ :: TVar ChaChaDRG -> ConnData -> @@ -1926,6 +2005,15 @@ deriving newtype instance ToField ChunkReplicaId deriving newtype instance FromField ChunkReplicaId +instance ToField X.CertificateChain where toField = toField . Binary . smpEncode . C.encodeCertChain + +instance FromField X.CertificateChain where fromField = blobFieldDecoder (parseAll C.certChainP) + +instance ToField X.PrivKey where toField = toField . Binary . C.encodeASNObj + +instance FromField X.PrivKey where + fromField = blobFieldDecoder $ C.decodeASNKey >=> \case (pk, []) -> Right pk; r -> C.asnKeyError r + fromOnlyBI :: Only BoolInt -> Bool fromOnlyBI (Only (BI b)) = b {-# INLINE fromOnlyBI #-} @@ -2005,19 +2093,18 @@ insertRcvQueue_ db connId' rq@RcvQueue {..} subMode serverKeyHash_ = do db [sql| INSERT INTO rcv_queues - ( host, port, rcv_id, conn_id, rcv_private_key, rcv_dh_secret, e2e_priv_key, e2e_dh_secret, + ( host, port, rcv_id, rcv_service_assoc, conn_id, rcv_private_key, rcv_dh_secret, e2e_priv_key, e2e_dh_secret, snd_id, queue_mode, status, to_subscribe, rcv_queue_id, rcv_primary, replace_rcv_queue_id, smp_client_version, server_key_hash, link_id, link_key, link_priv_sig_key, link_enc_fixed_data, ntf_public_key, ntf_private_key, ntf_id, rcv_ntf_dh_secret - ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?); + ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?); |] - ( (host server, port server, rcvId, connId', rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret) + ( (host server, port server, rcvId, rcvServiceAssoc, connId', rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret) :. (sndId, queueMode, status, BI toSubscribe, qId, BI primary, dbReplaceQueueId, smpClientVersion, serverKeyHash_) :. (shortLinkId <$> shortLink, shortLinkKey <$> shortLink, linkPrivSigKey <$> shortLink, linkEncFixedData <$> shortLink) :. ntfCredsFields ) - -- TODO [certs rcv] save client service - pure (rq :: NewRcvQueue) {connId = connId', dbQueueId = qId, clientService = Nothing} + pure (rq :: NewRcvQueue) {connId = connId', dbQueueId = qId} where toSubscribe = subMode == SMOnlyCreate ntfCredsFields = case clientNtfCreds of @@ -2371,7 +2458,7 @@ rcvQueueQuery = [sql| SELECT c.user_id, COALESCE(q.server_key_hash, s.key_hash), q.conn_id, q.host, q.port, q.rcv_id, q.rcv_private_key, q.rcv_dh_secret, q.e2e_priv_key, q.e2e_dh_secret, q.snd_id, q.queue_mode, q.status, c.enable_ntfs, q.client_notice_id, - q.rcv_queue_id, q.rcv_primary, q.replace_rcv_queue_id, q.switch_status, q.smp_client_version, q.delete_errors, + q.rcv_queue_id, q.rcv_primary, q.replace_rcv_queue_id, q.switch_status, q.smp_client_version, q.delete_errors, q.rcv_service_assoc, q.ntf_public_key, q.ntf_private_key, q.ntf_id, q.rcv_ntf_dh_secret, q.link_id, q.link_key, q.link_priv_sig_key, q.link_enc_fixed_data FROM rcv_queues q @@ -2381,13 +2468,13 @@ rcvQueueQuery = toRcvQueue :: (UserId, C.KeyHash, ConnId, NonEmpty TransportHost, ServiceName, SMP.RecipientId, SMP.RcvPrivateAuthKey, SMP.RcvDhSecret, C.PrivateKeyX25519, Maybe C.DhSecretX25519, SMP.SenderId, Maybe QueueMode) - :. (QueueStatus, Maybe BoolInt, Maybe NoticeId, DBEntityId, BoolInt, Maybe Int64, Maybe RcvSwitchStatus, Maybe VersionSMPC, Int) + :. (QueueStatus, Maybe BoolInt, Maybe NoticeId, DBEntityId, BoolInt, Maybe Int64, Maybe RcvSwitchStatus, Maybe VersionSMPC, Int, ServiceAssoc) :. (Maybe SMP.NtfPublicAuthKey, Maybe SMP.NtfPrivateAuthKey, Maybe SMP.NotifierId, Maybe RcvNtfDhSecret) :. (Maybe SMP.LinkId, Maybe LinkKey, Maybe C.PrivateKeyEd25519, Maybe EncDataBytes) -> RcvQueue toRcvQueue ( (userId, keyHash, connId, host, port, rcvId, rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret, sndId, queueMode) - :. (status, enableNtfs_, clientNoticeId, dbQueueId, BI primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion_, deleteErrors) + :. (status, enableNtfs_, clientNoticeId, dbQueueId, BI primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion_, deleteErrors, rcvServiceAssoc) :. (ntfPublicKey_, ntfPrivateKey_, notifierId_, rcvNtfDhSecret_) :. (shortLinkId_, shortLinkKey_, linkPrivSigKey_, linkEncFixedData_) ) = @@ -2401,7 +2488,7 @@ toRcvQueue _ -> Nothing enableNtfs = maybe True unBI enableNtfs_ -- TODO [certs rcv] read client service - in RcvQueue {userId, connId, server, rcvId, rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret, sndId, queueMode, shortLink, clientService = Nothing, status, enableNtfs, clientNoticeId, dbQueueId, primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion, clientNtfCreds, deleteErrors} + in RcvQueue {userId, connId, server, rcvId, rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret, sndId, queueMode, shortLink, rcvServiceAssoc, status, enableNtfs, clientNoticeId, dbQueueId, primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion, clientNtfCreds, deleteErrors} -- | returns all connection queue credentials, the first queue is the primary one getRcvQueueSubsByConnId_ :: DB.Connection -> ConnId -> IO (Maybe (NonEmpty RcvQueueSub)) diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/App.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/App.hs index 7371d95842..ae9b3d80e3 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/App.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/App.hs @@ -46,6 +46,7 @@ import Simplex.Messaging.Agent.Store.SQLite.Migrations.M20250322_short_links import Simplex.Messaging.Agent.Store.SQLite.Migrations.M20250702_conn_invitations_remove_cascade_delete import Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251009_queue_to_subscribe import Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251010_client_notices +import Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251020_service_certs import Simplex.Messaging.Agent.Store.Shared (Migration (..)) schemaMigrations :: [(String, Query, Maybe Query)] @@ -91,7 +92,8 @@ schemaMigrations = ("m20250322_short_links", m20250322_short_links, Just down_m20250322_short_links), ("m20250702_conn_invitations_remove_cascade_delete", m20250702_conn_invitations_remove_cascade_delete, Just down_m20250702_conn_invitations_remove_cascade_delete), ("m20251009_queue_to_subscribe", m20251009_queue_to_subscribe, Just down_m20251009_queue_to_subscribe), - ("m20251010_client_notices", m20251010_client_notices, Just down_m20251010_client_notices) + ("m20251010_client_notices", m20251010_client_notices, Just down_m20251010_client_notices), + ("m20251020_service_certs", m20251020_service_certs, Just down_m20251020_service_certs) ] -- | The list of migrations in ascending order by date diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20250517_service_certs.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20250517_service_certs.hs deleted file mode 100644 index 7708fd6d26..0000000000 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20250517_service_certs.hs +++ /dev/null @@ -1,40 +0,0 @@ -{-# LANGUAGE QuasiQuotes #-} - -module Simplex.Messaging.Agent.Store.SQLite.Migrations.M20250517_service_certs where - -import Database.SQLite.Simple (Query) -import Database.SQLite.Simple.QQ (sql) - --- TODO move date forward, create migration for postgres -m20250517_service_certs :: Query -m20250517_service_certs = - [sql| -CREATE TABLE server_certs( - server_cert_id INTEGER PRIMARY KEY AUTOINCREMENT, - user_id INTEGER NOT NULL REFERENCES users ON UPDATE RESTRICT ON DELETE CASCADE, - host TEXT NOT NULL, - port TEXT NOT NULL, - certificate BLOB NOT NULL, - priv_key BLOB NOT NULL, - service_id BLOB, - FOREIGN KEY(host, port) REFERENCES servers ON UPDATE CASCADE ON DELETE RESTRICT, -); - -CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON server_certs(user_id, host, port); - -CREATE INDEX idx_server_certs_host_port ON server_certs(host, port); - -ALTER TABLE rcv_queues ADD COLUMN rcv_service_id BLOB; - |] - -down_m20250517_service_certs :: Query -down_m20250517_service_certs = - [sql| -ALTER TABLE rcv_queues DROP COLUMN rcv_service_id; - -DROP INDEX idx_server_certs_host_port; - -DROP INDEX idx_server_certs_user_id_host_port; - -DROP TABLE server_certs; - |] diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs new file mode 100644 index 0000000000..780ced1d4a --- /dev/null +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251020_service_certs where + +import Database.SQLite.Simple (Query) +import Database.SQLite.Simple.QQ (sql) + +-- TODO move date forward, create migration for postgres +m20251020_service_certs :: Query +m20251020_service_certs = + [sql| +CREATE TABLE client_services( + user_id INTEGER NOT NULL REFERENCES users ON DELETE CASCADE, + host TEXT NOT NULL, + port TEXT NOT NULL, + service_cert BLOB NOT NULL, + service_cert_hash BLOB NOT NULL, + service_priv_key BLOB NOT NULL, + rcv_service_id BLOB, + FOREIGN KEY(host, port) REFERENCES servers ON UPDATE CASCADE ON DELETE RESTRICT +); + +CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services(user_id, host, port); + +CREATE INDEX idx_server_certs_host_port ON client_services(host, port); + +ALTER TABLE rcv_queues ADD COLUMN rcv_service_assoc INTEGER NOT NULL DEFAULT 0; + |] + +down_m20251020_service_certs :: Query +down_m20251020_service_certs = + [sql| +ALTER TABLE rcv_queues DROP COLUMN rcv_service_assoc; + +DROP INDEX idx_server_certs_host_port; + +DROP INDEX idx_server_certs_user_id_host_port; + +DROP TABLE client_services; + |] diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql index d2838a7b0e..8013313ac3 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql @@ -63,6 +63,7 @@ CREATE TABLE rcv_queues( to_subscribe INTEGER NOT NULL DEFAULT 0, client_notice_id INTEGER REFERENCES client_notices ON UPDATE RESTRICT ON DELETE SET NULL, + rcv_service_assoc INTEGER NOT NULL DEFAULT 0, PRIMARY KEY(host, port, rcv_id), FOREIGN KEY(host, port) REFERENCES servers ON DELETE RESTRICT ON UPDATE CASCADE, @@ -450,6 +451,16 @@ CREATE TABLE client_notices( created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL ); +CREATE TABLE client_services( + user_id INTEGER NOT NULL REFERENCES users ON DELETE CASCADE, + host TEXT NOT NULL, + port TEXT NOT NULL, + service_cert BLOB NOT NULL, + service_cert_hash BLOB NOT NULL, + service_priv_key BLOB NOT NULL, + rcv_service_id BLOB, + FOREIGN KEY(host, port) REFERENCES servers ON UPDATE CASCADE ON DELETE RESTRICT +); CREATE UNIQUE INDEX idx_rcv_queues_ntf ON rcv_queues(host, port, ntf_id); CREATE UNIQUE INDEX idx_rcv_queue_id ON rcv_queues(conn_id, rcv_queue_id); CREATE UNIQUE INDEX idx_snd_queue_id ON snd_queues(conn_id, snd_queue_id); @@ -593,3 +604,9 @@ CREATE UNIQUE INDEX idx_client_notices_entity ON client_notices( entity_id ); CREATE INDEX idx_rcv_queues_client_notice_id ON rcv_queues(client_notice_id); +CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services( + user_id, + host, + port +); +CREATE INDEX idx_server_certs_host_port ON client_services(host, port); diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 27840b092b..4f70efcf2f 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -909,12 +909,12 @@ nsubResponse_ = \case {-# INLINE nsubResponse_ #-} -- This command is always sent in background request mode -subscribeService :: forall p. (PartyI p, ServiceParty p) => SMPClient -> SParty p -> ExceptT SMPClientError IO Int64 +subscribeService :: forall p. (PartyI p, ServiceParty p) => SMPClient -> SParty p -> ExceptT SMPClientError IO (Int64, IdsHash) subscribeService c party = case smpClientService c of Just THClientService {serviceId, serviceKey} -> do liftIO $ enablePings c sendSMPCommand c NRMBackground (Just (C.APrivateAuthKey C.SEd25519 serviceKey)) serviceId subCmd >>= \case - SOKS n -> pure n + SOKS n idsHash -> pure (n, idsHash) r -> throwE $ unexpectedResponse r where subCmd :: Command p diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index 6049603606..722a86c7ef 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -479,14 +479,14 @@ smpSubscribeService ca smp srv serviceSub@(serviceId, _) = case smpClientService (True <$ processSubscription r) (pure False) if ok - then case r of - Right n -> notify ca $ CAServiceSubscribed srv serviceSub n + then case r of -- TODO [certs rcv] compare hash + Right (n, _idsHash) -> notify ca $ CAServiceSubscribed srv serviceSub n Left e | smpClientServiceError e -> notifyUnavailable | temporaryClientError e -> reconnectClient ca srv | otherwise -> notify ca $ CAServiceSubError srv serviceSub e else reconnectClient ca srv - processSubscription = mapM_ $ \n -> do + processSubscription = mapM_ $ \(n, _idsHash) -> do -- TODO [certs rcv] validate hash here? setActiveServiceSub ca srv $ Just ((serviceId, n), sessId) setPendingServiceSub ca srv Nothing serviceAvailable THClientService {serviceRole, serviceId = serviceId'} = diff --git a/src/Simplex/Messaging/Crypto.hs b/src/Simplex/Messaging/Crypto.hs index 9cc78acb30..3d24f0bcba 100644 --- a/src/Simplex/Messaging/Crypto.hs +++ b/src/Simplex/Messaging/Crypto.hs @@ -87,6 +87,8 @@ module Simplex.Messaging.Crypto signatureKeyPair, publicToX509, encodeASNObj, + decodeASNKey, + asnKeyError, -- * key encoding/decoding encodePubKey, @@ -1493,11 +1495,11 @@ encodeASNObj k = toStrict . encodeASN1 DER $ toASN1 k [] -- Decoding of binary X509 'CryptoPublicKey'. decodePubKey :: CryptoPublicKey k => ByteString -> Either String k -decodePubKey = decodeKey >=> x509ToPublic >=> pubKey +decodePubKey = decodeASNKey >=> x509ToPublic >=> pubKey -- Decoding of binary PKCS8 'PrivateKey'. decodePrivKey :: CryptoPrivateKey k => ByteString -> Either String k -decodePrivKey = decodeKey >=> x509ToPrivate >=> privKey +decodePrivKey = decodeASNKey >=> x509ToPrivate >=> privKey x509ToPublic :: (X.PubKey, [ASN1]) -> Either String APublicKey x509ToPublic = \case @@ -1505,7 +1507,7 @@ x509ToPublic = \case (X.PubKeyEd448 k, []) -> Right . APublicKey SEd448 $ PublicKeyEd448 k (X.PubKeyX25519 k, []) -> Right . APublicKey SX25519 $ PublicKeyX25519 k (X.PubKeyX448 k, []) -> Right . APublicKey SX448 $ PublicKeyX448 k - r -> keyError r + r -> asnKeyError r x509ToPublic' :: CryptoPublicKey k => X.PubKey -> Either String k x509ToPublic' k = x509ToPublic (k, []) >>= pubKey @@ -1517,16 +1519,16 @@ x509ToPrivate = \case (X.PrivKeyEd448 k, []) -> Right $ APrivateKey SEd448 $ PrivateKeyEd448 k (X.PrivKeyX25519 k, []) -> Right $ APrivateKey SX25519 $ PrivateKeyX25519 k (X.PrivKeyX448 k, []) -> Right $ APrivateKey SX448 $ PrivateKeyX448 k - r -> keyError r + r -> asnKeyError r x509ToPrivate' :: CryptoPrivateKey k => X.PrivKey -> Either String k x509ToPrivate' pk = x509ToPrivate (pk, []) >>= privKey {-# INLINE x509ToPrivate' #-} -decodeKey :: ASN1Object a => ByteString -> Either String (a, [ASN1]) -decodeKey = fromASN1 <=< first show . decodeASN1 DER . fromStrict +decodeASNKey :: ASN1Object a => ByteString -> Either String (a, [ASN1]) +decodeASNKey = fromASN1 <=< first show . decodeASN1 DER . fromStrict -keyError :: (a, [ASN1]) -> Either String b -keyError = \case +asnKeyError :: (a, [ASN1]) -> Either String b +asnKeyError = \case (_, []) -> Left "unknown key algorithm" _ -> Left "more than one key" diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 13ac3f1820..3be4515cce 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -140,6 +140,7 @@ module Simplex.Messaging.Protocol RcvMessage (..), MsgId, MsgBody, + IdsHash, MaxMessageLen, MaxRcvMessageLen, EncRcvMsgBody (..), @@ -698,11 +699,13 @@ data BrokerMsg where -- | Service subscription success - confirms when queue was associated with the service SOK :: Maybe ServiceId -> BrokerMsg -- | The number of queues subscribed with SUBS command - SOKS :: Int64 -> BrokerMsg + SOKS :: Int64 -> IdsHash -> BrokerMsg -- MSG v1/2 has to be supported for encoding/decoding -- v1: MSG :: MsgId -> SystemTime -> MsgBody -> BrokerMsg -- v2: MsgId -> SystemTime -> MsgFlags -> MsgBody -> BrokerMsg MSG :: RcvMessage -> BrokerMsg + -- sent once delivering messages to SUBS command is complete + SALL :: BrokerMsg NID :: NotifierId -> RcvNtfPublicDhKey -> BrokerMsg NMSG :: C.CbNonce -> EncNMsgMeta -> BrokerMsg -- Should include certificate chain @@ -939,6 +942,7 @@ data BrokerMsgTag | SOK_ | SOKS_ | MSG_ + | SALL_ | NID_ | NMSG_ | PKEY_ @@ -1031,6 +1035,7 @@ instance Encoding BrokerMsgTag where SOK_ -> "SOK" SOKS_ -> "SOKS" MSG_ -> "MSG" + SALL_ -> "SALL" NID_ -> "NID" NMSG_ -> "NMSG" PKEY_ -> "PKEY" @@ -1052,6 +1057,7 @@ instance ProtocolMsgTag BrokerMsgTag where "SOK" -> Just SOK_ "SOKS" -> Just SOKS_ "MSG" -> Just MSG_ + "SALL" -> Just SALL_ "NID" -> Just NID_ "NMSG" -> Just NMSG_ "PKEY" -> Just PKEY_ @@ -1454,6 +1460,8 @@ type MsgId = ByteString -- | SMP message body. type MsgBody = ByteString +type IdsHash = ByteString + data ProtocolErrorType = PECmdSyntax | PECmdUnknown | PESession | PEBlock -- | Type for protocol errors. @@ -1834,9 +1842,12 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where SOK serviceId_ | v >= serviceCertsSMPVersion -> e (SOK_, ' ', serviceId_) | otherwise -> e OK_ -- won't happen, the association with the service requires v >= serviceCertsSMPVersion - SOKS n -> e (SOKS_, ' ', n) + SOKS n idsHash + | v >= rcvServiceSMPVersion -> e (SOKS_, ' ', n, idsHash) + | otherwise -> e (SOKS_, ' ', n) MSG RcvMessage {msgId, msgBody = EncRcvMsgBody body} -> e (MSG_, ' ', msgId, Tail body) + SALL -> e SALL_ NID nId srvNtfDh -> e (NID_, ' ', nId, srvNtfDh) NMSG nmsgNonce encNMsgMeta -> e (NMSG_, ' ', nmsgNonce, encNMsgMeta) PKEY sid vr certKey -> e (PKEY_, ' ', sid, vr, certKey) @@ -1867,6 +1878,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where MSG . RcvMessage msgId <$> bodyP where bodyP = EncRcvMsgBody . unTail <$> smpP + SALL_ -> pure SALL IDS_ | v >= newNtfCredsSMPVersion -> ids smpP smpP smpP smpP | v >= serviceCertsSMPVersion -> ids smpP smpP smpP nothing @@ -1887,7 +1899,9 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where pure $ IDS QIK {rcvId, sndId, rcvPublicDhKey, queueMode, linkId, serviceId, serverNtfCreds} LNK_ -> LNK <$> _smpP <*> smpP SOK_ -> SOK <$> _smpP - SOKS_ -> SOKS <$> _smpP + SOKS_ + | v >= rcvServiceSMPVersion -> SOKS <$> _smpP <*> smpP + | otherwise -> SOKS <$> _smpP <*> pure B.empty NID_ -> NID <$> _smpP <*> smpP NMSG_ -> NMSG <$> _smpP <*> smpP PKEY_ -> PKEY <$> _smpP <*> smpP <*> smpP @@ -1917,6 +1931,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where PONG -> noEntityMsg PKEY {} -> noEntityMsg RRES _ -> noEntityMsg + SALL -> noEntityMsg -- other broker responses must have queue ID _ | B.null entId -> Left $ CMD NO_ENTITY diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index ec75a07d43..1e5e94fd6a 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -1359,7 +1359,7 @@ client -- TODO [certs rcv] rcv subscriptions Server {subscribers, ntfSubscribers} ms - clnt@Client {clientId, ntfSubscriptions, ntfServiceSubscribed, serviceSubsCount = _todo', ntfServiceSubsCount, rcvQ, sndQ, clientTHParams = thParams'@THandleParams {sessionId}, procThreads} = do + clnt@Client {clientId, rcvQ, sndQ, msgQ, clientTHParams = thParams'@THandleParams {sessionId}, procThreads} = do labelMyThread . B.unpack $ "client $" <> encode sessionId <> " commands" let THandleParams {thVersion} = thParams' clntServiceId = (\THClientService {serviceId} -> serviceId) <$> (peerClientService =<< thAuth thParams') @@ -1495,7 +1495,9 @@ client OFF -> response <$> maybe (pure $ err INTERNAL) suspendQueue_ q_ DEL -> response <$> maybe (pure $ err INTERNAL) delQueueAndMsgs q_ QUE -> withQueue $ \q qr -> (corrId,entId,) <$> getQueueInfo q qr - Cmd SRecipientService SUBS -> pure $ response $ err (CMD PROHIBITED) -- "TODO [certs rcv]" + Cmd SRecipientService SUBS -> response . (corrId,entId,) <$> case clntServiceId of + Just serviceId -> subscribeServiceMessages serviceId + Nothing -> pure $ ERR INTERNAL -- it's "internal" because it should never get to this branch where createQueue :: NewQueueReq -> M s (Transmission BrokerMsg) createQueue NewQueueReq {rcvAuthKey, rcvDhKey, subMode, queueReqData, ntfCreds} @@ -1615,11 +1617,13 @@ client suspendQueue_ :: (StoreQueue s, QueueRec) -> M s (Transmission BrokerMsg) suspendQueue_ (q, _) = liftIO $ either err (const ok) <$> suspendQueue (queueStore ms) q - -- TODO [certs rcv] if serviceId is passed, associate with the service and respond with SOK subscribeQueueAndDeliver :: StoreQueue s -> QueueRec -> M s ResponseAndMessage - subscribeQueueAndDeliver q qr = + subscribeQueueAndDeliver q qr@QueueRec {rcvServiceId} = liftIO (TM.lookupIO entId $ subscriptions clnt) >>= \case - Nothing -> subscribeRcvQueue qr >>= deliver False + Nothing -> + sharedSubscribeQueue q SRecipientService rcvServiceId subscribers subscriptions serviceSubsCount (newSubscription NoSub) rcvServices >>= \case + Left e -> pure (err e, Nothing) + Right s -> deliver s Just s@Sub {subThread} -> do stats <- asks serverStats case subThread of @@ -1629,27 +1633,29 @@ client pure (err (CMD PROHIBITED), Nothing) _ -> do incStat $ qSubDuplicate stats - atomically (writeTVar (delivered s) Nothing) >> deliver True s + atomically (writeTVar (delivered s) Nothing) >> deliver (True, Just s) where - deliver :: Bool -> Sub -> M s ResponseAndMessage - deliver hasSub sub = do + deliver :: (Bool, Maybe Sub) -> M s ResponseAndMessage + deliver (hasSub, sub_) = do stats <- asks serverStats fmap (either ((,Nothing) . err) id) $ liftIO $ runExceptT $ do msg_ <- tryPeekMsg ms q msg' <- forM msg_ $ \msg -> liftIO $ do ts <- getSystemSeconds + sub <- maybe (atomically getSub) pure sub_ atomically $ setDelivered sub msg ts unless hasSub $ incStat $ qSub stats pure (NoCorrId, entId, MSG (encryptMsg qr msg)) pure ((corrId, entId, SOK clntServiceId), msg') - -- TODO [certs rcv] combine with subscribing ntf queues - subscribeRcvQueue :: QueueRec -> M s Sub - subscribeRcvQueue QueueRec {rcvServiceId} = atomically $ do - writeTQueue (subQ subscribers) (CSClient entId rcvServiceId Nothing, clientId) - sub <- newSubscription NoSub - TM.insert entId sub $ subscriptions clnt - pure sub + getSub :: STM Sub + getSub = + TM.lookup entId (subscriptions clnt) >>= \case + Just sub -> pure sub + Nothing -> do + sub <- newSubscription NoSub + TM.insert entId sub $ subscriptions clnt + pure sub subscribeNewQueue :: RecipientId -> QueueRec -> M s () subscribeNewQueue rId QueueRec {rcvServiceId} = do @@ -1719,74 +1725,131 @@ client else liftIO (updateQueueTime (queueStore ms) q t) >>= either (pure . err') (action q) subscribeNotifications :: StoreQueue s -> NtfCreds -> M s BrokerMsg - subscribeNotifications q NtfCreds {ntfServiceId} = do + subscribeNotifications q NtfCreds {ntfServiceId} = + sharedSubscribeQueue q SNotifierService ntfServiceId ntfSubscribers ntfSubscriptions ntfServiceSubsCount (pure ()) ntfServices >>= \case + Left e -> pure $ ERR e + Right (hasSub, _) -> do + when (isNothing clntServiceId) $ + asks serverStats >>= incStat . (if hasSub then ntfSubDuplicate else ntfSub) + pure $ SOK clntServiceId + + sharedSubscribeQueue :: + (PartyI p, ServiceParty p) => + StoreQueue s -> + SParty p -> + Maybe ServiceId -> + ServerSubscribers s -> + (Client s -> TMap QueueId sub) -> + (Client s -> TVar Int64) -> + STM sub -> + (ServerStats -> ServiceStats) -> + M s (Either ErrorType (Bool, Maybe sub)) + sharedSubscribeQueue q party queueServiceId srvSubscribers clientSubs clientServiceSubs mkSub servicesSel = do stats <- asks serverStats - let incNtfSrvStat sel = incStat $ sel $ ntfServices stats - case clntServiceId of + let incSrvStat sel = incStat $ sel $ servicesSel stats + writeSub = writeTQueue (subQ srvSubscribers) (CSClient entId queueServiceId clntServiceId, clientId) + liftIO $ case clntServiceId of Just serviceId - | ntfServiceId == Just serviceId -> do + | queueServiceId == Just serviceId -> do -- duplicate queue-service association - can only happen in case of response error/timeout - hasSub <- atomically $ ifM hasServiceSub (pure True) (False <$ newServiceQueueSub) + hasSub <- atomically $ ifM hasServiceSub (pure True) (False <$ incServiceQueueSubs) unless hasSub $ do - incNtfSrvStat srvSubCount - incNtfSrvStat srvSubQueues - incNtfSrvStat srvAssocDuplicate - pure $ SOK $ Just serviceId - | otherwise -> + atomically writeSub + incSrvStat srvSubCount + incSrvStat srvSubQueues + incSrvStat srvAssocDuplicate + pure $ Right (hasSub, Nothing) + | otherwise -> runExceptT $ do -- new or updated queue-service association - liftIO (setQueueService (queueStore ms) q SNotifierService (Just serviceId)) >>= \case - Left e -> pure $ ERR e - Right () -> do - hasSub <- atomically $ (<$ newServiceQueueSub) =<< hasServiceSub - unless hasSub $ incNtfSrvStat srvSubCount - incNtfSrvStat srvSubQueues - incNtfSrvStat $ maybe srvAssocNew (const srvAssocUpdated) ntfServiceId - pure $ SOK $ Just serviceId + ExceptT $ setQueueService (queueStore ms) q party (Just serviceId) + hasSub <- atomically $ (<$ incServiceQueueSubs) =<< hasServiceSub + atomically writeSub + liftIO $ do + unless hasSub $ incSrvStat srvSubCount + incSrvStat srvSubQueues + incSrvStat $ maybe srvAssocNew (const srvAssocUpdated) queueServiceId + pure (hasSub, Nothing) where - hasServiceSub = (0 /=) <$> readTVar ntfServiceSubsCount - -- This function is used when queue is associated with the service. - newServiceQueueSub = do - writeTQueue (subQ ntfSubscribers) (CSClient entId ntfServiceId (Just serviceId), clientId) - modifyTVar' ntfServiceSubsCount (+ 1) -- service count - modifyTVar' (totalServiceSubs ntfSubscribers) (+ 1) -- server count for all services - Nothing -> case ntfServiceId of - Just _ -> - liftIO (setQueueService (queueStore ms) q SNotifierService Nothing) >>= \case - Left e -> pure $ ERR e - Right () -> do - -- hasSubscription should never be True in this branch, because queue was associated with service. - -- So unless storage and session states diverge, this check is redundant. - hasSub <- atomically $ hasSubscription >>= newSub - incNtfSrvStat srvAssocRemoved - sok hasSub + hasServiceSub = (0 /=) <$> readTVar (clientServiceSubs clnt) + -- This function is used when queue association with the service is created. + incServiceQueueSubs = modifyTVar' (clientServiceSubs clnt) (+ 1) -- service count + Nothing -> case queueServiceId of + Just _ -> runExceptT $ do + ExceptT $ setQueueService (queueStore ms) q party Nothing + liftIO $ incSrvStat srvAssocRemoved + -- getSubscription may be Just for receiving service, where clientSubs also hold active deliveries for service subscriptions. + -- For notification service it can only be Just if storage and session states diverge. + r <- atomically $ getSubscription >>= newSub + atomically writeSub + pure r Nothing -> do - hasSub <- atomically $ ifM hasSubscription (pure True) (newSub False) - sok hasSub + r@(hasSub, _) <- atomically $ getSubscription >>= newSub + unless hasSub $ atomically writeSub + pure $ Right r where - hasSubscription = TM.member entId ntfSubscriptions - newSub hasSub = do - writeTQueue (subQ ntfSubscribers) (CSClient entId ntfServiceId Nothing, clientId) - unless (hasSub) $ TM.insert entId () ntfSubscriptions - pure hasSub - sok hasSub = do - incStat $ if hasSub then ntfSubDuplicate stats else ntfSub stats - pure $ SOK Nothing + getSubscription = TM.lookup entId $ clientSubs clnt + newSub = \case + Just sub -> pure (True, Just sub) + Nothing -> do + sub <- mkSub + TM.insert entId sub $ clientSubs clnt + pure (False, Just sub) + + subscribeServiceMessages :: ServiceId -> M s BrokerMsg + subscribeServiceMessages serviceId = + sharedSubscribeService SRecipientService serviceId subscribers serviceSubscribed serviceSubsCount >>= \case + Left e -> pure $ ERR e + Right (hasSub, (count, idsHash)) -> do + unless hasSub $ forkClient clnt "deliverServiceMessages" $ liftIO $ deliverServiceMessages count + pure $ SOKS count idsHash + where + deliverServiceMessages expectedCnt = do + (qCnt, _msgCnt, _dupCnt, _errCnt) <- foldRcvServiceMessages ms serviceId deliverQueueMsg (0, 0, 0, 0) + atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, SALL)] + -- TODO [cert rcv] compare with expected + logNote $ "Service subscriptions for " <> tshow serviceId <> " (" <> tshow qCnt <> " queues)" + deliverQueueMsg :: (Int, Int, Int, Int) -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO (Int, Int, Int, Int) + deliverQueueMsg (!qCnt, !msgCnt, !dupCnt, !errCnt) rId = \case + Left e -> pure (qCnt + 1, msgCnt, dupCnt, errCnt + 1) -- TODO [certs rcv] deliver subscription error + Right qMsg_ -> case qMsg_ of + Nothing -> pure (qCnt + 1, msgCnt, dupCnt, errCnt) + Just (qr, msg) -> + atomically (getSubscription rId) >>= \case + Nothing -> pure (qCnt + 1, msgCnt, dupCnt + 1, errCnt) + Just sub -> do + ts <- getSystemSeconds + atomically $ setDelivered sub msg ts + atomically $ writeTBQueue msgQ [(NoCorrId, rId, MSG (encryptMsg qr msg))] + pure (qCnt + 1, msgCnt + 1, dupCnt, errCnt) + getSubscription rId = + TM.lookup rId (subscriptions clnt) >>= \case + -- If delivery subscription already exists, then there is no need to deliver message. + -- It may have been created when the message is sent after service subscription is created. + Just _sub -> pure Nothing + Nothing -> do + sub <- newSubscription NoSub + TM.insert rId sub $ subscriptions clnt + pure $ Just sub subscribeServiceNotifications :: ServiceId -> M s BrokerMsg - subscribeServiceNotifications serviceId = do - subscribed <- readTVarIO ntfServiceSubscribed - if subscribed - then SOKS <$> readTVarIO ntfServiceSubsCount - else - liftIO (getServiceQueueCount @(StoreQueue s) (queueStore ms) SNotifierService serviceId) >>= \case - Left e -> pure $ ERR e - Right !count' -> do + subscribeServiceNotifications serviceId = + either ERR (uncurry SOKS . snd) <$> sharedSubscribeService SNotifierService serviceId ntfSubscribers ntfServiceSubscribed ntfServiceSubsCount + + sharedSubscribeService :: (PartyI p, ServiceParty p) => SParty p -> ServiceId -> ServerSubscribers s -> (Client s -> TVar Bool) -> (Client s -> TVar Int64) -> M s (Either ErrorType (Bool, (Int64, IdsHash))) + sharedSubscribeService party serviceId srvSubscribers clientServiceSubscribed clientServiceSubs = do + subscribed <- readTVarIO $ clientServiceSubscribed clnt + liftIO $ runExceptT $ + (subscribed,) + <$> if subscribed + then (,B.empty) <$> readTVarIO (clientServiceSubs clnt) -- TODO [certs rcv] get IDs hash + else do + count' <- ExceptT $ getServiceQueueCount @(StoreQueue s) (queueStore ms) party serviceId incCount <- atomically $ do - writeTVar ntfServiceSubscribed True - count <- swapTVar ntfServiceSubsCount count' + writeTVar (clientServiceSubscribed clnt) True + count <- swapTVar (clientServiceSubs clnt) count' pure $ count' - count - atomically $ writeTQueue (subQ ntfSubscribers) (CSService serviceId incCount, clientId) - pure $ SOKS count' + atomically $ writeTQueue (subQ srvSubscribers) (CSService serviceId incCount, clientId) + pure (count', B.empty) -- TODO [certs rcv] get IDs hash acknowledgeMsg :: MsgId -> StoreQueue s -> QueueRec -> M s (Transmission BrokerMsg) acknowledgeMsg msgId q qr = @@ -1904,10 +1967,13 @@ client tryDeliverMessage msg = -- the subscribed client var is read outside of STM to avoid transaction cost -- in case no client is subscribed. - getSubscribedClient rId (queueSubscribers subscribers) + getSubscribed $>>= deliverToSub >>= mapM_ forkDeliver where + getSubscribed = case rcvServiceId qr of + Just serviceId -> getSubscribedClient serviceId $ serviceSubscribers subscribers + Nothing -> getSubscribedClient rId $ queueSubscribers subscribers rId = recipientId q deliverToSub rcv = do ts <- getSystemSeconds @@ -1918,6 +1984,7 @@ client -- the new client will receive message in response to SUB. readTVar rcv $>>= \rc@Client {subscriptions = subs, sndQ = sndQ'} -> TM.lookup rId subs + >>= maybe (newServiceDeliverySub subs) (pure . Just) $>>= \s@Sub {subThread, delivered} -> case subThread of ProhibitSub -> pure Nothing ServerSub st -> readTVar st >>= \case @@ -1930,6 +1997,12 @@ client (writeTVar st SubPending $> Just (rc, s, st)) (deliver sndQ' s ts $> Nothing) _ -> pure Nothing + newServiceDeliverySub subs + | isJust (rcvServiceId qr) = do + sub <- newSubscription NoSub + TM.insert rId sub subs + pure $ Just sub + | otherwise = pure Nothing deliver sndQ' s ts = do let encMsg = encryptMsg qr msg writeTBQueue sndQ' ([(NoCorrId, rId, MSG encMsg)], []) @@ -2051,6 +2124,7 @@ client -- we delete subscription here, so the client with no subscriptions can be disconnected. sub <- atomically $ TM.lookupDelete entId $ subscriptions clnt liftIO $ mapM_ cancelSub sub + when (isJust rcvServiceId) $ atomically $ modifyTVar' (serviceSubsCount clnt) $ \n -> max 0 (n - 1) atomically $ writeTQueue (subQ subscribers) (CSDeleted entId rcvServiceId, clientId) forM_ (notifier qr) $ \NtfCreds {notifierId = nId, ntfServiceId} -> do -- queue is deleted by a different client from the one subscribed to notifications, diff --git a/src/Simplex/Messaging/Server/MsgStore/Journal.hs b/src/Simplex/Messaging/Server/MsgStore/Journal.hs index 5038c8826c..d9a1ff6ecd 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Journal.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Journal.hs @@ -444,6 +444,26 @@ instance MsgStoreClass (JournalMsgStore s) where getLoadedQueue :: JournalQueue s -> IO (JournalQueue s) getLoadedQueue q = fromMaybe q <$> TM.lookupIO (recipientId q) (loadedQueues $ queueStore_ ms) + foldRcvServiceMessages :: JournalMsgStore s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages ms serviceId f acc = case queueStore_ ms of + MQStore st -> foldRcvServiceQueues st serviceId f' acc + where + f' a (q, qr) = runExceptT (tryPeekMsg ms q) >>= f a (recipientId q) . ((qr,) <$$>) +#if defined(dbServerPostgres) + PQStore st -> foldRcvServiceQueueRecs st serviceId f' acc + where + JournalMsgStore {queueLocks, sharedLock} = ms + f' a (rId, qr) = do + q <- mkQueue ms False rId qr + qMsg_ <- + withSharedWaitLock rId queueLocks sharedLock $ runExceptT $ tryStore' "foldRcvServiceMessages" rId $ + (qr,) . snd <$$> (getLoadedQueue q >>= unStoreIO . getPeekMsgQueue ms) + f a rId qMsg_ + -- Use cached queue if available. + -- Also see the comment in loadQueue in PostgresQueueStore + getLoadedQueue q = fromMaybe q <$> TM.lookupIO (recipientId q) (loadedQueues $ queueStore_ ms) +#endif + logQueueStates :: JournalMsgStore s -> IO () logQueueStates ms = withActiveMsgQueues ms $ unStoreIO . logQueueState diff --git a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs index a0eb1d1ca1..f3000811b5 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs @@ -119,6 +119,34 @@ instance MsgStoreClass PostgresMsgStore where toMessageStats (expiredMsgsCount, storedMsgsCount, storedQueues) = MessageStats {expiredMsgsCount, storedMsgsCount, storedQueues} + foldRcvServiceMessages :: PostgresMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages ms serviceId f acc = + withTransaction (dbStore $ queueStore_ ms) $ \db -> + DB.fold + db + [sql| + SELECT q.recipient_id, q.recipient_keys, q.rcv_dh_secret, + q.sender_id, q.sender_key, q.queue_mode, + q.notifier_id, q.notifier_key, q.rcv_ntf_dh_secret, q.ntf_service_id, + q.status, q.updated_at, q.link_id, q.rcv_service_id, + m.msg_id, m.msg_ts, m.msg_quota, m.msg_ntf_flag, m.msg_body + FROM msg_queues q + LEFT JOIN ( + SELECT recipient_id, msg_id, msg_ts, msg_quota, msg_ntf_flag, msg_body, + ROW_NUMBER() OVER (PARTITION BY recipient_id ORDER BY message_id ASC) AS row_num + FROM messages + ) m ON q.recipient_id = m.recipient_id AND m.row_num = 1 + WHERE q.rcv_service_id = ? AND q.deleted_at IS NULL; + |] + (Only serviceId) + acc + f' + where + f' a (qRow :. mRow) = + let (rId, qr) = rowToQueueRec qRow + msg_ = toMaybeMessage mRow + in f a rId $ Right ((qr,) <$> msg_) + logQueueStates _ = error "logQueueStates not used" logQueueState _ = error "logQueueState not used" @@ -247,6 +275,11 @@ uninterruptibleMask_ :: ExceptT ErrorType IO a -> ExceptT ErrorType IO a uninterruptibleMask_ = ExceptT . E.uninterruptibleMask_ . runExceptT {-# INLINE uninterruptibleMask_ #-} +toMaybeMessage :: (Maybe (Binary MsgId), Maybe Int64, Maybe Bool, Maybe Bool, Maybe (Binary MsgBody)) -> Maybe Message +toMaybeMessage = \case + (Just msgId, Just ts, Just msgQuota, Just ntf, Just body) -> Just $ toMessage (msgId, ts, msgQuota, ntf, body) + _ -> Nothing + toMessage :: (Binary MsgId, Int64, Bool, Bool, Binary MsgBody) -> Message toMessage (Binary msgId, ts, msgQuota, ntf, Binary body) | msgQuota = MessageQuota {msgId, msgTs} diff --git a/src/Simplex/Messaging/Server/MsgStore/STM.hs b/src/Simplex/Messaging/Server/MsgStore/STM.hs index 73e1bf3980..24d489accb 100644 --- a/src/Simplex/Messaging/Server/MsgStore/STM.hs +++ b/src/Simplex/Messaging/Server/MsgStore/STM.hs @@ -87,6 +87,11 @@ instance MsgStoreClass STMMsgStore where expireOldMessages _tty ms now ttl = withLoadedQueues (queueStore_ ms) $ atomically . expireQueueMsgs ms now (now - ttl) + foldRcvServiceMessages :: STMMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages ms serviceId f= + foldRcvServiceQueues (queueStore_ ms) serviceId $ \a (q, qr) -> + runExceptT (tryPeekMsg ms q) >>= f a (recipientId q) . ((qr,) <$$>) + logQueueStates _ = pure () {-# INLINE logQueueStates #-} logQueueState _ = pure () diff --git a/src/Simplex/Messaging/Server/MsgStore/Types.hs b/src/Simplex/Messaging/Server/MsgStore/Types.hs index 98c12d4be7..e186da05a1 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Types.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Types.hs @@ -45,6 +45,7 @@ class (Monad (StoreMonad s), QueueStoreClass (StoreQueue s) (QueueStore s)) => M unsafeWithAllMsgQueues :: Monoid a => Bool -> s -> (StoreQueue s -> IO a) -> IO a -- tty, store, now, ttl expireOldMessages :: Bool -> s -> Int64 -> Int64 -> IO MessageStats + foldRcvServiceMessages :: s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a logQueueStates :: s -> IO () logQueueState :: StoreQueue s -> StoreMonad s () queueStore :: s -> QueueStore s diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs index e86bec07b4..2fabbfa33b 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs @@ -24,9 +24,11 @@ module Simplex.Messaging.Server.QueueStore.Postgres batchInsertServices, batchInsertQueues, foldServiceRecs, + foldRcvServiceQueueRecs, foldQueueRecs, foldRecentQueueRecs, handleDuplicate, + rowToQueueRec, withLog_, withDB, withDB', @@ -577,12 +579,17 @@ insertServiceQuery = VALUES (?,?,?,?,?) |] -foldServiceRecs :: forall a q. Monoid a => PostgresQueueStore q -> (ServiceRec -> IO a) -> IO a +foldServiceRecs :: Monoid a => PostgresQueueStore q -> (ServiceRec -> IO a) -> IO a foldServiceRecs st f = withTransaction (dbStore st) $ \db -> DB.fold_ db "SELECT service_id, service_role, service_cert, service_cert_hash, created_at FROM services" mempty $ \ !acc -> fmap (acc <>) . f . rowToServiceRec +foldRcvServiceQueueRecs :: PostgresQueueStore q -> ServiceId -> (a -> (RecipientId, QueueRec) -> IO a) -> a -> IO a +foldRcvServiceQueueRecs st serviceId f acc = + withTransaction (dbStore st) $ \db -> + DB.fold db (queueRecQuery <> " WHERE rcv_service_id = ? AND deleted_at IS NULL") (Only serviceId) acc $ \a -> f a . rowToQueueRec + foldQueueRecs :: Monoid a => Bool -> Bool -> PostgresQueueStore q -> ((RecipientId, QueueRec) -> IO a) -> IO a foldQueueRecs withData = foldQueueRecs_ foldRecs where @@ -769,10 +776,6 @@ instance ToField SMPServiceRole where toField = toField . decodeLatin1 . smpEnco instance FromField SMPServiceRole where fromField = fromTextField_ $ eitherToMaybe . smpDecode . encodeUtf8 -instance ToField X.CertificateChain where toField = toField . Binary . smpEncode . C.encodeCertChain - -instance FromField X.CertificateChain where fromField = blobFieldDecoder (parseAll C.certChainP) - #if !defined(dbPostgres) instance ToField EntityId where toField (EntityId s) = toField $ Binary s @@ -797,4 +800,8 @@ deriving newtype instance FromField EncDataBytes deriving newtype instance ToField (RoundedSystemTime t) deriving newtype instance FromField (RoundedSystemTime t) + +instance ToField X.CertificateChain where toField = toField . Binary . smpEncode . C.encodeCertChain + +instance FromField X.CertificateChain where fromField = blobFieldDecoder (parseAll C.certChainP) #endif diff --git a/src/Simplex/Messaging/Server/QueueStore/STM.hs b/src/Simplex/Messaging/Server/QueueStore/STM.hs index ad98698db8..ad3e00a03e 100644 --- a/src/Simplex/Messaging/Server/QueueStore/STM.hs +++ b/src/Simplex/Messaging/Server/QueueStore/STM.hs @@ -17,6 +17,7 @@ module Simplex.Messaging.Server.QueueStore.STM ( STMQueueStore (..), STMService (..), + foldRcvServiceQueues, setStoreLog, withLog', readQueueRecIO, @@ -45,7 +46,7 @@ import Simplex.Messaging.SystemTime import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM import Simplex.Messaging.Transport (SMPServiceRole (..)) -import Simplex.Messaging.Util (anyM, ifM, tshow, ($>>), ($>>=), (<$$)) +import Simplex.Messaging.Util (anyM, ifM, tshow, ($>>), ($>>=), (<$$), (<$$>)) import System.IO import UnliftIO.STM @@ -359,6 +360,16 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where SRecipientService -> serviceRcvQueues SNotifierService -> serviceNtfQueues +foldRcvServiceQueues :: StoreQueueClass q => STMQueueStore q -> ServiceId -> (a -> (q, QueueRec) -> IO a) -> a -> IO a +foldRcvServiceQueues st serviceId f acc = + TM.lookupIO serviceId (services st) >>= \case + Nothing -> pure acc + Just s -> + readTVarIO (serviceRcvQueues s) + >>= foldM (\a -> get >=> maybe (pure a) (f a)) acc + where + get rId = TM.lookupIO rId (queues st) $>>= \q -> (q,) <$$> readTVarIO (queueRec q) + withQueueRec :: TVar (Maybe QueueRec) -> (QueueRec -> STM a) -> IO (Either ErrorType a) withQueueRec qr a = atomically $ readQueueRec qr >>= mapM a diff --git a/src/Simplex/Messaging/Transport.hs b/src/Simplex/Messaging/Transport.hs index e2e912875f..2d959410da 100644 --- a/src/Simplex/Messaging/Transport.hs +++ b/src/Simplex/Messaging/Transport.hs @@ -56,6 +56,7 @@ module Simplex.Messaging.Transport serviceCertsSMPVersion, newNtfCredsSMPVersion, clientNoticesSMPVersion, + rcvServiceSMPVersion, simplexMQVersion, smpBlockSize, TransportConfig (..), @@ -170,6 +171,7 @@ smpBlockSize = 16384 -- 16 - service certificates (5/31/2025) -- 17 - create notification credentials with NEW (7/12/2025) -- 18 - support client notices (10/10/2025) +-- 19 - service subscriptions to messages (10/20/2025) data SMPVersion @@ -218,6 +220,9 @@ newNtfCredsSMPVersion = VersionSMP 17 clientNoticesSMPVersion :: VersionSMP clientNoticesSMPVersion = VersionSMP 18 +rcvServiceSMPVersion :: VersionSMP +rcvServiceSMPVersion = VersionSMP 19 + minClientSMPRelayVersion :: VersionSMP minClientSMPRelayVersion = VersionSMP 6 @@ -225,13 +230,13 @@ minServerSMPRelayVersion :: VersionSMP minServerSMPRelayVersion = VersionSMP 6 currentClientSMPRelayVersion :: VersionSMP -currentClientSMPRelayVersion = VersionSMP 18 +currentClientSMPRelayVersion = VersionSMP 19 legacyServerSMPRelayVersion :: VersionSMP legacyServerSMPRelayVersion = VersionSMP 6 currentServerSMPRelayVersion :: VersionSMP -currentServerSMPRelayVersion = VersionSMP 18 +currentServerSMPRelayVersion = VersionSMP 19 -- Max SMP protocol version to be used in e2e encrypted -- connection between client and server, as defined by SMP proxy. @@ -239,7 +244,7 @@ currentServerSMPRelayVersion = VersionSMP 18 -- to prevent client version fingerprinting by the -- destination relays when clients upgrade at different times. proxiedSMPRelayVersion :: VersionSMP -proxiedSMPRelayVersion = VersionSMP 17 +proxiedSMPRelayVersion = VersionSMP 18 -- minimal supported protocol version is 6 -- TODO remove code that supports sending commands without batching @@ -823,7 +828,7 @@ smpClientHandshake c ks_ keyHash@(C.KeyHash kh) vRange proxyServer serviceKeys_ serviceKeys = case serviceKeys_ of Just sks | v >= serviceCertsSMPVersion && certificateSent c -> Just sks _ -> Nothing - clientService = mkClientService <$> serviceKeys + clientService = mkClientService v =<< serviceKeys hs = SMPClientHandshake {smpVersion = v, keyHash, authPubKey = fst <$> ks_, proxyServer, clientService} sendHandshake th hs service <- mapM getClientService serviceKeys @@ -831,10 +836,12 @@ smpClientHandshake c ks_ keyHash@(C.KeyHash kh) vRange proxyServer serviceKeys_ Nothing -> throwE TEVersion where th@THandle {params = THandleParams {sessionId}} = smpTHandle c - mkClientService :: (ServiceCredentials, C.KeyPairEd25519) -> SMPClientHandshakeService - mkClientService (ServiceCredentials {serviceRole, serviceCreds, serviceSignKey}, (k, _)) = - let sk = C.signX509 serviceSignKey $ C.publicToX509 k - in SMPClientHandshakeService {serviceRole, serviceCertKey = CertChainPubKey (fst serviceCreds) sk} + mkClientService :: VersionSMP -> (ServiceCredentials, C.KeyPairEd25519) -> Maybe SMPClientHandshakeService + mkClientService v (ServiceCredentials {serviceRole, serviceCreds, serviceSignKey}, (k, _)) + | serviceRole == SRMessaging && v < rcvServiceSMPVersion = Nothing + | otherwise = + let sk = C.signX509 serviceSignKey $ C.publicToX509 k + in Just SMPClientHandshakeService {serviceRole, serviceCertKey = CertChainPubKey (fst serviceCreds) sk} getClientService :: (ServiceCredentials, C.KeyPairEd25519) -> ExceptT TransportError IO THClientService getClientService (ServiceCredentials {serviceRole, serviceCertHash}, (_, pk)) = getHandshake th >>= \case diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index fcdd5be294..017958890b 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -85,7 +85,7 @@ import Simplex.Messaging.Agent hiding (acceptContact, createConnection, deleteCo import qualified Simplex.Messaging.Agent as A import Simplex.Messaging.Agent.Client (ProtocolTestFailure (..), ProtocolTestStep (..), ServerQueueInfo (..), UserNetworkInfo (..), UserNetworkType (..), waitForUserNetwork) import Simplex.Messaging.Agent.Env.SQLite (AgentConfig (..), Env (..), InitialAgentServers (..), createAgentStore) -import Simplex.Messaging.Agent.Protocol hiding (CON, CONF, INFO, REQ, SENT, INV, JOINED) +import Simplex.Messaging.Agent.Protocol hiding (CON, CONF, INFO, REQ, SENT) import qualified Simplex.Messaging.Agent.Protocol as A import Simplex.Messaging.Agent.Store (Connection' (..), SomeConn' (..), StoredRcvQueue (..)) import Simplex.Messaging.Agent.Store.AgentStore (getConn) @@ -219,12 +219,6 @@ pattern SENT msgId = A.SENT msgId Nothing pattern Rcvd :: AgentMsgId -> AEvent 'AEConn pattern Rcvd agentMsgId <- RCVD MsgMeta {integrity = MsgOk} [MsgReceipt {agentMsgId, msgRcptStatus = MROk}] -pattern INV :: AConnectionRequestUri -> AEvent 'AEConn -pattern INV cReq = A.INV cReq Nothing - -pattern JOINED :: SndQueueSecured -> AEvent 'AEConn -pattern JOINED sndSecure = A.JOINED sndSecure Nothing - smpCfgVPrev :: ProtocolClientConfig SMPVersion smpCfgVPrev = (smpCfg agentCfg) {serverVRange = prevRange $ serverVRange $ smpCfg agentCfg} @@ -282,16 +276,16 @@ inAnyOrder g rs = withFrozenCallStack $ do createConnection :: ConnectionModeI c => AgentClient -> UserId -> Bool -> SConnectionMode c -> Maybe CRClientData -> SubscriptionMode -> AE (ConnId, ConnectionRequestUri c) createConnection c userId enableNtfs cMode clientData subMode = do - (connId, (CCLink cReq _, Nothing)) <- A.createConnection c NRMInteractive userId enableNtfs True cMode Nothing clientData IKPQOn subMode + (connId, CCLink cReq _) <- A.createConnection c NRMInteractive userId enableNtfs True cMode Nothing clientData IKPQOn subMode pure (connId, cReq) joinConnection :: AgentClient -> UserId -> Bool -> ConnectionRequestUri c -> ConnInfo -> SubscriptionMode -> AE (ConnId, SndQueueSecured) joinConnection c userId enableNtfs cReq connInfo subMode = do connId <- A.prepareConnectionToJoin c userId enableNtfs cReq PQSupportOn - (sndSecure, Nothing) <- A.joinConnection c NRMInteractive userId connId enableNtfs cReq connInfo PQSupportOn subMode + sndSecure <- A.joinConnection c NRMInteractive userId connId enableNtfs cReq connInfo PQSupportOn subMode pure (connId, sndSecure) -acceptContact :: AgentClient -> UserId -> ConnId -> Bool -> ConfirmationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AE (SndQueueSecured, Maybe ClientServiceId) +acceptContact :: AgentClient -> UserId -> ConnId -> Bool -> ConfirmationId -> ConnInfo -> PQSupport -> SubscriptionMode -> AE SndQueueSecured acceptContact c = A.acceptContact c NRMInteractive subscribeConnection :: AgentClient -> ConnId -> AE () @@ -708,9 +702,9 @@ runAgentClientTest pqSupport sqSecured viaProxy alice bob baseId = runAgentClientTestPQ :: HasCallStack => SndQueueSecured -> Bool -> (AgentClient, InitialKeys) -> (AgentClient, PQSupport) -> AgentMsgId -> IO () runAgentClientTestPQ sqSecured viaProxy (alice, aPQ) (bob, bPQ) baseId = runRight_ $ do - (bobId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing aPQ SMSubscribe + (bobId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing aPQ SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo bPQ - (sqSecured', Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" bPQ SMSubscribe + sqSecured' <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" bPQ SMSubscribe liftIO $ sqSecured' `shouldBe` sqSecured ("", _, A.CONF confId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` CR.connPQEncryption aPQ @@ -910,14 +904,14 @@ runAgentClientContactTest pqSupport sqSecured viaProxy alice bob baseId = runAgentClientContactTestPQ :: HasCallStack => SndQueueSecured -> Bool -> PQSupport -> (AgentClient, InitialKeys) -> (AgentClient, PQSupport) -> AgentMsgId -> IO () runAgentClientContactTestPQ sqSecured viaProxy reqPQSupport (alice, aPQ) (bob, bPQ) baseId = runRight_ $ do - (_, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing aPQ SMSubscribe + (_, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing aPQ SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo bPQ - (sqSecuredJoin, Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" bPQ SMSubscribe + sqSecuredJoin <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" bPQ SMSubscribe liftIO $ sqSecuredJoin `shouldBe` False -- joining via contact address connection ("", _, A.REQ invId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` reqPQSupport bobId <- A.prepareConnectionToAccept alice 1 True invId (CR.connPQEncryption aPQ) - (sqSecured', Nothing) <- acceptContact alice 1 bobId True invId "alice's connInfo" (CR.connPQEncryption aPQ) SMSubscribe + sqSecured' <- acceptContact alice 1 bobId True invId "alice's connInfo" (CR.connPQEncryption aPQ) SMSubscribe liftIO $ sqSecured' `shouldBe` sqSecured ("", _, A.CONF confId pqSup'' _ "alice's connInfo") <- get bob liftIO $ pqSup'' `shouldBe` bPQ @@ -954,7 +948,7 @@ runAgentClientContactTestPQ sqSecured viaProxy reqPQSupport (alice, aPQ) (bob, b runAgentClientContactTestPQ3 :: HasCallStack => Bool -> (AgentClient, InitialKeys) -> (AgentClient, PQSupport) -> (AgentClient, PQSupport) -> AgentMsgId -> IO () runAgentClientContactTestPQ3 viaProxy (alice, aPQ) (bob, bPQ) (tom, tPQ) baseId = runRight_ $ do - (_, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing aPQ SMSubscribe + (_, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing aPQ SMSubscribe (bAliceId, bobId, abPQEnc) <- connectViaContact bob bPQ qInfo sentMessages abPQEnc alice bobId bob bAliceId (tAliceId, tomId, atPQEnc) <- connectViaContact tom tPQ qInfo @@ -963,12 +957,12 @@ runAgentClientContactTestPQ3 viaProxy (alice, aPQ) (bob, bPQ) (tom, tPQ) baseId msgId = subtract baseId . fst connectViaContact b pq qInfo = do aId <- A.prepareConnectionToJoin b 1 True qInfo pq - (sqSecuredJoin, Nothing) <- A.joinConnection b NRMInteractive 1 aId True qInfo "bob's connInfo" pq SMSubscribe + sqSecuredJoin <- A.joinConnection b NRMInteractive 1 aId True qInfo "bob's connInfo" pq SMSubscribe liftIO $ sqSecuredJoin `shouldBe` False -- joining via contact address connection ("", _, A.REQ invId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` PQSupportOn bId <- A.prepareConnectionToAccept alice 1 True invId (CR.connPQEncryption aPQ) - (sqSecuredAccept, Nothing) <- acceptContact alice 1 bId True invId "alice's connInfo" (CR.connPQEncryption aPQ) SMSubscribe + sqSecuredAccept <- acceptContact alice 1 bId True invId "alice's connInfo" (CR.connPQEncryption aPQ) SMSubscribe liftIO $ sqSecuredAccept `shouldBe` False -- agent cfg is v8 ("", _, A.CONF confId pqSup'' _ "alice's connInfo") <- get b liftIO $ pqSup'' `shouldBe` pq @@ -1007,9 +1001,9 @@ noMessages_ ingoreQCONT c err = tryGet `shouldReturn` () testRejectContactRequest :: HasCallStack => IO () testRejectContactRequest = withAgentClients2 $ \alice bob -> runRight_ $ do - (_addrConnId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing IKPQOn SMSubscribe + (_addrConnId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMContact Nothing Nothing IKPQOn SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn - (sqSecured, Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe + sqSecured <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sqSecured `shouldBe` False -- joining via contact address connection ("", _, A.REQ invId PQSupportOn _ "bob's connInfo") <- get alice rejectContact alice invId @@ -1022,7 +1016,7 @@ testUpdateConnectionUserId = newUserId <- createUser alice [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] _ <- changeConnectionUser alice 1 connId newUserId aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn - (sqSecured', Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe + sqSecured' <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sqSecured' `shouldBe` True ("", _, A.CONF confId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` PQSupportOn @@ -1206,7 +1200,7 @@ testInvitationErrors ps restart = do threadDelay 200000 let loopConfirm n = runExceptT (A.joinConnection b' NRMInteractive 1 aId True cReq "bob's connInfo" PQSupportOn SMSubscribe) >>= \case - Right (True, Nothing) -> pure n + Right True -> pure n Right r -> error $ "unexpected result " <> show r Left _ -> putStrLn "retrying confirm" >> threadDelay 200000 >> loopConfirm (n + 1) n <- loopConfirm 1 @@ -1268,7 +1262,7 @@ testContactErrors ps restart = do let loopSend = do -- sends the invitation to testPort runExceptT (A.joinConnection b'' NRMInteractive 1 aId True cReq "bob's connInfo" PQSupportOn SMSubscribe) >>= \case - Right (False, Nothing) -> pure () + Right False -> pure () Right r -> error $ "unexpected result " <> show r Left _ -> putStrLn "retrying send" >> threadDelay 200000 >> loopSend loopSend @@ -1297,7 +1291,7 @@ testContactErrors ps restart = do ("", "", UP _ [_]) <- nGet b'' let loopConfirm n = runExceptT (acceptContact a' 1 bId True invId "alice's connInfo" PQSupportOn SMSubscribe) >>= \case - Right (True, Nothing) -> pure n + Right True -> pure n Right r -> error $ "unexpected result " <> show r Left _ -> putStrLn "retrying accept confirm" >> threadDelay 200000 >> loopConfirm (n + 1) n <- loopConfirm 1 @@ -1334,7 +1328,7 @@ testInvitationShortLink viaProxy a b = withAgent 3 agentCfg initAgentServers testDB3 $ \c -> do let userData = UserLinkData "some user data" newLinkData = UserInvLinkData userData - (bId, (CCLink connReq (Just shortLink), Nothing)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKUsePQ SMSubscribe + (bId, CCLink connReq (Just shortLink)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKUsePQ SMSubscribe (connReq', connData') <- runRight $ getConnShortLink b 1 shortLink strDecode (strEncode shortLink) `shouldBe` Right shortLink connReq' `shouldBe` connReq @@ -1356,7 +1350,7 @@ testInvitationShortLink viaProxy a b = testJoinConn_ :: Bool -> Bool -> AgentClient -> ConnId -> AgentClient -> ConnectionRequestUri c -> ExceptT AgentErrorType IO () testJoinConn_ viaProxy sndSecure a bId b connReq = do aId <- A.prepareConnectionToJoin b 1 True connReq PQSupportOn - (sndSecure', Nothing) <- A.joinConnection b NRMInteractive 1 aId True connReq "bob's connInfo" PQSupportOn SMSubscribe + sndSecure' <- A.joinConnection b NRMInteractive 1 aId True connReq "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sndSecure' `shouldBe` sndSecure ("", _, CONF confId _ "bob's connInfo") <- get a allowConnection a bId confId "alice's connInfo" @@ -1370,14 +1364,14 @@ testInvitationShortLinkPrev viaProxy sndSecure a b = runRight_ $ do let userData = UserLinkData "some user data" newLinkData = UserInvLinkData userData -- can't create short link with previous version - (bId, (CCLink connReq Nothing, Nothing)) <- A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKPQOn SMSubscribe + (bId, CCLink connReq Nothing) <- A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKPQOn SMSubscribe testJoinConn_ viaProxy sndSecure a bId b connReq testInvitationShortLinkAsync :: HasCallStack => Bool -> AgentClient -> AgentClient -> IO () testInvitationShortLinkAsync viaProxy a b = do let userData = UserLinkData "some user data" newLinkData = UserInvLinkData userData - (bId, (CCLink connReq (Just shortLink), Nothing)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKUsePQ SMSubscribe + (bId, CCLink connReq (Just shortLink)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKUsePQ SMSubscribe (connReq', connData') <- runRight $ getConnShortLink b 1 shortLink strDecode (strEncode shortLink) `shouldBe` Right shortLink connReq' `shouldBe` connReq @@ -1404,7 +1398,7 @@ testContactShortLink viaProxy a b = let userData = UserLinkData "some user data" userCtData = UserContactData {direct = True, owners = [], relays = [], userData} newLinkData = UserContactLinkData userCtData - (contactId, (CCLink connReq0 (Just shortLink), Nothing)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMContact (Just newLinkData) Nothing CR.IKPQOn SMSubscribe + (contactId, CCLink connReq0 (Just shortLink)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMContact (Just newLinkData) Nothing CR.IKPQOn SMSubscribe Right connReq <- pure $ smpDecode (smpEncode connReq0) (connReq', ContactLinkData _ userCtData') <- runRight $ getConnShortLink b 1 shortLink strDecode (strEncode shortLink) `shouldBe` Right shortLink @@ -1423,7 +1417,7 @@ testContactShortLink viaProxy a b = liftIO $ sndSecure `shouldBe` False ("", _, REQ invId _ "bob's connInfo") <- get a bId <- A.prepareConnectionToAccept a 1 True invId PQSupportOn - (sndSecure', Nothing) <- acceptContact a 1 bId True invId "alice's connInfo" PQSupportOn SMSubscribe + sndSecure' <- acceptContact a 1 bId True invId "alice's connInfo" PQSupportOn SMSubscribe liftIO $ sndSecure' `shouldBe` True ("", _, CONF confId _ "alice's connInfo") <- get b allowConnection b aId confId "bob's connInfo" @@ -1451,7 +1445,7 @@ testContactShortLink viaProxy a b = testAddContactShortLink :: HasCallStack => Bool -> AgentClient -> AgentClient -> IO () testAddContactShortLink viaProxy a b = withAgent 3 agentCfg initAgentServers testDB3 $ \c -> do - (contactId, (CCLink connReq0 Nothing, Nothing)) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMContact Nothing Nothing CR.IKPQOn SMSubscribe + (contactId, CCLink connReq0 Nothing) <- runRight $ A.createConnection a NRMInteractive 1 True True SCMContact Nothing Nothing CR.IKPQOn SMSubscribe Right connReq <- pure $ smpDecode (smpEncode connReq0) -- let userData = UserLinkData "some user data" userCtData = UserContactData {direct = True, owners = [], relays = [], userData} @@ -1474,7 +1468,7 @@ testAddContactShortLink viaProxy a b = liftIO $ sndSecure `shouldBe` False ("", _, REQ invId _ "bob's connInfo") <- get a bId <- A.prepareConnectionToAccept a 1 True invId PQSupportOn - (sndSecure', Nothing) <- acceptContact a 1 bId True invId "alice's connInfo" PQSupportOn SMSubscribe + sndSecure' <- acceptContact a 1 bId True invId "alice's connInfo" PQSupportOn SMSubscribe liftIO $ sndSecure' `shouldBe` True ("", _, CONF confId _ "alice's connInfo") <- get b allowConnection b aId confId "bob's connInfo" @@ -1496,7 +1490,7 @@ testInvitationShortLinkRestart :: HasCallStack => (ASrvTransport, AStoreType) -> testInvitationShortLinkRestart ps = withAgentClients2 $ \a b -> do let userData = UserLinkData "some user data" newLinkData = UserInvLinkData userData - (bId, (CCLink connReq (Just shortLink), Nothing)) <- withSmpServer ps $ + (bId, CCLink connReq (Just shortLink)) <- withSmpServer ps $ runRight $ A.createConnection a NRMInteractive 1 True True SCMInvitation (Just newLinkData) Nothing CR.IKUsePQ SMOnlyCreate withSmpServer ps $ do runRight_ $ subscribeConnection a bId @@ -1510,7 +1504,7 @@ testContactShortLinkRestart ps = withAgentClients2 $ \a b -> do let userData = UserLinkData "some user data" userCtData = UserContactData {direct = True, owners = [], relays = [], userData} newLinkData = UserContactLinkData userCtData - (contactId, (CCLink connReq0 (Just shortLink), Nothing)) <- withSmpServer ps $ + (contactId, CCLink connReq0 (Just shortLink)) <- withSmpServer ps $ runRight $ A.createConnection a NRMInteractive 1 True True SCMContact (Just newLinkData) Nothing CR.IKPQOn SMOnlyCreate Right connReq <- pure $ smpDecode (smpEncode connReq0) let updatedData = UserLinkData "updated user data" @@ -1534,7 +1528,7 @@ testAddContactShortLinkRestart ps = withAgentClients2 $ \a b -> do let userData = UserLinkData "some user data" userCtData = UserContactData {direct = True, owners = [], relays = [], userData} newLinkData = UserContactLinkData userCtData - ((contactId, (CCLink connReq0 Nothing, Nothing)), shortLink) <- withSmpServer ps $ runRight $ do + ((contactId, CCLink connReq0 Nothing), shortLink) <- withSmpServer ps $ runRight $ do r@(contactId, _) <- A.createConnection a NRMInteractive 1 True True SCMContact Nothing Nothing CR.IKPQOn SMOnlyCreate (r,) <$> setConnShortLink a contactId SCMContact newLinkData Nothing Right connReq <- pure $ smpDecode (smpEncode connReq0) @@ -1556,7 +1550,7 @@ testAddContactShortLinkRestart ps = withAgentClients2 $ \a b -> do testOldContactQueueShortLink :: HasCallStack => (ASrvTransport, AStoreType) -> IO () testOldContactQueueShortLink ps@(_, msType) = withAgentClients2 $ \a b -> do - (contactId, (CCLink connReq Nothing, Nothing)) <- withSmpServer ps $ runRight $ + (contactId, CCLink connReq Nothing) <- withSmpServer ps $ runRight $ A.createConnection a NRMInteractive 1 True True SCMContact Nothing Nothing CR.IKPQOn SMOnlyCreate -- make it an "old" queue let updateStoreLog f = replaceSubstringInFile f " queue_mode=C" "" @@ -2301,9 +2295,9 @@ makeConnectionForUsers = makeConnectionForUsers_ PQSupportOn True makeConnectionForUsers_ :: HasCallStack => PQSupport -> SndQueueSecured -> AgentClient -> UserId -> AgentClient -> UserId -> ExceptT AgentErrorType IO (ConnId, ConnId) makeConnectionForUsers_ pqSupport sqSecured alice aliceUserId bob bobUserId = do - (bobId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive aliceUserId True True SCMInvitation Nothing Nothing (IKLinkPQ pqSupport) SMSubscribe + (bobId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive aliceUserId True True SCMInvitation Nothing Nothing (IKLinkPQ pqSupport) SMSubscribe aliceId <- A.prepareConnectionToJoin bob bobUserId True qInfo pqSupport - (sqSecured', Nothing) <- A.joinConnection bob NRMInteractive bobUserId aliceId True qInfo "bob's connInfo" pqSupport SMSubscribe + sqSecured' <- A.joinConnection bob NRMInteractive bobUserId aliceId True qInfo "bob's connInfo" pqSupport SMSubscribe liftIO $ sqSecured' `shouldBe` sqSecured ("", _, A.CONF confId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` pqSupport diff --git a/tests/AgentTests/SQLiteTests.hs b/tests/AgentTests/SQLiteTests.hs index dff79c8617..f66dfe5dfc 100644 --- a/tests/AgentTests/SQLiteTests.hs +++ b/tests/AgentTests/SQLiteTests.hs @@ -227,7 +227,7 @@ rcvQueue1 = sndId = EntityId "2345", queueMode = Just QMMessaging, shortLink = Nothing, - clientService = Nothing, + rcvServiceAssoc = False, status = New, enableNtfs = True, clientNoticeId = Nothing, @@ -441,7 +441,7 @@ testUpgradeSndConnToDuplex = sndId = EntityId "4567", queueMode = Just QMMessaging, shortLink = Nothing, - clientService = Nothing, + rcvServiceAssoc = False, status = New, enableNtfs = True, clientNoticeId = Nothing, diff --git a/tests/AgentTests/ServerChoice.hs b/tests/AgentTests/ServerChoice.hs index a27678cb69..8412c6761a 100644 --- a/tests/AgentTests/ServerChoice.hs +++ b/tests/AgentTests/ServerChoice.hs @@ -64,6 +64,7 @@ initServers = ntf = [testNtfServer], xftp = userServers [testXFTPServer], netCfg = defaultNetworkConfig, + useServices = M.empty, presetDomains = [], presetServers = [] } diff --git a/tests/SMPAgentClient.hs b/tests/SMPAgentClient.hs index 02bee9ae7e..9357750506 100644 --- a/tests/SMPAgentClient.hs +++ b/tests/SMPAgentClient.hs @@ -65,6 +65,7 @@ initAgentServers = ntf = [testNtfServer], xftp = userServers [testXFTPServer], netCfg = defaultNetworkConfig {tcpTimeout = NetworkTimeout 500000 500000, tcpConnectTimeout = NetworkTimeout 500000 500000}, + useServices = M.empty, presetDomains = [], presetServers = [] } diff --git a/tests/SMPClient.hs b/tests/SMPClient.hs index 3c1ac0150a..361bc4f1d7 100644 --- a/tests/SMPClient.hs +++ b/tests/SMPClient.hs @@ -15,10 +15,14 @@ module SMPClient where +import Control.Monad import Control.Monad.Except (runExceptT) import Data.ByteString.Char8 (ByteString) import Data.List.NonEmpty (NonEmpty) +import qualified Data.X509 as X +import qualified Data.X509.Validation as XV import Network.Socket +import qualified Network.TLS as TLS import Simplex.Messaging.Agent.Store.Postgres.Options (DBOpts (..)) import Simplex.Messaging.Agent.Store.Shared (MigrationConfirmation (..)) import Simplex.Messaging.Client (ProtocolClientConfig (..), chooseTransportHost, defaultNetworkConfig) @@ -33,6 +37,7 @@ import Simplex.Messaging.Server.QueueStore.Postgres.Config (PostgresStoreCfg (.. import Simplex.Messaging.Transport import Simplex.Messaging.Transport.Client import Simplex.Messaging.Transport.Server +import Simplex.Messaging.Transport.Shared (ChainCertificates (..), chainIdCaCerts) import Simplex.Messaging.Util (ifM) import Simplex.Messaging.Version import Simplex.Messaging.Version.Internal @@ -151,13 +156,26 @@ testSMPClient = testSMPClientVR supportedClientSMPRelayVRange testSMPClientVR :: Transport c => VersionRangeSMP -> (THandleSMP c 'TClient -> IO a) -> IO a testSMPClientVR vr client = do Right useHost <- pure $ chooseTransportHost defaultNetworkConfig testHost - testSMPClient_ useHost testPort vr client + testSMPClient_ useHost testPort vr Nothing client -testSMPClient_ :: Transport c => TransportHost -> ServiceName -> VersionRangeSMP -> (THandleSMP c 'TClient -> IO a) -> IO a -testSMPClient_ host port vr client = do - let tcConfig = defaultTransportClientConfig {clientALPN} :: TransportClientConfig +testSMPServiceClient :: Transport c => (TLS.Credential, C.KeyPairEd25519) -> (THandleSMP c 'TClient -> IO a) -> IO a +testSMPServiceClient serviceCreds client = do + Right useHost <- pure $ chooseTransportHost defaultNetworkConfig testHost + testSMPClient_ useHost testPort supportedClientSMPRelayVRange (Just serviceCreds) client + +testSMPClient_ :: Transport c => TransportHost -> ServiceName -> VersionRangeSMP -> Maybe (TLS.Credential, C.KeyPairEd25519) -> (THandleSMP c 'TClient -> IO a) -> IO a +testSMPClient_ host port vr serviceCreds_ client = do + serviceAndKeys_ <- forM serviceCreds_ $ \(serviceCreds@(cc, pk), keys) -> do + Right serviceSignKey <- pure $ C.x509ToPrivate' pk + let idCert' = case chainIdCaCerts cc of + CCSelf cert -> cert + CCValid {idCert} -> idCert + _ -> error "bad certificate" + serviceCertHash = XV.getFingerprint idCert' X.HashSHA256 + pure (ServiceCredentials {serviceRole = SRMessaging, serviceCreds, serviceCertHash, serviceSignKey}, keys) + let tcConfig = defaultTransportClientConfig {clientALPN, clientCredentials = fst <$> serviceCreds_} :: TransportClientConfig runTransportClient tcConfig Nothing host port (Just testKeyHash) $ \h -> - runExceptT (smpClientHandshake h Nothing testKeyHash vr False Nothing) >>= \case + runExceptT (smpClientHandshake h Nothing testKeyHash vr False serviceAndKeys_) >>= \case Right th -> client th Left e -> error $ show e where @@ -165,6 +183,12 @@ testSMPClient_ host port vr client = do | authCmdsSMPVersion `isCompatible` vr = Just alpnSupportedSMPHandshakes | otherwise = Nothing +runSMPClient :: Transport c => TProxy c 'TServer -> (THandleSMP c 'TClient -> IO a) -> IO a +runSMPClient _ test' = testSMPClient test' + +runSMPServiceClient :: Transport c => TProxy c 'TServer -> (TLS.Credential, C.KeyPairEd25519) -> (THandleSMP c 'TClient -> IO a) -> IO a +runSMPServiceClient _ serviceCreds test' = testSMPServiceClient serviceCreds test' + testNtfServiceClient :: Transport c => TProxy c 'TServer -> C.KeyPairEd25519 -> (THandleSMP c 'TClient -> IO a) -> IO a testNtfServiceClient _ keys client = do tlsNtfServerCreds <- loadServerCredential ntfTestServerCredentials diff --git a/tests/SMPProxyTests.hs b/tests/SMPProxyTests.hs index b756ce7c99..09f20c1dd6 100644 --- a/tests/SMPProxyTests.hs +++ b/tests/SMPProxyTests.hs @@ -224,9 +224,9 @@ agentDeliverMessageViaProxy :: (C.AlgorithmI a, C.AuthAlgorithm a) => (NonEmpty agentDeliverMessageViaProxy aTestCfg@(aSrvs, _, aViaProxy) bTestCfg@(bSrvs, _, bViaProxy) alg msg1 msg2 baseId = withAgent 1 aCfg (servers aTestCfg) testDB $ \alice -> withAgent 2 aCfg (servers bTestCfg) testDB2 $ \bob -> runRight_ $ do - (bobId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe + (bobId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn - (sqSecured, Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe + sqSecured <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sqSecured `shouldBe` True ("", _, A.CONF confId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` PQSupportOn @@ -280,9 +280,9 @@ agentDeliverMessagesViaProxyConc agentServers msgs = -- agent connections have to be set up in advance -- otherwise the CONF messages would get mixed with MSG prePair alice bob = do - (bobId, (CCLink qInfo Nothing, Nothing)) <- runExceptT' $ A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe + (bobId, CCLink qInfo Nothing) <- runExceptT' $ A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe aliceId <- runExceptT' $ A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn - (sqSecured, Nothing) <- runExceptT' $ A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe + sqSecured <- runExceptT' $ A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sqSecured `shouldBe` True confId <- get alice >>= \case @@ -331,7 +331,7 @@ agentViaProxyVersionError = withAgent 1 agentCfg (servers [SMPServer testHost testPort testKeyHash]) testDB $ \alice -> do Left (A.BROKER _ (TRANSPORT TEVersion)) <- withAgent 2 agentCfg (servers [SMPServer testHost2 testPort2 testKeyHash]) testDB2 $ \bob -> runExceptT $ do - (_bobId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe + (_bobId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe pure () @@ -351,9 +351,9 @@ agentViaProxyRetryOffline = do let pqEnc = CR.PQEncOn withServer $ \_ -> do (aliceId, bobId) <- withServer2 $ \_ -> runRight $ do - (bobId, (CCLink qInfo Nothing, Nothing)) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe + (bobId, CCLink qInfo Nothing) <- A.createConnection alice NRMInteractive 1 True True SCMInvitation Nothing Nothing CR.IKPQOn SMSubscribe aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn - (sqSecured, Nothing) <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe + sqSecured <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe liftIO $ sqSecured `shouldBe` True ("", _, A.CONF confId pqSup' _ "bob's connInfo") <- get alice liftIO $ pqSup' `shouldBe` PQSupportOn @@ -434,14 +434,14 @@ agentViaProxyRetryNoSession = do testNoProxy :: AStoreType -> IO () testNoProxy msType = do withSmpServerConfigOn (transport @TLS) (cfgMS msType) testPort2 $ \_ -> do - testSMPClient_ "127.0.0.1" testPort2 proxyVRangeV8 $ \(th :: THandleSMP TLS 'TClient) -> do + testSMPClient_ "127.0.0.1" testPort2 proxyVRangeV8 Nothing $ \(th :: THandleSMP TLS 'TClient) -> do (_, _, reply) <- sendRecv th (Nothing, "0", NoEntity, SMP.PRXY testSMPServer Nothing) reply `shouldBe` Right (SMP.ERR $ SMP.PROXY SMP.BASIC_AUTH) testProxyAuth :: AStoreType -> IO () testProxyAuth msType = do withSmpServerConfigOn (transport @TLS) proxyCfgAuth testPort $ \_ -> do - testSMPClient_ "127.0.0.1" testPort proxyVRangeV8 $ \(th :: THandleSMP TLS 'TClient) -> do + testSMPClient_ "127.0.0.1" testPort proxyVRangeV8 Nothing $ \(th :: THandleSMP TLS 'TClient) -> do (_, _, reply) <- sendRecv th (Nothing, "0", NoEntity, SMP.PRXY testSMPServer2 $ Just "wrong") reply `shouldBe` Right (SMP.ERR $ SMP.PROXY SMP.BASIC_AUTH) where diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index b2c2d997c9..39009794c0 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -29,9 +29,11 @@ import Data.Bifunctor (first) import qualified Data.ByteString.Base64 as B64 import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B +import Data.Foldable (foldrM) import Data.Hashable (hash) import qualified Data.IntSet as IS import Data.List.NonEmpty (NonEmpty) +import Data.Maybe (catMaybes) import Data.String (IsString (..)) import Data.Type.Equality import qualified Data.X509.Validation as XV @@ -50,6 +52,7 @@ import Simplex.Messaging.Server.MsgStore.Types (MsgStoreClass (..), QSType (..), import Simplex.Messaging.Server.Stats (PeriodStatsData (..), ServerStatsData (..)) import Simplex.Messaging.Server.StoreLog (StoreLogRecord (..), closeStoreLog) import Simplex.Messaging.Transport +import Simplex.Messaging.Transport.Credentials import Simplex.Messaging.Util (whenM) import Simplex.Messaging.Version (mkVersionRange) import System.Directory (doesDirectoryExist, doesFileExist, removeDirectoryRecursive, removeFile) @@ -84,6 +87,9 @@ serverTests = do describe "GET & SUB commands" testGetSubCommands describe "Exceeding queue quota" testExceedQueueQuota describe "Concurrent sending and delivery" testConcurrentSendDelivery + describe "Service message subscriptions" $ do + testServiceDeliverSubscribe + testServiceUpgradeAndDowngrade describe "Store log" testWithStoreLog describe "Restore messages" testRestoreMessages describe "Restore messages (old / v2)" testRestoreExpireMessages @@ -111,6 +117,9 @@ pattern New rPub dhPub = NEW (NewQueueReq rPub dhPub Nothing SMSubscribe (Just ( pattern Ids :: RecipientId -> SenderId -> RcvPublicDhKey -> BrokerMsg pattern Ids rId sId srvDh <- IDS (QIK rId sId srvDh _sndSecure _linkId Nothing Nothing) +pattern Ids_ :: RecipientId -> SenderId -> RcvPublicDhKey -> ServiceId -> BrokerMsg +pattern Ids_ rId sId srvDh serviceId <- IDS (QIK rId sId srvDh _sndSecure _linkId (Just serviceId) Nothing) + pattern Msg :: MsgId -> MsgBody -> BrokerMsg pattern Msg msgId body <- MSG RcvMessage {msgId, msgBody = EncRcvMsgBody body} @@ -135,11 +144,21 @@ serviceSignSendRecv h pk serviceKey t = do [r] <- signSendRecv_ h pk (Just serviceKey) t pure r +serviceSignSendRecv2 :: forall c p. (Transport c, PartyI p) => THandleSMP c 'TClient -> C.APrivateAuthKey -> C.PrivateKeyEd25519 -> (ByteString, EntityId, Command p) -> IO (Transmission (Either ErrorType BrokerMsg), Transmission (Either ErrorType BrokerMsg)) +serviceSignSendRecv2 h pk serviceKey t = do + [r1, r2] <- signSendRecv_ h pk (Just serviceKey) t + pure (r1, r2) + signSendRecv_ :: forall c p. (Transport c, PartyI p) => THandleSMP c 'TClient -> C.APrivateAuthKey -> Maybe C.PrivateKeyEd25519 -> (ByteString, EntityId, Command p) -> IO (NonEmpty (Transmission (Either ErrorType BrokerMsg))) -signSendRecv_ h@THandle {params} (C.APrivateAuthKey a pk) serviceKey_ (corrId, qId, cmd) = do +signSendRecv_ h pk serviceKey_ t = do + signSend_ h pk serviceKey_ t + tGetClient h + +signSend_ :: forall c p. (Transport c, PartyI p) => THandleSMP c 'TClient -> C.APrivateAuthKey -> Maybe C.PrivateKeyEd25519 -> (ByteString, EntityId, Command p) -> IO () +signSend_ h@THandle {params} (C.APrivateAuthKey a pk) serviceKey_ (corrId, qId, cmd) = do let TransmissionForAuth {tForAuth, tToSend} = encodeTransmissionForAuth params (CorrId corrId, qId, cmd) Right () <- tPut1 h (authorize tForAuth, tToSend) - liftIO $ tGetClient h + pure () where authorize t = (,(`C.sign'` t) <$> serviceKey_) <$> case a of C.SEd25519 -> Just . TASignature . C.ASignature C.SEd25519 $ C.sign' pk t' @@ -660,6 +679,194 @@ testConcurrentSendDelivery = Resp "4" _ OK <- signSendRecv rh rKey ("4", rId, ACK mId2) pure () +testServiceDeliverSubscribe :: SpecWith (ASrvTransport, AStoreType) +testServiceDeliverSubscribe = + it "should create queue as service and subscribe with SUBS after reconnect" $ \(at@(ATransport t), msType) -> do + g <- C.newRandom + creds <- genCredentials g Nothing (0, 2400) "localhost" + let (_fp, tlsCred) = tlsCredentials [creds] + serviceKeys@(_, servicePK) <- atomically $ C.generateKeyPair g + let aServicePK = C.APrivateAuthKey C.SEd25519 servicePK + withSmpServerConfigOn at (cfgMS msType) testPort $ \_ -> runSMPClient t $ \h -> do + (rPub, rKey) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub, dhPriv :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (sPub, sKey) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + + (rId, sId, dec, serviceId) <- runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + Resp "1" NoEntity (ERR SERVICE) <- signSendRecv sh rKey ("1", NoEntity, New rPub dhPub) + Resp "2" NoEntity (Ids_ rId sId srvDh serviceId) <- serviceSignSendRecv sh rKey servicePK ("2", NoEntity, New rPub dhPub) + let dec = decryptMsgV3 $ C.dh' srvDh dhPriv + Resp "3" sId' OK <- signSendRecv h sKey ("3", sId, SKEY sPub) + sId' `shouldBe` sId + Resp "4" _ OK <- signSendRecv h sKey ("4", sId, _SEND "hello") + Resp "5" _ OK <- signSendRecv h sKey ("5", sId, _SEND "hello 2") + Resp "" rId' (Msg mId1 msg1) <- tGet1 sh + rId' `shouldBe` rId + dec mId1 msg1 `shouldBe` Right "hello" + -- ACK doesn't need service signature + Resp "6" _ (Msg mId2 msg2) <- signSendRecv sh rKey ("6", rId, ACK mId1) + dec mId2 msg2 `shouldBe` Right "hello 2" + Resp "7" _ (ERR NO_MSG) <- signSendRecv sh rKey ("7", rId, ACK mId1) + Resp "8" _ OK <- signSendRecv sh rKey ("8", rId, ACK mId2) + Resp "9" _ OK <- signSendRecv h sKey ("9", sId, _SEND "hello 3") + pure (rId, sId, dec, serviceId) + + runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + Resp "10" NoEntity (ERR (CMD NO_AUTH)) <- signSendRecv sh aServicePK ("10", NoEntity, SUBS) + signSend_ sh aServicePK Nothing ("11", serviceId, SUBS) + [mId3] <- + fmap catMaybes $ + receiveInAnyOrder -- race between SOKS and MSG, clients can handle it + sh + [ \case + Resp "11" serviceId' (SOKS n _) -> do + n `shouldBe` 1 + serviceId' `shouldBe` serviceId + pure $ Just Nothing + _ -> pure Nothing, + \case + Resp "" rId'' (Msg mId3 msg3) -> do + rId'' `shouldBe` rId + dec mId3 msg3 `shouldBe` Right "hello 3" + pure $ Just $ Just mId3 + _ -> pure Nothing + ] + Resp "" NoEntity SALL <- tGet1 sh + Resp "12" _ OK <- signSendRecv sh rKey ("12", rId, ACK mId3) + Resp "14" _ OK <- signSendRecv h sKey ("14", sId, _SEND "hello 4") + Resp "" _ (Msg mId4 msg4) <- tGet1 sh + dec mId4 msg4 `shouldBe` Right "hello 4" + Resp "15" _ OK <- signSendRecv sh rKey ("15", rId, ACK mId4) + pure () + +testServiceUpgradeAndDowngrade :: SpecWith (ASrvTransport, AStoreType) +testServiceUpgradeAndDowngrade = + it "should create queue as client and switch to service and back" $ \(at@(ATransport t), msType) -> do + g <- C.newRandom + creds <- genCredentials g Nothing (0, 2400) "localhost" + let (_fp, tlsCred) = tlsCredentials [creds] + serviceKeys@(_, servicePK) <- atomically $ C.generateKeyPair g + let aServicePK = C.APrivateAuthKey C.SEd25519 servicePK + withSmpServerConfigOn at (cfgMS msType) testPort $ \_ -> runSMPClient t $ \h -> do + (rPub, rKey) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub, dhPriv :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (sPub, sKey) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (rPub2, rKey2) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub2, dhPriv2 :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (sPub2, sKey2) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (rPub3, rKey3) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub3, dhPriv3 :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (sPub3, sKey3) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + + (rId, sId, dec) <- runSMPClient t $ \sh -> do + Resp "1" NoEntity (Ids rId sId srvDh) <- signSendRecv sh rKey ("1", NoEntity, New rPub dhPub) + let dec = decryptMsgV3 $ C.dh' srvDh dhPriv + Resp "2" sId' OK <- signSendRecv h sKey ("2", sId, SKEY sPub) + sId' `shouldBe` sId + Resp "3" _ OK <- signSendRecv h sKey ("3", sId, _SEND "hello") + Resp "" rId' (Msg mId1 msg1) <- tGet1 sh + rId' `shouldBe` rId + dec mId1 msg1 `shouldBe` Right "hello" + Resp "4" _ OK <- signSendRecv sh rKey ("4", rId, ACK mId1) + Resp "5" _ OK <- signSendRecv h sKey ("5", sId, _SEND "hello 2") + pure (rId, sId, dec) + + -- split to prevent message delivery + (rId2, sId2, dec2) <- runSMPClient t $ \sh -> do + Resp "6" NoEntity (Ids rId2 sId2 srvDh2) <- signSendRecv sh rKey2 ("6", NoEntity, New rPub2 dhPub2) + let dec2 = decryptMsgV3 $ C.dh' srvDh2 dhPriv2 + Resp "7" sId2' OK <- signSendRecv h sKey2 ("7", sId2, SKEY sPub2) + sId2' `shouldBe` sId2 + pure (rId2, sId2, dec2) + + (rId3, _sId3, _dec3) <- runSMPClient t $ \sh -> do + Resp "6" NoEntity (Ids rId3 sId3 srvDh3) <- signSendRecv sh rKey3 ("6", NoEntity, New rPub3 dhPub3) + let dec3 = decryptMsgV3 $ C.dh' srvDh3 dhPriv3 + Resp "7" sId3' OK <- signSendRecv h sKey3 ("7", sId3, SKEY sPub3) + sId3' `shouldBe` sId3 + pure (rId3, sId3, dec3) + + serviceId <- runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + Resp "8" _ (ERR SERVICE) <- signSendRecv sh rKey ("8", rId, SUB) + (Resp "9" rId' (SOK (Just serviceId)), Resp "" rId'' (Msg mId2 msg2)) <- serviceSignSendRecv2 sh rKey servicePK ("9", rId, SUB) + rId' `shouldBe` rId + rId'' `shouldBe` rId + dec mId2 msg2 `shouldBe` Right "hello 2" + (Resp "10" rId2' (SOK (Just serviceId'))) <- serviceSignSendRecv sh rKey2 servicePK ("10", rId2, SUB) + rId2' `shouldBe` rId2 + serviceId' `shouldBe` serviceId + Resp "10.1" _ OK <- signSendRecv sh rKey ("10.1", rId, ACK mId2) + (Resp "10.2" rId3' (SOK (Just serviceId''))) <- serviceSignSendRecv sh rKey3 servicePK ("10.2", rId3, SUB) + rId3' `shouldBe` rId3 + serviceId'' `shouldBe` serviceId + pure serviceId + + Resp "11" _ OK <- signSendRecv h sKey ("11", sId, _SEND "hello 3.1") + Resp "12" _ OK <- signSendRecv h sKey2 ("12", sId2, _SEND "hello 3.2") + + runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + signSend_ sh aServicePK Nothing ("14", serviceId, SUBS) + [(rKey3_1, rId3_1, mId3_1), (rKey3_2, rId3_2, mId3_2)] <- + fmap catMaybes $ + receiveInAnyOrder -- race between SOKS and MSG, clients can handle it + sh + [ \case + Resp "14" serviceId' (SOKS n _) -> do + n `shouldBe` 3 + serviceId' `shouldBe` serviceId + pure $ Just Nothing + _ -> pure Nothing, + \case + Resp "" rId'' (Msg mId3 msg3) | rId'' == rId -> do + dec mId3 msg3 `shouldBe` Right "hello 3.1" + pure $ Just $ Just (rKey, rId, mId3) + _ -> pure Nothing, + \case + Resp "" rId'' (Msg mId3 msg3) | rId'' == rId2 -> do + dec2 mId3 msg3 `shouldBe` Right "hello 3.2" + pure $ Just $ Just (rKey2, rId2, mId3) + _ -> pure Nothing + ] + Resp "" NoEntity SALL <- tGet1 sh + Resp "15" _ OK <- signSendRecv sh rKey3_1 ("15", rId3_1, ACK mId3_1) + Resp "16" _ OK <- signSendRecv sh rKey3_2 ("16", rId3_2, ACK mId3_2) + pure () + + Resp "17" _ OK <- signSendRecv h sKey ("17", sId, _SEND "hello 4") + + runSMPClient t $ \sh -> do + Resp "18" _ (ERR SERVICE) <- signSendRecv sh aServicePK ("18", serviceId, SUBS) + (Resp "19" rId' (SOK Nothing), Resp "" rId'' (Msg mId4 msg4)) <- signSendRecv2 sh rKey ("19", rId, SUB) + rId' `shouldBe` rId + rId'' `shouldBe` rId + dec mId4 msg4 `shouldBe` Right "hello 4" + Resp "20" _ OK <- signSendRecv sh rKey ("20", rId, ACK mId4) + Resp "21" _ OK <- signSendRecv h sKey ("21", sId, _SEND "hello 5") + Resp "" _ (Msg mId5 msg5) <- tGet1 sh + dec mId5 msg5 `shouldBe` Right "hello 5" + Resp "22" _ OK <- signSendRecv sh rKey ("22", rId, ACK mId5) + + Resp "23" rId2' (SOK Nothing) <- signSendRecv sh rKey2 ("23", rId2, SUB) + rId2' `shouldBe` rId2 + Resp "24" _ OK <- signSendRecv h sKey ("24", sId, _SEND "hello 6") + Resp "" _ (Msg mId6 msg6) <- tGet1 sh + dec mId6 msg6 `shouldBe` Right "hello 6" + Resp "25" _ OK <- signSendRecv sh rKey ("25", rId, ACK mId6) + pure () + +receiveInAnyOrder :: (HasCallStack, Transport c) => THandleSMP c 'TClient -> [(CorrId, EntityId, Either ErrorType BrokerMsg) -> IO (Maybe b)] -> IO [b] +receiveInAnyOrder h = fmap reverse . go [] + where + go rs [] = pure rs + go rs ps = withFrozenCallStack $ do + r <- 5000000 `timeout` tGet1 h >>= maybe (error "inAnyOrder timeout") pure + (r_, ps') <- foldrM (choose r) (Nothing, []) ps + case r_ of + Just r' -> go (r' : rs) ps' + Nothing -> error $ "unexpected event: " <> show r + choose r p (Nothing, ps') = (maybe (Nothing, p : ps') ((,ps') . Just)) <$> p r + choose _ p (Just r, ps') = pure (Just r, p : ps') + testWithStoreLog :: SpecWith (ASrvTransport, AStoreType) testWithStoreLog = it "should store simplex queues to log and restore them after server restart" $ \(at@(ATransport t), msType) -> do @@ -1159,7 +1366,7 @@ testMessageServiceNotifications = deliverMessage rh rId rKey sh sId sKey nh2 "connection 1" dec deliverMessage rh rId'' rKey'' sh sId'' sKey'' nh2 "connection 2" dec'' -- -- another client makes service subscription - Resp "12" serviceId5 (SOKS 2) <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS) + Resp "12" serviceId5 (SOKS 2 _) <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS) serviceId5 `shouldBe` serviceId Resp "" serviceId6 (ENDS 2) <- tGet1 nh2 serviceId6 `shouldBe` serviceId @@ -1193,7 +1400,7 @@ testServiceNotificationsTwoRestarts = threadDelay 250000 withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> testNtfServiceClient t serviceKeys $ \nh -> do - Resp "2.1" serviceId' (SOKS n) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("2.1", serviceId, NSUBS) + Resp "2.1" serviceId' (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("2.1", serviceId, NSUBS) n `shouldBe` 1 Resp "2.2" _ (SOK Nothing) <- signSendRecv rh rKey ("2.2", rId, SUB) serviceId' `shouldBe` serviceId @@ -1201,7 +1408,7 @@ testServiceNotificationsTwoRestarts = threadDelay 250000 withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> testNtfServiceClient t serviceKeys $ \nh -> do - Resp "3.1" _ (SOKS n) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("3.1", serviceId, NSUBS) + Resp "3.1" _ (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("3.1", serviceId, NSUBS) n `shouldBe` 1 Resp "3.2" _ (SOK Nothing) <- signSendRecv rh rKey ("3.2", rId, SUB) deliverMessage rh rId rKey sh sId sKey nh "hello 3" dec From 3ccf8548658d809b0eaaf64c95208cc1b0f7a5ea Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 25 Nov 2025 16:55:59 +0000 Subject: [PATCH 02/91] servers: maintain xor-hash of all associated queue IDs in PostgreSQL (#1668) * servers: maintain xor-hash of all associated queue IDs in PostgreSQL (#1615) * ntf server: maintain xor-hash of all associated queue IDs via PostgreSQL triggers * smp server: xor hash with triggers * fix sql and using pgcrypto extension in tests * track counts and hashes in smp/ntf servers via triggers, smp server stats for service subscription, update SMP protocol to pass expected count and hash in SSUB/NSSUB commands * agent migrations with functions/triggers * remove agent triggers * try tracking service subs in the agent (WIP, does not compile) * Revert "try tracking service subs in the agent (WIP, does not compile)" This reverts commit 59e908100d21ddb6eb95c75d49821d2349fc4d6c. * comment * agent database triggers * service subscriptions in the client * test / fix client services * update schema * fix postgres migration * update schema * move schema test to the end * use static function with SQLite to avoid dynamic wrapper --- simplexmq.cabal | 3 + src/Simplex/Messaging/Agent.hs | 27 +- src/Simplex/Messaging/Agent/Client.hs | 124 +- .../Messaging/Agent/NtfSubSupervisor.hs | 2 +- .../Messaging/Agent/Store/AgentStore.hs | 41 +- .../Agent/Store/Postgres/Migrations/App.hs | 4 +- .../Migrations/M20251020_service_certs.hs | 114 ++ .../Agent/Store/Postgres/Migrations/Util.hs | 46 + .../Migrations/agent_postgres_schema.sql | 1469 +++++++++++++++++ .../Messaging/Agent/Store/Postgres/Util.hs | 112 +- src/Simplex/Messaging/Agent/Store/SQLite.hs | 35 +- .../Messaging/Agent/Store/SQLite/Common.hs | 6 + .../Migrations/M20251020_service_certs.hs | 63 +- .../Store/SQLite/Migrations/agent_schema.sql | 52 +- .../Messaging/Agent/Store/SQLite/Util.hs | 41 + src/Simplex/Messaging/Agent/TSessionSubs.hs | 84 +- src/Simplex/Messaging/Client.hs | 10 +- src/Simplex/Messaging/Client/Agent.hs | 57 +- src/Simplex/Messaging/Crypto.hs | 6 +- .../Messaging/Notifications/Protocol.hs | 14 +- src/Simplex/Messaging/Notifications/Server.hs | 19 +- .../Messaging/Notifications/Server/Stats.hs | 1 + .../Notifications/Server/Store/Migrations.hs | 126 +- .../Notifications/Server/Store/Postgres.hs | 35 +- .../Server/Store/ntf_server_schema.sql | 133 +- src/Simplex/Messaging/Protocol.hs | 72 +- src/Simplex/Messaging/Server.hs | 49 +- .../Messaging/Server/MsgStore/Journal.hs | 4 +- src/Simplex/Messaging/Server/Prometheus.hs | 1 + src/Simplex/Messaging/Server/QueueStore.hs | 1 + .../Messaging/Server/QueueStore/Postgres.hs | 18 +- .../Server/QueueStore/Postgres/Migrations.hs | 140 +- .../QueueStore/Postgres/server_schema.sql | 146 +- .../Messaging/Server/QueueStore/STM.hs | 44 +- .../Messaging/Server/QueueStore/Types.hs | 2 +- src/Simplex/Messaging/Server/Stats.hs | 82 +- .../Messaging/Server/StoreLog/ReadWrite.hs | 2 +- tests/AgentTests/EqInstances.hs | 5 + tests/AgentTests/FunctionalAPITests.hs | 28 + tests/CoreTests/TSessionSubs.hs | 24 +- tests/Fixtures.hs | 5 + tests/SMPAgentClient.hs | 3 + tests/ServerTests.hs | 29 +- tests/Test.hs | 19 +- 44 files changed, 2968 insertions(+), 330 deletions(-) create mode 100644 src/Simplex/Messaging/Agent/Store/Postgres/Migrations/M20251020_service_certs.hs create mode 100644 src/Simplex/Messaging/Agent/Store/Postgres/Migrations/Util.hs create mode 100644 src/Simplex/Messaging/Agent/Store/Postgres/Migrations/agent_postgres_schema.sql create mode 100644 src/Simplex/Messaging/Agent/Store/SQLite/Util.hs diff --git a/simplexmq.cabal b/simplexmq.cabal index 081c05bca8..0eeec3cfd1 100644 --- a/simplexmq.cabal +++ b/simplexmq.cabal @@ -167,6 +167,7 @@ library Simplex.Messaging.Agent.Store.Postgres.Migrations.M20250702_conn_invitations_remove_cascade_delete Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251009_queue_to_subscribe Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251010_client_notices + Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251020_service_certs else exposed-modules: Simplex.Messaging.Agent.Store.SQLite @@ -217,12 +218,14 @@ library Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251009_queue_to_subscribe Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251010_client_notices Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251020_service_certs + Simplex.Messaging.Agent.Store.SQLite.Util if flag(client_postgres) || flag(server_postgres) exposed-modules: Simplex.Messaging.Agent.Store.Postgres Simplex.Messaging.Agent.Store.Postgres.Common Simplex.Messaging.Agent.Store.Postgres.DB Simplex.Messaging.Agent.Store.Postgres.Migrations + Simplex.Messaging.Agent.Store.Postgres.Migrations.Util Simplex.Messaging.Agent.Store.Postgres.Util if !flag(client_library) exposed-modules: diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index f9f1dc0894..63516ada45 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -211,7 +211,6 @@ import Simplex.Messaging.Protocol ErrorType (AUTH), MsgBody, MsgFlags (..), - IdsHash, NtfServer, ProtoServerWithAuth (..), ProtocolServer (..), @@ -222,6 +221,7 @@ import Simplex.Messaging.Protocol SMPMsgMeta, SParty (..), SProtocolType (..), + ServiceSub (..), SndPublicAuthKey, SubscriptionMode (..), UserProtocol, @@ -500,7 +500,7 @@ resubscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either Agen resubscribeConnections c = withAgentEnv c . resubscribeConnections' c {-# INLINE resubscribeConnections #-} -subscribeClientServices :: AgentClient -> UserId -> AE (Map SMPServer (Either AgentErrorType (Int64, IdsHash))) +subscribeClientServices :: AgentClient -> UserId -> AE (Map SMPServer (Either AgentErrorType ServiceSub)) subscribeClientServices c = withAgentEnv c . subscribeClientServices' c {-# INLINE subscribeClientServices #-} @@ -594,6 +594,7 @@ testProtocolServer c nm userId srv = withAgentEnv' c $ case protocolTypeI @p of SPNTF -> runNTFServerTest c nm userId srv -- | set SOCKS5 proxy on/off and optionally set TCP timeouts for fast network +-- TODO [certs rcv] should fail if any user is enabled to use services and per-connection isolation is chosen setNetworkConfig :: AgentClient -> NetworkConfig -> IO () setNetworkConfig c@AgentClient {useNetworkConfig, proxySessTs} cfg' = do ts <- getCurrentTime @@ -771,6 +772,7 @@ deleteUser' c@AgentClient {smpServersStats, xftpServersStats} userId delSMPQueue whenM (withStore' c (`deleteUserWithoutConns` userId)) . atomically $ writeTBQueue (subQ c) ("", "", AEvt SAENone $ DEL_USER userId) +-- TODO [certs rcv] should fail enabling if per-connection isolation is set setUserService' :: AgentClient -> UserId -> Bool -> AM () setUserService' c userId enable = do wasEnabled <- liftIO $ fromMaybe False <$> TM.lookupIO userId (useClientServices c) @@ -1507,15 +1509,15 @@ resubscribeConnections' c connIds = do [] -> pure True rqs' -> anyM $ map (atomically . hasActiveSubscription c) rqs' --- TODO [certs rcv] compare hash with lock -subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType (Int64, IdsHash))) +-- TODO [certs rcv] compare hash. possibly, it should return both expected and returned counts +subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType ServiceSub)) subscribeClientServices' c userId = ifM useService subscribe $ throwError $ CMD PROHIBITED "no user service allowed" where useService = liftIO $ (Just True ==) <$> TM.lookupIO userId (useClientServices c) subscribe = do srvs <- withStore' c (`getClientServiceServers` userId) - lift $ M.fromList . zip srvs <$> mapConcurrently (tryAllErrors' . subscribeClientService c userId) srvs + lift $ M.fromList <$> mapConcurrently (\(srv, ServiceSub _ n idsHash) -> fmap (srv,) $ tryAllErrors' $ subscribeClientService c userId srv n idsHash) srvs -- requesting messages sequentially, to reduce memory usage getConnectionMessages' :: AgentClient -> NonEmpty ConnMsgReq -> AM' (NonEmpty (Either AgentErrorType (Maybe SMPMsgMeta))) @@ -2829,12 +2831,13 @@ processSMPTransmissions :: AgentClient -> ServerTransmissionBatch SMPVersion Err processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId, ts) = do upConnIds <- newTVarIO [] forM_ ts $ \(entId, t) -> case t of - STEvent msgOrErr -> - withRcvConn entId $ \rq@RcvQueue {connId} conn -> case msgOrErr of - Right msg -> runProcessSMP rq conn (toConnData conn) msg - Left e -> lift $ do - processClientNotice rq e - notifyErr connId e + STEvent msgOrErr + | entId == SMP.NoEntity -> pure () -- TODO [certs rcv] process SALL + | otherwise -> withRcvConn entId $ \rq@RcvQueue {connId} conn -> case msgOrErr of + Right msg -> runProcessSMP rq conn (toConnData conn) msg + Left e -> lift $ do + processClientNotice rq e + notifyErr connId e STResponse (Cmd SRecipient cmd) respOrErr -> withRcvConn entId $ \rq conn -> case cmd of SMP.SUB -> case respOrErr of @@ -2870,7 +2873,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId processSubOk :: RcvQueue -> TVar [ConnId] -> IO () processSubOk rq@RcvQueue {connId} upConnIds = atomically . whenM (isPendingSub rq) $ do - SS.addActiveSub tSess sessId (rcvQueueSub rq) $ currentSubs c + SS.addActiveSub tSess sessId rq $ currentSubs c modifyTVar' upConnIds (connId :) processSubErr :: RcvQueue -> SMPClientError -> AM' () processSubErr rq@RcvQueue {connId} e = do diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 4a10d07efb..68d7ef62b2 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -241,7 +241,7 @@ import Simplex.Messaging.Agent.RetryInterval import Simplex.Messaging.Agent.Stats import Simplex.Messaging.Agent.Store import Simplex.Messaging.Agent.Store.AgentStore -import Simplex.Messaging.Agent.Store.Common (DBStore, withTransaction) +import Simplex.Messaging.Agent.Store.Common (DBStore) import qualified Simplex.Messaging.Agent.Store.DB as DB import Simplex.Messaging.Agent.Store.Entity import Simplex.Messaging.Agent.TSessionSubs (TSessionSubs) @@ -279,6 +279,7 @@ import Simplex.Messaging.Protocol RcvNtfPublicDhKey, SMPMsgMeta (..), SProtocolType (..), + ServiceSub (..), SndPublicAuthKey, SubscriptionMode (..), NewNtfCreds (..), @@ -499,6 +500,7 @@ data UserNetworkType = UNNone | UNCellular | UNWifi | UNEthernet | UNOther deriving (Eq, Show) -- | Creates an SMP agent client instance that receives commands and sends responses via 'TBQueue's. +-- TODO [certs rcv] should fail if both per-connection isolation is set and any users use services newAgentClient :: Int -> InitialAgentServers -> UTCTime -> Map (Maybe SMPServer) (Maybe SystemSeconds) -> Env -> IO AgentClient newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, useServices, presetDomains, presetServers} currentTs notices agentEnv = do let cfg = config agentEnv @@ -622,9 +624,8 @@ getServiceCredentials c userId srv = let tlsCreds = tlsCredentials [cred] createClientService db userId srv tlsCreds pure (tlsCreds, Nothing) - (_, pk) <- atomically $ C.generateKeyPair g - let serviceSignKey = C.APrivateSignKey C.SEd25519 pk - creds = ServiceCredentials {serviceRole = SRMessaging, serviceCreds, serviceCertHash = XV.Fingerprint kh, serviceSignKey} + serviceSignKey <- liftEitherWith INTERNAL $ C.x509ToPrivate' $ snd serviceCreds + let creds = ServiceCredentials {serviceRole = SRMessaging, serviceCreds, serviceCertHash = XV.Fingerprint kh, serviceSignKey} pure (creds, serviceId_) class (Encoding err, Show err) => ProtocolServerClient v err msg | msg -> v, msg -> err where @@ -744,9 +745,11 @@ smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm smp <- liftError (protocolClientError SMP $ B.unpack $ strEncode srv) $ do ts <- readTVarIO proxySessTs ExceptT $ getProtocolClient g nm tSess cfg' presetDomains (Just msgQ) ts $ smpClientDisconnected c tSess env v' prs + -- TODO [certs rcv] add service to SS, possibly combine with SS.setSessionId atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c updateClientService service smp pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} + -- TODO [certs rcv] this should differentiate between service ID just set and service ID changed, and in the latter case disassociate the queue updateClientService service smp = case (service, smpClientService smp) of (Just (_, serviceId_), Just THClientService {serviceId}) | serviceId_ /= Just serviceId -> withStore' c $ \db -> setClientServiceId db userId srv serviceId @@ -763,32 +766,34 @@ smpClientDisconnected c@AgentClient {active, smpClients, smpProxiedRelays} tSess -- we make active subscriptions pending only if the client for tSess was current (in the map) and active, -- because we can have a race condition when a new current client could have already -- made subscriptions active, and the old client would be processing diconnection later. - removeClientAndSubs :: IO ([RcvQueueSub], [ConnId]) + removeClientAndSubs :: IO ([RcvQueueSub], [ConnId], Maybe ServiceSub) removeClientAndSubs = atomically $ do removeSessVar v tSess smpClients - ifM (readTVar active) removeSubs (pure ([], [])) + ifM (readTVar active) removeSubs (pure ([], [], Nothing)) where sessId = sessionId $ thParams client removeSubs = do mode <- getSessionMode c - subs <- SS.setSubsPending mode tSess sessId $ currentSubs c + (subs, serviceSub_) <- SS.setSubsPending mode tSess sessId $ currentSubs c let qs = M.elems subs cs = nubOrd $ map qConnId qs -- this removes proxied relays that this client created sessions to destSrvs <- M.keys <$> readTVar prs forM_ destSrvs $ \destSrv -> TM.delete (userId, destSrv, cId) smpProxiedRelays - pure (qs, cs) + pure (qs, cs, serviceSub_) - serverDown :: ([RcvQueueSub], [ConnId]) -> IO () - serverDown (qs, conns) = whenM (readTVarIO active) $ do + serverDown :: ([RcvQueueSub], [ConnId], Maybe ServiceSub) -> IO () + serverDown (qs, conns, serviceSub_) = whenM (readTVarIO active) $ do notifySub c $ hostEvent' DISCONNECT client unless (null conns) $ notifySub c $ DOWN srv conns - unless (null qs) $ do + unless (null qs && isNothing serviceSub_) $ do releaseGetLocksIO c qs mode <- getSessionModeIO c let resubscribe | (mode == TSMEntity) == isJust cId = resubscribeSMPSession c tSess - | otherwise = void $ subscribeQueues c True qs + | otherwise = do + mapM_ (runExceptT . resubscribeClientService c tSess) serviceSub_ + unless (null qs) $ void $ subscribeQueues c True qs runReaderT resubscribe env resubscribeSMPSession :: AgentClient -> SMPTransportSession -> AM' () @@ -807,11 +812,12 @@ resubscribeSMPSession c@AgentClient {smpSubWorkers, workerSeq} tSess = do runSubWorker = do ri <- asks $ reconnectInterval . config withRetryForeground ri isForeground (isNetworkOnline c) $ \_ loop -> do - pending <- atomically $ SS.getPendingSubs tSess $ currentSubs c - unless (M.null pending) $ do + (pendingSubs, pendingSS) <- atomically $ SS.getPendingSubs tSess $ currentSubs c + unless (M.null pendingSubs && isNothing pendingSS) $ do liftIO $ waitUntilForeground c liftIO $ waitForUserNetwork c - handleNotify $ resubscribeSessQueues c tSess $ M.elems pending + mapM_ (handleNotify . void . runExceptT . resubscribeClientService c tSess) pendingSS + unless (M.null pendingSubs) $ handleNotify $ resubscribeSessQueues c tSess $ M.elems pendingSubs loop isForeground = (ASForeground ==) <$> readTVar (agentState c) cleanup :: SessionVar (Async ()) -> STM () @@ -1508,25 +1514,25 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl newErr :: String -> AM (Maybe ShortLinkCreds) newErr = throwE . BROKER (B.unpack $ strEncode srv) . UNEXPECTED . ("Create queue: " <>) -processSubResults :: AgentClient -> SMPTransportSession -> SessionId -> NonEmpty (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> STM [(RcvQueueSub, Maybe ClientNotice)] -processSubResults c tSess@(userId, srv, _) sessId rs = do - pendingSubs <- SS.getPendingSubs tSess $ currentSubs c - let (failed, subscribed, notices, ignored) = foldr (partitionResults pendingSubs) (M.empty, [], [], 0) rs +processSubResults :: AgentClient -> SMPTransportSession -> SessionId -> Maybe ServiceId -> NonEmpty (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> STM ([RcvQueueSub], [(RcvQueueSub, Maybe ClientNotice)]) +processSubResults c tSess@(userId, srv, _) sessId smpServiceId rs = do + pending <- SS.getPendingSubs tSess $ currentSubs c + let (failed, subscribed@(qs, sQs), notices, ignored) = foldr (partitionResults pending) (M.empty, ([], []), [], 0) rs unless (M.null failed) $ do incSMPServerStat' c userId srv connSubErrs $ M.size failed failSubscriptions c tSess failed - unless (null subscribed) $ do - incSMPServerStat' c userId srv connSubscribed $ length subscribed + unless (null qs && null sQs) $ do + incSMPServerStat' c userId srv connSubscribed $ length qs + length sQs SS.batchAddActiveSubs tSess sessId subscribed $ currentSubs c unless (ignored == 0) $ incSMPServerStat' c userId srv connSubIgnored ignored - pure notices + pure (sQs, notices) where partitionResults :: - Map SMP.RecipientId RcvQueueSub -> + (Map SMP.RecipientId RcvQueueSub, Maybe ServiceSub) -> (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> - (Map SMP.RecipientId SMPClientError, [RcvQueueSub], [(RcvQueueSub, Maybe ClientNotice)], Int) -> - (Map SMP.RecipientId SMPClientError, [RcvQueueSub], [(RcvQueueSub, Maybe ClientNotice)], Int) - partitionResults pendingSubs (rq@RcvQueueSub {rcvId, clientNoticeId}, r) acc@(failed, subscribed, notices, ignored) = case r of + (Map SMP.RecipientId SMPClientError, ([RcvQueueSub], [RcvQueueSub]), [(RcvQueueSub, Maybe ClientNotice)], Int) -> + (Map SMP.RecipientId SMPClientError, ([RcvQueueSub], [RcvQueueSub]), [(RcvQueueSub, Maybe ClientNotice)], Int) + partitionResults (pendingSubs, pendingSS) (rq@RcvQueueSub {rcvId, clientNoticeId}, r) acc@(failed, subscribed@(qs, sQs), notices, ignored) = case r of Left e -> case smpErrorClientNotice e of Just notice_ -> (failed', subscribed, (rq, notice_) : notices, ignored) where @@ -1536,8 +1542,12 @@ processSubResults c tSess@(userId, srv, _) sessId rs = do | otherwise -> (failed', subscribed, notices, ignored) where failed' = M.insert rcvId e failed - Right _serviceId -- TODO [certs rcv] store association with the service - | rcvId `M.member` pendingSubs -> (failed, rq : subscribed, notices', ignored) + Right serviceId_ + | rcvId `M.member` pendingSubs -> + let subscribed' = case (smpServiceId, serviceId_, pendingSS) of + (Just sId, Just sId', Just ServiceSub {serviceId}) | sId == sId' && sId == serviceId -> (qs, rq : sQs) + _ -> (rq : qs, sQs) + in (failed, subscribed', notices', ignored) | otherwise -> (failed, subscribed, notices', ignored + 1) where notices' = if isJust clientNoticeId then (rq, Nothing) : notices else notices @@ -1576,6 +1586,7 @@ serverHostError = \case -- | Batch by transport session and subscribe queues. The list of results can have a different order. subscribeQueues :: AgentClient -> Bool -> [RcvQueueSub] -> AM' [(RcvQueueSub, Either AgentErrorType (Maybe ServiceId))] +subscribeQueues _ _ [] = pure [] subscribeQueues c withEvents qs = do (errs, qs') <- checkQueues c qs atomically $ modifyTVar' (subscrConns c) (`S.union` S.fromList (map qConnId qs')) @@ -1632,6 +1643,7 @@ checkQueues c = fmap partitionEithers . mapM checkQueue -- This function expects that all queues belong to one transport session, -- and that they are already added to pending subscriptions. resubscribeSessQueues :: AgentClient -> SMPTransportSession -> [RcvQueueSub] -> AM' () +resubscribeSessQueues _ _ [] = pure () resubscribeSessQueues c tSess qs = do (errs, qs_) <- checkQueues c qs forM_ (L.nonEmpty qs_) $ \qs' -> void $ subscribeSessQueues_ c True (tSess, qs') @@ -1650,13 +1662,15 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c then Just . S.fromList . map qConnId . M.elems <$> atomically (SS.getActiveSubs tSess $ currentSubs c) else pure Nothing active <- E.uninterruptibleMask_ $ do - (active, notices) <- atomically $ do - r@(_, notices) <- ifM + (active, (serviceQs, notices)) <- atomically $ do + r@(_, (_, notices)) <- ifM (activeClientSession c tSess sessId) - ((True,) <$> processSubResults c tSess sessId rs) - ((False, []) <$ incSMPServerStat' c userId srv connSubIgnored (length rs)) + ((True,) <$> processSubResults c tSess sessId smpServiceId rs) + ((False, ([], [])) <$ incSMPServerStat' c userId srv connSubIgnored (length rs)) unless (null notices) $ takeTMVar $ clientNoticesLock c pure r + unless (null serviceQs) $ void $ + processRcvServiceAssocs c serviceQs `runReaderT` agentEnv c unless (null notices) $ void $ (processClientNotices c tSess notices `runReaderT` agentEnv c) `E.finally` atomically (putTMVar (clientNoticesLock c) ()) @@ -1677,6 +1691,13 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c where tSess = transportSession' smp sessId = sessionId $ thParams smp + smpServiceId = (\THClientService {serviceId} -> serviceId) <$> smpClientService smp + +processRcvServiceAssocs :: AgentClient -> [RcvQueueSub] -> AM' () +processRcvServiceAssocs c serviceQs = + withStore' c (`setRcvServiceAssocs` serviceQs) `catchAllErrors'` \e -> do + logError $ "processClientNotices error: " <> tshow e + notifySub' c "" $ ERR e processClientNotices :: AgentClient -> SMPTransportSession -> [(RcvQueueSub, Maybe ClientNotice)] -> AM' () processClientNotices c@AgentClient {presetServers} tSess notices = do @@ -1689,10 +1710,35 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do logError $ "processClientNotices error: " <> tshow e notifySub' c "" $ ERR e -subscribeClientService :: AgentClient -> UserId -> SMPServer -> AM (Int64, IdsHash) -subscribeClientService c userId srv = - withLogClient c NRMBackground (userId, srv, Nothing) B.empty "SUBS" $ - (`subscribeService` SMP.SRecipientService) . connectedClient +resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSub +resubscribeClientService c tSess (ServiceSub _ n idsHash) = + withServiceClient c tSess $ \smp _ -> do + subscribeClientService_ c tSess smp n idsHash + +subscribeClientService :: AgentClient -> UserId -> SMPServer -> Int64 -> IdsHash -> AM ServiceSub +subscribeClientService c userId srv n idsHash = + withServiceClient c tSess $ \smp smpServiceId -> do + let serviceSub = ServiceSub smpServiceId n idsHash + atomically $ SS.setPendingServiceSub tSess serviceSub $ currentSubs c + subscribeClientService_ c tSess smp n idsHash + where + tSess = (userId, srv, Nothing) + +withServiceClient :: AgentClient -> SMPTransportSession -> (SMPClient -> ServiceId -> ExceptT SMPClientError IO a) -> AM a +withServiceClient c tSess action = + withLogClient c NRMBackground tSess B.empty "SUBS" $ \(SMPConnectedClient smp _) -> + case (\THClientService {serviceId} -> serviceId) <$> smpClientService smp of + Just smpServiceId -> action smp smpServiceId + Nothing -> throwE PCEServiceUnavailable + +subscribeClientService_ :: AgentClient -> SMPTransportSession -> SMPClient -> Int64 -> IdsHash -> ExceptT SMPClientError IO ServiceSub +subscribeClientService_ c tSess smp n idsHash = do + -- TODO [certs rcv] handle error + serviceSub' <- subscribeService smp SMP.SRecipientService n idsHash + let sessId = sessionId $ thParams smp + atomically $ whenM (activeClientSession c tSess sessId) $ + SS.setActiveServiceSub tSess sessId serviceSub' $ currentSubs c + pure serviceSub' activeClientSession :: AgentClient -> SMPTransportSession -> SessionId -> STM Bool activeClientSession c tSess sessId = sameSess <$> tryReadSessVar tSess (smpClients c) @@ -1762,7 +1808,7 @@ addNewQueueSubscription c rq' tSess sessId = do modifyTVar' (subscrConns c) $ S.insert $ qConnId rq active <- activeClientSession c tSess sessId if active - then SS.addActiveSub tSess sessId rq $ currentSubs c + then SS.addActiveSub tSess sessId rq' $ currentSubs c else SS.addPendingSub tSess rq $ currentSubs c pure active unless same $ resubscribeSMPSession c tSess @@ -1951,6 +1997,7 @@ releaseGetLock c rq = {-# INLINE releaseGetLock #-} releaseGetLocksIO :: SomeRcvQueue q => AgentClient -> [q] -> IO () +releaseGetLocksIO _ [] = pure () releaseGetLocksIO c rqs = do locks <- readTVarIO $ getMsgLocks c forM_ rqs $ \rq -> @@ -2301,7 +2348,8 @@ withStore c action = do [ E.Handler $ \(e :: SQL.SQLError) -> let se = SQL.sqlError e busy = se == SQL.ErrorBusy || se == SQL.ErrorLocked - in pure . Left . (if busy then SEDatabaseBusy else SEInternal) $ bshow se, + err = tshow se <> ": " <> SQL.sqlErrorDetails e <> ", " <> SQL.sqlErrorContext e + in pure . Left . (if busy then SEDatabaseBusy else SEInternal) $ encodeUtf8 err, E.Handler $ \(E.SomeException e) -> pure . Left $ SEInternal $ bshow e ] #endif diff --git a/src/Simplex/Messaging/Agent/NtfSubSupervisor.hs b/src/Simplex/Messaging/Agent/NtfSubSupervisor.hs index fe852ac64c..f5a2b281d6 100644 --- a/src/Simplex/Messaging/Agent/NtfSubSupervisor.hs +++ b/src/Simplex/Messaging/Agent/NtfSubSupervisor.hs @@ -314,7 +314,7 @@ runNtfWorker c srv Worker {doWork} = _ -> ((ntfSubConnId sub, INTERNAL "NSACheck - no subscription ID") : errs, subs, subIds) updateSub :: DB.Connection -> NtfServer -> UTCTime -> UTCTime -> (NtfSubscription, NtfSubStatus) -> IO (Maybe SMPServer) updateSub db ntfServer ts nextCheckTs (sub, status) - | ntfShouldSubscribe status = + | status `elem` subscribeNtfStatuses = let sub' = sub {ntfSubStatus = NASCreated status} in Nothing <$ updateNtfSubscription db sub' (NSANtf NSACheck) nextCheckTs -- ntf server stopped subscribing to this queue diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 0b2c632fa4..b519f381ea 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -53,6 +53,7 @@ module Simplex.Messaging.Agent.Store.AgentStore getSubscriptionServers, getUserServerRcvQueueSubs, unsetQueuesToSubscribe, + setRcvServiceAssocs, getConnIds, getConn, getDeletedConn, @@ -401,29 +402,31 @@ deleteUsersWithoutConns db = do pure userIds createClientService :: DB.Connection -> UserId -> SMPServer -> (C.KeyHash, TLS.Credential) -> IO () -createClientService db userId srv (kh, (cert, pk)) = +createClientService db userId srv (kh, (cert, pk)) = do + serverKeyHash_ <- createServer_ db srv DB.execute db [sql| INSERT INTO client_services - (user_id, host, port, service_cert_hash, service_cert, service_priv_key) - VALUES (?,?,?,?,?,?) - ON CONFLICT (user_id, host, port) + (user_id, host, port, server_key_hash, service_cert_hash, service_cert, service_priv_key) + VALUES (?,?,?,?,?,?,?) + ON CONFLICT (user_id, host, port, server_key_hash) DO UPDATE SET service_cert_hash = EXCLUDED.service_cert_hash, service_cert = EXCLUDED.service_cert, service_priv_key = EXCLUDED.service_priv_key, - rcv_service_id = NULL + service_id = NULL |] - (userId, host srv, port srv, kh, cert, pk) + (userId, host srv, port srv, serverKeyHash_, kh, cert, pk) +-- TODO [certs rcv] get correct service based on key hash of the server getClientService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) getClientService db userId srv = maybeFirstRow toService $ DB.query db [sql| - SELECT service_cert_hash, service_cert, service_priv_key, rcv_service_id + SELECT service_cert_hash, service_cert, service_priv_key, service_id FROM client_services WHERE user_id = ? AND host = ? AND port = ? |] @@ -431,19 +434,21 @@ getClientService db userId srv = where toService (kh, cert, pk, serviceId_) = ((kh, (cert, pk)), serviceId_) -getClientServiceServers :: DB.Connection -> UserId -> IO [SMPServer] +getClientServiceServers :: DB.Connection -> UserId -> IO [(SMPServer, ServiceSub)] getClientServiceServers db userId = map toServer <$> DB.query db [sql| - SELECT c.host, c.port, s.key_hash + SELECT c.host, c.port, s.key_hash, c.service_id, c.service_queue_count, c.service_queue_ids_hash FROM client_services c JOIN servers s ON s.host = c.host AND s.port = c.port + WHERE c.user_id = ? |] (Only userId) where - toServer (host, port, kh) = SMPServer host port kh + toServer (host, port, kh, serviceId, n, Binary idsHash) = + (SMPServer host port kh, ServiceSub serviceId n (IdsHash idsHash)) setClientServiceId :: DB.Connection -> UserId -> SMPServer -> ServiceId -> IO () setClientServiceId db userId srv serviceId = @@ -451,7 +456,7 @@ setClientServiceId db userId srv serviceId = db [sql| UPDATE client_services - SET rcv_service_id = ? + SET service_id = ? WHERE user_id = ? AND host = ? AND port = ? |] (serviceId, userId, host srv, port srv) @@ -2099,7 +2104,7 @@ insertRcvQueue_ db connId' rq@RcvQueue {..} subMode serverKeyHash_ = do ntf_public_key, ntf_private_key, ntf_id, rcv_ntf_dh_secret ) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?); |] - ( (host server, port server, rcvId, rcvServiceAssoc, connId', rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret) + ( (host server, port server, rcvId, BI rcvServiceAssoc, connId', rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret) :. (sndId, queueMode, status, BI toSubscribe, qId, BI primary, dbReplaceQueueId, smpClientVersion, serverKeyHash_) :. (shortLinkId <$> shortLink, shortLinkKey <$> shortLink, linkPrivSigKey <$> shortLink, linkEncFixedData <$> shortLink) :. ntfCredsFields @@ -2248,6 +2253,14 @@ getUserServerRcvQueueSubs db userId srv onlyNeeded = unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" +setRcvServiceAssocs :: DB.Connection -> [RcvQueueSub] -> IO () +setRcvServiceAssocs db rqs = +#if defined(dbPostgres) + DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN " $ Only $ In (map queueId rqs) +#else + DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = " $ map (Only . queueId) rqs +#endif + -- * getConn helpers getConnIds :: DB.Connection -> IO [ConnId] @@ -2468,13 +2481,13 @@ rcvQueueQuery = toRcvQueue :: (UserId, C.KeyHash, ConnId, NonEmpty TransportHost, ServiceName, SMP.RecipientId, SMP.RcvPrivateAuthKey, SMP.RcvDhSecret, C.PrivateKeyX25519, Maybe C.DhSecretX25519, SMP.SenderId, Maybe QueueMode) - :. (QueueStatus, Maybe BoolInt, Maybe NoticeId, DBEntityId, BoolInt, Maybe Int64, Maybe RcvSwitchStatus, Maybe VersionSMPC, Int, ServiceAssoc) + :. (QueueStatus, Maybe BoolInt, Maybe NoticeId, DBEntityId, BoolInt, Maybe Int64, Maybe RcvSwitchStatus, Maybe VersionSMPC, Int, BoolInt) :. (Maybe SMP.NtfPublicAuthKey, Maybe SMP.NtfPrivateAuthKey, Maybe SMP.NotifierId, Maybe RcvNtfDhSecret) :. (Maybe SMP.LinkId, Maybe LinkKey, Maybe C.PrivateKeyEd25519, Maybe EncDataBytes) -> RcvQueue toRcvQueue ( (userId, keyHash, connId, host, port, rcvId, rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret, sndId, queueMode) - :. (status, enableNtfs_, clientNoticeId, dbQueueId, BI primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion_, deleteErrors, rcvServiceAssoc) + :. (status, enableNtfs_, clientNoticeId, dbQueueId, BI primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion_, deleteErrors, BI rcvServiceAssoc) :. (ntfPublicKey_, ntfPrivateKey_, notifierId_, rcvNtfDhSecret_) :. (shortLinkId_, shortLinkKey_, linkPrivSigKey_, linkEncFixedData_) ) = diff --git a/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/App.hs b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/App.hs index 011d890318..41090aa203 100644 --- a/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/App.hs +++ b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/App.hs @@ -10,6 +10,7 @@ import Simplex.Messaging.Agent.Store.Postgres.Migrations.M20250322_short_links import Simplex.Messaging.Agent.Store.Postgres.Migrations.M20250702_conn_invitations_remove_cascade_delete import Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251009_queue_to_subscribe import Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251010_client_notices +import Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251020_service_certs import Simplex.Messaging.Agent.Store.Shared (Migration (..)) schemaMigrations :: [(String, Text, Maybe Text)] @@ -19,7 +20,8 @@ schemaMigrations = ("20250322_short_links", m20250322_short_links, Just down_m20250322_short_links), ("20250702_conn_invitations_remove_cascade_delete", m20250702_conn_invitations_remove_cascade_delete, Just down_m20250702_conn_invitations_remove_cascade_delete), ("20251009_queue_to_subscribe", m20251009_queue_to_subscribe, Just down_m20251009_queue_to_subscribe), - ("20251010_client_notices", m20251010_client_notices, Just down_m20251010_client_notices) + ("20251010_client_notices", m20251010_client_notices, Just down_m20251010_client_notices), + ("20251020_service_certs", m20251020_service_certs, Just down_m20251020_service_certs) ] -- | The list of migrations in ascending order by date diff --git a/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/M20251020_service_certs.hs b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/M20251020_service_certs.hs new file mode 100644 index 0000000000..aee45de825 --- /dev/null +++ b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/M20251020_service_certs.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} + +module Simplex.Messaging.Agent.Store.Postgres.Migrations.M20251020_service_certs where + +import Data.Text (Text) +import Simplex.Messaging.Agent.Store.Postgres.Migrations.Util +import Text.RawString.QQ (r) + +m20251020_service_certs :: Text +m20251020_service_certs = + createXorHashFuncs <> [r| +CREATE TABLE client_services( + user_id BIGINT NOT NULL REFERENCES users ON UPDATE RESTRICT ON DELETE CASCADE, + host TEXT NOT NULL, + port TEXT NOT NULL, + server_key_hash BYTEA, + service_cert BYTEA NOT NULL, + service_cert_hash BYTEA NOT NULL, + service_priv_key BYTEA NOT NULL, + service_id BYTEA, + service_queue_count BIGINT NOT NULL DEFAULT 0, + service_queue_ids_hash BYTEA NOT NULL DEFAULT '\x00000000000000000000000000000000', + FOREIGN KEY(host, port) REFERENCES servers ON DELETE RESTRICT +); + +CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services(user_id, host, port, server_key_hash); +CREATE INDEX idx_server_certs_host_port ON client_services(host, port); + +ALTER TABLE rcv_queues ADD COLUMN rcv_service_assoc SMALLINT NOT NULL DEFAULT 0; + +CREATE FUNCTION update_aggregates(p_conn_id BYTEA, p_host TEXT, p_port TEXT, p_change BIGINT, p_rcv_id BYTEA) RETURNS VOID +LANGUAGE plpgsql +AS $$ +DECLARE q_user_id BIGINT; +BEGIN + SELECT user_id INTO q_user_id FROM connections WHERE conn_id = p_conn_id; + UPDATE client_services + SET service_queue_count = service_queue_count + p_change, + service_queue_ids_hash = xor_combine(service_queue_ids_hash, public.digest(p_rcv_id, 'md5')) + WHERE user_id = q_user_id AND host = p_host AND port = p_port; +END; +$$; + +CREATE FUNCTION on_rcv_queue_insert() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 THEN + PERFORM update_aggregates(NEW.conn_id, NEW.host, NEW.port, 1, NEW.rcv_id); + END IF; + RETURN NEW; +END; +$$; + +CREATE FUNCTION on_rcv_queue_delete() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 THEN + PERFORM update_aggregates(OLD.conn_id, OLD.host, OLD.port, -1, OLD.rcv_id); + END IF; + RETURN OLD; +END; +$$; + +CREATE FUNCTION on_rcv_queue_update() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 THEN + IF NOT (NEW.rcv_service_assoc != 0 AND NEW.deleted = 0) THEN + PERFORM update_aggregates(OLD.conn_id, OLD.host, OLD.port, -1, OLD.rcv_id); + END IF; + ELSIF NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 THEN + PERFORM update_aggregates(NEW.conn_id, NEW.host, NEW.port, 1, NEW.rcv_id); + END IF; + RETURN NEW; +END; +$$; + +CREATE TRIGGER tr_rcv_queue_insert +AFTER INSERT ON rcv_queues +FOR EACH ROW EXECUTE PROCEDURE on_rcv_queue_insert(); + +CREATE TRIGGER tr_rcv_queue_delete +AFTER DELETE ON rcv_queues +FOR EACH ROW EXECUTE PROCEDURE on_rcv_queue_delete(); + +CREATE TRIGGER tr_rcv_queue_update +AFTER UPDATE ON rcv_queues +FOR EACH ROW EXECUTE PROCEDURE on_rcv_queue_update(); + |] + +down_m20251020_service_certs :: Text +down_m20251020_service_certs = + [r| +DROP TRIGGER tr_rcv_queue_insert ON rcv_queues; +DROP TRIGGER tr_rcv_queue_delete ON rcv_queues; +DROP TRIGGER tr_rcv_queue_update ON rcv_queues; + +DROP FUNCTION on_rcv_queue_insert; +DROP FUNCTION on_rcv_queue_delete; +DROP FUNCTION on_rcv_queue_update; + +DROP FUNCTION update_aggregates; + +ALTER TABLE rcv_queues DROP COLUMN rcv_service_assoc; + +DROP INDEX idx_server_certs_host_port; +DROP INDEX idx_server_certs_user_id_host_port; +DROP TABLE client_services; + |] + <> dropXorHashFuncs diff --git a/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/Util.hs b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/Util.hs new file mode 100644 index 0000000000..b51d487e4c --- /dev/null +++ b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/Util.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Simplex.Messaging.Agent.Store.Postgres.Migrations.Util where + +import Data.Text (Text) +import qualified Data.Text as T +import Text.RawString.QQ (r) + +-- xor_combine is only applied to locally computed md5 hashes (128 bits/16 bytes), +-- so it is safe to require that all values are of the same length. +createXorHashFuncs :: Text +createXorHashFuncs = + T.pack + [r| +CREATE OR REPLACE FUNCTION xor_combine(state BYTEA, value BYTEA) RETURNS BYTEA +LANGUAGE plpgsql IMMUTABLE STRICT +AS $$ +DECLARE + result BYTEA := state; + i INTEGER; + len INTEGER := octet_length(value); +BEGIN + IF octet_length(state) != len THEN + RAISE EXCEPTION 'Inputs must be equal length (% != %)', octet_length(state), len; + END IF; + FOR i IN 0..len-1 LOOP + result := set_byte(result, i, get_byte(state, i) # get_byte(value, i)); + END LOOP; + RETURN result; +END; +$$; + +CREATE OR REPLACE AGGREGATE xor_aggregate(BYTEA) ( + SFUNC = xor_combine, + STYPE = BYTEA, + INITCOND = '\x00000000000000000000000000000000' -- 16 bytes +); + |] + +dropXorHashFuncs :: Text +dropXorHashFuncs = + T.pack + [r| +DROP AGGREGATE xor_aggregate(BYTEA); +DROP FUNCTION xor_combine; + |] diff --git a/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/agent_postgres_schema.sql b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/agent_postgres_schema.sql new file mode 100644 index 0000000000..c56efb2267 --- /dev/null +++ b/src/Simplex/Messaging/Agent/Store/Postgres/Migrations/agent_postgres_schema.sql @@ -0,0 +1,1469 @@ + + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + + +CREATE SCHEMA smp_agent_test_protocol_schema; + + + +CREATE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_delete() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 THEN + PERFORM update_aggregates(OLD.conn_id, OLD.host, OLD.port, -1, OLD.rcv_id); + END IF; + RETURN OLD; +END; +$$; + + + +CREATE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_insert() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 THEN + PERFORM update_aggregates(NEW.conn_id, NEW.host, NEW.port, 1, NEW.rcv_id); + END IF; + RETURN NEW; +END; +$$; + + + +CREATE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_update() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 THEN + IF NOT (NEW.rcv_service_assoc != 0 AND NEW.deleted = 0) THEN + PERFORM update_aggregates(OLD.conn_id, OLD.host, OLD.port, -1, OLD.rcv_id); + END IF; + ELSIF NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 THEN + PERFORM update_aggregates(NEW.conn_id, NEW.host, NEW.port, 1, NEW.rcv_id); + END IF; + RETURN NEW; +END; +$$; + + + +CREATE FUNCTION smp_agent_test_protocol_schema.update_aggregates(p_conn_id bytea, p_host text, p_port text, p_change bigint, p_rcv_id bytea) RETURNS void + LANGUAGE plpgsql + AS $$ +DECLARE q_user_id BIGINT; +BEGIN + SELECT user_id INTO q_user_id FROM connections WHERE conn_id = p_conn_id; + UPDATE client_services + SET service_queue_count = service_queue_count + p_change, + service_queue_ids_hash = xor_combine(service_queue_ids_hash, public.digest(p_rcv_id, 'md5')) + WHERE user_id = q_user_id AND host = p_host AND port = p_port; +END; +$$; + + + +CREATE FUNCTION smp_agent_test_protocol_schema.xor_combine(state bytea, value bytea) RETURNS bytea + LANGUAGE plpgsql IMMUTABLE STRICT + AS $$ +DECLARE + result BYTEA := state; + i INTEGER; + len INTEGER := octet_length(value); +BEGIN + IF octet_length(state) != len THEN + RAISE EXCEPTION 'Inputs must be equal length (% != %)', octet_length(state), len; + END IF; + FOR i IN 0..len-1 LOOP + result := set_byte(result, i, get_byte(state, i) # get_byte(value, i)); + END LOOP; + RETURN result; +END; +$$; + + + +CREATE AGGREGATE smp_agent_test_protocol_schema.xor_aggregate(bytea) ( + SFUNC = smp_agent_test_protocol_schema.xor_combine, + STYPE = bytea, + INITCOND = '\x00000000000000000000000000000000' +); + + +SET default_table_access_method = heap; + + +CREATE TABLE smp_agent_test_protocol_schema.client_notices ( + client_notice_id bigint NOT NULL, + protocol text NOT NULL, + host text NOT NULL, + port text NOT NULL, + entity_id bytea NOT NULL, + server_key_hash bytea, + notice_ttl bigint, + created_at bigint NOT NULL, + updated_at bigint NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.client_notices ALTER COLUMN client_notice_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.client_notices_client_notice_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.client_services ( + user_id bigint NOT NULL, + host text NOT NULL, + port text NOT NULL, + server_key_hash bytea, + service_cert bytea NOT NULL, + service_cert_hash bytea NOT NULL, + service_priv_key bytea NOT NULL, + service_id bytea, + service_queue_count bigint DEFAULT 0 NOT NULL, + service_queue_ids_hash bytea DEFAULT '\x00000000000000000000000000000000'::bytea NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.commands ( + command_id bigint NOT NULL, + conn_id bytea NOT NULL, + host text, + port text, + corr_id bytea NOT NULL, + command_tag bytea NOT NULL, + command bytea NOT NULL, + agent_version integer DEFAULT 1 NOT NULL, + server_key_hash bytea, + created_at timestamp with time zone DEFAULT '1970-01-01 00:00:00+01'::timestamp with time zone NOT NULL, + failed smallint DEFAULT 0 +); + + + +ALTER TABLE smp_agent_test_protocol_schema.commands ALTER COLUMN command_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.commands_command_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.conn_confirmations ( + confirmation_id bytea NOT NULL, + conn_id bytea NOT NULL, + e2e_snd_pub_key bytea NOT NULL, + sender_key bytea, + ratchet_state bytea NOT NULL, + sender_conn_info bytea NOT NULL, + accepted smallint NOT NULL, + own_conn_info bytea, + created_at timestamp with time zone DEFAULT now() NOT NULL, + smp_reply_queues bytea, + smp_client_version integer +); + + + +CREATE TABLE smp_agent_test_protocol_schema.conn_invitations ( + invitation_id bytea NOT NULL, + contact_conn_id bytea, + cr_invitation bytea NOT NULL, + recipient_conn_info bytea NOT NULL, + accepted smallint DEFAULT 0 NOT NULL, + own_conn_info bytea, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.connections ( + conn_id bytea NOT NULL, + conn_mode text NOT NULL, + last_internal_msg_id bigint DEFAULT 0 NOT NULL, + last_internal_rcv_msg_id bigint DEFAULT 0 NOT NULL, + last_internal_snd_msg_id bigint DEFAULT 0 NOT NULL, + last_external_snd_msg_id bigint DEFAULT 0 NOT NULL, + last_rcv_msg_hash bytea DEFAULT '\x'::bytea NOT NULL, + last_snd_msg_hash bytea DEFAULT '\x'::bytea NOT NULL, + smp_agent_version integer DEFAULT 1 NOT NULL, + duplex_handshake smallint DEFAULT 0, + enable_ntfs smallint, + deleted smallint DEFAULT 0 NOT NULL, + user_id bigint NOT NULL, + ratchet_sync_state text DEFAULT 'ok'::text NOT NULL, + deleted_at_wait_delivery timestamp with time zone, + pq_support smallint DEFAULT 0 NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.deleted_snd_chunk_replicas ( + deleted_snd_chunk_replica_id bigint NOT NULL, + user_id bigint NOT NULL, + xftp_server_id bigint NOT NULL, + replica_id bytea NOT NULL, + replica_key bytea NOT NULL, + chunk_digest bytea NOT NULL, + delay bigint, + retries bigint DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + failed smallint DEFAULT 0 +); + + + +ALTER TABLE smp_agent_test_protocol_schema.deleted_snd_chunk_replicas ALTER COLUMN deleted_snd_chunk_replica_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.deleted_snd_chunk_replicas_deleted_snd_chunk_replica_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.encrypted_rcv_message_hashes ( + encrypted_rcv_message_hash_id bigint NOT NULL, + conn_id bytea NOT NULL, + hash bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.encrypted_rcv_message_hashes ALTER COLUMN encrypted_rcv_message_hash_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.encrypted_rcv_message_hashes_encrypted_rcv_message_hash_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.inv_short_links ( + inv_short_link_id bigint NOT NULL, + host text NOT NULL, + port text NOT NULL, + server_key_hash bytea, + link_id bytea NOT NULL, + link_key bytea NOT NULL, + snd_private_key bytea NOT NULL, + snd_id bytea +); + + + +ALTER TABLE smp_agent_test_protocol_schema.inv_short_links ALTER COLUMN inv_short_link_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.inv_short_links_inv_short_link_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.messages ( + conn_id bytea NOT NULL, + internal_id bigint NOT NULL, + internal_ts timestamp with time zone NOT NULL, + internal_rcv_id bigint, + internal_snd_id bigint, + msg_type bytea NOT NULL, + msg_body bytea DEFAULT '\x'::bytea NOT NULL, + msg_flags text, + pq_encryption smallint DEFAULT 0 NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.migrations ( + name text NOT NULL, + ts timestamp without time zone NOT NULL, + down text +); + + + +CREATE TABLE smp_agent_test_protocol_schema.ntf_servers ( + ntf_host text NOT NULL, + ntf_port text NOT NULL, + ntf_key_hash bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.ntf_subscriptions ( + conn_id bytea NOT NULL, + smp_host text, + smp_port text, + smp_ntf_id bytea, + ntf_host text NOT NULL, + ntf_port text NOT NULL, + ntf_sub_id bytea, + ntf_sub_status text NOT NULL, + ntf_sub_action bytea, + ntf_sub_smp_action bytea, + ntf_sub_action_ts timestamp with time zone, + updated_by_supervisor smallint DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + smp_server_key_hash bytea, + ntf_failed smallint DEFAULT 0, + smp_failed smallint DEFAULT 0 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.ntf_tokens ( + provider text NOT NULL, + device_token text NOT NULL, + ntf_host text NOT NULL, + ntf_port text NOT NULL, + tkn_id bytea, + tkn_pub_key bytea NOT NULL, + tkn_priv_key bytea NOT NULL, + tkn_pub_dh_key bytea NOT NULL, + tkn_priv_dh_key bytea NOT NULL, + tkn_dh_secret bytea, + tkn_status text NOT NULL, + tkn_action bytea, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + ntf_mode bytea +); + + + +CREATE TABLE smp_agent_test_protocol_schema.ntf_tokens_to_delete ( + ntf_token_to_delete_id bigint NOT NULL, + ntf_host text NOT NULL, + ntf_port text NOT NULL, + ntf_key_hash bytea NOT NULL, + tkn_id bytea NOT NULL, + tkn_priv_key bytea NOT NULL, + del_failed smallint DEFAULT 0, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.ntf_tokens_to_delete ALTER COLUMN ntf_token_to_delete_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.ntf_tokens_to_delete_ntf_token_to_delete_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.processed_ratchet_key_hashes ( + processed_ratchet_key_hash_id bigint NOT NULL, + conn_id bytea NOT NULL, + hash bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.processed_ratchet_key_hashes ALTER COLUMN processed_ratchet_key_hash_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.processed_ratchet_key_hashes_processed_ratchet_key_hash_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.ratchets ( + conn_id bytea NOT NULL, + x3dh_priv_key_1 bytea, + x3dh_priv_key_2 bytea, + ratchet_state bytea, + e2e_version integer DEFAULT 1 NOT NULL, + x3dh_pub_key_1 bytea, + x3dh_pub_key_2 bytea, + pq_priv_kem bytea, + pq_pub_kem bytea +); + + + +CREATE TABLE smp_agent_test_protocol_schema.rcv_file_chunk_replicas ( + rcv_file_chunk_replica_id bigint NOT NULL, + rcv_file_chunk_id bigint NOT NULL, + replica_number bigint NOT NULL, + xftp_server_id bigint NOT NULL, + replica_id bytea NOT NULL, + replica_key bytea NOT NULL, + received smallint DEFAULT 0 NOT NULL, + delay bigint, + retries bigint DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.rcv_file_chunk_replicas ALTER COLUMN rcv_file_chunk_replica_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.rcv_file_chunk_replicas_rcv_file_chunk_replica_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.rcv_file_chunks ( + rcv_file_chunk_id bigint NOT NULL, + rcv_file_id bigint NOT NULL, + chunk_no bigint NOT NULL, + chunk_size bigint NOT NULL, + digest bytea NOT NULL, + tmp_path text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.rcv_file_chunks ALTER COLUMN rcv_file_chunk_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.rcv_file_chunks_rcv_file_chunk_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.rcv_files ( + rcv_file_id bigint NOT NULL, + rcv_file_entity_id bytea NOT NULL, + user_id bigint NOT NULL, + size bigint NOT NULL, + digest bytea NOT NULL, + key bytea NOT NULL, + nonce bytea NOT NULL, + chunk_size bigint NOT NULL, + prefix_path text NOT NULL, + tmp_path text, + save_path text NOT NULL, + status text NOT NULL, + deleted smallint DEFAULT 0 NOT NULL, + error text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + save_file_key bytea, + save_file_nonce bytea, + failed smallint DEFAULT 0, + redirect_id bigint, + redirect_entity_id bytea, + redirect_size bigint, + redirect_digest bytea, + approved_relays smallint DEFAULT 0 NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.rcv_files ALTER COLUMN rcv_file_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.rcv_files_rcv_file_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.rcv_messages ( + conn_id bytea NOT NULL, + internal_rcv_id bigint NOT NULL, + internal_id bigint NOT NULL, + external_snd_id bigint NOT NULL, + broker_id bytea NOT NULL, + broker_ts timestamp with time zone NOT NULL, + internal_hash bytea NOT NULL, + external_prev_snd_hash bytea NOT NULL, + integrity bytea NOT NULL, + user_ack smallint DEFAULT 0, + rcv_queue_id bigint NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.rcv_queues ( + host text NOT NULL, + port text NOT NULL, + rcv_id bytea NOT NULL, + conn_id bytea NOT NULL, + rcv_private_key bytea NOT NULL, + rcv_dh_secret bytea NOT NULL, + e2e_priv_key bytea NOT NULL, + e2e_dh_secret bytea, + snd_id bytea NOT NULL, + snd_key bytea, + status text NOT NULL, + smp_server_version integer DEFAULT 1 NOT NULL, + smp_client_version integer, + ntf_public_key bytea, + ntf_private_key bytea, + ntf_id bytea, + rcv_ntf_dh_secret bytea, + rcv_queue_id bigint NOT NULL, + rcv_primary smallint NOT NULL, + replace_rcv_queue_id bigint, + delete_errors bigint DEFAULT 0 NOT NULL, + server_key_hash bytea, + switch_status text, + deleted smallint DEFAULT 0 NOT NULL, + last_broker_ts timestamp with time zone, + link_id bytea, + link_key bytea, + link_priv_sig_key bytea, + link_enc_fixed_data bytea, + queue_mode text, + to_subscribe smallint DEFAULT 0 NOT NULL, + client_notice_id bigint, + rcv_service_assoc smallint DEFAULT 0 NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.servers ( + host text NOT NULL, + port text NOT NULL, + key_hash bytea NOT NULL +); + + + +CREATE TABLE smp_agent_test_protocol_schema.servers_stats ( + servers_stats_id bigint NOT NULL, + servers_stats text, + started_at timestamp with time zone DEFAULT now() NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.servers_stats ALTER COLUMN servers_stats_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.servers_stats_servers_stats_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.skipped_messages ( + skipped_message_id bigint NOT NULL, + conn_id bytea NOT NULL, + header_key bytea NOT NULL, + msg_n bigint NOT NULL, + msg_key bytea NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.skipped_messages ALTER COLUMN skipped_message_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.skipped_messages_skipped_message_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_file_chunk_replica_recipients ( + snd_file_chunk_replica_recipient_id bigint NOT NULL, + snd_file_chunk_replica_id bigint NOT NULL, + rcv_replica_id bytea NOT NULL, + rcv_replica_key bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_file_chunk_replica_recipients ALTER COLUMN snd_file_chunk_replica_recipient_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_file_chunk_replica_recipi_snd_file_chunk_replica_recipi_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_file_chunk_replicas ( + snd_file_chunk_replica_id bigint NOT NULL, + snd_file_chunk_id bigint NOT NULL, + replica_number bigint NOT NULL, + xftp_server_id bigint NOT NULL, + replica_id bytea NOT NULL, + replica_key bytea NOT NULL, + replica_status text NOT NULL, + delay bigint, + retries bigint DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_file_chunk_replicas ALTER COLUMN snd_file_chunk_replica_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_file_chunk_replicas_snd_file_chunk_replica_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_file_chunks ( + snd_file_chunk_id bigint NOT NULL, + snd_file_id bigint NOT NULL, + chunk_no bigint NOT NULL, + chunk_offset bigint NOT NULL, + chunk_size bigint NOT NULL, + digest bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_file_chunks ALTER COLUMN snd_file_chunk_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_file_chunks_snd_file_chunk_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_files ( + snd_file_id bigint NOT NULL, + snd_file_entity_id bytea NOT NULL, + user_id bigint NOT NULL, + num_recipients bigint NOT NULL, + digest bytea, + key bytea NOT NULL, + nonce bytea NOT NULL, + path text NOT NULL, + prefix_path text, + status text NOT NULL, + deleted smallint DEFAULT 0 NOT NULL, + error text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + src_file_key bytea, + src_file_nonce bytea, + failed smallint DEFAULT 0, + redirect_size bigint, + redirect_digest bytea +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_files ALTER COLUMN snd_file_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_files_snd_file_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_message_bodies ( + snd_message_body_id bigint NOT NULL, + agent_msg bytea DEFAULT '\x'::bytea NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_message_bodies ALTER COLUMN snd_message_body_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_message_bodies_snd_message_body_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_message_deliveries ( + snd_message_delivery_id bigint NOT NULL, + conn_id bytea NOT NULL, + snd_queue_id bigint NOT NULL, + internal_id bigint NOT NULL, + failed smallint DEFAULT 0 +); + + + +ALTER TABLE smp_agent_test_protocol_schema.snd_message_deliveries ALTER COLUMN snd_message_delivery_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.snd_message_deliveries_snd_message_delivery_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_messages ( + conn_id bytea NOT NULL, + internal_snd_id bigint NOT NULL, + internal_id bigint NOT NULL, + internal_hash bytea NOT NULL, + previous_msg_hash bytea DEFAULT '\x'::bytea NOT NULL, + retry_int_slow bigint, + retry_int_fast bigint, + rcpt_internal_id bigint, + rcpt_status text, + msg_encrypt_key bytea, + padded_msg_len bigint, + snd_message_body_id bigint +); + + + +CREATE TABLE smp_agent_test_protocol_schema.snd_queues ( + host text NOT NULL, + port text NOT NULL, + snd_id bytea NOT NULL, + conn_id bytea NOT NULL, + snd_private_key bytea NOT NULL, + e2e_dh_secret bytea NOT NULL, + status text NOT NULL, + smp_server_version integer DEFAULT 1 NOT NULL, + smp_client_version integer DEFAULT 1 NOT NULL, + snd_public_key bytea, + e2e_pub_key bytea, + snd_queue_id bigint NOT NULL, + snd_primary smallint NOT NULL, + replace_snd_queue_id bigint, + server_key_hash bytea, + switch_status text, + queue_mode text +); + + + +CREATE TABLE smp_agent_test_protocol_schema.users ( + user_id bigint NOT NULL, + deleted smallint DEFAULT 0 NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.users ALTER COLUMN user_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.users_user_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +CREATE TABLE smp_agent_test_protocol_schema.xftp_servers ( + xftp_server_id bigint NOT NULL, + xftp_host text NOT NULL, + xftp_port text NOT NULL, + xftp_key_hash bytea NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + + +ALTER TABLE smp_agent_test_protocol_schema.xftp_servers ALTER COLUMN xftp_server_id ADD GENERATED ALWAYS AS IDENTITY ( + SEQUENCE NAME smp_agent_test_protocol_schema.xftp_servers_xftp_server_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1 +); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.client_notices + ADD CONSTRAINT client_notices_pkey PRIMARY KEY (client_notice_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.commands + ADD CONSTRAINT commands_pkey PRIMARY KEY (command_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.conn_confirmations + ADD CONSTRAINT conn_confirmations_pkey PRIMARY KEY (confirmation_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.conn_invitations + ADD CONSTRAINT conn_invitations_pkey PRIMARY KEY (invitation_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.connections + ADD CONSTRAINT connections_pkey PRIMARY KEY (conn_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.deleted_snd_chunk_replicas + ADD CONSTRAINT deleted_snd_chunk_replicas_pkey PRIMARY KEY (deleted_snd_chunk_replica_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.encrypted_rcv_message_hashes + ADD CONSTRAINT encrypted_rcv_message_hashes_pkey PRIMARY KEY (encrypted_rcv_message_hash_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.inv_short_links + ADD CONSTRAINT inv_short_links_pkey PRIMARY KEY (inv_short_link_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.messages + ADD CONSTRAINT messages_pkey PRIMARY KEY (conn_id, internal_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.migrations + ADD CONSTRAINT migrations_pkey PRIMARY KEY (name); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_servers + ADD CONSTRAINT ntf_servers_pkey PRIMARY KEY (ntf_host, ntf_port); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_subscriptions + ADD CONSTRAINT ntf_subscriptions_pkey PRIMARY KEY (conn_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_tokens + ADD CONSTRAINT ntf_tokens_pkey PRIMARY KEY (provider, device_token, ntf_host, ntf_port); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_tokens_to_delete + ADD CONSTRAINT ntf_tokens_to_delete_pkey PRIMARY KEY (ntf_token_to_delete_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.processed_ratchet_key_hashes + ADD CONSTRAINT processed_ratchet_key_hashes_pkey PRIMARY KEY (processed_ratchet_key_hash_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ratchets + ADD CONSTRAINT ratchets_pkey PRIMARY KEY (conn_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_file_chunk_replicas + ADD CONSTRAINT rcv_file_chunk_replicas_pkey PRIMARY KEY (rcv_file_chunk_replica_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_file_chunks + ADD CONSTRAINT rcv_file_chunks_pkey PRIMARY KEY (rcv_file_chunk_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_files + ADD CONSTRAINT rcv_files_pkey PRIMARY KEY (rcv_file_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_files + ADD CONSTRAINT rcv_files_rcv_file_entity_id_key UNIQUE (rcv_file_entity_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_messages + ADD CONSTRAINT rcv_messages_pkey PRIMARY KEY (conn_id, internal_rcv_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_queues + ADD CONSTRAINT rcv_queues_host_port_snd_id_key UNIQUE (host, port, snd_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_queues + ADD CONSTRAINT rcv_queues_pkey PRIMARY KEY (host, port, rcv_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.servers + ADD CONSTRAINT servers_pkey PRIMARY KEY (host, port); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.servers_stats + ADD CONSTRAINT servers_stats_pkey PRIMARY KEY (servers_stats_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.skipped_messages + ADD CONSTRAINT skipped_messages_pkey PRIMARY KEY (skipped_message_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunk_replica_recipients + ADD CONSTRAINT snd_file_chunk_replica_recipients_pkey PRIMARY KEY (snd_file_chunk_replica_recipient_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunk_replicas + ADD CONSTRAINT snd_file_chunk_replicas_pkey PRIMARY KEY (snd_file_chunk_replica_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunks + ADD CONSTRAINT snd_file_chunks_pkey PRIMARY KEY (snd_file_chunk_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_files + ADD CONSTRAINT snd_files_pkey PRIMARY KEY (snd_file_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_message_bodies + ADD CONSTRAINT snd_message_bodies_pkey PRIMARY KEY (snd_message_body_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_message_deliveries + ADD CONSTRAINT snd_message_deliveries_pkey PRIMARY KEY (snd_message_delivery_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_messages + ADD CONSTRAINT snd_messages_pkey PRIMARY KEY (conn_id, internal_snd_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_queues + ADD CONSTRAINT snd_queues_pkey PRIMARY KEY (host, port, snd_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.users + ADD CONSTRAINT users_pkey PRIMARY KEY (user_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.xftp_servers + ADD CONSTRAINT xftp_servers_pkey PRIMARY KEY (xftp_server_id); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.xftp_servers + ADD CONSTRAINT xftp_servers_xftp_host_xftp_port_xftp_key_hash_key UNIQUE (xftp_host, xftp_port, xftp_key_hash); + + + +CREATE UNIQUE INDEX idx_client_notices_entity ON smp_agent_test_protocol_schema.client_notices USING btree (protocol, host, port, entity_id); + + + +CREATE INDEX idx_commands_conn_id ON smp_agent_test_protocol_schema.commands USING btree (conn_id); + + + +CREATE INDEX idx_commands_host_port ON smp_agent_test_protocol_schema.commands USING btree (host, port); + + + +CREATE INDEX idx_commands_server_commands ON smp_agent_test_protocol_schema.commands USING btree (host, port, created_at, command_id); + + + +CREATE INDEX idx_conn_confirmations_conn_id ON smp_agent_test_protocol_schema.conn_confirmations USING btree (conn_id); + + + +CREATE INDEX idx_conn_invitations_contact_conn_id ON smp_agent_test_protocol_schema.conn_invitations USING btree (contact_conn_id); + + + +CREATE INDEX idx_connections_user ON smp_agent_test_protocol_schema.connections USING btree (user_id); + + + +CREATE INDEX idx_deleted_snd_chunk_replicas_pending ON smp_agent_test_protocol_schema.deleted_snd_chunk_replicas USING btree (created_at); + + + +CREATE INDEX idx_deleted_snd_chunk_replicas_user_id ON smp_agent_test_protocol_schema.deleted_snd_chunk_replicas USING btree (user_id); + + + +CREATE INDEX idx_deleted_snd_chunk_replicas_xftp_server_id ON smp_agent_test_protocol_schema.deleted_snd_chunk_replicas USING btree (xftp_server_id); + + + +CREATE INDEX idx_encrypted_rcv_message_hashes_created_at ON smp_agent_test_protocol_schema.encrypted_rcv_message_hashes USING btree (created_at); + + + +CREATE INDEX idx_encrypted_rcv_message_hashes_hash ON smp_agent_test_protocol_schema.encrypted_rcv_message_hashes USING btree (conn_id, hash); + + + +CREATE UNIQUE INDEX idx_inv_short_links_link_id ON smp_agent_test_protocol_schema.inv_short_links USING btree (host, port, link_id); + + + +CREATE INDEX idx_messages_conn_id ON smp_agent_test_protocol_schema.messages USING btree (conn_id); + + + +CREATE INDEX idx_messages_conn_id_internal_rcv_id ON smp_agent_test_protocol_schema.messages USING btree (conn_id, internal_rcv_id); + + + +CREATE INDEX idx_messages_conn_id_internal_snd_id ON smp_agent_test_protocol_schema.messages USING btree (conn_id, internal_snd_id); + + + +CREATE INDEX idx_messages_internal_ts ON smp_agent_test_protocol_schema.messages USING btree (internal_ts); + + + +CREATE INDEX idx_messages_snd_expired ON smp_agent_test_protocol_schema.messages USING btree (conn_id, internal_snd_id, internal_ts); + + + +CREATE INDEX idx_ntf_subscriptions_ntf_host_ntf_port ON smp_agent_test_protocol_schema.ntf_subscriptions USING btree (ntf_host, ntf_port); + + + +CREATE INDEX idx_ntf_subscriptions_smp_host_smp_port ON smp_agent_test_protocol_schema.ntf_subscriptions USING btree (smp_host, smp_port); + + + +CREATE INDEX idx_ntf_tokens_ntf_host_ntf_port ON smp_agent_test_protocol_schema.ntf_tokens USING btree (ntf_host, ntf_port); + + + +CREATE INDEX idx_processed_ratchet_key_hashes_created_at ON smp_agent_test_protocol_schema.processed_ratchet_key_hashes USING btree (created_at); + + + +CREATE INDEX idx_processed_ratchet_key_hashes_hash ON smp_agent_test_protocol_schema.processed_ratchet_key_hashes USING btree (conn_id, hash); + + + +CREATE INDEX idx_ratchets_conn_id ON smp_agent_test_protocol_schema.ratchets USING btree (conn_id); + + + +CREATE INDEX idx_rcv_file_chunk_replicas_pending ON smp_agent_test_protocol_schema.rcv_file_chunk_replicas USING btree (received, replica_number); + + + +CREATE INDEX idx_rcv_file_chunk_replicas_rcv_file_chunk_id ON smp_agent_test_protocol_schema.rcv_file_chunk_replicas USING btree (rcv_file_chunk_id); + + + +CREATE INDEX idx_rcv_file_chunk_replicas_xftp_server_id ON smp_agent_test_protocol_schema.rcv_file_chunk_replicas USING btree (xftp_server_id); + + + +CREATE INDEX idx_rcv_file_chunks_rcv_file_id ON smp_agent_test_protocol_schema.rcv_file_chunks USING btree (rcv_file_id); + + + +CREATE INDEX idx_rcv_files_redirect_id ON smp_agent_test_protocol_schema.rcv_files USING btree (redirect_id); + + + +CREATE INDEX idx_rcv_files_status_created_at ON smp_agent_test_protocol_schema.rcv_files USING btree (status, created_at); + + + +CREATE INDEX idx_rcv_files_user_id ON smp_agent_test_protocol_schema.rcv_files USING btree (user_id); + + + +CREATE INDEX idx_rcv_messages_conn_id_internal_id ON smp_agent_test_protocol_schema.rcv_messages USING btree (conn_id, internal_id); + + + +CREATE UNIQUE INDEX idx_rcv_queue_id ON smp_agent_test_protocol_schema.rcv_queues USING btree (conn_id, rcv_queue_id); + + + +CREATE INDEX idx_rcv_queues_client_notice_id ON smp_agent_test_protocol_schema.rcv_queues USING btree (client_notice_id); + + + +CREATE UNIQUE INDEX idx_rcv_queues_link_id ON smp_agent_test_protocol_schema.rcv_queues USING btree (host, port, link_id); + + + +CREATE UNIQUE INDEX idx_rcv_queues_ntf ON smp_agent_test_protocol_schema.rcv_queues USING btree (host, port, ntf_id); + + + +CREATE INDEX idx_rcv_queues_to_subscribe ON smp_agent_test_protocol_schema.rcv_queues USING btree (to_subscribe); + + + +CREATE INDEX idx_server_certs_host_port ON smp_agent_test_protocol_schema.client_services USING btree (host, port); + + + +CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON smp_agent_test_protocol_schema.client_services USING btree (user_id, host, port, server_key_hash); + + + +CREATE INDEX idx_skipped_messages_conn_id ON smp_agent_test_protocol_schema.skipped_messages USING btree (conn_id); + + + +CREATE INDEX idx_snd_file_chunk_replica_recipients_snd_file_chunk_replica_id ON smp_agent_test_protocol_schema.snd_file_chunk_replica_recipients USING btree (snd_file_chunk_replica_id); + + + +CREATE INDEX idx_snd_file_chunk_replicas_pending ON smp_agent_test_protocol_schema.snd_file_chunk_replicas USING btree (replica_status, replica_number); + + + +CREATE INDEX idx_snd_file_chunk_replicas_snd_file_chunk_id ON smp_agent_test_protocol_schema.snd_file_chunk_replicas USING btree (snd_file_chunk_id); + + + +CREATE INDEX idx_snd_file_chunk_replicas_xftp_server_id ON smp_agent_test_protocol_schema.snd_file_chunk_replicas USING btree (xftp_server_id); + + + +CREATE INDEX idx_snd_file_chunks_snd_file_id ON smp_agent_test_protocol_schema.snd_file_chunks USING btree (snd_file_id); + + + +CREATE INDEX idx_snd_files_snd_file_entity_id ON smp_agent_test_protocol_schema.snd_files USING btree (snd_file_entity_id); + + + +CREATE INDEX idx_snd_files_status_created_at ON smp_agent_test_protocol_schema.snd_files USING btree (status, created_at); + + + +CREATE INDEX idx_snd_files_user_id ON smp_agent_test_protocol_schema.snd_files USING btree (user_id); + + + +CREATE INDEX idx_snd_message_deliveries ON smp_agent_test_protocol_schema.snd_message_deliveries USING btree (conn_id, snd_queue_id); + + + +CREATE INDEX idx_snd_message_deliveries_conn_id_internal_id ON smp_agent_test_protocol_schema.snd_message_deliveries USING btree (conn_id, internal_id); + + + +CREATE INDEX idx_snd_message_deliveries_expired ON smp_agent_test_protocol_schema.snd_message_deliveries USING btree (conn_id, snd_queue_id, failed, internal_id); + + + +CREATE INDEX idx_snd_messages_conn_id_internal_id ON smp_agent_test_protocol_schema.snd_messages USING btree (conn_id, internal_id); + + + +CREATE INDEX idx_snd_messages_rcpt_internal_id ON smp_agent_test_protocol_schema.snd_messages USING btree (conn_id, rcpt_internal_id); + + + +CREATE INDEX idx_snd_messages_snd_message_body_id ON smp_agent_test_protocol_schema.snd_messages USING btree (snd_message_body_id); + + + +CREATE UNIQUE INDEX idx_snd_queue_id ON smp_agent_test_protocol_schema.snd_queues USING btree (conn_id, snd_queue_id); + + + +CREATE INDEX idx_snd_queues_host_port ON smp_agent_test_protocol_schema.snd_queues USING btree (host, port); + + + +CREATE TRIGGER tr_rcv_queue_delete AFTER DELETE ON smp_agent_test_protocol_schema.rcv_queues FOR EACH ROW EXECUTE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_delete(); + + + +CREATE TRIGGER tr_rcv_queue_insert AFTER INSERT ON smp_agent_test_protocol_schema.rcv_queues FOR EACH ROW EXECUTE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_insert(); + + + +CREATE TRIGGER tr_rcv_queue_update AFTER UPDATE ON smp_agent_test_protocol_schema.rcv_queues FOR EACH ROW EXECUTE FUNCTION smp_agent_test_protocol_schema.on_rcv_queue_update(); + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.client_services + ADD CONSTRAINT client_services_host_port_fkey FOREIGN KEY (host, port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.client_services + ADD CONSTRAINT client_services_user_id_fkey FOREIGN KEY (user_id) REFERENCES smp_agent_test_protocol_schema.users(user_id) ON UPDATE RESTRICT ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.commands + ADD CONSTRAINT commands_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.commands + ADD CONSTRAINT commands_host_port_fkey FOREIGN KEY (host, port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.conn_confirmations + ADD CONSTRAINT conn_confirmations_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.conn_invitations + ADD CONSTRAINT conn_invitations_contact_conn_id_fkey FOREIGN KEY (contact_conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE SET NULL; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.connections + ADD CONSTRAINT connections_user_id_fkey FOREIGN KEY (user_id) REFERENCES smp_agent_test_protocol_schema.users(user_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.deleted_snd_chunk_replicas + ADD CONSTRAINT deleted_snd_chunk_replicas_user_id_fkey FOREIGN KEY (user_id) REFERENCES smp_agent_test_protocol_schema.users(user_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.deleted_snd_chunk_replicas + ADD CONSTRAINT deleted_snd_chunk_replicas_xftp_server_id_fkey FOREIGN KEY (xftp_server_id) REFERENCES smp_agent_test_protocol_schema.xftp_servers(xftp_server_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.encrypted_rcv_message_hashes + ADD CONSTRAINT encrypted_rcv_message_hashes_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.messages + ADD CONSTRAINT fk_messages_rcv_messages FOREIGN KEY (conn_id, internal_rcv_id) REFERENCES smp_agent_test_protocol_schema.rcv_messages(conn_id, internal_rcv_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.messages + ADD CONSTRAINT fk_messages_snd_messages FOREIGN KEY (conn_id, internal_snd_id) REFERENCES smp_agent_test_protocol_schema.snd_messages(conn_id, internal_snd_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.inv_short_links + ADD CONSTRAINT inv_short_links_host_port_fkey FOREIGN KEY (host, port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.messages + ADD CONSTRAINT messages_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_subscriptions + ADD CONSTRAINT ntf_subscriptions_ntf_host_ntf_port_fkey FOREIGN KEY (ntf_host, ntf_port) REFERENCES smp_agent_test_protocol_schema.ntf_servers(ntf_host, ntf_port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_subscriptions + ADD CONSTRAINT ntf_subscriptions_smp_host_smp_port_fkey FOREIGN KEY (smp_host, smp_port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON UPDATE CASCADE ON DELETE SET NULL; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ntf_tokens + ADD CONSTRAINT ntf_tokens_ntf_host_ntf_port_fkey FOREIGN KEY (ntf_host, ntf_port) REFERENCES smp_agent_test_protocol_schema.ntf_servers(ntf_host, ntf_port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.processed_ratchet_key_hashes + ADD CONSTRAINT processed_ratchet_key_hashes_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.ratchets + ADD CONSTRAINT ratchets_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_file_chunk_replicas + ADD CONSTRAINT rcv_file_chunk_replicas_rcv_file_chunk_id_fkey FOREIGN KEY (rcv_file_chunk_id) REFERENCES smp_agent_test_protocol_schema.rcv_file_chunks(rcv_file_chunk_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_file_chunk_replicas + ADD CONSTRAINT rcv_file_chunk_replicas_xftp_server_id_fkey FOREIGN KEY (xftp_server_id) REFERENCES smp_agent_test_protocol_schema.xftp_servers(xftp_server_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_file_chunks + ADD CONSTRAINT rcv_file_chunks_rcv_file_id_fkey FOREIGN KEY (rcv_file_id) REFERENCES smp_agent_test_protocol_schema.rcv_files(rcv_file_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_files + ADD CONSTRAINT rcv_files_redirect_id_fkey FOREIGN KEY (redirect_id) REFERENCES smp_agent_test_protocol_schema.rcv_files(rcv_file_id) ON DELETE SET NULL; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_files + ADD CONSTRAINT rcv_files_user_id_fkey FOREIGN KEY (user_id) REFERENCES smp_agent_test_protocol_schema.users(user_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_messages + ADD CONSTRAINT rcv_messages_conn_id_internal_id_fkey FOREIGN KEY (conn_id, internal_id) REFERENCES smp_agent_test_protocol_schema.messages(conn_id, internal_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_queues + ADD CONSTRAINT rcv_queues_client_notice_id_fkey FOREIGN KEY (client_notice_id) REFERENCES smp_agent_test_protocol_schema.client_notices(client_notice_id) ON UPDATE RESTRICT ON DELETE SET NULL; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_queues + ADD CONSTRAINT rcv_queues_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.rcv_queues + ADD CONSTRAINT rcv_queues_host_port_fkey FOREIGN KEY (host, port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.skipped_messages + ADD CONSTRAINT skipped_messages_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.ratchets(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunk_replica_recipients + ADD CONSTRAINT snd_file_chunk_replica_recipient_snd_file_chunk_replica_id_fkey FOREIGN KEY (snd_file_chunk_replica_id) REFERENCES smp_agent_test_protocol_schema.snd_file_chunk_replicas(snd_file_chunk_replica_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunk_replicas + ADD CONSTRAINT snd_file_chunk_replicas_snd_file_chunk_id_fkey FOREIGN KEY (snd_file_chunk_id) REFERENCES smp_agent_test_protocol_schema.snd_file_chunks(snd_file_chunk_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunk_replicas + ADD CONSTRAINT snd_file_chunk_replicas_xftp_server_id_fkey FOREIGN KEY (xftp_server_id) REFERENCES smp_agent_test_protocol_schema.xftp_servers(xftp_server_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_file_chunks + ADD CONSTRAINT snd_file_chunks_snd_file_id_fkey FOREIGN KEY (snd_file_id) REFERENCES smp_agent_test_protocol_schema.snd_files(snd_file_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_files + ADD CONSTRAINT snd_files_user_id_fkey FOREIGN KEY (user_id) REFERENCES smp_agent_test_protocol_schema.users(user_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_message_deliveries + ADD CONSTRAINT snd_message_deliveries_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_message_deliveries + ADD CONSTRAINT snd_message_deliveries_conn_id_internal_id_fkey FOREIGN KEY (conn_id, internal_id) REFERENCES smp_agent_test_protocol_schema.messages(conn_id, internal_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_messages + ADD CONSTRAINT snd_messages_conn_id_internal_id_fkey FOREIGN KEY (conn_id, internal_id) REFERENCES smp_agent_test_protocol_schema.messages(conn_id, internal_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_messages + ADD CONSTRAINT snd_messages_snd_message_body_id_fkey FOREIGN KEY (snd_message_body_id) REFERENCES smp_agent_test_protocol_schema.snd_message_bodies(snd_message_body_id) ON DELETE SET NULL; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_queues + ADD CONSTRAINT snd_queues_conn_id_fkey FOREIGN KEY (conn_id) REFERENCES smp_agent_test_protocol_schema.connections(conn_id) ON DELETE CASCADE; + + + +ALTER TABLE ONLY smp_agent_test_protocol_schema.snd_queues + ADD CONSTRAINT snd_queues_host_port_fkey FOREIGN KEY (host, port) REFERENCES smp_agent_test_protocol_schema.servers(host, port) ON UPDATE CASCADE ON DELETE RESTRICT; + + + diff --git a/src/Simplex/Messaging/Agent/Store/Postgres/Util.hs b/src/Simplex/Messaging/Agent/Store/Postgres/Util.hs index 0913c76e3c..bcbb0e281d 100644 --- a/src/Simplex/Messaging/Agent/Store/Postgres/Util.hs +++ b/src/Simplex/Messaging/Agent/Store/Postgres/Util.hs @@ -21,30 +21,32 @@ import Database.PostgreSQL.Simple.SqlQQ (sql) createDBAndUserIfNotExists :: ConnectInfo -> IO () createDBAndUserIfNotExists ConnectInfo {connectUser = user, connectDatabase = dbName} = do -- connect to the default "postgres" maintenance database - bracket (PSQL.connect defaultConnectInfo {connectUser = "postgres", connectDatabase = "postgres"}) PSQL.close $ - \postgresDB -> do - void $ PSQL.execute_ postgresDB "SET client_min_messages TO WARNING" - -- check if the user exists, create if not - [Only userExists] <- - PSQL.query - postgresDB - [sql| - SELECT EXISTS ( - SELECT 1 FROM pg_catalog.pg_roles - WHERE rolname = ? - ) - |] - (Only user) - unless userExists $ void $ PSQL.execute_ postgresDB (fromString $ "CREATE USER " <> user) - -- check if the database exists, create if not - dbExists <- checkDBExists postgresDB dbName - unless dbExists $ void $ PSQL.execute_ postgresDB (fromString $ "CREATE DATABASE " <> dbName <> " OWNER " <> user) + bracket (PSQL.connect defaultConnectInfo {connectUser = "postgres", connectDatabase = "postgres"}) PSQL.close $ \db -> do + execSQL db "SET client_min_messages TO WARNING" + -- check if the user exists, create if not + [Only userExists] <- + PSQL.query + db + [sql| + SELECT EXISTS ( + SELECT 1 FROM pg_catalog.pg_roles + WHERE rolname = ? + ) + |] + (Only user) + unless userExists $ execSQL db $ "CREATE USER " <> user + -- check if the database exists, create if not + dbExists <- checkDBExists db dbName + unless dbExists $ do + execSQL db $ "CREATE DATABASE " <> dbName <> " OWNER " <> user + bracket (PSQL.connect defaultConnectInfo {connectUser = "postgres", connectDatabase = dbName}) PSQL.close $ + (`execSQL` "CREATE EXTENSION IF NOT EXISTS pgcrypto") checkDBExists :: PSQL.Connection -> String -> IO Bool -checkDBExists postgresDB dbName = do +checkDBExists db dbName = do [Only dbExists] <- PSQL.query - postgresDB + db [sql| SELECT EXISTS ( SELECT 1 FROM pg_catalog.pg_database @@ -56,45 +58,45 @@ checkDBExists postgresDB dbName = do dropSchema :: ConnectInfo -> String -> IO () dropSchema connectInfo schema = - bracket (PSQL.connect connectInfo) PSQL.close $ - \db -> do - void $ PSQL.execute_ db "SET client_min_messages TO WARNING" - void $ PSQL.execute_ db (fromString $ "DROP SCHEMA IF EXISTS " <> schema <> " CASCADE") + bracket (PSQL.connect connectInfo) PSQL.close $ \db -> do + execSQL db "SET client_min_messages TO WARNING" + execSQL db $ "DROP SCHEMA IF EXISTS " <> schema <> " CASCADE" dropAllSchemasExceptSystem :: ConnectInfo -> IO () dropAllSchemasExceptSystem connectInfo = - bracket (PSQL.connect connectInfo) PSQL.close $ - \db -> do - void $ PSQL.execute_ db "SET client_min_messages TO WARNING" - schemaNames :: [Only String] <- - PSQL.query_ + bracket (PSQL.connect connectInfo) PSQL.close $ \db -> do + execSQL db "SET client_min_messages TO WARNING" + schemaNames :: [Only String] <- + PSQL.query_ + db + [sql| + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name NOT IN ('public', 'pg_catalog', 'information_schema') + |] + forM_ schemaNames $ \(Only schema) -> + execSQL db $ "DROP SCHEMA " <> schema <> " CASCADE" + +dropDatabaseAndUser :: ConnectInfo -> IO () +dropDatabaseAndUser ConnectInfo {connectUser = user, connectDatabase = dbName} = + bracket (PSQL.connect defaultConnectInfo {connectUser = "postgres", connectDatabase = "postgres"}) PSQL.close $ \db -> do + execSQL db "SET client_min_messages TO WARNING" + dbExists <- checkDBExists db dbName + when dbExists $ do + execSQL db $ "ALTER DATABASE " <> dbName <> " WITH ALLOW_CONNECTIONS false" + -- terminate all connections to the database + _r :: [Only Bool] <- + PSQL.query db [sql| - SELECT schema_name - FROM information_schema.schemata - WHERE schema_name NOT IN ('public', 'pg_catalog', 'information_schema') + SELECT pg_terminate_backend(pg_stat_activity.pid) + FROM pg_stat_activity + WHERE datname = ? + AND pid <> pg_backend_pid() |] - forM_ schemaNames $ \(Only schema) -> - PSQL.execute_ db (fromString $ "DROP SCHEMA " <> schema <> " CASCADE") + (Only dbName) + execSQL db $ "DROP DATABASE " <> dbName + execSQL db $ "DROP USER IF EXISTS " <> user -dropDatabaseAndUser :: ConnectInfo -> IO () -dropDatabaseAndUser ConnectInfo {connectUser = user, connectDatabase = dbName} = - bracket (PSQL.connect defaultConnectInfo {connectUser = "postgres", connectDatabase = "postgres"}) PSQL.close $ - \postgresDB -> do - void $ PSQL.execute_ postgresDB "SET client_min_messages TO WARNING" - dbExists <- checkDBExists postgresDB dbName - when dbExists $ do - void $ PSQL.execute_ postgresDB (fromString $ "ALTER DATABASE " <> dbName <> " WITH ALLOW_CONNECTIONS false") - -- terminate all connections to the database - _r :: [Only Bool] <- - PSQL.query - postgresDB - [sql| - SELECT pg_terminate_backend(pg_stat_activity.pid) - FROM pg_stat_activity - WHERE datname = ? - AND pid <> pg_backend_pid() - |] - (Only dbName) - void $ PSQL.execute_ postgresDB (fromString $ "DROP DATABASE " <> dbName) - void $ PSQL.execute_ postgresDB (fromString $ "DROP USER IF EXISTS " <> user) +execSQL :: PSQL.Connection -> String -> IO () +execSQL db = void . PSQL.execute_ db . fromString diff --git a/src/Simplex/Messaging/Agent/Store/SQLite.hs b/src/Simplex/Messaging/Agent/Store/SQLite.hs index 688eae0d20..45c1f26ad4 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite.hs @@ -42,9 +42,15 @@ module Simplex.Messaging.Agent.Store.SQLite ) where +import Control.Concurrent.MVar +import Control.Concurrent.STM +import Control.Exception (bracketOnError, onException, throwIO) import Control.Monad +import Data.Bits (xor) import Data.ByteArray (ScrubbedBytes) import qualified Data.ByteArray as BA +import Data.ByteString (ByteString) +import qualified Data.ByteString as B import Data.Functor (($>)) import Data.IORef import Data.Maybe (fromMaybe) @@ -54,17 +60,19 @@ import Database.SQLite.Simple (Query (..)) import qualified Database.SQLite.Simple as SQL import Database.SQLite.Simple.QQ (sql) import qualified Database.SQLite3 as SQLite3 +import Database.SQLite3.Bindings +import Foreign.C.Types +import Foreign.Ptr import Simplex.Messaging.Agent.Store.Migrations (DBMigrate (..), sharedMigrateSchema) import qualified Simplex.Messaging.Agent.Store.SQLite.Migrations as Migrations import Simplex.Messaging.Agent.Store.SQLite.Common import qualified Simplex.Messaging.Agent.Store.SQLite.DB as DB import Simplex.Messaging.Agent.Store.Shared (Migration (..), MigrationConfig (..), MigrationError (..)) +import Simplex.Messaging.Agent.Store.SQLite.Util (SQLiteFunc, createStaticFunction, mkSQLiteFunc) +import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Util (ifM, safeDecodeUtf8) import System.Directory (copyFile, createDirectoryIfMissing, doesFileExist) import System.FilePath (takeDirectory, takeFileName, ()) -import UnliftIO.Exception (bracketOnError, onException) -import UnliftIO.MVar -import UnliftIO.STM -- * SQLite Store implementation @@ -109,9 +117,9 @@ connectDB path key track = do pure db where prepare db = do - let exec = SQLite3.exec $ SQL.connectionHandle $ DB.conn db - unless (BA.null key) . exec $ "PRAGMA key = " <> keyString key <> ";" - exec . fromQuery $ + let db' = SQL.connectionHandle $ DB.conn db + unless (BA.null key) . SQLite3.exec db' $ "PRAGMA key = " <> keyString key <> ";" + SQLite3.exec db' . fromQuery $ [sql| PRAGMA busy_timeout = 100; PRAGMA foreign_keys = ON; @@ -119,6 +127,21 @@ connectDB path key track = do PRAGMA secure_delete = ON; PRAGMA auto_vacuum = FULL; |] + createStaticFunction db' "simplex_xor_md5_combine" 2 True sqliteXorMd5CombinePtr + >>= either (throwIO . userError . show) pure + +foreign export ccall "simplex_xor_md5_combine" sqliteXorMd5Combine :: SQLiteFunc + +foreign import ccall "&simplex_xor_md5_combine" sqliteXorMd5CombinePtr :: FunPtr SQLiteFunc + +sqliteXorMd5Combine :: SQLiteFunc +sqliteXorMd5Combine = mkSQLiteFunc $ \cxt args -> do + idsHash <- SQLite3.funcArgBlob args 0 + rId <- SQLite3.funcArgBlob args 1 + SQLite3.funcResultBlob cxt $ xorMd5Combine idsHash rId + +xorMd5Combine :: ByteString -> ByteString -> ByteString +xorMd5Combine idsHash rId = B.packZipWith xor idsHash $ C.md5Hash rId closeDBStore :: DBStore -> IO () closeDBStore st@DBStore {dbClosed} = diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs index 3800dc3626..af70c41f56 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs @@ -53,6 +53,12 @@ withConnectionPriority DBStore {dbSem, dbConnection} priority action | priority = E.bracket_ signal release $ withMVar dbConnection action | otherwise = lowPriority where + -- To debug FK errors, set foreign_keys = OFF in Simplex.Messaging.Agent.Store.SQLite and use action' instead of action + -- action' conn = do + -- r <- action conn + -- violations <- DB.query_ conn "PRAGMA foreign_key_check" :: IO [ (String, Int, String, Int)] + -- unless (null violations) $ print violations + -- pure r lowPriority = wait >> withMVar dbConnection (\db -> ifM free (Just <$> action db) (pure Nothing)) >>= maybe lowPriority pure signal = atomically $ modifyTVar' dbSem (+ 1) release = atomically $ modifyTVar' dbSem $ \sem -> if sem > 0 then sem - 1 else 0 diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs index 780ced1d4a..ee6a0095a1 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/M20251020_service_certs.hs @@ -5,7 +5,6 @@ module Simplex.Messaging.Agent.Store.SQLite.Migrations.M20251020_service_certs w import Database.SQLite.Simple (Query) import Database.SQLite.Simple.QQ (sql) --- TODO move date forward, create migration for postgres m20251020_service_certs :: Query m20251020_service_certs = [sql| @@ -13,27 +12,81 @@ CREATE TABLE client_services( user_id INTEGER NOT NULL REFERENCES users ON DELETE CASCADE, host TEXT NOT NULL, port TEXT NOT NULL, + server_key_hash BLOB, service_cert BLOB NOT NULL, service_cert_hash BLOB NOT NULL, service_priv_key BLOB NOT NULL, - rcv_service_id BLOB, + service_id BLOB, + service_queue_count INTEGER NOT NULL DEFAULT 0, + service_queue_ids_hash BLOB NOT NULL DEFAULT x'00000000000000000000000000000000', FOREIGN KEY(host, port) REFERENCES servers ON UPDATE CASCADE ON DELETE RESTRICT ); -CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services(user_id, host, port); - +CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services(user_id, host, port, server_key_hash); CREATE INDEX idx_server_certs_host_port ON client_services(host, port); ALTER TABLE rcv_queues ADD COLUMN rcv_service_assoc INTEGER NOT NULL DEFAULT 0; + +CREATE TRIGGER tr_rcv_queue_insert +AFTER INSERT ON rcv_queues +FOR EACH ROW +WHEN NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count + 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, NEW.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = NEW.conn_id) + AND host = NEW.host AND port = NEW.port; +END; + +CREATE TRIGGER tr_rcv_queue_delete +AFTER DELETE ON rcv_queues +FOR EACH ROW +WHEN OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count - 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, OLD.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = OLD.conn_id) + AND host = OLD.host AND port = OLD.port; +END; + +CREATE TRIGGER tr_rcv_queue_update_remove +AFTER UPDATE ON rcv_queues +FOR EACH ROW +WHEN OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 AND NOT (NEW.rcv_service_assoc != 0 AND NEW.deleted = 0) +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count - 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, OLD.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = OLD.conn_id) + AND host = OLD.host AND port = OLD.port; +END; + +CREATE TRIGGER tr_rcv_queue_update_add +AFTER UPDATE ON rcv_queues +FOR EACH ROW +WHEN NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 AND NOT (OLD.rcv_service_assoc != 0 AND OLD.deleted = 0) +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count + 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, NEW.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = NEW.conn_id) + AND host = NEW.host AND port = NEW.port; +END; |] down_m20251020_service_certs :: Query down_m20251020_service_certs = [sql| +DROP TRIGGER tr_rcv_queue_insert; +DROP TRIGGER tr_rcv_queue_delete; +DROP TRIGGER tr_rcv_queue_update_remove; +DROP TRIGGER tr_rcv_queue_update_add; + ALTER TABLE rcv_queues DROP COLUMN rcv_service_assoc; DROP INDEX idx_server_certs_host_port; - DROP INDEX idx_server_certs_user_id_host_port; DROP TABLE client_services; diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql index 8013313ac3..339e3a8ee8 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Migrations/agent_schema.sql @@ -455,10 +455,13 @@ CREATE TABLE client_services( user_id INTEGER NOT NULL REFERENCES users ON DELETE CASCADE, host TEXT NOT NULL, port TEXT NOT NULL, + server_key_hash BLOB, service_cert BLOB NOT NULL, service_cert_hash BLOB NOT NULL, service_priv_key BLOB NOT NULL, - rcv_service_id BLOB, + service_id BLOB, + service_queue_count INTEGER NOT NULL DEFAULT 0, + service_queue_ids_hash BLOB NOT NULL DEFAULT x'00000000000000000000000000000000', FOREIGN KEY(host, port) REFERENCES servers ON UPDATE CASCADE ON DELETE RESTRICT ); CREATE UNIQUE INDEX idx_rcv_queues_ntf ON rcv_queues(host, port, ntf_id); @@ -607,6 +610,51 @@ CREATE INDEX idx_rcv_queues_client_notice_id ON rcv_queues(client_notice_id); CREATE UNIQUE INDEX idx_server_certs_user_id_host_port ON client_services( user_id, host, - port + port, + server_key_hash ); CREATE INDEX idx_server_certs_host_port ON client_services(host, port); +CREATE TRIGGER tr_rcv_queue_insert +AFTER INSERT ON rcv_queues +FOR EACH ROW +WHEN NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count + 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, NEW.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = NEW.conn_id) + AND host = NEW.host AND port = NEW.port; +END; +CREATE TRIGGER tr_rcv_queue_delete +AFTER DELETE ON rcv_queues +FOR EACH ROW +WHEN OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count - 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, OLD.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = OLD.conn_id) + AND host = OLD.host AND port = OLD.port; +END; +CREATE TRIGGER tr_rcv_queue_update_remove +AFTER UPDATE ON rcv_queues +FOR EACH ROW +WHEN OLD.rcv_service_assoc != 0 AND OLD.deleted = 0 AND NOT (NEW.rcv_service_assoc != 0 AND NEW.deleted = 0) +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count - 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, OLD.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = OLD.conn_id) + AND host = OLD.host AND port = OLD.port; +END; +CREATE TRIGGER tr_rcv_queue_update_add +AFTER UPDATE ON rcv_queues +FOR EACH ROW +WHEN NEW.rcv_service_assoc != 0 AND NEW.deleted = 0 AND NOT (OLD.rcv_service_assoc != 0 AND OLD.deleted = 0) +BEGIN + UPDATE client_services + SET service_queue_count = service_queue_count + 1, + service_queue_ids_hash = simplex_xor_md5_combine(service_queue_ids_hash, NEW.rcv_id) + WHERE user_id = (SELECT user_id FROM connections WHERE conn_id = NEW.conn_id) + AND host = NEW.host AND port = NEW.port; +END; diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs new file mode 100644 index 0000000000..a3c3b94ac4 --- /dev/null +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs @@ -0,0 +1,41 @@ +module Simplex.Messaging.Agent.Store.SQLite.Util where + +import Control.Exception (SomeException, catch, mask_) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Database.SQLite3.Direct (Database (..), FuncArgs (..), FuncContext (..)) +import Database.SQLite3.Bindings +import Foreign.C.String +import Foreign.Ptr +import Foreign.StablePtr + +data CFuncPtrs = CFuncPtrs (FunPtr CFunc) (FunPtr CFunc) (FunPtr CFuncFinal) + +type SQLiteFunc = Ptr CContext -> CArgCount -> Ptr (Ptr CValue) -> IO () + +mkSQLiteFunc :: (FuncContext -> FuncArgs -> IO ()) -> SQLiteFunc +mkSQLiteFunc f cxt nArgs cvals = catchAsResultError cxt $ f (FuncContext cxt) (FuncArgs nArgs cvals) +{-# INLINE mkSQLiteFunc #-} + +-- Based on createFunction from Database.SQLite3.Direct, but uses static function pointer to avoid dynamic wrapper that triggers DCL. +createStaticFunction :: Database -> ByteString -> CArgCount -> Bool -> FunPtr SQLiteFunc -> IO (Either Error ()) +createStaticFunction (Database db) name nArgs isDet funPtr = mask_ $ do + u <- newStablePtr $ CFuncPtrs funPtr nullFunPtr nullFunPtr + let flags = if isDet then c_SQLITE_DETERMINISTIC else 0 + B.useAsCString name $ \namePtr -> + toResult () <$> c_sqlite3_create_function_v2 db namePtr nArgs flags (castStablePtrToPtr u) funPtr nullFunPtr nullFunPtr nullFunPtr + +-- Convert a 'CError' to a 'Either Error', in the common case where +-- SQLITE_OK signals success and anything else signals an error. +-- +-- Note that SQLITE_OK == 0. +toResult :: a -> CError -> Either Error a +toResult a (CError 0) = Right a +toResult _ code = Left $ decodeError code + +-- call c_sqlite3_result_error in the event of an error +catchAsResultError :: Ptr CContext -> IO () -> IO () +catchAsResultError ctx action = catch action $ \exn -> do + let msg = show (exn :: SomeException) + withCAStringLen msg $ \(ptr, len) -> + c_sqlite3_result_error ctx ptr (fromIntegral len) diff --git a/src/Simplex/Messaging/Agent/TSessionSubs.hs b/src/Simplex/Messaging/Agent/TSessionSubs.hs index cce103fe6a..ab15b9793c 100644 --- a/src/Simplex/Messaging/Agent/TSessionSubs.hs +++ b/src/Simplex/Messaging/Agent/TSessionSubs.hs @@ -2,6 +2,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} module Simplex.Messaging.Agent.TSessionSubs ( TSessionSubs (sessionSubs), @@ -12,7 +13,10 @@ module Simplex.Messaging.Agent.TSessionSubs hasPendingSub, addPendingSub, setSessionId, + setPendingServiceSub, + setActiveServiceSub, addActiveSub, + addActiveSub', batchAddActiveSubs, batchAddPendingSubs, deletePendingSub, @@ -38,13 +42,13 @@ import qualified Data.Map.Strict as M import Data.Maybe (isJust) import qualified Data.Set as S import Simplex.Messaging.Agent.Protocol (SMPQueue (..)) -import Simplex.Messaging.Agent.Store (RcvQueueSub (..), SomeRcvQueue) +import Simplex.Messaging.Agent.Store (RcvQueue, RcvQueueSub (..), SomeRcvQueue, StoredRcvQueue (rcvServiceAssoc), rcvQueueSub) import Simplex.Messaging.Client (SMPTransportSession, TransportSessionMode (..)) -import Simplex.Messaging.Protocol (RecipientId) +import Simplex.Messaging.Protocol (RecipientId, ServiceSub (..), queueIdHash) import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM import Simplex.Messaging.Transport -import Simplex.Messaging.Util (($>>=)) +import Simplex.Messaging.Util (anyM, ($>>=)) data TSessionSubs = TSessionSubs { sessionSubs :: TMap SMPTransportSession SessSubs @@ -53,7 +57,9 @@ data TSessionSubs = TSessionSubs data SessSubs = SessSubs { subsSessId :: TVar (Maybe SessionId), activeSubs :: TMap RecipientId RcvQueueSub, - pendingSubs :: TMap RecipientId RcvQueueSub + pendingSubs :: TMap RecipientId RcvQueueSub, + activeServiceSub :: TVar (Maybe ServiceSub), + pendingServiceSub :: TVar (Maybe ServiceSub) } emptyIO :: IO TSessionSubs @@ -72,7 +78,7 @@ getSessSubs :: SMPTransportSession -> TSessionSubs -> STM SessSubs getSessSubs tSess ss = lookupSubs tSess ss >>= maybe new pure where new = do - s <- SessSubs <$> newTVar Nothing <*> newTVar M.empty <*> newTVar M.empty + s <- SessSubs <$> newTVar Nothing <*> newTVar M.empty <*> newTVar M.empty <*> newTVar Nothing <*> newTVar Nothing TM.insert tSess s $ sessionSubs ss pure s @@ -98,8 +104,27 @@ setSessionId tSess sessId ss = do Nothing -> writeTVar (subsSessId s) (Just sessId) Just sessId' -> unless (sessId == sessId') $ void $ setSubsPending_ s $ Just sessId -addActiveSub :: SMPTransportSession -> SessionId -> RcvQueueSub -> TSessionSubs -> STM () -addActiveSub tSess sessId rq ss = do +setPendingServiceSub :: SMPTransportSession -> ServiceSub -> TSessionSubs -> STM () +setPendingServiceSub tSess serviceSub ss = do + s <- getSessSubs tSess ss + writeTVar (pendingServiceSub s) $ Just serviceSub + +setActiveServiceSub :: SMPTransportSession -> SessionId -> ServiceSub -> TSessionSubs -> STM () +setActiveServiceSub tSess sessId serviceSub ss = do + s <- getSessSubs tSess ss + sessId' <- readTVar $ subsSessId s + if Just sessId == sessId' + then do + writeTVar (activeServiceSub s) $ Just serviceSub + writeTVar (pendingServiceSub s) Nothing + else writeTVar (pendingServiceSub s) $ Just serviceSub + +addActiveSub :: SMPTransportSession -> SessionId -> RcvQueue -> TSessionSubs -> STM () +addActiveSub tSess sessId rq = addActiveSub' tSess sessId (rcvQueueSub rq) (rcvServiceAssoc rq) +{-# INLINE addActiveSub #-} + +addActiveSub' :: SMPTransportSession -> SessionId -> RcvQueueSub -> Bool -> TSessionSubs -> STM () +addActiveSub' tSess sessId rq serviceAssoc ss = do s <- getSessSubs tSess ss sessId' <- readTVar $ subsSessId s let rId = rcvId rq @@ -107,10 +132,13 @@ addActiveSub tSess sessId rq ss = do then do TM.insert rId rq $ activeSubs s TM.delete rId $ pendingSubs s + when serviceAssoc $ + let updateServiceSub (ServiceSub serviceId n idsHash) = ServiceSub serviceId (n + 1) (idsHash <> queueIdHash rId) + in modifyTVar' (activeServiceSub s) (updateServiceSub <$>) else TM.insert rId rq $ pendingSubs s -batchAddActiveSubs :: SMPTransportSession -> SessionId -> [RcvQueueSub] -> TSessionSubs -> STM () -batchAddActiveSubs tSess sessId rqs ss = do +batchAddActiveSubs :: SMPTransportSession -> SessionId -> ([RcvQueueSub], [RcvQueueSub]) -> TSessionSubs -> STM () +batchAddActiveSubs tSess sessId (rqs, serviceRQs) ss = do s <- getSessSubs tSess ss sessId' <- readTVar $ subsSessId s let qs = M.fromList $ map (\rq -> (rcvId rq, rq)) rqs @@ -118,6 +146,12 @@ batchAddActiveSubs tSess sessId rqs ss = do then do TM.union qs $ activeSubs s modifyTVar' (pendingSubs s) (`M.difference` qs) + serviceSub_ <- readTVar $ activeServiceSub s + forM_ serviceSub_ $ \(ServiceSub serviceId n idsHash) -> do + unless (null serviceRQs) $ do + let idsHash' = idsHash <> mconcat (map (queueIdHash . rcvId) serviceRQs) + n' = n + fromIntegral (length serviceRQs) + writeTVar (activeServiceSub s) $ Just $ ServiceSub serviceId n' idsHash' else TM.union qs $ pendingSubs s batchAddPendingSubs :: SMPTransportSession -> [RcvQueueSub] -> TSessionSubs -> STM () @@ -143,11 +177,15 @@ batchDeleteSubs tSess rqs = lookupSubs tSess >=> mapM_ (\s -> delete (activeSubs delete = (`modifyTVar'` (`M.withoutKeys` rIds)) hasPendingSubs :: SMPTransportSession -> TSessionSubs -> STM Bool -hasPendingSubs tSess = lookupSubs tSess >=> maybe (pure False) (fmap (not . null) . readTVar . pendingSubs) +hasPendingSubs tSess = lookupSubs tSess >=> maybe (pure False) (\s -> anyM [hasSubs s, hasServiceSub s]) + where + hasSubs = fmap (not . null) . readTVar . pendingSubs + hasServiceSub = fmap isJust . readTVar . pendingServiceSub -getPendingSubs :: SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) -getPendingSubs = getSubs_ pendingSubs -{-# INLINE getPendingSubs #-} +getPendingSubs :: SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub, Maybe ServiceSub) +getPendingSubs tSess = lookupSubs tSess >=> maybe (pure (M.empty, Nothing)) get + where + get s = liftM2 (,) (readTVar $ pendingSubs s) (readTVar $ pendingServiceSub s) getActiveSubs :: SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) getActiveSubs = getSubs_ activeSubs @@ -156,7 +194,7 @@ getActiveSubs = getSubs_ activeSubs getSubs_ :: (SessSubs -> TMap RecipientId RcvQueueSub) -> SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) getSubs_ subs tSess = lookupSubs tSess >=> maybe (pure M.empty) (readTVar . subs) -setSubsPending :: TransportSessionMode -> SMPTransportSession -> SessionId -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) +setSubsPending :: TransportSessionMode -> SMPTransportSession -> SessionId -> TSessionSubs -> STM (Map RecipientId RcvQueueSub, Maybe ServiceSub) setSubsPending mode tSess@(uId, srv, connId_) sessId tss@(TSessionSubs ss) | entitySession == isJust connId_ = TM.lookup tSess ss >>= withSessSubs (`setSubsPending_` Nothing) @@ -166,17 +204,17 @@ setSubsPending mode tSess@(uId, srv, connId_) sessId tss@(TSessionSubs ss) entitySession = mode == TSMEntity sessEntId = if entitySession then Just else const Nothing withSessSubs run = \case - Nothing -> pure M.empty + Nothing -> pure (M.empty, Nothing) Just s -> do sessId' <- readTVar $ subsSessId s - if Just sessId == sessId' then run s else pure M.empty + if Just sessId == sessId' then run s else pure (M.empty, Nothing) setPendingChangeMode s = do subs <- M.union <$> readTVar (activeSubs s) <*> readTVar (pendingSubs s) unless (null subs) $ forM_ subs $ \rq -> addPendingSub (uId, srv, sessEntId (connId rq)) rq tss - pure subs + (subs,) <$> setServiceSubPending_ s -setSubsPending_ :: SessSubs -> Maybe SessionId -> STM (Map RecipientId RcvQueueSub) +setSubsPending_ :: SessSubs -> Maybe SessionId -> STM (Map RecipientId RcvQueueSub, Maybe ServiceSub) setSubsPending_ s sessId_ = do writeTVar (subsSessId s) sessId_ let as = activeSubs s @@ -184,7 +222,15 @@ setSubsPending_ s sessId_ = do unless (null subs) $ do writeTVar as M.empty modifyTVar' (pendingSubs s) $ M.union subs - pure subs + (subs,) <$> setServiceSubPending_ s + +setServiceSubPending_ :: SessSubs -> STM (Maybe ServiceSub) +setServiceSubPending_ s = do + serviceSub_ <- readTVar $ activeServiceSub s + forM_ serviceSub_ $ \serviceSub -> do + writeTVar (activeServiceSub s) Nothing + writeTVar (pendingServiceSub s) $ Just serviceSub + pure serviceSub_ updateClientNotices :: SMPTransportSession -> [(RecipientId, Maybe Int64)] -> TSessionSubs -> STM () updateClientNotices tSess noticeIds ss = do diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 4f70efcf2f..58ffd14189 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -909,18 +909,18 @@ nsubResponse_ = \case {-# INLINE nsubResponse_ #-} -- This command is always sent in background request mode -subscribeService :: forall p. (PartyI p, ServiceParty p) => SMPClient -> SParty p -> ExceptT SMPClientError IO (Int64, IdsHash) -subscribeService c party = case smpClientService c of +subscribeService :: forall p. (PartyI p, ServiceParty p) => SMPClient -> SParty p -> Int64 -> IdsHash -> ExceptT SMPClientError IO ServiceSub +subscribeService c party n idsHash = case smpClientService c of Just THClientService {serviceId, serviceKey} -> do liftIO $ enablePings c sendSMPCommand c NRMBackground (Just (C.APrivateAuthKey C.SEd25519 serviceKey)) serviceId subCmd >>= \case - SOKS n idsHash -> pure (n, idsHash) + SOKS n' idsHash' -> pure $ ServiceSub serviceId n' idsHash' r -> throwE $ unexpectedResponse r where subCmd :: Command p subCmd = case party of - SRecipientService -> SUBS - SNotifierService -> NSUBS + SRecipientService -> SUBS n idsHash + SNotifierService -> NSUBS n idsHash Nothing -> throwE PCEServiceUnavailable smpClientService :: SMPClient -> Maybe THClientService diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index 722a86c7ef..45d747d210 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -45,7 +45,6 @@ import Crypto.Random (ChaChaDRG) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B import Data.Constraint (Dict (..)) -import Data.Int (Int64) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as L import Data.Map.Strict (Map) @@ -69,10 +68,12 @@ import Simplex.Messaging.Protocol ProtocolServer (..), QueueId, SMPServer, + ServiceSub (..), SParty (..), ServiceParty, serviceParty, - partyServiceRole + partyServiceRole, + queueIdsHash, ) import Simplex.Messaging.Session import Simplex.Messaging.TMap (TMap) @@ -91,14 +92,14 @@ data SMPClientAgentEvent | CADisconnected SMPServer (NonEmpty QueueId) | CASubscribed SMPServer (Maybe ServiceId) (NonEmpty QueueId) | CASubError SMPServer (NonEmpty (QueueId, SMPClientError)) - | CAServiceDisconnected SMPServer (ServiceId, Int64) - | CAServiceSubscribed SMPServer (ServiceId, Int64) Int64 - | CAServiceSubError SMPServer (ServiceId, Int64) SMPClientError + | CAServiceDisconnected SMPServer ServiceSub + | CAServiceSubscribed {subServer :: SMPServer, expected :: ServiceSub, subscribed :: ServiceSub} + | CAServiceSubError SMPServer ServiceSub SMPClientError -- CAServiceUnavailable is used when service ID in pending subscription is different from the current service in connection. -- This will require resubscribing to all queues associated with this service ID individually, creating new associations. -- It may happen if, for example, SMP server deletes service information (e.g. via downgrade and upgrade) -- and assigns different service ID to the service certificate. - | CAServiceUnavailable SMPServer (ServiceId, Int64) + | CAServiceUnavailable SMPServer ServiceSub data SMPClientAgentConfig = SMPClientAgentConfig { smpCfg :: ProtocolClientConfig SMPVersion, @@ -142,11 +143,11 @@ data SMPClientAgent p = SMPClientAgent -- Only one service subscription can exist per server with this agent. -- With correctly functioning SMP server, queue and service subscriptions can't be -- active at the same time. - activeServiceSubs :: TMap SMPServer (TVar (Maybe ((ServiceId, Int64), SessionId))), + activeServiceSubs :: TMap SMPServer (TVar (Maybe (ServiceSub, SessionId))), activeQueueSubs :: TMap SMPServer (TMap QueueId (SessionId, C.APrivateAuthKey)), -- Pending service subscriptions can co-exist with pending queue subscriptions -- on the same SMP server during subscriptions being transitioned from per-queue to service. - pendingServiceSubs :: TMap SMPServer (TVar (Maybe (ServiceId, Int64))), + pendingServiceSubs :: TMap SMPServer (TVar (Maybe ServiceSub)), pendingQueueSubs :: TMap SMPServer (TMap QueueId C.APrivateAuthKey), smpSubWorkers :: TMap SMPServer (SessionVar (Async ())), workerSeq :: TVar Int @@ -256,7 +257,7 @@ connectClient ca@SMPClientAgent {agentCfg, smpClients, smpSessions, msgQ, random removeClientAndSubs smp >>= serverDown logInfo . decodeUtf8 $ "Agent disconnected from " <> showServer srv - removeClientAndSubs :: SMPClient -> IO (Maybe (ServiceId, Int64), Maybe (Map QueueId C.APrivateAuthKey)) + removeClientAndSubs :: SMPClient -> IO (Maybe ServiceSub, Maybe (Map QueueId C.APrivateAuthKey)) removeClientAndSubs smp = do -- Looking up subscription vars outside of STM transaction to reduce re-evaluation. -- It is possible because these vars are never removed, they are only added. @@ -287,7 +288,7 @@ connectClient ca@SMPClientAgent {agentCfg, smpClients, smpSessions, msgQ, random then pure Nothing else Just subs <$ addSubs_ (pendingQueueSubs ca) srv subs - serverDown :: (Maybe (ServiceId, Int64), Maybe (Map QueueId C.APrivateAuthKey)) -> IO () + serverDown :: (Maybe ServiceSub, Maybe (Map QueueId C.APrivateAuthKey)) -> IO () serverDown (sSub, qSubs) = do mapM_ (notify ca . CAServiceDisconnected srv) sSub let qIds = L.nonEmpty . M.keys =<< qSubs @@ -317,7 +318,7 @@ reconnectClient ca@SMPClientAgent {active, agentCfg, smpSubWorkers, workerSeq} s loop ProtocolClientConfig {networkConfig = NetworkConfig {tcpConnectTimeout}} = smpCfg agentCfg noPending (sSub, qSubs) = isNothing sSub && maybe True M.null qSubs - getPending :: Monad m => (forall a. SMPServer -> TMap SMPServer a -> m (Maybe a)) -> (forall a. TVar a -> m a) -> m (Maybe (ServiceId, Int64), Maybe (Map QueueId C.APrivateAuthKey)) + getPending :: Monad m => (forall a. SMPServer -> TMap SMPServer a -> m (Maybe a)) -> (forall a. TVar a -> m a) -> m (Maybe ServiceSub, Maybe (Map QueueId C.APrivateAuthKey)) getPending lkup rd = do sSub <- lkup srv (pendingServiceSubs ca) $>>= rd qSubs <- lkup srv (pendingQueueSubs ca) >>= mapM rd @@ -329,7 +330,7 @@ reconnectClient ca@SMPClientAgent {active, agentCfg, smpSubWorkers, workerSeq} s whenM (isEmptyTMVar $ sessionVar v) retry removeSessVar v srv smpSubWorkers -reconnectSMPClient :: forall p. SMPClientAgent p -> SMPServer -> (Maybe (ServiceId, Int64), Maybe (Map QueueId C.APrivateAuthKey)) -> ExceptT SMPClientError IO () +reconnectSMPClient :: forall p. SMPClientAgent p -> SMPServer -> (Maybe ServiceSub, Maybe (Map QueueId C.APrivateAuthKey)) -> ExceptT SMPClientError IO () reconnectSMPClient ca@SMPClientAgent {agentCfg, agentParty} srv (sSub_, qSubs_) = withSMP ca srv $ \smp -> liftIO $ case serviceParty agentParty of Just Dict -> resubscribe smp @@ -430,7 +431,7 @@ smpSubscribeQueues ca smp srv subs = do let acc@(_, _, (qOks, sQs), notPending) = foldr (groupSub pending) (False, [], ([], []), []) (L.zip subs rs) unless (null qOks) $ addActiveSubs ca srv qOks unless (null sQs) $ forM_ smpServiceId $ \serviceId -> - updateActiveServiceSub ca srv ((serviceId, fromIntegral $ length sQs), sessId) + updateActiveServiceSub ca srv (ServiceSub serviceId (fromIntegral $ length sQs) (queueIdsHash sQs), sessId) unless (null notPending) $ removePendingSubs ca srv notPending pure acc sessId = sessionId $ thParams smp @@ -454,24 +455,24 @@ smpSubscribeQueues ca smp srv subs = do notify_ :: (SMPServer -> NonEmpty a -> SMPClientAgentEvent) -> [a] -> IO () notify_ evt qs = mapM_ (notify ca . evt srv) $ L.nonEmpty qs -subscribeServiceNtfs :: SMPClientAgent 'NotifierService -> SMPServer -> (ServiceId, Int64) -> IO () +subscribeServiceNtfs :: SMPClientAgent 'NotifierService -> SMPServer -> ServiceSub -> IO () subscribeServiceNtfs = subscribeService_ {-# INLINE subscribeServiceNtfs #-} -subscribeService_ :: (PartyI p, ServiceParty p) => SMPClientAgent p -> SMPServer -> (ServiceId, Int64) -> IO () +subscribeService_ :: (PartyI p, ServiceParty p) => SMPClientAgent p -> SMPServer -> ServiceSub -> IO () subscribeService_ ca srv serviceSub = do atomically $ setPendingServiceSub ca srv $ Just serviceSub runExceptT (getSMPServerClient' ca srv) >>= \case Right smp -> smpSubscribeService ca smp srv serviceSub Left _ -> pure () -- no call to reconnectClient - failing getSMPServerClient' does that -smpSubscribeService :: (PartyI p, ServiceParty p) => SMPClientAgent p -> SMPClient -> SMPServer -> (ServiceId, Int64) -> IO () -smpSubscribeService ca smp srv serviceSub@(serviceId, _) = case smpClientService smp of +smpSubscribeService :: (PartyI p, ServiceParty p) => SMPClientAgent p -> SMPClient -> SMPServer -> ServiceSub -> IO () +smpSubscribeService ca smp srv serviceSub@(ServiceSub serviceId n idsHash) = case smpClientService smp of Just service | serviceAvailable service -> subscribe _ -> notifyUnavailable where subscribe = do - r <- runExceptT $ subscribeService smp $ agentParty ca + r <- runExceptT $ subscribeService smp (agentParty ca) n idsHash ok <- atomically $ ifM @@ -479,15 +480,15 @@ smpSubscribeService ca smp srv serviceSub@(serviceId, _) = case smpClientService (True <$ processSubscription r) (pure False) if ok - then case r of -- TODO [certs rcv] compare hash - Right (n, _idsHash) -> notify ca $ CAServiceSubscribed srv serviceSub n + then case r of + Right serviceSub' -> notify ca $ CAServiceSubscribed srv serviceSub serviceSub' Left e | smpClientServiceError e -> notifyUnavailable | temporaryClientError e -> reconnectClient ca srv | otherwise -> notify ca $ CAServiceSubError srv serviceSub e else reconnectClient ca srv - processSubscription = mapM_ $ \(n, _idsHash) -> do -- TODO [certs rcv] validate hash here? - setActiveServiceSub ca srv $ Just ((serviceId, n), sessId) + processSubscription = mapM_ $ \serviceSub' -> do -- TODO [certs rcv] validate hash here? + setActiveServiceSub ca srv $ Just (serviceSub', sessId) setPendingServiceSub ca srv Nothing serviceAvailable THClientService {serviceRole, serviceId = serviceId'} = serviceId == serviceId' && partyServiceRole (agentParty ca) == serviceRole @@ -529,11 +530,11 @@ addSubs_ subs srv ss = Just m -> TM.union ss m _ -> TM.insertM srv (newTVar ss) subs -setActiveServiceSub :: SMPClientAgent p -> SMPServer -> Maybe ((ServiceId, Int64), SessionId) -> STM () +setActiveServiceSub :: SMPClientAgent p -> SMPServer -> Maybe (ServiceSub, SessionId) -> STM () setActiveServiceSub = setServiceSub_ activeServiceSubs {-# INLINE setActiveServiceSub #-} -setPendingServiceSub :: SMPClientAgent p -> SMPServer -> Maybe (ServiceId, Int64) -> STM () +setPendingServiceSub :: SMPClientAgent p -> SMPServer -> Maybe ServiceSub -> STM () setPendingServiceSub = setServiceSub_ pendingServiceSubs {-# INLINE setPendingServiceSub #-} @@ -548,12 +549,12 @@ setServiceSub_ subsSel ca srv sub = Just v -> writeTVar v sub Nothing -> TM.insertM srv (newTVar sub) (subsSel ca) -updateActiveServiceSub :: SMPClientAgent p -> SMPServer -> ((ServiceId, Int64), SessionId) -> STM () -updateActiveServiceSub ca srv sub@((serviceId', n'), sessId') = +updateActiveServiceSub :: SMPClientAgent p -> SMPServer -> (ServiceSub, SessionId) -> STM () +updateActiveServiceSub ca srv sub@(ServiceSub serviceId' n' idsHash', sessId') = TM.lookup srv (activeServiceSubs ca) >>= \case Just v -> modifyTVar' v $ \case - Just ((serviceId, n), sessId) | serviceId == serviceId' && sessId == sessId' -> - Just ((serviceId, n + n'), sessId) + Just (ServiceSub serviceId n idsHash, sessId) | serviceId == serviceId' && sessId == sessId' -> + Just (ServiceSub serviceId (n + n') (idsHash <> idsHash'), sessId) _ -> Just sub Nothing -> TM.insertM srv (newTVar $ Just sub) (activeServiceSubs ca) diff --git a/src/Simplex/Messaging/Crypto.hs b/src/Simplex/Messaging/Crypto.hs index 3d24f0bcba..c7b5396416 100644 --- a/src/Simplex/Messaging/Crypto.hs +++ b/src/Simplex/Messaging/Crypto.hs @@ -178,6 +178,7 @@ module Simplex.Messaging.Crypto sha512Hash, sha3_256, sha3_384, + md5Hash, -- * Message padding / un-padding canPad, @@ -216,7 +217,7 @@ import Crypto.Cipher.AES (AES256) import qualified Crypto.Cipher.Types as AES import qualified Crypto.Cipher.XSalsa as XSalsa import qualified Crypto.Error as CE -import Crypto.Hash (Digest, SHA3_256, SHA3_384, SHA256 (..), SHA512 (..), hash, hashDigestSize) +import Crypto.Hash (Digest, MD5, SHA3_256, SHA3_384, SHA256 (..), SHA512 (..), hash, hashDigestSize) import qualified Crypto.KDF.HKDF as H import qualified Crypto.MAC.Poly1305 as Poly1305 import qualified Crypto.PubKey.Curve25519 as X25519 @@ -1024,6 +1025,9 @@ sha3_384 :: ByteString -> ByteString sha3_384 = BA.convert . (hash :: ByteString -> Digest SHA3_384) {-# INLINE sha3_384 #-} +md5Hash :: ByteString -> ByteString +md5Hash = BA.convert . (hash :: ByteString -> Digest MD5) + -- | AEAD-GCM encryption with associated data. -- -- Used as part of double ratchet encryption. diff --git a/src/Simplex/Messaging/Notifications/Protocol.hs b/src/Simplex/Messaging/Notifications/Protocol.hs index 0b5889bb7f..7acb714c03 100644 --- a/src/Simplex/Messaging/Notifications/Protocol.hs +++ b/src/Simplex/Messaging/Notifications/Protocol.hs @@ -489,17 +489,9 @@ data NtfSubStatus NSErr ByteString deriving (Eq, Ord, Show) -ntfShouldSubscribe :: NtfSubStatus -> Bool -ntfShouldSubscribe = \case - NSNew -> True - NSPending -> True - NSActive -> True - NSInactive -> True - NSEnd -> False - NSDeleted -> False - NSAuth -> False - NSService -> True - NSErr _ -> False +-- if these statuses change, the queue ID hashes for services need to be updated in a new migration (see m20250830_queue_ids_hash) +subscribeNtfStatuses :: [NtfSubStatus] +subscribeNtfStatuses = [NSNew, NSPending, NSActive, NSInactive] instance Encoding NtfSubStatus where smpEncode = \case diff --git a/src/Simplex/Messaging/Notifications/Server.hs b/src/Simplex/Messaging/Notifications/Server.hs index 43d97988e2..f06e9c7b1d 100644 --- a/src/Simplex/Messaging/Notifications/Server.hs +++ b/src/Simplex/Messaging/Notifications/Server.hs @@ -62,7 +62,7 @@ import Simplex.Messaging.Notifications.Server.Store (NtfSTMStore, TokenNtfMessag import Simplex.Messaging.Notifications.Server.Store.Postgres import Simplex.Messaging.Notifications.Server.Store.Types import Simplex.Messaging.Notifications.Transport -import Simplex.Messaging.Protocol (EntityId (..), ErrorType (..), NotifierId, Party (..), ProtocolServer (host), SMPServer, ServiceId, SignedTransmission, Transmission, pattern NoEntity, pattern SMPServer, encodeTransmission, tGetServer, tPut) +import Simplex.Messaging.Protocol (EntityId (..), ErrorType (..), NotifierId, Party (..), ProtocolServer (host), SMPServer, ServiceSub (..), SignedTransmission, Transmission, pattern NoEntity, pattern SMPServer, encodeTransmission, tGetServer, tPut) import qualified Simplex.Messaging.Protocol as SMP import Simplex.Messaging.Server import Simplex.Messaging.Server.Control (CPClientRole (..)) @@ -257,9 +257,9 @@ ntfServer cfg@NtfServerConfig {transports, transportConfig = tCfg, startOptions} srvSubscribers <- getSMPWorkerMetrics a smpSubscribers srvClients <- getSMPWorkerMetrics a smpClients srvSubWorkers <- getSMPWorkerMetrics a smpSubWorkers - ntfActiveServiceSubs <- getSMPServiceSubMetrics a activeServiceSubs $ snd . fst + ntfActiveServiceSubs <- getSMPServiceSubMetrics a activeServiceSubs $ smpQueueCount . fst ntfActiveQueueSubs <- getSMPSubMetrics a activeQueueSubs - ntfPendingServiceSubs <- getSMPServiceSubMetrics a pendingServiceSubs snd + ntfPendingServiceSubs <- getSMPServiceSubMetrics a pendingServiceSubs smpQueueCount ntfPendingQueueSubs <- getSMPSubMetrics a pendingQueueSubs smpSessionCount <- M.size <$> readTVarIO smpSessions apnsPushQLength <- atomically $ lengthTBQueue pushQ @@ -452,13 +452,13 @@ resubscribe NtfSubscriber {smpAgent = ca} = do counts <- mapConcurrently (subscribeSrvSubs ca st batchSize) srvs logNote $ "Completed all SMP resubscriptions for " <> tshow (length srvs) <> " servers (" <> tshow (sum counts) <> " subscriptions)" -subscribeSrvSubs :: SMPClientAgent 'NotifierService -> NtfPostgresStore -> Int -> (SMPServer, Int64, Maybe (ServiceId, Int64)) -> IO Int +subscribeSrvSubs :: SMPClientAgent 'NotifierService -> NtfPostgresStore -> Int -> (SMPServer, Int64, Maybe ServiceSub) -> IO Int subscribeSrvSubs ca st batchSize (srv, srvId, service_) = do let srvStr = safeDecodeUtf8 (strEncode $ L.head $ host srv) logNote $ "Starting SMP resubscriptions for " <> srvStr - forM_ service_ $ \(serviceId, n) -> do - logNote $ "Subscribing service to " <> srvStr <> " with " <> tshow n <> " associated queues" - subscribeServiceNtfs ca srv (serviceId, n) + forM_ service_ $ \serviceSub -> do + logNote $ "Subscribing service to " <> srvStr <> " with " <> tshow (smpQueueCount serviceSub) <> " associated queues" + subscribeServiceNtfs ca srv serviceSub n <- subscribeLoop 0 Nothing logNote $ "Completed SMP resubscriptions for " <> srvStr <> " (" <> tshow n <> " subscriptions)" pure n @@ -576,7 +576,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = -- TODO [certs] resubscribe queues with statuses NSErr and NSService CAServiceDisconnected srv serviceSub -> logNote $ "SMP server service disconnected " <> showService srv serviceSub - CAServiceSubscribed srv serviceSub@(_, expected) n + CAServiceSubscribed srv serviceSub@(ServiceSub _ expected _) (ServiceSub _ n _) -- TODO [certs rcv] compare hash | expected == n -> logNote msg | otherwise -> logWarn $ msg <> ", confirmed subs: " <> tshow n where @@ -593,7 +593,8 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = void $ subscribeSrvSubs ca st batchSize (srv, srvId, Nothing) Left e -> logError $ "SMP server update and resubscription error " <> tshow e where - showService srv (serviceId, n) = showServer' srv <> ", service ID " <> decodeLatin1 (strEncode serviceId) <> ", " <> tshow n <> " subs" + -- TODO [certs rcv] compare hash + showService srv (ServiceSub serviceId n _idsHash) = showServer' srv <> ", service ID " <> decodeLatin1 (strEncode serviceId) <> ", " <> tshow n <> " subs" logSubErrors :: SMPServer -> NonEmpty (SMP.NotifierId, NtfSubStatus) -> Int -> IO () logSubErrors srv subs updated = forM_ (L.group $ L.sort $ L.map snd subs) $ \ss -> do diff --git a/src/Simplex/Messaging/Notifications/Server/Stats.hs b/src/Simplex/Messaging/Notifications/Server/Stats.hs index a20e41c342..7125ce2906 100644 --- a/src/Simplex/Messaging/Notifications/Server/Stats.hs +++ b/src/Simplex/Messaging/Notifications/Server/Stats.hs @@ -17,6 +17,7 @@ import Simplex.Messaging.Server.Stats import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM +-- TODO [certs rcv] track service subscriptions and count/hash diffs for own and other servers + prometheus data NtfServerStats = NtfServerStats { fromTime :: IORef UTCTime, tknCreated :: IORef Int, diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs b/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs index 6a53ff4a22..8c0da7c07e 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs @@ -6,13 +6,15 @@ module Simplex.Messaging.Notifications.Server.Store.Migrations where import Data.List (sortOn) import Data.Text (Text) +import Simplex.Messaging.Agent.Store.Postgres.Migrations.Util import Simplex.Messaging.Agent.Store.Shared import Text.RawString.QQ (r) ntfServerSchemaMigrations :: [(String, Text, Maybe Text)] ntfServerSchemaMigrations = [ ("20250417_initial", m20250417_initial, Nothing), - ("20250517_service_cert", m20250517_service_cert, Just down_m20250517_service_cert) + ("20250517_service_cert", m20250517_service_cert, Just down_m20250517_service_cert), + ("20250830_queue_ids_hash", m20250830_queue_ids_hash, Just down_m20250830_queue_ids_hash) ] -- | The list of migrations in ascending order by date @@ -101,3 +103,125 @@ ALTER TABLE smp_servers DROP COLUMN ntf_service_id; ALTER TABLE subscriptions DROP COLUMN ntf_service_assoc; |] + +m20250830_queue_ids_hash :: Text +m20250830_queue_ids_hash = + createXorHashFuncs + <> [r| +ALTER TABLE smp_servers + ADD COLUMN smp_notifier_count BIGINT NOT NULL DEFAULT 0, + ADD COLUMN smp_notifier_ids_hash BYTEA NOT NULL DEFAULT '\x00000000000000000000000000000000'; + +CREATE FUNCTION should_subscribe_status(p_status TEXT) RETURNS BOOLEAN +LANGUAGE plpgsql IMMUTABLE STRICT +AS $$ +BEGIN + RETURN p_status IN ('NEW', 'PENDING', 'ACTIVE', 'INACTIVE'); +END; +$$; + +CREATE FUNCTION update_all_aggregates() RETURNS VOID +LANGUAGE plpgsql +AS $$ +BEGIN + WITH acc AS ( + SELECT + s.smp_server_id, + count(smp_notifier_id) as notifier_count, + xor_aggregate(public.digest(s.smp_notifier_id, 'md5')) AS notifier_hash + FROM subscriptions s + WHERE s.ntf_service_assoc = true AND should_subscribe_status(s.status) + GROUP BY s.smp_server_id + ) + UPDATE smp_servers srv + SET smp_notifier_count = COALESCE(acc.notifier_count, 0), + smp_notifier_ids_hash = COALESCE(acc.notifier_hash, '\x00000000000000000000000000000000') + FROM acc + WHERE srv.smp_server_id = acc.smp_server_id; +END; +$$; + +SELECT update_all_aggregates(); + +CREATE FUNCTION update_aggregates(p_server_id BIGINT, p_change BIGINT, p_notifier_id BYTEA) RETURNS VOID +LANGUAGE plpgsql +AS $$ +BEGIN + UPDATE smp_servers + SET smp_notifier_count = smp_notifier_count + p_change, + smp_notifier_ids_hash = xor_combine(smp_notifier_ids_hash, public.digest(p_notifier_id, 'md5')) + WHERE smp_server_id = p_server_id; +END; +$$; + +CREATE FUNCTION on_subscription_insert() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status) THEN + PERFORM update_aggregates(NEW.smp_server_id, 1, NEW.smp_notifier_id); + END IF; + RETURN NEW; +END; +$$; + +CREATE FUNCTION on_subscription_delete() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.ntf_service_assoc = true AND should_subscribe_status(OLD.status) THEN + PERFORM update_aggregates(OLD.smp_server_id, -1, OLD.smp_notifier_id); + END IF; + RETURN OLD; +END; +$$; + +CREATE FUNCTION on_subscription_update() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.ntf_service_assoc = true AND should_subscribe_status(OLD.status) THEN + IF NOT (NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status)) THEN + PERFORM update_aggregates(OLD.smp_server_id, -1, OLD.smp_notifier_id); + END IF; + ELSIF NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status) THEN + PERFORM update_aggregates(NEW.smp_server_id, 1, NEW.smp_notifier_id); + END IF; + RETURN NEW; +END; +$$; + +CREATE TRIGGER tr_subscriptions_insert +AFTER INSERT ON subscriptions +FOR EACH ROW EXECUTE PROCEDURE on_subscription_insert(); + +CREATE TRIGGER tr_subscriptions_delete +AFTER DELETE ON subscriptions +FOR EACH ROW EXECUTE PROCEDURE on_subscription_delete(); + +CREATE TRIGGER tr_subscriptions_update +AFTER UPDATE ON subscriptions +FOR EACH ROW EXECUTE PROCEDURE on_subscription_update(); + |] + +down_m20250830_queue_ids_hash :: Text +down_m20250830_queue_ids_hash = + [r| +DROP TRIGGER tr_subscriptions_insert ON subscriptions; +DROP TRIGGER tr_subscriptions_delete ON subscriptions; +DROP TRIGGER tr_subscriptions_update ON subscriptions; + +DROP FUNCTION on_subscription_insert; +DROP FUNCTION on_subscription_delete; +DROP FUNCTION on_subscription_update; + +DROP FUNCTION update_aggregates; +DROP FUNCTION update_all_aggregates; + +DROP FUNCTION should_subscribe_status; + +ALTER TABLE smp_servers + DROP COLUMN smp_notifier_count, + DROP COLUMN smp_notifier_ids_hash; + |] + <> dropXorHashFuncs diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs index 80d946c8b3..60e81a68b7 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs @@ -64,7 +64,7 @@ import Simplex.Messaging.Notifications.Server.Store (NtfSTMStore (..), NtfSubDat import Simplex.Messaging.Notifications.Server.Store.Migrations import Simplex.Messaging.Notifications.Server.Store.Types import Simplex.Messaging.Notifications.Server.StoreLog -import Simplex.Messaging.Protocol (EntityId (..), EncNMsgMeta, ErrorType (..), NotifierId, NtfPrivateAuthKey, NtfPublicAuthKey, SMPServer, ServiceId, pattern SMPServer) +import Simplex.Messaging.Protocol (EntityId (..), EncNMsgMeta, ErrorType (..), IdsHash (..), NotifierId, NtfPrivateAuthKey, NtfPublicAuthKey, SMPServer, ServiceId, ServiceSub (..), pattern SMPServer) import Simplex.Messaging.Server.QueueStore.Postgres (handleDuplicate, withLog_) import Simplex.Messaging.Server.QueueStore.Postgres.Config (PostgresStoreCfg (..)) import Simplex.Messaging.Server.StoreLog (openWriteStoreLog) @@ -239,7 +239,7 @@ updateTknCronInterval st tknId cronInt = -- Reads servers that have subscriptions that need subscribing. -- It is executed on server start, and it is supposed to crash on database error -getUsedSMPServers :: NtfPostgresStore -> IO [(SMPServer, Int64, Maybe (ServiceId, Int64))] +getUsedSMPServers :: NtfPostgresStore -> IO [(SMPServer, Int64, Maybe ServiceSub)] getUsedSMPServers st = withTransaction (dbStore st) $ \db -> map rowToSrvSubs <$> @@ -247,25 +247,17 @@ getUsedSMPServers st = db [sql| SELECT - p.smp_host, p.smp_port, p.smp_keyhash, p.smp_server_id, p.ntf_service_id, - SUM(CASE WHEN s.ntf_service_assoc THEN s.subs_count ELSE 0 END) :: BIGINT as service_subs_count - FROM smp_servers p - JOIN ( - SELECT - smp_server_id, - ntf_service_assoc, - COUNT(1) as subs_count - FROM subscriptions - WHERE status IN ? - GROUP BY smp_server_id, ntf_service_assoc - ) s ON s.smp_server_id = p.smp_server_id - GROUP BY p.smp_host, p.smp_port, p.smp_keyhash, p.smp_server_id, p.ntf_service_id + smp_host, smp_port, smp_keyhash, smp_server_id, + ntf_service_id, smp_notifier_count, smp_notifier_ids_hash + FROM smp_servers + WHERE EXISTS (SELECT 1 FROM subscriptions WHERE status IN ?) |] - (Only (In [NSNew, NSPending, NSActive, NSInactive])) + (Only (In subscribeNtfStatuses)) where - rowToSrvSubs :: SMPServerRow :. (Int64, Maybe ServiceId, Int64) -> (SMPServer, Int64, Maybe (ServiceId, Int64)) - rowToSrvSubs ((host, port, kh) :. (srvId, serviceId_, subsCount)) = - (SMPServer host port kh, srvId, (,subsCount) <$> serviceId_) + rowToSrvSubs :: SMPServerRow :. (Int64, Maybe ServiceId, Int64, IdsHash) -> (SMPServer, Int64, Maybe ServiceSub) + rowToSrvSubs ((host, port, kh) :. (srvId, serviceId_, n, idsHash)) = + let service_ = (\serviceId -> ServiceSub serviceId n idsHash) <$> serviceId_ + in (SMPServer host port kh, srvId, service_) getServerNtfSubscriptions :: NtfPostgresStore -> Int64 -> Maybe NtfSubscriptionId -> Int -> IO (Either ErrorType [ServerNtfSub]) getServerNtfSubscriptions st srvId afterSubId_ count = @@ -273,9 +265,9 @@ getServerNtfSubscriptions st srvId afterSubId_ count = subs <- map toServerNtfSub <$> case afterSubId_ of Nothing -> - DB.query db (query <> orderLimit) (srvId, statusIn, count) + DB.query db (query <> orderLimit) (srvId, In subscribeNtfStatuses, count) Just afterSubId -> - DB.query db (query <> " AND subscription_id > ?" <> orderLimit) (srvId, statusIn, afterSubId, count) + DB.query db (query <> " AND subscription_id > ?" <> orderLimit) (srvId, In subscribeNtfStatuses, afterSubId, count) void $ DB.executeMany db @@ -296,7 +288,6 @@ getServerNtfSubscriptions st srvId afterSubId_ count = WHERE smp_server_id = ? AND NOT ntf_service_assoc AND status IN ? |] orderLimit = " ORDER BY subscription_id LIMIT ?" - statusIn = In [NSNew, NSPending, NSActive, NSInactive] toServerNtfSub (ntfSubId, notifierId, notifierKey) = (ntfSubId, (notifierId, notifierKey)) -- Returns token and subscription. diff --git a/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql b/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql index 3b155fa1a9..b739956840 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql +++ b/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql @@ -15,6 +15,123 @@ SET row_security = off; CREATE SCHEMA ntf_server; + +CREATE FUNCTION ntf_server.on_subscription_delete() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.ntf_service_assoc = true AND should_subscribe_status(OLD.status) THEN + PERFORM update_aggregates(OLD.smp_server_id, -1, OLD.smp_notifier_id); + END IF; + RETURN OLD; +END; +$$; + + + +CREATE FUNCTION ntf_server.on_subscription_insert() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status) THEN + PERFORM update_aggregates(NEW.smp_server_id, 1, NEW.smp_notifier_id); + END IF; + RETURN NEW; +END; +$$; + + + +CREATE FUNCTION ntf_server.on_subscription_update() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.ntf_service_assoc = true AND should_subscribe_status(OLD.status) THEN + IF NOT (NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status)) THEN + PERFORM update_aggregates(OLD.smp_server_id, -1, OLD.smp_notifier_id); + END IF; + ELSIF NEW.ntf_service_assoc = true AND should_subscribe_status(NEW.status) THEN + PERFORM update_aggregates(NEW.smp_server_id, 1, NEW.smp_notifier_id); + END IF; + RETURN NEW; +END; +$$; + + + +CREATE FUNCTION ntf_server.should_subscribe_status(p_status text) RETURNS boolean + LANGUAGE plpgsql IMMUTABLE STRICT + AS $$ +BEGIN + RETURN p_status IN ('NEW', 'PENDING', 'ACTIVE', 'INACTIVE'); +END; +$$; + + + +CREATE FUNCTION ntf_server.update_aggregates(p_server_id bigint, p_change bigint, p_notifier_id bytea) RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE smp_servers + SET smp_notifier_count = smp_notifier_count + p_change, + smp_notifier_ids_hash = xor_combine(smp_notifier_ids_hash, public.digest(p_notifier_id, 'md5')) + WHERE smp_server_id = p_server_id; +END; +$$; + + + +CREATE FUNCTION ntf_server.update_all_aggregates() RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + WITH acc AS ( + SELECT + s.smp_server_id, + count(smp_notifier_id) as notifier_count, + xor_aggregate(public.digest(s.smp_notifier_id, 'md5')) AS notifier_hash + FROM subscriptions s + WHERE s.ntf_service_assoc = true AND should_subscribe_status(s.status) + GROUP BY s.smp_server_id + ) + UPDATE smp_servers srv + SET smp_notifier_count = COALESCE(acc.notifier_count, 0), + smp_notifier_ids_hash = COALESCE(acc.notifier_hash, '\x00000000000000000000000000000000') + FROM acc + WHERE srv.smp_server_id = acc.smp_server_id; +END; +$$; + + + +CREATE FUNCTION ntf_server.xor_combine(state bytea, value bytea) RETURNS bytea + LANGUAGE plpgsql IMMUTABLE STRICT + AS $$ +DECLARE + result BYTEA := state; + i INTEGER; + len INTEGER := octet_length(value); +BEGIN + IF octet_length(state) != len THEN + RAISE EXCEPTION 'Inputs must be equal length (% != %)', octet_length(state), len; + END IF; + FOR i IN 0..len-1 LOOP + result := set_byte(result, i, get_byte(state, i) # get_byte(value, i)); + END LOOP; + RETURN result; +END; +$$; + + + +CREATE AGGREGATE ntf_server.xor_aggregate(bytea) ( + SFUNC = ntf_server.xor_combine, + STYPE = bytea, + INITCOND = '\x00000000000000000000000000000000' +); + + SET default_table_access_method = heap; @@ -53,7 +170,9 @@ CREATE TABLE ntf_server.smp_servers ( smp_host text NOT NULL, smp_port text NOT NULL, smp_keyhash bytea NOT NULL, - ntf_service_id bytea + ntf_service_id bytea, + smp_notifier_count bigint DEFAULT 0 NOT NULL, + smp_notifier_ids_hash bytea DEFAULT '\x00000000000000000000000000000000'::bytea NOT NULL ); @@ -158,6 +277,18 @@ CREATE INDEX idx_tokens_status_cron_interval_sent_at ON ntf_server.tokens USING +CREATE TRIGGER tr_subscriptions_delete AFTER DELETE ON ntf_server.subscriptions FOR EACH ROW EXECUTE FUNCTION ntf_server.on_subscription_delete(); + + + +CREATE TRIGGER tr_subscriptions_insert AFTER INSERT ON ntf_server.subscriptions FOR EACH ROW EXECUTE FUNCTION ntf_server.on_subscription_insert(); + + + +CREATE TRIGGER tr_subscriptions_update AFTER UPDATE ON ntf_server.subscriptions FOR EACH ROW EXECUTE FUNCTION ntf_server.on_subscription_update(); + + + ALTER TABLE ONLY ntf_server.last_notifications ADD CONSTRAINT last_notifications_subscription_id_fkey FOREIGN KEY (subscription_id) REFERENCES ntf_server.subscriptions(subscription_id) ON UPDATE RESTRICT ON DELETE CASCADE; diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 3be4515cce..c00899e1c1 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -140,7 +140,10 @@ module Simplex.Messaging.Protocol RcvMessage (..), MsgId, MsgBody, - IdsHash, + IdsHash (..), + ServiceSub (..), + queueIdsHash, + queueIdHash, MaxMessageLen, MaxRcvMessageLen, EncRcvMsgBody (..), @@ -223,6 +226,8 @@ import qualified Data.Aeson.TH as J import Data.Attoparsec.ByteString.Char8 (Parser, ()) import qualified Data.Attoparsec.ByteString.Char8 as A import Data.Bifunctor (bimap, first) +import Data.Bits (xor) +import qualified Data.ByteString as BS import qualified Data.ByteString.Base64 as B64 import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B @@ -232,6 +237,7 @@ import Data.Constraint (Dict (..)) import Data.Functor (($>)) import Data.Int (Int64) import Data.Kind +import Data.List (foldl') import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as L import Data.Maybe (isJust, isNothing) @@ -241,7 +247,7 @@ import qualified Data.Text as T import Data.Text.Encoding (decodeLatin1, encodeUtf8) import Data.Time.Clock.System (SystemTime (..), systemToUTCTime) import Data.Type.Equality -import Data.Word (Word16) +import Data.Word (Word8, Word16) import GHC.TypeLits (ErrorMessage (..), TypeError, type (+)) import qualified GHC.TypeLits as TE import qualified GHC.TypeLits as Type @@ -548,7 +554,8 @@ data Command (p :: Party) where NEW :: NewQueueReq -> Command Creator SUB :: Command Recipient -- | subscribe all associated queues. Service ID must be used as entity ID, and service session key must sign the command. - SUBS :: Command RecipientService + -- Parameters are expected queue count and hash of all subscribed queues, it allows to monitor "state drift" on the server + SUBS :: Int64 -> IdsHash -> Command RecipientService KEY :: SndPublicAuthKey -> Command Recipient RKEY :: NonEmpty RcvPublicAuthKey -> Command Recipient LSET :: LinkId -> QueueLinkData -> Command Recipient @@ -572,7 +579,7 @@ data Command (p :: Party) where -- SMP notification subscriber commands NSUB :: Command Notifier -- | subscribe all associated queues. Service ID must be used as entity ID, and service session key must sign the command. - NSUBS :: Command NotifierService + NSUBS :: Int64 -> IdsHash -> Command NotifierService PRXY :: SMPServer -> Maybe BasicAuth -> Command ProxiedClient -- request a relay server connection by URI -- Transmission to proxy: -- - entity ID: ID of the session with relay returned in PKEY (response to PRXY) @@ -698,7 +705,7 @@ data BrokerMsg where LNK :: SenderId -> QueueLinkData -> BrokerMsg -- | Service subscription success - confirms when queue was associated with the service SOK :: Maybe ServiceId -> BrokerMsg - -- | The number of queues subscribed with SUBS command + -- | The number of queues and XOR-hash of their IDs subscribed with SUBS command SOKS :: Int64 -> IdsHash -> BrokerMsg -- MSG v1/2 has to be supported for encoding/decoding -- v1: MSG :: MsgId -> SystemTime -> MsgBody -> BrokerMsg @@ -1460,7 +1467,42 @@ type MsgId = ByteString -- | SMP message body. type MsgBody = ByteString -type IdsHash = ByteString +data ServiceSub = ServiceSub + { serviceId :: ServiceId, + smpQueueCount :: Int64, + smpQueueIdsHash :: IdsHash + } + +newtype IdsHash = IdsHash {unIdsHash :: BS.ByteString} + deriving (Eq, Show) + deriving newtype (Encoding, FromField) + +instance ToField IdsHash where + toField (IdsHash s) = toField (Binary s) + {-# INLINE toField #-} + +instance Semigroup IdsHash where + (IdsHash s1) <> (IdsHash s2) = IdsHash $! BS.pack $ BS.zipWith xor s1 s2 + +instance Monoid IdsHash where + mempty = IdsHash $ BS.replicate 16 0 + mconcat ss = + let !s' = BS.pack $ foldl' (\ !r (IdsHash s) -> zipWith xor' r (BS.unpack s)) (replicate 16 0) ss -- to prevent packing/unpacking in <> on each step with default mappend + in IdsHash s' + +xor' :: Word8 -> Word8 -> Word8 +xor' x y = let !r = xor x y in r + +noIdsHash ::IdsHash +noIdsHash = IdsHash B.empty +{-# INLINE noIdsHash #-} + +queueIdsHash :: [QueueId] -> IdsHash +queueIdsHash = mconcat . map queueIdHash + +queueIdHash :: QueueId -> IdsHash +queueIdHash = IdsHash . C.md5Hash . unEntityId +{-# INLINE queueIdHash #-} data ProtocolErrorType = PECmdSyntax | PECmdUnknown | PESession | PEBlock @@ -1695,7 +1737,9 @@ instance PartyI p => ProtocolEncoding SMPVersion ErrorType (Command p) where new = e (NEW_, ' ', rKey, dhKey) auth = maybe "" (e . ('A',)) auth_ SUB -> e SUB_ - SUBS -> e SUBS_ + SUBS n idsHash + | v >= rcvServiceSMPVersion -> e (SUBS_, ' ', n, idsHash) + | otherwise -> e SUBS_ KEY k -> e (KEY_, ' ', k) RKEY ks -> e (RKEY_, ' ', ks) LSET lnkId d -> e (LSET_, ' ', lnkId, d) @@ -1711,7 +1755,9 @@ instance PartyI p => ProtocolEncoding SMPVersion ErrorType (Command p) where SEND flags msg -> e (SEND_, ' ', flags, ' ', Tail msg) PING -> e PING_ NSUB -> e NSUB_ - NSUBS -> e NSUBS_ + NSUBS n idsHash + | v >= rcvServiceSMPVersion -> e (NSUBS_, ' ', n, idsHash) + | otherwise -> e NSUBS_ LKEY k -> e (LKEY_, ' ', k) LGET -> e LGET_ PRXY host auth_ -> e (PRXY_, ' ', host, auth_) @@ -1802,7 +1848,9 @@ instance ProtocolEncoding SMPVersion ErrorType Cmd where OFF_ -> pure OFF DEL_ -> pure DEL QUE_ -> pure QUE - CT SRecipientService SUBS_ -> pure $ Cmd SRecipientService SUBS + CT SRecipientService SUBS_ + | v >= rcvServiceSMPVersion -> Cmd SRecipientService <$> (SUBS <$> _smpP <*> smpP) + | otherwise -> pure $ Cmd SRecipientService $ SUBS (-1) noIdsHash CT SSender tag -> Cmd SSender <$> case tag of SKEY_ -> SKEY <$> _smpP @@ -1819,7 +1867,9 @@ instance ProtocolEncoding SMPVersion ErrorType Cmd where PFWD_ -> PFWD <$> _smpP <*> smpP <*> (EncTransmission . unTail <$> smpP) PRXY_ -> PRXY <$> _smpP <*> smpP CT SNotifier NSUB_ -> pure $ Cmd SNotifier NSUB - CT SNotifierService NSUBS_ -> pure $ Cmd SNotifierService NSUBS + CT SNotifierService NSUBS_ + | v >= rcvServiceSMPVersion -> Cmd SNotifierService <$> (NSUBS <$> _smpP <*> smpP) + | otherwise -> pure $ Cmd SNotifierService $ NSUBS (-1) noIdsHash fromProtocolError = fromProtocolError @SMPVersion @ErrorType @BrokerMsg {-# INLINE fromProtocolError #-} @@ -1901,7 +1951,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where SOK_ -> SOK <$> _smpP SOKS_ | v >= rcvServiceSMPVersion -> SOKS <$> _smpP <*> smpP - | otherwise -> SOKS <$> _smpP <*> pure B.empty + | otherwise -> SOKS <$> _smpP <*> pure noIdsHash NID_ -> NID <$> _smpP <*> smpP NMSG_ -> NMSG <$> _smpP <*> smpP PKEY_ -> PKEY <$> _smpP <*> smpP <*> smpP diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 1e5e94fd6a..a05743a06b 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -6,6 +6,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedLists #-} @@ -1247,7 +1248,7 @@ verifyQueueTransmission service thAuth (tAuth, authorized, (corrId, entId, comma vc SCreator (NEW NewQueueReq {rcvAuthKey = k}) = verifiedWith k vc SRecipient SUB = verifyQueue $ \q -> verifiedWithKeys $ recipientKeys (snd q) vc SRecipient _ = verifyQueue $ \q -> verifiedWithKeys $ recipientKeys (snd q) - vc SRecipientService SUBS = verifyServiceCmd + vc SRecipientService SUBS {} = verifyServiceCmd vc SSender (SKEY k) = verifySecure k -- SEND will be accepted without authorization before the queue is secured with KEY, SKEY or LSKEY command vc SSender SEND {} = verifyQueue $ \q -> if maybe (isNothing tAuth) verify (senderKey $ snd q) then VRVerified q_ else VRFailed AUTH @@ -1255,7 +1256,7 @@ verifyQueueTransmission service thAuth (tAuth, authorized, (corrId, entId, comma vc SSenderLink (LKEY k) = verifySecure k vc SSenderLink LGET = verifyQueue $ \q -> if isContactQueue (snd q) then VRVerified q_ else VRFailed AUTH vc SNotifier NSUB = verifyQueue $ \q -> maybe dummyVerify (\n -> verifiedWith $ notifierKey n) (notifier $ snd q) - vc SNotifierService NSUBS = verifyServiceCmd + vc SNotifierService NSUBS {} = verifyServiceCmd vc SProxiedClient _ = VRVerified Nothing vc SProxyService (RFWD _) = VRVerified Nothing checkRole = case (service, partyClientRole p) of @@ -1465,8 +1466,8 @@ client Cmd SNotifier NSUB -> response . (corrId,entId,) <$> case q_ of Just (q, QueueRec {notifier = Just ntfCreds}) -> subscribeNotifications q ntfCreds _ -> pure $ ERR INTERNAL - Cmd SNotifierService NSUBS -> response . (corrId,entId,) <$> case clntServiceId of - Just serviceId -> subscribeServiceNotifications serviceId + Cmd SNotifierService (NSUBS n idsHash) -> response . (corrId,entId,) <$> case clntServiceId of + Just serviceId -> subscribeServiceNotifications serviceId (n, idsHash) Nothing -> pure $ ERR INTERNAL Cmd SCreator (NEW nqr@NewQueueReq {auth_}) -> response <$> ifM allowNew (createQueue nqr) (pure (corrId, entId, ERR AUTH)) @@ -1495,8 +1496,8 @@ client OFF -> response <$> maybe (pure $ err INTERNAL) suspendQueue_ q_ DEL -> response <$> maybe (pure $ err INTERNAL) delQueueAndMsgs q_ QUE -> withQueue $ \q qr -> (corrId,entId,) <$> getQueueInfo q qr - Cmd SRecipientService SUBS -> response . (corrId,entId,) <$> case clntServiceId of - Just serviceId -> subscribeServiceMessages serviceId + Cmd SRecipientService (SUBS n idsHash)-> response . (corrId,entId,) <$> case clntServiceId of + Just serviceId -> subscribeServiceMessages serviceId (n, idsHash) Nothing -> pure $ ERR INTERNAL -- it's "internal" because it should never get to this branch where createQueue :: NewQueueReq -> M s (Transmission BrokerMsg) @@ -1795,9 +1796,9 @@ client TM.insert entId sub $ clientSubs clnt pure (False, Just sub) - subscribeServiceMessages :: ServiceId -> M s BrokerMsg - subscribeServiceMessages serviceId = - sharedSubscribeService SRecipientService serviceId subscribers serviceSubscribed serviceSubsCount >>= \case + subscribeServiceMessages :: ServiceId -> (Int64, IdsHash) -> M s BrokerMsg + subscribeServiceMessages serviceId expected = + sharedSubscribeService SRecipientService serviceId expected subscribers serviceSubscribed serviceSubsCount rcvServices >>= \case Left e -> pure $ ERR e Right (hasSub, (count, idsHash)) -> do unless hasSub $ forkClient clnt "deliverServiceMessages" $ liftIO $ deliverServiceMessages count @@ -1806,7 +1807,7 @@ client deliverServiceMessages expectedCnt = do (qCnt, _msgCnt, _dupCnt, _errCnt) <- foldRcvServiceMessages ms serviceId deliverQueueMsg (0, 0, 0, 0) atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, SALL)] - -- TODO [cert rcv] compare with expected + -- TODO [certs rcv] compare with expected logNote $ "Service subscriptions for " <> tshow serviceId <> " (" <> tshow qCnt <> " queues)" deliverQueueMsg :: (Int, Int, Int, Int) -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO (Int, Int, Int, Int) deliverQueueMsg (!qCnt, !msgCnt, !dupCnt, !errCnt) rId = \case @@ -1831,25 +1832,33 @@ client TM.insert rId sub $ subscriptions clnt pure $ Just sub - subscribeServiceNotifications :: ServiceId -> M s BrokerMsg - subscribeServiceNotifications serviceId = - either ERR (uncurry SOKS . snd) <$> sharedSubscribeService SNotifierService serviceId ntfSubscribers ntfServiceSubscribed ntfServiceSubsCount + subscribeServiceNotifications :: ServiceId -> (Int64, IdsHash) -> M s BrokerMsg + subscribeServiceNotifications serviceId expected = + either ERR (uncurry SOKS . snd) <$> sharedSubscribeService SNotifierService serviceId expected ntfSubscribers ntfServiceSubscribed ntfServiceSubsCount ntfServices - sharedSubscribeService :: (PartyI p, ServiceParty p) => SParty p -> ServiceId -> ServerSubscribers s -> (Client s -> TVar Bool) -> (Client s -> TVar Int64) -> M s (Either ErrorType (Bool, (Int64, IdsHash))) - sharedSubscribeService party serviceId srvSubscribers clientServiceSubscribed clientServiceSubs = do + sharedSubscribeService :: (PartyI p, ServiceParty p) => SParty p -> ServiceId -> (Int64, IdsHash) -> ServerSubscribers s -> (Client s -> TVar Bool) -> (Client s -> TVar Int64) -> (ServerStats -> ServiceStats) -> M s (Either ErrorType (Bool, (Int64, IdsHash))) + sharedSubscribeService party serviceId (count, idsHash) srvSubscribers clientServiceSubscribed clientServiceSubs servicesSel = do subscribed <- readTVarIO $ clientServiceSubscribed clnt + stats <- asks serverStats liftIO $ runExceptT $ (subscribed,) <$> if subscribed - then (,B.empty) <$> readTVarIO (clientServiceSubs clnt) -- TODO [certs rcv] get IDs hash + then (,mempty) <$> readTVarIO (clientServiceSubs clnt) -- TODO [certs rcv] get IDs hash else do - count' <- ExceptT $ getServiceQueueCount @(StoreQueue s) (queueStore ms) party serviceId + (count', idsHash') <- ExceptT $ getServiceQueueCountHash @(StoreQueue s) (queueStore ms) party serviceId incCount <- atomically $ do writeTVar (clientServiceSubscribed clnt) True - count <- swapTVar (clientServiceSubs clnt) count' - pure $ count' - count + currCount <- swapTVar (clientServiceSubs clnt) count' -- TODO [certs rcv] maintain IDs hash here? + pure $ count' - currCount + let incSrvStat sel n = liftIO $ atomicModifyIORef'_ (sel $ servicesSel stats) (+ n) + diff = fromIntegral $ count' - count + if -- TODO [certs rcv] account for not provided counts/hashes (expected n = -1) + | diff == 0 && idsHash == idsHash' -> incSrvStat srvSubOk 1 + | diff > 0 -> incSrvStat srvSubMore 1 >> incSrvStat srvSubMoreTotal diff + | diff < 0 -> incSrvStat srvSubFewer 1 >> incSrvStat srvSubFewerTotal (- diff) + | otherwise -> incSrvStat srvSubDiff 1 atomically $ writeTQueue (subQ srvSubscribers) (CSService serviceId incCount, clientId) - pure (count', B.empty) -- TODO [certs rcv] get IDs hash + pure (count', idsHash') acknowledgeMsg :: MsgId -> StoreQueue s -> QueueRec -> M s (Transmission BrokerMsg) acknowledgeMsg msgId q qr = diff --git a/src/Simplex/Messaging/Server/MsgStore/Journal.hs b/src/Simplex/Messaging/Server/MsgStore/Journal.hs index d9a1ff6ecd..89e9f03831 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Journal.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Journal.hs @@ -355,8 +355,8 @@ instance QueueStoreClass (JournalQueue s) (QStore s) where {-# INLINE setQueueService #-} getQueueNtfServices = withQS (getQueueNtfServices @(JournalQueue s)) {-# INLINE getQueueNtfServices #-} - getServiceQueueCount = withQS (getServiceQueueCount @(JournalQueue s)) - {-# INLINE getServiceQueueCount #-} + getServiceQueueCountHash = withQS (getServiceQueueCountHash @(JournalQueue s)) + {-# INLINE getServiceQueueCountHash #-} makeQueue_ :: JournalMsgStore s -> RecipientId -> QueueRec -> Lock -> IO (JournalQueue s) makeQueue_ JournalMsgStore {sharedLock} rId qr queueLock = do diff --git a/src/Simplex/Messaging/Server/Prometheus.hs b/src/Simplex/Messaging/Server/Prometheus.hs index 859587b606..e4d6a2774f 100644 --- a/src/Simplex/Messaging/Server/Prometheus.hs +++ b/src/Simplex/Messaging/Server/Prometheus.hs @@ -21,6 +21,7 @@ import Simplex.Messaging.Transport (simplexMQVersion) import Simplex.Messaging.Transport.Server (SocketStats (..)) import Simplex.Messaging.Util (tshow) +-- TODO [certs rcv] add service subscriptions and count/hash diffs data ServerMetrics = ServerMetrics { statsData :: ServerStatsData, activeQueueCounts :: PeriodStatCounts, diff --git a/src/Simplex/Messaging/Server/QueueStore.hs b/src/Simplex/Messaging/Server/QueueStore.hs index e05719cf6d..7caca76693 100644 --- a/src/Simplex/Messaging/Server/QueueStore.hs +++ b/src/Simplex/Messaging/Server/QueueStore.hs @@ -65,6 +65,7 @@ data ServiceRec = ServiceRec serviceCert :: X.CertificateChain, serviceCertHash :: XV.Fingerprint, -- SHA512 hash of long-term service client certificate. See comment for ClientHandshake. serviceCreatedAt :: SystemDate + -- entitiesHash :: IdsHash -- a xor-hash of all associated entities } deriving (Show) diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs index 2fabbfa33b..eb1ba3b2c0 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs @@ -524,15 +524,11 @@ instance StoreQueueClass q => QueueStoreClass q (PostgresQueueStore q) where let (sNtfs, restNtfs) = partition (\(nId, _) -> S.member nId snIds) ntfs' in ((serviceId, sNtfs) : ssNtfs, restNtfs) - getServiceQueueCount :: (PartyI p, ServiceParty p) => PostgresQueueStore q -> SParty p -> ServiceId -> IO (Either ErrorType Int64) - getServiceQueueCount st party serviceId = - E.uninterruptibleMask_ $ runExceptT $ withDB' "getServiceQueueCount" st $ \db -> - maybeFirstRow' 0 fromOnly $ - DB.query db query (Only serviceId) - where - query = case party of - SRecipientService -> "SELECT count(1) FROM msg_queues WHERE rcv_service_id = ? AND deleted_at IS NULL" - SNotifierService -> "SELECT count(1) FROM msg_queues WHERE ntf_service_id = ? AND deleted_at IS NULL" + getServiceQueueCountHash :: (PartyI p, ServiceParty p) => PostgresQueueStore q -> SParty p -> ServiceId -> IO (Either ErrorType (Int64, IdsHash)) + getServiceQueueCountHash st party serviceId = + E.uninterruptibleMask_ $ runExceptT $ withDB' "getServiceQueueCountHash" st $ \db -> + maybeFirstRow' (0, mempty) id $ + DB.query db ("SELECT queue_count, queue_ids_hash FROM services WHERE service_id = ? AND service_role = ?") (serviceId, partyServiceRole party) batchInsertServices :: [STMService] -> PostgresQueueStore q -> IO Int64 batchInsertServices services' toStore = @@ -793,6 +789,10 @@ instance ToField C.APublicAuthKey where toField = toField . Binary . C.encodePub instance FromField C.APublicAuthKey where fromField = blobFieldDecoder C.decodePubKey +instance ToField IdsHash where toField (IdsHash s) = toField (Binary s) + +deriving newtype instance FromField IdsHash + instance ToField EncDataBytes where toField (EncDataBytes s) = toField (Binary s) deriving newtype instance FromField EncDataBytes diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres/Migrations.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres/Migrations.hs index 7ff8b98627..5a4d470eb3 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres/Migrations.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres/Migrations.hs @@ -7,6 +7,7 @@ module Simplex.Messaging.Server.QueueStore.Postgres.Migrations where import Data.List (sortOn) import Data.Text (Text) import Simplex.Messaging.Agent.Store.Shared +import Simplex.Messaging.Agent.Store.Postgres.Migrations.Util import Text.RawString.QQ (r) serverSchemaMigrations :: [(String, Text, Maybe Text)] @@ -15,7 +16,8 @@ serverSchemaMigrations = ("20250319_updated_index", m20250319_updated_index, Just down_m20250319_updated_index), ("20250320_short_links", m20250320_short_links, Just down_m20250320_short_links), ("20250514_service_certs", m20250514_service_certs, Just down_m20250514_service_certs), - ("20250903_store_messages", m20250903_store_messages, Just down_m20250903_store_messages) + ("20250903_store_messages", m20250903_store_messages, Just down_m20250903_store_messages), + ("20250915_queue_ids_hash", m20250915_queue_ids_hash, Just down_m20250915_queue_ids_hash) ] -- | The list of migrations in ascending order by date @@ -447,3 +449,139 @@ ALTER TABLE msg_queues DROP TABLE messages; |] + +m20250915_queue_ids_hash :: Text +m20250915_queue_ids_hash = + createXorHashFuncs + <> [r| +ALTER TABLE services + ADD COLUMN queue_count BIGINT NOT NULL DEFAULT 0, + ADD COLUMN queue_ids_hash BYTEA NOT NULL DEFAULT '\x00000000000000000000000000000000'; + +CREATE FUNCTION update_all_aggregates() RETURNS VOID +LANGUAGE plpgsql +AS $$ +BEGIN + WITH acc AS ( + SELECT + s.service_id, + count(1) as q_count, + xor_aggregate(public.digest(CASE WHEN s.service_role = 'M' THEN q.recipient_id ELSE COALESCE(q.notifier_id, '\x00000000000000000000000000000000') END, 'md5')) AS q_ids_hash + FROM services s + JOIN msg_queues q ON (s.service_id = q.rcv_service_id AND s.service_role = 'M') OR (s.service_id = q.ntf_service_id AND s.service_role = 'N') + WHERE q.deleted_at IS NULL + GROUP BY s.service_id + ) + UPDATE services s + SET queue_count = COALESCE(acc.q_count, 0), + queue_ids_hash = COALESCE(acc.q_ids_hash, '\x00000000000000000000000000000000') + FROM acc + WHERE s.service_id = acc.service_id; +END; +$$; + +SELECT update_all_aggregates(); + +CREATE FUNCTION update_aggregates(p_service_id BYTEA, p_role TEXT, p_queue_id BYTEA, p_change BIGINT) RETURNS VOID +LANGUAGE plpgsql +AS $$ +BEGIN + UPDATE services + SET queue_count = queue_count + p_change, + queue_ids_hash = xor_combine(queue_ids_hash, public.digest(p_queue_id, 'md5')) + WHERE service_id = p_service_id AND service_role = p_role; +END; +$$; + +CREATE FUNCTION on_queue_insert() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF NEW.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + IF NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + RETURN NEW; +END; +$$; + +CREATE FUNCTION on_queue_delete() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.deleted_at IS NULL THEN + IF OLD.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + END IF; + IF OLD.ntf_service_id IS NOT NULL AND OLD.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + END IF; + END IF; + RETURN OLD; +END; +$$; + +CREATE FUNCTION on_queue_update() RETURNS TRIGGER +LANGUAGE plpgsql +AS $$ +BEGIN + IF OLD.deleted_at IS NULL AND OLD.rcv_service_id IS NOT NULL THEN + IF NOT (NEW.deleted_at IS NULL AND NEW.rcv_service_id IS NOT NULL) THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + ELSIF OLD.rcv_service_id IS DISTINCT FROM NEW.rcv_service_id THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + ELSIF NEW.deleted_at IS NULL AND NEW.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + + IF OLD.deleted_at IS NULL AND OLD.ntf_service_id IS NOT NULL AND OLD.notifier_id IS NOT NULL THEN + IF NOT (NEW.deleted_at IS NULL AND NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL) THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + ELSIF OLD.ntf_service_id IS DISTINCT FROM NEW.ntf_service_id OR OLD.notifier_id IS DISTINCT FROM NEW.notifier_id THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + ELSIF NEW.deleted_at IS NULL AND NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + RETURN NEW; +END; +$$; + +CREATE TRIGGER tr_queue_insert +AFTER INSERT ON msg_queues +FOR EACH ROW EXECUTE PROCEDURE on_queue_insert(); + +CREATE TRIGGER tr_queue_delete +AFTER DELETE ON msg_queues +FOR EACH ROW EXECUTE PROCEDURE on_queue_delete(); + +CREATE TRIGGER tr_queue_update +AFTER UPDATE ON msg_queues +FOR EACH ROW EXECUTE PROCEDURE on_queue_update(); + |] + +down_m20250915_queue_ids_hash :: Text +down_m20250915_queue_ids_hash = + [r| +DROP TRIGGER tr_queue_insert ON msg_queues; +DROP TRIGGER tr_queue_delete ON msg_queues; +DROP TRIGGER tr_queue_update ON msg_queues; + +DROP FUNCTION on_queue_insert; +DROP FUNCTION on_queue_delete; +DROP FUNCTION on_queue_update; + +DROP FUNCTION update_aggregates; + +DROP FUNCTION update_all_aggregates; + +ALTER TABLE services + DROP COLUMN queue_count, + DROP COLUMN queue_ids_hash; + |] + <> dropXorHashFuncs diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres/server_schema.sql b/src/Simplex/Messaging/Server/QueueStore/Postgres/server_schema.sql index 433d454739..f0da5272d3 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres/server_schema.sql +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres/server_schema.sql @@ -104,6 +104,71 @@ $$; +CREATE FUNCTION smp_server.on_queue_delete() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.deleted_at IS NULL THEN + IF OLD.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + END IF; + IF OLD.ntf_service_id IS NOT NULL AND OLD.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + END IF; + END IF; + RETURN OLD; +END; +$$; + + + +CREATE FUNCTION smp_server.on_queue_insert() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF NEW.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + IF NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + RETURN NEW; +END; +$$; + + + +CREATE FUNCTION smp_server.on_queue_update() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN + IF OLD.deleted_at IS NULL AND OLD.rcv_service_id IS NOT NULL THEN + IF NOT (NEW.deleted_at IS NULL AND NEW.rcv_service_id IS NOT NULL) THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + ELSIF OLD.rcv_service_id IS DISTINCT FROM NEW.rcv_service_id THEN + PERFORM update_aggregates(OLD.rcv_service_id, 'M', OLD.recipient_id, -1); + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + ELSIF NEW.deleted_at IS NULL AND NEW.rcv_service_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.rcv_service_id, 'M', NEW.recipient_id, 1); + END IF; + + IF OLD.deleted_at IS NULL AND OLD.ntf_service_id IS NOT NULL AND OLD.notifier_id IS NOT NULL THEN + IF NOT (NEW.deleted_at IS NULL AND NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL) THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + ELSIF OLD.ntf_service_id IS DISTINCT FROM NEW.ntf_service_id OR OLD.notifier_id IS DISTINCT FROM NEW.notifier_id THEN + PERFORM update_aggregates(OLD.ntf_service_id, 'N', OLD.notifier_id, -1); + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + ELSIF NEW.deleted_at IS NULL AND NEW.ntf_service_id IS NOT NULL AND NEW.notifier_id IS NOT NULL THEN + PERFORM update_aggregates(NEW.ntf_service_id, 'N', NEW.notifier_id, 1); + END IF; + RETURN NEW; +END; +$$; + + + CREATE FUNCTION smp_server.try_del_msg(p_recipient_id bytea, p_msg_id bytea) RETURNS TABLE(r_msg_id bytea, r_msg_ts bigint, r_msg_quota boolean, r_msg_ntf_flag boolean, r_msg_body bytea) LANGUAGE plpgsql AS $$ @@ -225,6 +290,43 @@ $$; +CREATE FUNCTION smp_server.update_aggregates(p_service_id bytea, p_role text, p_queue_id bytea, p_change bigint) RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE services + SET queue_count = queue_count + p_change, + queue_ids_hash = xor_combine(queue_ids_hash, public.digest(p_queue_id, 'md5')) + WHERE service_id = p_service_id AND service_role = p_role; +END; +$$; + + + +CREATE FUNCTION smp_server.update_all_aggregates() RETURNS void + LANGUAGE plpgsql + AS $$ +BEGIN + WITH acc AS ( + SELECT + s.service_id, + count(1) as q_count, + xor_aggregate(public.digest(CASE WHEN s.service_role = 'M' THEN q.recipient_id ELSE COALESCE(q.notifier_id, '\x00000000000000000000000000000000') END, 'md5')) AS q_ids_hash + FROM services s + JOIN msg_queues q ON (s.service_id = q.rcv_service_id AND s.service_role = 'M') OR (s.service_id = q.ntf_service_id AND s.service_role = 'N') + WHERE q.deleted_at IS NULL + GROUP BY s.service_id + ) + UPDATE services s + SET queue_count = COALESCE(acc.q_count, 0), + queue_ids_hash = COALESCE(acc.q_ids_hash, '\x00000000000000000000000000000000') + FROM acc + WHERE s.service_id = acc.service_id; +END; +$$; + + + CREATE FUNCTION smp_server.write_message(p_recipient_id bytea, p_msg_id bytea, p_msg_ts bigint, p_msg_quota boolean, p_msg_ntf_flag boolean, p_msg_body bytea, p_quota integer) RETURNS TABLE(quota_written boolean, was_empty boolean) LANGUAGE plpgsql AS $$ @@ -256,6 +358,34 @@ END; $$; + +CREATE FUNCTION smp_server.xor_combine(state bytea, value bytea) RETURNS bytea + LANGUAGE plpgsql IMMUTABLE STRICT + AS $$ +DECLARE + result BYTEA := state; + i INTEGER; + len INTEGER := octet_length(value); +BEGIN + IF octet_length(state) != len THEN + RAISE EXCEPTION 'Inputs must be equal length (% != %)', octet_length(state), len; + END IF; + FOR i IN 0..len-1 LOOP + result := set_byte(result, i, get_byte(state, i) # get_byte(value, i)); + END LOOP; + RETURN result; +END; +$$; + + + +CREATE AGGREGATE smp_server.xor_aggregate(bytea) ( + SFUNC = smp_server.xor_combine, + STYPE = bytea, + INITCOND = '\x00000000000000000000000000000000' +); + + SET default_table_access_method = heap; @@ -320,7 +450,9 @@ CREATE TABLE smp_server.services ( service_role text NOT NULL, service_cert bytea NOT NULL, service_cert_hash bytea NOT NULL, - created_at bigint NOT NULL + created_at bigint NOT NULL, + queue_count bigint DEFAULT 0 NOT NULL, + queue_ids_hash bytea DEFAULT '\x00000000000000000000000000000000'::bytea NOT NULL ); @@ -390,6 +522,18 @@ CREATE INDEX idx_services_service_role ON smp_server.services USING btree (servi +CREATE TRIGGER tr_queue_delete AFTER DELETE ON smp_server.msg_queues FOR EACH ROW EXECUTE FUNCTION smp_server.on_queue_delete(); + + + +CREATE TRIGGER tr_queue_insert AFTER INSERT ON smp_server.msg_queues FOR EACH ROW EXECUTE FUNCTION smp_server.on_queue_insert(); + + + +CREATE TRIGGER tr_queue_update AFTER UPDATE ON smp_server.msg_queues FOR EACH ROW EXECUTE FUNCTION smp_server.on_queue_update(); + + + ALTER TABLE ONLY smp_server.messages ADD CONSTRAINT messages_recipient_id_fkey FOREIGN KEY (recipient_id) REFERENCES smp_server.msg_queues(recipient_id) ON UPDATE RESTRICT ON DELETE CASCADE; diff --git a/src/Simplex/Messaging/Server/QueueStore/STM.hs b/src/Simplex/Messaging/Server/QueueStore/STM.hs index ad3e00a03e..8b64db55a2 100644 --- a/src/Simplex/Messaging/Server/QueueStore/STM.hs +++ b/src/Simplex/Messaging/Server/QueueStore/STM.hs @@ -28,6 +28,7 @@ where import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad +import Data.Bifunctor (first) import Data.Bitraversable (bimapM) import Data.Functor (($>)) import Data.Int (Int64) @@ -62,8 +63,8 @@ data STMQueueStore q = STMQueueStore data STMService = STMService { serviceRec :: ServiceRec, - serviceRcvQueues :: TVar (Set RecipientId), - serviceNtfQueues :: TVar (Set NotifierId) + serviceRcvQueues :: TVar (Set RecipientId, IdsHash), -- TODO [certs rcv] get/maintain hash + serviceNtfQueues :: TVar (Set NotifierId, IdsHash) -- TODO [certs rcv] get/maintain hash } setStoreLog :: STMQueueStore q -> StoreLog 'WriteMode -> IO () @@ -113,7 +114,7 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where } where serviceCount role = M.foldl' (\ !n s -> if serviceRole (serviceRec s) == role then n + 1 else n) 0 - serviceQueuesCount serviceSel = foldM (\n s -> (n +) . S.size <$> readTVarIO (serviceSel s)) 0 + serviceQueuesCount serviceSel = foldM (\n s -> (n +) . S.size . fst <$> readTVarIO (serviceSel s)) 0 addQueue_ :: STMQueueStore q -> (RecipientId -> QueueRec -> IO q) -> RecipientId -> QueueRec -> IO (Either ErrorType q) addQueue_ st mkQ rId qr@QueueRec {senderId = sId, notifier, queueData, rcvServiceId} = do @@ -304,8 +305,8 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where TM.insert fp newSrvId serviceCerts pure $ Right (newSrvId, True) newSTMService = do - serviceRcvQueues <- newTVar S.empty - serviceNtfQueues <- newTVar S.empty + serviceRcvQueues <- newTVar (S.empty, mempty) + serviceNtfQueues <- newTVar (S.empty, mempty) pure STMService {serviceRec = sr, serviceRcvQueues, serviceNtfQueues} setQueueService :: (PartyI p, ServiceParty p) => STMQueueStore q -> q -> SParty p -> Maybe ServiceId -> IO (Either ErrorType ()) @@ -331,7 +332,7 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where let !q' = Just q {notifier = Just nc {ntfServiceId = serviceId}} updateServiceQueues serviceNtfQueues nId prevNtfSrvId writeTVar qr q' $> Right () - updateServiceQueues :: (STMService -> TVar (Set QueueId)) -> QueueId -> Maybe ServiceId -> STM () + updateServiceQueues :: (STMService -> TVar (Set QueueId, IdsHash)) -> QueueId -> Maybe ServiceId -> STM () updateServiceQueues serviceSel qId prevSrvId = do mapM_ (removeServiceQueue st serviceSel qId) prevSrvId mapM_ (addServiceQueue st serviceSel qId) serviceId @@ -346,16 +347,16 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where pure $ Right (ssNtfs', deleteNtfs) where addService (ssNtfs, ntfs') (serviceId, s) = do - snIds <- readTVarIO $ serviceNtfQueues s + (snIds, _) <- readTVarIO $ serviceNtfQueues s let (sNtfs, restNtfs) = partition (\(nId, _) -> S.member nId snIds) ntfs' pure ((Just serviceId, sNtfs) : ssNtfs, restNtfs) - getServiceQueueCount :: (PartyI p, ServiceParty p) => STMQueueStore q -> SParty p -> ServiceId -> IO (Either ErrorType Int64) - getServiceQueueCount st party serviceId = + getServiceQueueCountHash :: (PartyI p, ServiceParty p) => STMQueueStore q -> SParty p -> ServiceId -> IO (Either ErrorType (Int64, IdsHash)) + getServiceQueueCountHash st party serviceId = TM.lookupIO serviceId (services st) >>= - maybe (pure $ Left AUTH) (fmap (Right . fromIntegral . S.size) . readTVarIO . serviceSel) + maybe (pure $ Left AUTH) (fmap (Right . first (fromIntegral . S.size)) . readTVarIO . serviceSel) where - serviceSel :: STMService -> TVar (Set QueueId) + serviceSel :: STMService -> TVar (Set QueueId, IdsHash) serviceSel = case party of SRecipientService -> serviceRcvQueues SNotifierService -> serviceNtfQueues @@ -366,7 +367,7 @@ foldRcvServiceQueues st serviceId f acc = Nothing -> pure acc Just s -> readTVarIO (serviceRcvQueues s) - >>= foldM (\a -> get >=> maybe (pure a) (f a)) acc + >>= foldM (\a -> get >=> maybe (pure a) (f a)) acc . fst where get rId = TM.lookupIO rId (queues st) $>>= \q -> (q,) <$$> readTVarIO (queueRec q) @@ -379,16 +380,23 @@ setStatus qr status = Just q -> (Right (), Just q {status}) Nothing -> (Left AUTH, Nothing) -addServiceQueue :: STMQueueStore q -> (STMService -> TVar (Set QueueId)) -> QueueId -> ServiceId -> STM () -addServiceQueue st serviceSel qId serviceId = - TM.lookup serviceId (services st) >>= mapM_ (\s -> modifyTVar' (serviceSel s) (S.insert qId)) +addServiceQueue :: STMQueueStore q -> (STMService -> TVar (Set QueueId, IdsHash)) -> QueueId -> ServiceId -> STM () +addServiceQueue = setServiceQueues_ S.insert {-# INLINE addServiceQueue #-} -removeServiceQueue :: STMQueueStore q -> (STMService -> TVar (Set QueueId)) -> QueueId -> ServiceId -> STM () -removeServiceQueue st serviceSel qId serviceId = - TM.lookup serviceId (services st) >>= mapM_ (\s -> modifyTVar' (serviceSel s) (S.delete qId)) +removeServiceQueue :: STMQueueStore q -> (STMService -> TVar (Set QueueId, IdsHash)) -> QueueId -> ServiceId -> STM () +removeServiceQueue = setServiceQueues_ S.delete {-# INLINE removeServiceQueue #-} +setServiceQueues_ :: (QueueId -> Set QueueId -> Set QueueId) -> STMQueueStore q -> (STMService -> TVar (Set QueueId, IdsHash)) -> QueueId -> ServiceId -> STM () +setServiceQueues_ updateSet st serviceSel qId serviceId = + TM.lookup serviceId (services st) >>= mapM_ (\v -> modifyTVar' (serviceSel v) update) + where + update (s, idsHash) = + let !s' = updateSet qId s + !idsHash' = queueIdHash qId <> idsHash + in (s', idsHash') + removeNotifier :: STMQueueStore q -> NtfCreds -> STM () removeNotifier st NtfCreds {notifierId = nId, ntfServiceId} = do TM.delete nId $ notifiers st diff --git a/src/Simplex/Messaging/Server/QueueStore/Types.hs b/src/Simplex/Messaging/Server/QueueStore/Types.hs index 8de0154210..723930e9fe 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Types.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Types.hs @@ -47,7 +47,7 @@ class StoreQueueClass q => QueueStoreClass q s where getCreateService :: s -> ServiceRec -> IO (Either ErrorType ServiceId) setQueueService :: (PartyI p, ServiceParty p) => s -> q -> SParty p -> Maybe ServiceId -> IO (Either ErrorType ()) getQueueNtfServices :: s -> [(NotifierId, a)] -> IO (Either ErrorType ([(Maybe ServiceId, [(NotifierId, a)])], [(NotifierId, a)])) - getServiceQueueCount :: (PartyI p, ServiceParty p) => s -> SParty p -> ServiceId -> IO (Either ErrorType Int64) + getServiceQueueCountHash :: (PartyI p, ServiceParty p) => s -> SParty p -> ServiceId -> IO (Either ErrorType (Int64, IdsHash)) data EntityCounts = EntityCounts { queueCount :: Int, diff --git a/src/Simplex/Messaging/Server/Stats.hs b/src/Simplex/Messaging/Server/Stats.hs index e60f878153..120fad7b66 100644 --- a/src/Simplex/Messaging/Server/Stats.hs +++ b/src/Simplex/Messaging/Server/Stats.hs @@ -821,7 +821,15 @@ data ServiceStats = ServiceStats srvSubCount :: IORef Int, srvSubDuplicate :: IORef Int, srvSubQueues :: IORef Int, - srvSubEnd :: IORef Int + srvSubEnd :: IORef Int, + -- counts of subscriptions + srvSubOk :: IORef Int, -- server has the same queues as expected + srvSubMore :: IORef Int, -- server has more queues than expected + srvSubFewer :: IORef Int, -- server has fewer queues than expected + srvSubDiff :: IORef Int, -- server has the same count, but different queues than expected (based on xor-hash) + -- adds actual deviations + srvSubMoreTotal :: IORef Int, -- server has more queues than expected, adds diff + srvSubFewerTotal :: IORef Int } data ServiceStatsData = ServiceStatsData @@ -832,7 +840,13 @@ data ServiceStatsData = ServiceStatsData _srvSubCount :: Int, _srvSubDuplicate :: Int, _srvSubQueues :: Int, - _srvSubEnd :: Int + _srvSubEnd :: Int, + _srvSubOk :: Int, + _srvSubMore :: Int, + _srvSubFewer :: Int, + _srvSubDiff :: Int, + _srvSubMoreTotal :: Int, + _srvSubFewerTotal :: Int } deriving (Show) @@ -846,7 +860,13 @@ newServiceStatsData = _srvSubCount = 0, _srvSubDuplicate = 0, _srvSubQueues = 0, - _srvSubEnd = 0 + _srvSubEnd = 0, + _srvSubOk = 0, + _srvSubMore = 0, + _srvSubFewer = 0, + _srvSubDiff = 0, + _srvSubMoreTotal = 0, + _srvSubFewerTotal = 0 } newServiceStats :: IO ServiceStats @@ -859,6 +879,12 @@ newServiceStats = do srvSubDuplicate <- newIORef 0 srvSubQueues <- newIORef 0 srvSubEnd <- newIORef 0 + srvSubOk <- newIORef 0 + srvSubMore <- newIORef 0 + srvSubFewer <- newIORef 0 + srvSubDiff <- newIORef 0 + srvSubMoreTotal <- newIORef 0 + srvSubFewerTotal <- newIORef 0 pure ServiceStats { srvAssocNew, @@ -868,7 +894,13 @@ newServiceStats = do srvSubCount, srvSubDuplicate, srvSubQueues, - srvSubEnd + srvSubEnd, + srvSubOk, + srvSubMore, + srvSubFewer, + srvSubDiff, + srvSubMoreTotal, + srvSubFewerTotal } getServiceStatsData :: ServiceStats -> IO ServiceStatsData @@ -881,6 +913,12 @@ getServiceStatsData s = do _srvSubDuplicate <- readIORef $ srvSubDuplicate s _srvSubQueues <- readIORef $ srvSubQueues s _srvSubEnd <- readIORef $ srvSubEnd s + _srvSubOk <- readIORef $ srvSubOk s + _srvSubMore <- readIORef $ srvSubMore s + _srvSubFewer <- readIORef $ srvSubFewer s + _srvSubDiff <- readIORef $ srvSubDiff s + _srvSubMoreTotal <- readIORef $ srvSubMoreTotal s + _srvSubFewerTotal <- readIORef $ srvSubFewerTotal s pure ServiceStatsData { _srvAssocNew, @@ -890,7 +928,13 @@ getServiceStatsData s = do _srvSubCount, _srvSubDuplicate, _srvSubQueues, - _srvSubEnd + _srvSubEnd, + _srvSubOk, + _srvSubMore, + _srvSubFewer, + _srvSubDiff, + _srvSubMoreTotal, + _srvSubFewerTotal } getResetServiceStatsData :: ServiceStats -> IO ServiceStatsData @@ -903,6 +947,12 @@ getResetServiceStatsData s = do _srvSubDuplicate <- atomicSwapIORef (srvSubDuplicate s) 0 _srvSubQueues <- atomicSwapIORef (srvSubQueues s) 0 _srvSubEnd <- atomicSwapIORef (srvSubEnd s) 0 + _srvSubOk <- atomicSwapIORef (srvSubOk s) 0 + _srvSubMore <- atomicSwapIORef (srvSubMore s) 0 + _srvSubFewer <- atomicSwapIORef (srvSubFewer s) 0 + _srvSubDiff <- atomicSwapIORef (srvSubDiff s) 0 + _srvSubMoreTotal <- atomicSwapIORef (srvSubMoreTotal s) 0 + _srvSubFewerTotal <- atomicSwapIORef (srvSubFewerTotal s) 0 pure ServiceStatsData { _srvAssocNew, @@ -912,7 +962,13 @@ getResetServiceStatsData s = do _srvSubCount, _srvSubDuplicate, _srvSubQueues, - _srvSubEnd + _srvSubEnd, + _srvSubOk, + _srvSubMore, + _srvSubFewer, + _srvSubDiff, + _srvSubMoreTotal, + _srvSubFewerTotal } -- this function is not thread safe, it is used on server start only @@ -926,6 +982,12 @@ setServiceStats s d = do writeIORef (srvSubDuplicate s) $! _srvSubDuplicate d writeIORef (srvSubQueues s) $! _srvSubQueues d writeIORef (srvSubEnd s) $! _srvSubEnd d + writeIORef (srvSubOk s) $! _srvSubOk d + writeIORef (srvSubMore s) $! _srvSubMore d + writeIORef (srvSubFewer s) $! _srvSubFewer d + writeIORef (srvSubDiff s) $! _srvSubDiff d + writeIORef (srvSubMoreTotal s) $! _srvSubMoreTotal d + writeIORef (srvSubFewerTotal s) $! _srvSubFewerTotal d instance StrEncoding ServiceStatsData where strEncode ServiceStatsData {_srvAssocNew, _srvAssocDuplicate, _srvAssocUpdated, _srvAssocRemoved, _srvSubCount, _srvSubDuplicate, _srvSubQueues, _srvSubEnd} = @@ -963,7 +1025,13 @@ instance StrEncoding ServiceStatsData where _srvSubCount, _srvSubDuplicate, _srvSubQueues, - _srvSubEnd + _srvSubEnd, + _srvSubOk = 0, + _srvSubMore = 0, + _srvSubFewer = 0, + _srvSubDiff = 0, + _srvSubMoreTotal = 0, + _srvSubFewerTotal = 0 } data TimeBuckets = TimeBuckets diff --git a/src/Simplex/Messaging/Server/StoreLog/ReadWrite.hs b/src/Simplex/Messaging/Server/StoreLog/ReadWrite.hs index ea6c9ed4a9..2fd4ca6d8c 100644 --- a/src/Simplex/Messaging/Server/StoreLog/ReadWrite.hs +++ b/src/Simplex/Messaging/Server/StoreLog/ReadWrite.hs @@ -61,7 +61,7 @@ readQueueStore tty mkQ f st = readLogLines tty f $ \_ -> processLine Left e -> logError $ errPfx <> tshow e where errPfx = "STORE: getCreateService, stored service " <> decodeLatin1 (strEncode serviceId) <> ", " - QueueService rId (ASP party) serviceId -> withQueue rId "QueueService" $ \q -> setQueueService st q party serviceId + QueueService qId (ASP party) serviceId -> withQueue qId "QueueService" $ \q -> setQueueService st q party serviceId printError :: String -> IO () printError e = B.putStrLn $ "Error parsing log: " <> B.pack e <> " - " <> s withQueue :: forall a. RecipientId -> T.Text -> (q -> IO (Either ErrorType a)) -> IO () diff --git a/tests/AgentTests/EqInstances.hs b/tests/AgentTests/EqInstances.hs index 63c493861f..e142c61776 100644 --- a/tests/AgentTests/EqInstances.hs +++ b/tests/AgentTests/EqInstances.hs @@ -8,6 +8,7 @@ import Data.Type.Equality import Simplex.Messaging.Agent.Protocol (ConnLinkData (..), OwnerAuth (..), UserContactData (..), UserLinkData (..)) import Simplex.Messaging.Agent.Store import Simplex.Messaging.Client (ProxiedRelay (..)) +import Simplex.Messaging.Protocol (ServiceSub (..)) instance (Eq rq, Eq sq) => Eq (SomeConn' rq sq) where SomeConn d c == SomeConn d' c' = case testEquality d d' of @@ -47,3 +48,7 @@ deriving instance Eq OwnerAuth deriving instance Show ProxiedRelay deriving instance Eq ProxiedRelay + +deriving instance Show ServiceSub + +deriving instance Eq ServiceSub diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 017958890b..7f9641a5b5 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -476,6 +476,8 @@ functionalAPITests ps = do testUsersNoServer ps it "should connect two users and switch session mode" $ withSmpServer ps testTwoUsers + describe "Client service certificates" $ do + it "should connect, subscribe and reconnect as a service" $ testClientServiceConnection ps describe "Connection switch" $ do describe "should switch delivery to the new queue" $ testServerMatrix2 ps testSwitchConnection @@ -3664,6 +3666,32 @@ testTwoUsers = withAgentClients2 $ \a b -> do hasClients :: HasCallStack => AgentClient -> Int -> ExceptT AgentErrorType IO () hasClients c n = liftIO $ M.size <$> readTVarIO (smpClients c) `shouldReturn` n +testClientServiceConnection :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testClientServiceConnection ps = do + (sId, uId) <- withSmpServerStoreLogOn ps testPort $ \_ -> do + conns@(sId, uId) <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do + conns@(sId, uId) <- makeConnection service user + exchangeGreetings service uId user sId + pure conns + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do + subscribeClientServices service 1 + subscribeConnection user sId + exchangeGreetingsMsgId 4 service uId user sId + pure conns + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + subscribeClientServices service 1 + subscribeConnection user sId + exchangeGreetingsMsgId 6 service uId user sId + ("", "", DOWN _ [_]) <- nGet user + -- TODO [certs rcv] how to integrate service counts into stats + -- r <- nGet service -- TODO [certs rcv] some event when service disconnects with count + -- print r + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + ("", "", UP _ [_]) <- nGet user + -- r <- nGet service -- TODO [certs rcv] some event when service reconnects with count + exchangeGreetingsMsgId 8 service uId user sId + getSMPAgentClient' :: Int -> AgentConfig -> InitialAgentServers -> String -> IO AgentClient getSMPAgentClient' clientId cfg' initServers dbPath = do Right st <- liftIO $ createStore dbPath diff --git a/tests/CoreTests/TSessionSubs.hs b/tests/CoreTests/TSessionSubs.hs index e3f8193325..e9038b9d95 100644 --- a/tests/CoreTests/TSessionSubs.hs +++ b/tests/CoreTests/TSessionSubs.hs @@ -58,9 +58,9 @@ testSessionSubs = do atomically (SS.hasPendingSubs tSess2 ss) `shouldReturn` True atomically (SS.batchAddPendingSubs tSess1 [q1, q2] ss') atomically (SS.batchAddPendingSubs tSess2 [q3] ss') - atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` M.fromList [("r1", q1), ("r2", q2)] + atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [("r1", q1), ("r2", q2)], Nothing) atomically (SS.getActiveSubs tSess1 ss) `shouldReturn` M.fromList [] - atomically (SS.getPendingSubs tSess2 ss) `shouldReturn` M.fromList [("r3", q3)] + atomically (SS.getPendingSubs tSess2 ss) `shouldReturn` (M.fromList [("r3", q3)], Nothing) st <- dumpSessionSubs ss dumpSessionSubs ss' `shouldReturn` st countSubs ss `shouldReturn` (0, 3) @@ -69,41 +69,41 @@ testSessionSubs = do atomically (SS.hasPendingSub tSess1 (rcvId q4) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess1 (rcvId q4) ss) `shouldReturn` False -- setting active queue without setting session ID would keep it as pending - atomically $ SS.addActiveSub tSess1 "123" q1 ss + atomically $ SS.addActiveSub' tSess1 "123" q1 False ss atomically (SS.hasPendingSub tSess1 (rcvId q1) ss) `shouldReturn` True atomically (SS.hasActiveSub tSess1 (rcvId q1) ss) `shouldReturn` False dumpSessionSubs ss `shouldReturn` st countSubs ss `shouldReturn` (0, 3) -- setting active queues atomically $ SS.setSessionId tSess1 "123" ss - atomically $ SS.addActiveSub tSess1 "123" q1 ss + atomically $ SS.addActiveSub' tSess1 "123" q1 False ss atomically (SS.hasPendingSub tSess1 (rcvId q1) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess1 (rcvId q1) ss) `shouldReturn` True atomically (SS.getActiveSubs tSess1 ss) `shouldReturn` M.fromList [("r1", q1)] - atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` M.fromList [("r2", q2)] + atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [("r2", q2)], Nothing) countSubs ss `shouldReturn` (1, 2) atomically $ SS.setSessionId tSess2 "456" ss - atomically $ SS.addActiveSub tSess2 "456" q4 ss + atomically $ SS.addActiveSub' tSess2 "456" q4 False ss atomically (SS.hasPendingSub tSess2 (rcvId q4) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess2 (rcvId q4) ss) `shouldReturn` True atomically (SS.hasActiveSub tSess1 (rcvId q4) ss) `shouldReturn` False -- wrong transport session atomically (SS.getActiveSubs tSess2 ss) `shouldReturn` M.fromList [("r4", q4)] - atomically (SS.getPendingSubs tSess2 ss) `shouldReturn` M.fromList [("r3", q3)] + atomically (SS.getPendingSubs tSess2 ss) `shouldReturn` (M.fromList [("r3", q3)], Nothing) countSubs ss `shouldReturn` (2, 2) -- setting pending queues st' <- dumpSessionSubs ss - atomically (SS.setSubsPending TSMUser tSess1 "abc" ss) `shouldReturn` M.empty -- wrong session + atomically (SS.setSubsPending TSMUser tSess1 "abc" ss) `shouldReturn` (M.empty, Nothing) -- wrong session dumpSessionSubs ss `shouldReturn` st' - atomically (SS.setSubsPending TSMUser tSess1 "123" ss) `shouldReturn` M.fromList [("r1", q1)] + atomically (SS.setSubsPending TSMUser tSess1 "123" ss) `shouldReturn` (M.fromList [("r1", q1)], Nothing) atomically (SS.getActiveSubs tSess1 ss) `shouldReturn` M.fromList [] - atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` M.fromList [("r1", q1), ("r2", q2)] + atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [("r1", q1), ("r2", q2)], Nothing) countSubs ss `shouldReturn` (1, 3) -- delete subs atomically $ SS.deletePendingSub tSess1 (rcvId q1) ss - atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` M.fromList [("r2", q2)] + atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [("r2", q2)], Nothing) countSubs ss `shouldReturn` (1, 2) atomically $ SS.deleteSub tSess1 (rcvId q2) ss - atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` M.fromList [] + atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [], Nothing) countSubs ss `shouldReturn` (1, 1) atomically (SS.getActiveSubs tSess2 ss) `shouldReturn` M.fromList [("r4", q4)] atomically $ SS.deleteSub tSess2 (rcvId q4) ss diff --git a/tests/Fixtures.hs b/tests/Fixtures.hs index 2360a7ba69..f2f314fedc 100644 --- a/tests/Fixtures.hs +++ b/tests/Fixtures.hs @@ -3,7 +3,9 @@ module Fixtures where import Data.ByteString (ByteString) +import qualified Data.ByteString.Char8 as B import Database.PostgreSQL.Simple (ConnectInfo (..), defaultConnectInfo) +import Simplex.Messaging.Agent.Store.Postgres.Options testDBConnstr :: ByteString testDBConnstr = "postgresql://test_agent_user@/test_agent_db" @@ -14,3 +16,6 @@ testDBConnectInfo = connectUser = "test_agent_user", connectDatabase = "test_agent_db" } + +testDBOpts :: String -> DBOpts +testDBOpts schema' = DBOpts testDBConnstr (B.pack schema') 1 True diff --git a/tests/SMPAgentClient.hs b/tests/SMPAgentClient.hs index 9357750506..41aab20399 100644 --- a/tests/SMPAgentClient.hs +++ b/tests/SMPAgentClient.hs @@ -83,6 +83,9 @@ initAgentServersProxy_ smpProxyMode smpProxyFallback = initAgentServersProxy2 :: InitialAgentServers initAgentServersProxy2 = initAgentServersProxy {smp = userServers [testSMPServer2]} +initAgentServersClientService :: InitialAgentServers +initAgentServersClientService = initAgentServers {useServices = M.fromList [(1, True)]} + agentCfg :: AgentConfig agentCfg = defaultAgentConfig diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index 39009794c0..d3e1b21d08 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -712,15 +712,17 @@ testServiceDeliverSubscribe = pure (rId, sId, dec, serviceId) runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do - Resp "10" NoEntity (ERR (CMD NO_AUTH)) <- signSendRecv sh aServicePK ("10", NoEntity, SUBS) - signSend_ sh aServicePK Nothing ("11", serviceId, SUBS) + let idsHash = queueIdsHash [rId] + Resp "10" NoEntity (ERR (CMD NO_AUTH)) <- signSendRecv sh aServicePK ("10", NoEntity, SUBS 1 idsHash) + signSend_ sh aServicePK Nothing ("11", serviceId, SUBS 1 idsHash) -- TODO [certs rcv] compute and compare hashes [mId3] <- fmap catMaybes $ receiveInAnyOrder -- race between SOKS and MSG, clients can handle it sh [ \case - Resp "11" serviceId' (SOKS n _) -> do + Resp "11" serviceId' (SOKS n idsHash') -> do n `shouldBe` 1 + idsHash' `shouldBe` idsHash serviceId' `shouldBe` serviceId pure $ Just Nothing _ -> pure Nothing, @@ -805,14 +807,16 @@ testServiceUpgradeAndDowngrade = Resp "12" _ OK <- signSendRecv h sKey2 ("12", sId2, _SEND "hello 3.2") runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do - signSend_ sh aServicePK Nothing ("14", serviceId, SUBS) + let idsHash = queueIdsHash [rId, rId2, rId3] + signSend_ sh aServicePK Nothing ("14", serviceId, SUBS 3 idsHash) -- TODO [certs rcv] compute hash [(rKey3_1, rId3_1, mId3_1), (rKey3_2, rId3_2, mId3_2)] <- fmap catMaybes $ receiveInAnyOrder -- race between SOKS and MSG, clients can handle it sh [ \case - Resp "14" serviceId' (SOKS n _) -> do + Resp "14" serviceId' (SOKS n idsHash') -> do n `shouldBe` 3 + idsHash' `shouldBe` idsHash serviceId' `shouldBe` serviceId pure $ Just Nothing _ -> pure Nothing, @@ -835,7 +839,7 @@ testServiceUpgradeAndDowngrade = Resp "17" _ OK <- signSendRecv h sKey ("17", sId, _SEND "hello 4") runSMPClient t $ \sh -> do - Resp "18" _ (ERR SERVICE) <- signSendRecv sh aServicePK ("18", serviceId, SUBS) + Resp "18" _ (ERR SERVICE) <- signSendRecv sh aServicePK ("18", serviceId, SUBS 3 mempty) (Resp "19" rId' (SOK Nothing), Resp "" rId'' (Msg mId4 msg4)) <- signSendRecv2 sh rKey ("19", rId, SUB) rId' `shouldBe` rId rId'' `shouldBe` rId @@ -1366,7 +1370,9 @@ testMessageServiceNotifications = deliverMessage rh rId rKey sh sId sKey nh2 "connection 1" dec deliverMessage rh rId'' rKey'' sh sId'' sKey'' nh2 "connection 2" dec'' -- -- another client makes service subscription - Resp "12" serviceId5 (SOKS 2 _) <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS) + let idsHash = queueIdsHash [nId', nId''] + Resp "12" serviceId5 (SOKS 2 idsHash') <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS 2 idsHash) -- TODO [certs rcv] compute and compare hashes + idsHash' `shouldBe` idsHash serviceId5 `shouldBe` serviceId Resp "" serviceId6 (ENDS 2) <- tGet1 nh2 serviceId6 `shouldBe` serviceId @@ -1389,18 +1395,19 @@ testServiceNotificationsTwoRestarts = (nPub, nKey) <- atomically $ C.generateAuthKeyPair C.SEd25519 g serviceKeys@(_, servicePK) <- atomically $ C.generateKeyPair g (rcvNtfPubDhKey, _) <- atomically $ C.generateKeyPair g - (rId, rKey, sId, dec, serviceId) <- withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> do + (rId, rKey, sId, dec, nId, serviceId) <- withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> do (sId, rId, rKey, dhShared) <- createAndSecureQueue rh sPub let dec = decryptMsgV3 dhShared Resp "0" _ (NID nId _) <- signSendRecv rh rKey ("0", rId, NKEY nPub rcvNtfPubDhKey) testNtfServiceClient t serviceKeys $ \nh -> do Resp "1" _ (SOK (Just serviceId)) <- serviceSignSendRecv nh nKey servicePK ("1", nId, NSUB) deliverMessage rh rId rKey sh sId sKey nh "hello" dec - pure (rId, rKey, sId, dec, serviceId) + pure (rId, rKey, sId, dec, nId, serviceId) + let idsHash = queueIdsHash [nId] threadDelay 250000 withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> testNtfServiceClient t serviceKeys $ \nh -> do - Resp "2.1" serviceId' (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("2.1", serviceId, NSUBS) + Resp "2.1" serviceId' (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("2.1", serviceId, NSUBS 1 idsHash) n `shouldBe` 1 Resp "2.2" _ (SOK Nothing) <- signSendRecv rh rKey ("2.2", rId, SUB) serviceId' `shouldBe` serviceId @@ -1408,7 +1415,7 @@ testServiceNotificationsTwoRestarts = threadDelay 250000 withSmpServerStoreLogOn ps testPort $ runTest2 t $ \sh rh -> testNtfServiceClient t serviceKeys $ \nh -> do - Resp "3.1" _ (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("3.1", serviceId, NSUBS) + Resp "3.1" _ (SOKS n _) <- signSendRecv nh (C.APrivateAuthKey C.SEd25519 servicePK) ("3.1", serviceId, NSUBS 1 idsHash) n `shouldBe` 1 Resp "3.2" _ (SOK Nothing) <- signSendRecv rh rKey ("3.2", rId, SUB) deliverMessage rh rId rKey sh sId sKey nh "hello 3" dec diff --git a/tests/Test.hs b/tests/Test.hs index 3e36e192d6..260366fc81 100644 --- a/tests/Test.hs +++ b/tests/Test.hs @@ -38,6 +38,8 @@ import XFTPServerTests (xftpServerTests) #if defined(dbPostgres) import Fixtures +import SMPAgentClient (testDB) +import Simplex.Messaging.Agent.Store.Postgres.Migrations.App #else import AgentTests.SchemaDump (schemaDumpTest) #endif @@ -45,13 +47,13 @@ import AgentTests.SchemaDump (schemaDumpTest) #if defined(dbServerPostgres) import NtfServerTests (ntfServerTests) import NtfClient (ntfTestServerDBConnectInfo, ntfTestStoreDBOpts) -import PostgresSchemaDump (postgresSchemaDumpTest) import SMPClient (testServerDBConnectInfo, testStoreDBOpts) import Simplex.Messaging.Notifications.Server.Store.Migrations (ntfServerMigrations) import Simplex.Messaging.Server.QueueStore.Postgres.Migrations (serverMigrations) #endif #if defined(dbPostgres) || defined(dbServerPostgres) +import PostgresSchemaDump (postgresSchemaDumpTest) import SMPClient (postgressBracket) #endif @@ -71,10 +73,6 @@ main = do . before_ (createDirectoryIfMissing False "tests/tmp") . after_ (eventuallyRemove "tests/tmp" 3) $ do --- TODO [postgres] schema dump for postgres -#if !defined(dbPostgres) - describe "Agent SQLite schema dump" schemaDumpTest -#endif describe "Core tests" $ do describe "Batching tests" batchingTests describe "Encoding tests" encodingTests @@ -151,6 +149,17 @@ main = do describe "XFTP agent" xftpAgentTests describe "XRCP" remoteControlTests describe "Server CLIs" cliTests +#if defined(dbPostgres) + around_ (postgressBracket testDBConnectInfo) $ + describe "Agent PostgreSQL schema dump" $ + postgresSchemaDumpTest + appMigrations + ["20250322_short_links"] -- snd_secure and last_broker_ts columns swap order on down migration + (testDBOpts testDB) + "src/Simplex/Messaging/Agent/Store/Postgres/Migrations/agent_postgres_schema.sql" +#else + describe "Agent SQLite schema dump" schemaDumpTest +#endif eventuallyRemove :: FilePath -> Int -> IO () eventuallyRemove path retries = case retries of From 5e9b164f4e81e8a28dd845d392a0a1563ae7dbb8 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 25 Nov 2025 23:17:47 +0000 Subject: [PATCH 03/91] agent: fail when per-connection transport isolation is used with services (#1670) --- src/Simplex/Messaging/Agent.hs | 54 +++++++++++-------- src/Simplex/Messaging/Agent/Client.hs | 3 +- .../Messaging/Agent/Store/AgentStore.hs | 4 -- src/Simplex/Messaging/Client.hs | 2 +- src/Simplex/Messaging/Notifications/Server.hs | 4 +- src/Simplex/Messaging/Server.hs | 2 +- src/Simplex/Messaging/Server/Main.hs | 2 +- src/Simplex/Messaging/Transport.hs | 2 +- tests/AgentTests/FunctionalAPITests.hs | 10 ++-- tests/CoreTests/BatchingTests.hs | 2 +- tests/ServerTests.hs | 6 +-- 11 files changed, 49 insertions(+), 42 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 63516ada45..18bc0afbb9 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -194,7 +194,7 @@ import Simplex.Messaging.Agent.Store.Entity import Simplex.Messaging.Agent.Store.Interface (closeDBStore, execSQL, getCurrentMigrations) import Simplex.Messaging.Agent.Store.Shared (UpMigration (..), upMigration) import qualified Simplex.Messaging.Agent.TSessionSubs as SS -import Simplex.Messaging.Client (NetworkRequestMode (..), SMPClientError, ServerTransmission (..), ServerTransmissionBatch, nonBlockingWriteTBQueue, smpErrorClientNotice, temporaryClientError, unexpectedResponse) +import Simplex.Messaging.Client (NetworkRequestMode (..), SMPClientError, ServerTransmission (..), ServerTransmissionBatch, TransportSessionMode (..), nonBlockingWriteTBQueue, smpErrorClientNotice, temporaryClientError, unexpectedResponse) import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Crypto.File (CryptoFile, CryptoFileArgs) import Simplex.Messaging.Crypto.Ratchet (PQEncryption, PQSupport (..), pattern PQEncOff, pattern PQEncOn, pattern PQSupportOff, pattern PQSupportOn) @@ -249,13 +249,15 @@ import UnliftIO.STM type AE a = ExceptT AgentErrorType IO a -- | Creates an SMP agent client instance -getSMPAgentClient :: AgentConfig -> InitialAgentServers -> DBStore -> Bool -> IO AgentClient +getSMPAgentClient :: AgentConfig -> InitialAgentServers -> DBStore -> Bool -> AE AgentClient getSMPAgentClient = getSMPAgentClient_ 1 {-# INLINE getSMPAgentClient #-} -getSMPAgentClient_ :: Int -> AgentConfig -> InitialAgentServers -> DBStore -> Bool -> IO AgentClient -getSMPAgentClient_ clientId cfg initServers@InitialAgentServers {smp, xftp, presetServers} store backgroundMode = - newSMPAgentEnv cfg store >>= runReaderT runAgent +getSMPAgentClient_ :: Int -> AgentConfig -> InitialAgentServers -> DBStore -> Bool -> AE AgentClient +getSMPAgentClient_ clientId cfg initServers@InitialAgentServers {smp, xftp, netCfg, useServices, presetServers} store backgroundMode = do + -- This error should be prevented in the app + when (any id useServices && sessionMode netCfg == TSMEntity) $ throwE $ CMD PROHIBITED "newAgentClient" + liftIO $ newSMPAgentEnv cfg store >>= runReaderT runAgent where runAgent = do liftIO $ checkServers "SMP" smp >> checkServers "XFTP" xftp @@ -594,18 +596,22 @@ testProtocolServer c nm userId srv = withAgentEnv' c $ case protocolTypeI @p of SPNTF -> runNTFServerTest c nm userId srv -- | set SOCKS5 proxy on/off and optionally set TCP timeouts for fast network --- TODO [certs rcv] should fail if any user is enabled to use services and per-connection isolation is chosen -setNetworkConfig :: AgentClient -> NetworkConfig -> IO () +setNetworkConfig :: AgentClient -> NetworkConfig -> AE () setNetworkConfig c@AgentClient {useNetworkConfig, proxySessTs} cfg' = do - ts <- getCurrentTime - changed <- atomically $ do - (_, cfg) <- readTVar useNetworkConfig - let changed = cfg /= cfg' - !cfgSlow = slowNetworkConfig cfg' - when changed $ writeTVar useNetworkConfig (cfgSlow, cfg') - when (socksProxy cfg /= socksProxy cfg') $ writeTVar proxySessTs ts - pure changed - when changed $ reconnectAllServers c + ts <- liftIO getCurrentTime + (ok, changed) <- atomically $ do + useServices <- readTVar $ useClientServices c + if any id useServices && sessionMode cfg' == TSMEntity + then pure (False, False) + else do + (_, cfg) <- readTVar useNetworkConfig + let changed = cfg /= cfg' + !cfgSlow = slowNetworkConfig cfg' + when changed $ writeTVar useNetworkConfig (cfgSlow, cfg') + when (socksProxy cfg /= socksProxy cfg') $ writeTVar proxySessTs ts + pure (True, changed) + unless ok $ throwE $ CMD PROHIBITED "setNetworkConfig" + when changed $ liftIO $ reconnectAllServers c setUserNetworkInfo :: AgentClient -> UserNetworkInfo -> IO () setUserNetworkInfo c@AgentClient {userNetworkInfo, userNetworkUpdated} ni = withAgentEnv' c $ do @@ -772,13 +778,19 @@ deleteUser' c@AgentClient {smpServersStats, xftpServersStats} userId delSMPQueue whenM (withStore' c (`deleteUserWithoutConns` userId)) . atomically $ writeTBQueue (subQ c) ("", "", AEvt SAENone $ DEL_USER userId) --- TODO [certs rcv] should fail enabling if per-connection isolation is set setUserService' :: AgentClient -> UserId -> Bool -> AM () setUserService' c userId enable = do - wasEnabled <- liftIO $ fromMaybe False <$> TM.lookupIO userId (useClientServices c) - when (enable /= wasEnabled) $ do - atomically $ TM.insert userId enable $ useClientServices c - unless enable $ withStore' c (`deleteClientServices` userId) + (ok, changed) <- atomically $ do + (cfg, _) <- readTVar $ useNetworkConfig c + if enable && sessionMode cfg == TSMEntity + then pure (False, False) + else do + wasEnabled <- fromMaybe False <$> TM.lookup userId (useClientServices c) + let changed = enable /= wasEnabled + when changed $ TM.insert userId enable $ useClientServices c + pure (True, changed) + unless ok $ throwE $ CMD PROHIBITED "setNetworkConfig" + when (changed && not enable) $ withStore' c (`deleteClientServices` userId) newConnAsync :: ConnectionModeI c => AgentClient -> UserId -> ACorrId -> Bool -> SConnectionMode c -> CR.InitialKeys -> SubscriptionMode -> AM ConnId newConnAsync c userId corrId enableNtfs cMode pqInitKeys subMode = do diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 68d7ef62b2..e4324e088b 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -500,7 +500,6 @@ data UserNetworkType = UNNone | UNCellular | UNWifi | UNEthernet | UNOther deriving (Eq, Show) -- | Creates an SMP agent client instance that receives commands and sends responses via 'TBQueue's. --- TODO [certs rcv] should fail if both per-connection isolation is set and any users use services newAgentClient :: Int -> InitialAgentServers -> UTCTime -> Map (Maybe SMPServer) (Maybe SystemSeconds) -> Env -> IO AgentClient newAgentClient clientId InitialAgentServers {smp, ntf, xftp, netCfg, useServices, presetDomains, presetServers} currentTs notices agentEnv = do let cfg = config agentEnv @@ -749,7 +748,7 @@ smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c updateClientService service smp pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} - -- TODO [certs rcv] this should differentiate between service ID just set and service ID changed, and in the latter case disassociate the queue + -- TODO [certs rcv] this should differentiate between service ID just set and service ID changed, and in the latter case disassociate the queues updateClientService service smp = case (service, smpClientService smp) of (Just (_, serviceId_), Just THClientService {serviceId}) | serviceId_ /= Just serviceId -> withStore' c $ \db -> setClientServiceId db userId srv serviceId diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index b519f381ea..6e42aac9dd 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -487,7 +487,6 @@ createNewConn :: DB.Connection -> TVar ChaChaDRG -> ConnData -> SConnectionMode createNewConn db gVar cData cMode = do fst <$$> createConn_ gVar cData (\connId -> createConnRecord db connId cData cMode) --- TODO [certs rcv] store clientServiceId from NewRcvQueue updateNewConnRcv :: DB.Connection -> ConnId -> NewRcvQueue -> SubscriptionMode -> IO (Either StoreError RcvQueue) updateNewConnRcv db connId rq subMode = getConn db connId $>>= \case @@ -577,7 +576,6 @@ upgradeRcvConnToDuplex db connId sq = (SomeConn _ RcvConnection {}) -> Right <$> addConnSndQueue_ db connId sq (SomeConn c _) -> pure . Left . SEBadConnType "upgradeRcvConnToDuplex" $ connType c --- TODO [certs rcv] store clientServiceId from NewRcvQueue upgradeSndConnToDuplex :: DB.Connection -> ConnId -> NewRcvQueue -> SubscriptionMode -> IO (Either StoreError RcvQueue) upgradeSndConnToDuplex db connId rq subMode = getConn db connId >>= \case @@ -585,7 +583,6 @@ upgradeSndConnToDuplex db connId rq subMode = Right (SomeConn c _) -> pure . Left . SEBadConnType "upgradeSndConnToDuplex" $ connType c _ -> pure $ Left SEConnNotFound --- TODO [certs rcv] store clientServiceId from NewRcvQueue addConnRcvQueue :: DB.Connection -> ConnId -> NewRcvQueue -> SubscriptionMode -> IO (Either StoreError RcvQueue) addConnRcvQueue db connId rq subMode = getConn db connId >>= \case @@ -2500,7 +2497,6 @@ toRcvQueue (Just shortLinkId, Just shortLinkKey, Just linkPrivSigKey, Just linkEncFixedData) -> Just ShortLinkCreds {shortLinkId, shortLinkKey, linkPrivSigKey, linkEncFixedData} _ -> Nothing enableNtfs = maybe True unBI enableNtfs_ - -- TODO [certs rcv] read client service in RcvQueue {userId, connId, server, rcvId, rcvPrivateKey, rcvDhSecret, e2ePrivKey, e2eDhSecret, sndId, queueMode, shortLink, rcvServiceAssoc, status, enableNtfs, clientNoticeId, dbQueueId, primary, dbReplaceQueueId, rcvSwchStatus, smpClientVersion, clientNtfCreds, deleteErrors} -- | returns all connection queue credentials, the first queue is the primary one diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 58ffd14189..4d4086cfd3 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -781,7 +781,7 @@ temporaryClientError = \case smpClientServiceError :: SMPClientError -> Bool smpClientServiceError = \case PCEServiceUnavailable -> True - PCETransportError (TEHandshake BAD_SERVICE) -> True -- TODO [certs] this error may be temporary, so we should possibly resubscribe. + PCETransportError (TEHandshake BAD_SERVICE) -> True -- TODO [certs rcv] this error may be temporary, so we should possibly resubscribe. PCEProtocolError SERVICE -> True PCEProtocolError (PROXY (BROKER NO_SERVICE)) -> True -- for completeness, it cannot happen. _ -> False diff --git a/src/Simplex/Messaging/Notifications/Server.hs b/src/Simplex/Messaging/Notifications/Server.hs index f06e9c7b1d..143d417c60 100644 --- a/src/Simplex/Messaging/Notifications/Server.hs +++ b/src/Simplex/Messaging/Notifications/Server.hs @@ -573,7 +573,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = forM_ (L.nonEmpty $ mapMaybe (\(nId, err) -> (nId,) <$> queueSubErrorStatus err) $ L.toList errs) $ \subStatuses -> do updated <- batchUpdateSrvSubErrors st srv subStatuses logSubErrors srv subStatuses updated - -- TODO [certs] resubscribe queues with statuses NSErr and NSService + -- TODO [certs rcv] resubscribe queues with statuses NSErr and NSService CAServiceDisconnected srv serviceSub -> logNote $ "SMP server service disconnected " <> showService srv serviceSub CAServiceSubscribed srv serviceSub@(ServiceSub _ expected _) (ServiceSub _ n _) -- TODO [certs rcv] compare hash @@ -603,7 +603,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = queueSubErrorStatus :: SMPClientError -> Maybe NtfSubStatus queueSubErrorStatus = \case PCEProtocolError AUTH -> Just NSAuth - -- TODO [certs] we could allow making individual subscriptions within service session to handle SERVICE error. + -- TODO [certs rcv] we could allow making individual subscriptions within service session to handle SERVICE error. -- This would require full stack changes in SMP server, SMP client and SMP service agent. PCEProtocolError SERVICE -> Just NSService PCEProtocolError e -> updateErr "SMP error " e diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index a05743a06b..0598f3c539 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -923,7 +923,7 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt putSubscribersInfo protoName ServerSubscribers {queueSubscribers, subClients} showIds = do activeSubs <- getSubscribedClients queueSubscribers hPutStrLn h $ protoName <> " subscriptions: " <> show (M.size activeSubs) - -- TODO [certs] service subscriptions + -- TODO [certs rcv] service subscriptions clnts <- countSubClients activeSubs hPutStrLn h $ protoName <> " subscribed clients: " <> show (IS.size clnts) <> (if showIds then " " <> show (IS.toList clnts) else "") clnts' <- readTVarIO subClients diff --git a/src/Simplex/Messaging/Server/Main.hs b/src/Simplex/Messaging/Server/Main.hs index 64d18088de..7de966c36b 100644 --- a/src/Simplex/Messaging/Server/Main.hs +++ b/src/Simplex/Messaging/Server/Main.hs @@ -556,7 +556,7 @@ smpServerCLI_ generateSite serveStaticFiles attachStaticFiles cfgPath logPath = mkTransportServerConfig (fromMaybe False $ iniOnOff "TRANSPORT" "log_tls_errors" ini) (Just $ alpnSupportedSMPHandshakes <> httpALPN) - (fromMaybe True $ iniOnOff "TRANSPORT" "accept_service_credentials" ini), -- TODO [certs] remove this option + (fromMaybe True $ iniOnOff "TRANSPORT" "accept_service_credentials" ini), -- TODO [certs rcv] remove this option controlPort = eitherToMaybe $ T.unpack <$> lookupValue "TRANSPORT" "control_port" ini, smpAgentCfg = defaultSMPClientAgentConfig diff --git a/src/Simplex/Messaging/Transport.hs b/src/Simplex/Messaging/Transport.hs index 2d959410da..a14118ce4d 100644 --- a/src/Simplex/Messaging/Transport.hs +++ b/src/Simplex/Messaging/Transport.hs @@ -560,7 +560,7 @@ data SMPClientHandshake = SMPClientHandshake keyHash :: C.KeyHash, -- | pub key to agree shared secret for entity ID encryption, shared secret for command authorization is agreed using per-queue keys. authPubKey :: Maybe C.PublicKeyX25519, - -- TODO [certs] remove proxyServer, as serviceInfo includes it as clientRole + -- TODO [certs rcv] remove proxyServer, as serviceInfo includes it as clientRole -- | Whether connecting client is a proxy server (send from SMP v12). -- This property, if True, disables additional transport encrytion inside TLS. -- (Proxy server connection already has additional encryption, so this layer is not needed there). diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 7f9641a5b5..f3f7e817c7 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -3607,7 +3607,7 @@ testTwoUsers = withAgentClients2 $ \a b -> do exchangeGreetings a bId1' b aId1' a `hasClients` 1 b `hasClients` 1 - liftIO $ setNetworkConfig a nc {sessionMode = TSMEntity} + setNetworkConfig a nc {sessionMode = TSMEntity} liftIO $ threadDelay 250000 ("", "", DOWN _ _) <- nGet a ("", "", UP _ _) <- nGet a @@ -3617,7 +3617,7 @@ testTwoUsers = withAgentClients2 $ \a b -> do exchangeGreetingsMsgId 4 a bId1 b aId1 exchangeGreetingsMsgId 4 a bId1' b aId1' liftIO $ threadDelay 250000 - liftIO $ setNetworkConfig a nc {sessionMode = TSMUser} + setNetworkConfig a nc {sessionMode = TSMUser} liftIO $ threadDelay 250000 ("", "", DOWN _ _) <- nGet a ("", "", DOWN _ _) <- nGet a @@ -3632,7 +3632,7 @@ testTwoUsers = withAgentClients2 $ \a b -> do exchangeGreetings a bId2' b aId2' a `hasClients` 2 b `hasClients` 1 - liftIO $ setNetworkConfig a nc {sessionMode = TSMEntity} + setNetworkConfig a nc {sessionMode = TSMEntity} liftIO $ threadDelay 250000 ("", "", DOWN _ _) <- nGet a ("", "", DOWN _ _) <- nGet a @@ -3646,7 +3646,7 @@ testTwoUsers = withAgentClients2 $ \a b -> do exchangeGreetingsMsgId 4 a bId2 b aId2 exchangeGreetingsMsgId 4 a bId2' b aId2' liftIO $ threadDelay 250000 - liftIO $ setNetworkConfig a nc {sessionMode = TSMUser} + setNetworkConfig a nc {sessionMode = TSMUser} liftIO $ threadDelay 250000 ("", "", DOWN _ _) <- nGet a ("", "", DOWN _ _) <- nGet a @@ -3695,7 +3695,7 @@ testClientServiceConnection ps = do getSMPAgentClient' :: Int -> AgentConfig -> InitialAgentServers -> String -> IO AgentClient getSMPAgentClient' clientId cfg' initServers dbPath = do Right st <- liftIO $ createStore dbPath - c <- getSMPAgentClient_ clientId cfg' initServers st False + Right c <- runExceptT $ getSMPAgentClient_ clientId cfg' initServers st False when (dbNew st) $ insertUser st pure c diff --git a/tests/CoreTests/BatchingTests.hs b/tests/CoreTests/BatchingTests.hs index d013c0db4c..8a285721b6 100644 --- a/tests/CoreTests/BatchingTests.hs +++ b/tests/CoreTests/BatchingTests.hs @@ -334,7 +334,7 @@ randomSUBv6 = randomSUB_ C.SEd25519 minServerSMPRelayVersion randomSUB :: ByteString -> IO (Either TransportError (Maybe TAuthorizations, ByteString)) randomSUB = randomSUB_ C.SEd25519 currentClientSMPRelayVersion --- TODO [certs] test with the additional certificate signature +-- TODO [certs rcv] test with the additional certificate signature randomSUB_ :: (C.AlgorithmI a, C.AuthAlgorithm a) => C.SAlgorithm a -> VersionSMP -> ByteString -> IO (Either TransportError (Maybe TAuthorizations, ByteString)) randomSUB_ a v sessId = do g <- C.newRandom diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index d3e1b21d08..dd97781c2c 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -714,7 +714,7 @@ testServiceDeliverSubscribe = runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do let idsHash = queueIdsHash [rId] Resp "10" NoEntity (ERR (CMD NO_AUTH)) <- signSendRecv sh aServicePK ("10", NoEntity, SUBS 1 idsHash) - signSend_ sh aServicePK Nothing ("11", serviceId, SUBS 1 idsHash) -- TODO [certs rcv] compute and compare hashes + signSend_ sh aServicePK Nothing ("11", serviceId, SUBS 1 idsHash) [mId3] <- fmap catMaybes $ receiveInAnyOrder -- race between SOKS and MSG, clients can handle it @@ -808,7 +808,7 @@ testServiceUpgradeAndDowngrade = runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do let idsHash = queueIdsHash [rId, rId2, rId3] - signSend_ sh aServicePK Nothing ("14", serviceId, SUBS 3 idsHash) -- TODO [certs rcv] compute hash + signSend_ sh aServicePK Nothing ("14", serviceId, SUBS 3 idsHash) [(rKey3_1, rId3_1, mId3_1), (rKey3_2, rId3_2, mId3_2)] <- fmap catMaybes $ receiveInAnyOrder -- race between SOKS and MSG, clients can handle it @@ -1371,7 +1371,7 @@ testMessageServiceNotifications = deliverMessage rh rId'' rKey'' sh sId'' sKey'' nh2 "connection 2" dec'' -- -- another client makes service subscription let idsHash = queueIdsHash [nId', nId''] - Resp "12" serviceId5 (SOKS 2 idsHash') <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS 2 idsHash) -- TODO [certs rcv] compute and compare hashes + Resp "12" serviceId5 (SOKS 2 idsHash') <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS 2 idsHash) idsHash' `shouldBe` idsHash serviceId5 `shouldBe` serviceId Resp "" serviceId6 (ENDS 2) <- tGet1 nh2 From 38e899957f5c5618f46c4c70eb37d4a683d18917 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 27 Nov 2025 21:37:19 +0000 Subject: [PATCH 04/91] agent: service subscription events (#1671) * agent: use server keyhash when loading service record * agent: process queue/service associations with delayed subscription results * agent: service subscription events --- src/Simplex/Messaging/Agent.hs | 67 ++++++++++--------- src/Simplex/Messaging/Agent/Client.hs | 41 +++++++----- src/Simplex/Messaging/Agent/Protocol.hs | 11 +++ .../Messaging/Agent/Store/AgentStore.hs | 15 +++-- src/Simplex/Messaging/Client.hs | 5 +- src/Simplex/Messaging/Notifications/Server.hs | 2 +- src/Simplex/Messaging/Protocol.hs | 37 +++++++--- src/Simplex/Messaging/Server.hs | 2 +- tests/AgentTests/EqInstances.hs | 5 -- tests/AgentTests/FunctionalAPITests.hs | 16 +++-- tests/SMPProxyTests.hs | 4 +- tests/ServerTests.hs | 4 +- 12 files changed, 125 insertions(+), 84 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 18bc0afbb9..18e9d04659 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -194,7 +194,7 @@ import Simplex.Messaging.Agent.Store.Entity import Simplex.Messaging.Agent.Store.Interface (closeDBStore, execSQL, getCurrentMigrations) import Simplex.Messaging.Agent.Store.Shared (UpMigration (..), upMigration) import qualified Simplex.Messaging.Agent.TSessionSubs as SS -import Simplex.Messaging.Client (NetworkRequestMode (..), SMPClientError, ServerTransmission (..), ServerTransmissionBatch, TransportSessionMode (..), nonBlockingWriteTBQueue, smpErrorClientNotice, temporaryClientError, unexpectedResponse) +import Simplex.Messaging.Client (NetworkRequestMode (..), ProtocolClientError (..), SMPClientError, ServerTransmission (..), ServerTransmissionBatch, TransportSessionMode (..), nonBlockingWriteTBQueue, smpErrorClientNotice, temporaryClientError, unexpectedResponse) import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Crypto.File (CryptoFile, CryptoFileArgs) import Simplex.Messaging.Crypto.Ratchet (PQEncryption, PQSupport (..), pattern PQEncOff, pattern PQEncOn, pattern PQSupportOff, pattern PQSupportOn) @@ -222,6 +222,7 @@ import Simplex.Messaging.Protocol SParty (..), SProtocolType (..), ServiceSub (..), + ServiceSubResult, SndPublicAuthKey, SubscriptionMode (..), UserProtocol, @@ -232,7 +233,7 @@ import qualified Simplex.Messaging.Protocol as SMP import Simplex.Messaging.ServiceScheme (ServiceScheme (..)) import Simplex.Messaging.SystemTime import qualified Simplex.Messaging.TMap as TM -import Simplex.Messaging.Transport (SMPVersion) +import Simplex.Messaging.Transport (SMPVersion, THClientService' (..), THandleAuth (..), THandleParams (..)) import Simplex.Messaging.Util import Simplex.Messaging.Version import Simplex.RemoteControl.Client @@ -502,7 +503,7 @@ resubscribeConnections :: AgentClient -> [ConnId] -> AE (Map ConnId (Either Agen resubscribeConnections c = withAgentEnv c . resubscribeConnections' c {-# INLINE resubscribeConnections #-} -subscribeClientServices :: AgentClient -> UserId -> AE (Map SMPServer (Either AgentErrorType ServiceSub)) +subscribeClientServices :: AgentClient -> UserId -> AE (Map SMPServer (Either AgentErrorType ServiceSubResult)) subscribeClientServices c = withAgentEnv c . subscribeClientServices' c {-# INLINE subscribeClientServices #-} @@ -1355,11 +1356,7 @@ toConnResult connId rs = case M.lookup connId rs of Just (Left e) -> throwE e _ -> throwE $ INTERNAL $ "no result for connection " <> B.unpack connId -type QCmdResult a = (QueueStatus, Either AgentErrorType a) - -type QDelResult = QCmdResult () - -type QSubResult = QCmdResult (Maybe SMP.ServiceId) +type QCmdResult = (QueueStatus, Either AgentErrorType ()) subscribeConnections' :: AgentClient -> [ConnId] -> AM (Map ConnId (Either AgentErrorType ())) subscribeConnections' _ [] = pure M.empty @@ -1367,16 +1364,15 @@ subscribeConnections' c connIds = subscribeConnections_ c . zip connIds =<< with subscribeConnections_ :: AgentClient -> [(ConnId, Either StoreError SomeConnSub)] -> AM (Map ConnId (Either AgentErrorType ())) subscribeConnections_ c conns = do - -- TODO [certs rcv] - it should exclude connections already associated, and then if some don't deliver any response they may be unassociated let (subRs, cs) = foldr partitionResultsConns ([], []) conns resumeDelivery cs resumeConnCmds c $ map fst cs + -- queue/service association is handled in the client rcvRs <- lift $ connResults <$> subscribeQueues c False (concatMap rcvQueues cs) - rcvRs' <- storeClientServiceAssocs rcvRs ns <- asks ntfSupervisor - lift $ whenM (liftIO $ hasInstantNotifications ns) . void . forkIO . void $ sendNtfCreate ns rcvRs' cs + lift $ whenM (liftIO $ hasInstantNotifications ns) . void . forkIO . void $ sendNtfCreate ns rcvRs cs -- union is left-biased - let rs = rcvRs' `M.union` subRs + let rs = rcvRs `M.union` subRs notifyResultError rs pure rs where @@ -1400,24 +1396,21 @@ subscribeConnections_ c conns = do _ -> Left $ INTERNAL "unexpected queue status" rcvQueues :: (ConnId, SomeConnSub) -> [RcvQueueSub] rcvQueues (_, SomeConn _ conn) = connRcvQueues conn - connResults :: [(RcvQueueSub, Either AgentErrorType (Maybe SMP.ServiceId))] -> Map ConnId (Either AgentErrorType (Maybe SMP.ServiceId)) + connResults :: [(RcvQueueSub, Either AgentErrorType (Maybe SMP.ServiceId))] -> Map ConnId (Either AgentErrorType ()) connResults = M.map snd . foldl' addResult M.empty where -- collects results by connection ID - addResult :: Map ConnId QSubResult -> (RcvQueueSub, Either AgentErrorType (Maybe SMP.ServiceId)) -> Map ConnId QSubResult - addResult rs (RcvQueueSub {connId, status}, r) = M.alter (combineRes (status, r)) connId rs + addResult :: Map ConnId QCmdResult -> (RcvQueueSub, Either AgentErrorType (Maybe SMP.ServiceId)) -> Map ConnId QCmdResult + addResult rs (RcvQueueSub {connId, status}, r) = M.alter (combineRes (status, () <$ r)) connId rs -- combines two results for one connection, by using only Active queues (if there is at least one Active queue) - combineRes :: QSubResult -> Maybe QSubResult -> Maybe QSubResult + combineRes :: QCmdResult -> Maybe QCmdResult -> Maybe QCmdResult combineRes r' (Just r) = Just $ if order r <= order r' then r else r' combineRes r' _ = Just r' - order :: QSubResult -> Int + order :: QCmdResult -> Int order (Active, Right _) = 1 order (Active, _) = 2 order (_, Right _) = 3 order _ = 4 - -- TODO [certs rcv] store associations of queues with client service ID - storeClientServiceAssocs :: Map ConnId (Either AgentErrorType (Maybe SMP.ServiceId)) -> AM (Map ConnId (Either AgentErrorType ())) - storeClientServiceAssocs = pure . M.map (() <$) sendNtfCreate :: NtfSupervisor -> Map ConnId (Either AgentErrorType ()) -> [(ConnId, SomeConnSub)] -> AM' () sendNtfCreate ns rcvRs cs = do let oks = M.keysSet $ M.filter (either temporaryAgentError $ const True) rcvRs @@ -1522,14 +1515,14 @@ resubscribeConnections' c connIds = do rqs' -> anyM $ map (atomically . hasActiveSubscription c) rqs' -- TODO [certs rcv] compare hash. possibly, it should return both expected and returned counts -subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType ServiceSub)) +subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType ServiceSubResult)) subscribeClientServices' c userId = ifM useService subscribe $ throwError $ CMD PROHIBITED "no user service allowed" where useService = liftIO $ (Just True ==) <$> TM.lookupIO userId (useClientServices c) subscribe = do srvs <- withStore' c (`getClientServiceServers` userId) - lift $ M.fromList <$> mapConcurrently (\(srv, ServiceSub _ n idsHash) -> fmap (srv,) $ tryAllErrors' $ subscribeClientService c userId srv n idsHash) srvs + lift $ M.fromList <$> mapConcurrently (\(srv, ServiceSub _ n idsHash) -> fmap (srv,) $ tryAllErrors' $ subscribeClientService c False userId srv n idsHash) srvs -- requesting messages sequentially, to reduce memory usage getConnectionMessages' :: AgentClient -> NonEmpty ConnMsgReq -> AM' (NonEmpty (Either AgentErrorType (Maybe SMPMsgMeta))) @@ -2383,13 +2376,13 @@ deleteConnQueues c nm waitDelivery ntf rqs = do connResults = M.map snd . foldl' addResult M.empty where -- collects results by connection ID - addResult :: Map ConnId QDelResult -> (RcvQueue, Either AgentErrorType ()) -> Map ConnId QDelResult + addResult :: Map ConnId QCmdResult -> (RcvQueue, Either AgentErrorType ()) -> Map ConnId QCmdResult addResult rs (RcvQueue {connId, status}, r) = M.alter (combineRes (status, r)) connId rs -- combines two results for one connection, by prioritizing errors in Active queues - combineRes :: QDelResult -> Maybe QDelResult -> Maybe QDelResult + combineRes :: QCmdResult -> Maybe QCmdResult -> Maybe QCmdResult combineRes r' (Just r) = Just $ if order r <= order r' then r else r' combineRes r' _ = Just r' - order :: QDelResult -> Int + order :: QCmdResult -> Int order (Active, Left _) = 1 order (_, Left _) = 2 order _ = 3 @@ -2840,11 +2833,17 @@ data ACKd = ACKd | ACKPending -- It cannot be finally, as sometimes it needs to be ACK+DEL, -- and sometimes ACK has to be sent from the consumer. processSMPTransmissions :: AgentClient -> ServerTransmissionBatch SMPVersion ErrorType BrokerMsg -> AM' () -processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId, ts) = do +processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandleParams {thAuth, sessionId = sessId}, ts) = do upConnIds <- newTVarIO [] + serviceRQs <- newTVarIO ([] :: [RcvQueue]) forM_ ts $ \(entId, t) -> case t of STEvent msgOrErr - | entId == SMP.NoEntity -> pure () -- TODO [certs rcv] process SALL + | entId == SMP.NoEntity -> case msgOrErr of + Right msg -> case msg of + SMP.ALLS -> notifySub c $ SERVICE_ALL srv + SMP.ERR e -> notifyErr "" $ PCEProtocolError e + _ -> logError $ "unexpected event: " <> tshow msg + Left e -> notifyErr "" e | otherwise -> withRcvConn entId $ \rq@RcvQueue {connId} conn -> case msgOrErr of Right msg -> runProcessSMP rq conn (toConnData conn) msg Left e -> lift $ do @@ -2853,11 +2852,10 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId STResponse (Cmd SRecipient cmd) respOrErr -> withRcvConn entId $ \rq conn -> case cmd of SMP.SUB -> case respOrErr of - Right SMP.OK -> liftIO $ processSubOk rq upConnIds - -- TODO [certs rcv] associate queue with the service - Right (SMP.SOK _serviceId_) -> liftIO $ processSubOk rq upConnIds + Right SMP.OK -> liftIO $ processSubOk rq upConnIds serviceRQs Nothing + Right (SMP.SOK serviceId_) -> liftIO $ processSubOk rq upConnIds serviceRQs serviceId_ Right msg@SMP.MSG {} -> do - liftIO $ processSubOk rq upConnIds -- the connection is UP even when processing this particular message fails + liftIO $ processSubOk rq upConnIds serviceRQs Nothing -- the connection is UP even when processing this particular message fails runProcessSMP rq conn (toConnData conn) msg Right r -> lift $ processSubErr rq $ unexpectedResponse r Left e -> lift $ unless (temporaryClientError e) $ processSubErr rq e -- timeout/network was already reported @@ -2873,6 +2871,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId unless (null connIds) $ do notify' "" $ UP srv connIds atomically $ incSMPServerStat' c userId srv connSubscribed $ length connIds + readTVarIO serviceRQs >>= processRcvServiceAssocs c where withRcvConn :: SMP.RecipientId -> (forall c. RcvQueue -> Connection c -> AM ()) -> AM' () withRcvConn rId a = do @@ -2882,11 +2881,13 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), _v, sessId tryAllErrors' (a rq conn) >>= \case Left e -> notify' connId (ERR e) Right () -> pure () - processSubOk :: RcvQueue -> TVar [ConnId] -> IO () - processSubOk rq@RcvQueue {connId} upConnIds = + processSubOk :: RcvQueue -> TVar [ConnId] -> TVar [RcvQueue] -> Maybe SMP.ServiceId -> IO () + processSubOk rq@RcvQueue {connId} upConnIds serviceRQs serviceId_ = atomically . whenM (isPendingSub rq) $ do SS.addActiveSub tSess sessId rq $ currentSubs c modifyTVar' upConnIds (connId :) + when (isJust serviceId_ && serviceId_ == clientServiceId_) $ modifyTVar' serviceRQs (rq :) + clientServiceId_ = (\THClientService {serviceId} -> serviceId) <$> (clientService =<< thAuth) processSubErr :: RcvQueue -> SMPClientError -> AM' () processSubErr rq@RcvQueue {connId} e = do atomically . whenM (isPendingSub rq) $ diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index e4324e088b..77d73027d7 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -50,6 +50,7 @@ module Simplex.Messaging.Agent.Client subscribeQueues, subscribeUserServerQueues, subscribeClientService, + processRcvServiceAssocs, processClientNotices, getQueueMessage, decryptSMPMessage, @@ -280,6 +281,7 @@ import Simplex.Messaging.Protocol SMPMsgMeta (..), SProtocolType (..), ServiceSub (..), + ServiceSubResult (..), SndPublicAuthKey, SubscriptionMode (..), NewNtfCreds (..), @@ -292,6 +294,7 @@ import Simplex.Messaging.Protocol XFTPServerWithAuth, pattern NoEntity, senderCanSecure, + serviceSubResult, ) import qualified Simplex.Messaging.Protocol as SMP import Simplex.Messaging.Protocol.Types @@ -785,6 +788,7 @@ smpClientDisconnected c@AgentClient {active, smpClients, smpProxiedRelays} tSess serverDown (qs, conns, serviceSub_) = whenM (readTVarIO active) $ do notifySub c $ hostEvent' DISCONNECT client unless (null conns) $ notifySub c $ DOWN srv conns + mapM_ (notifySub c . SERVICE_DOWN srv) serviceSub_ unless (null qs && isNothing serviceSub_) $ do releaseGetLocksIO c qs mode <- getSessionModeIO c @@ -1514,7 +1518,7 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl newErr = throwE . BROKER (B.unpack $ strEncode srv) . UNEXPECTED . ("Create queue: " <>) processSubResults :: AgentClient -> SMPTransportSession -> SessionId -> Maybe ServiceId -> NonEmpty (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> STM ([RcvQueueSub], [(RcvQueueSub, Maybe ClientNotice)]) -processSubResults c tSess@(userId, srv, _) sessId smpServiceId rs = do +processSubResults c tSess@(userId, srv, _) sessId serviceId_ rs = do pending <- SS.getPendingSubs tSess $ currentSubs c let (failed, subscribed@(qs, sQs), notices, ignored) = foldr (partitionResults pending) (M.empty, ([], []), [], 0) rs unless (M.null failed) $ do @@ -1541,10 +1545,10 @@ processSubResults c tSess@(userId, srv, _) sessId smpServiceId rs = do | otherwise -> (failed', subscribed, notices, ignored) where failed' = M.insert rcvId e failed - Right serviceId_ + Right serviceId_' | rcvId `M.member` pendingSubs -> - let subscribed' = case (smpServiceId, serviceId_, pendingSS) of - (Just sId, Just sId', Just ServiceSub {serviceId}) | sId == sId' && sId == serviceId -> (qs, rq : sQs) + let subscribed' = case (serviceId_, serviceId_', pendingSS) of + (Just sId, Just sId', Just ServiceSub {smpServiceId}) | sId == sId' && sId == smpServiceId -> (qs, rq : sQs) _ -> (rq : qs, sQs) in (failed, subscribed', notices', ignored) | otherwise -> (failed, subscribed, notices', ignored + 1) @@ -1692,7 +1696,8 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c sessId = sessionId $ thParams smp smpServiceId = (\THClientService {serviceId} -> serviceId) <$> smpClientService smp -processRcvServiceAssocs :: AgentClient -> [RcvQueueSub] -> AM' () +processRcvServiceAssocs :: SMPQueue q => AgentClient -> [q] -> AM' () +processRcvServiceAssocs _ [] = pure () processRcvServiceAssocs c serviceQs = withStore' c (`setRcvServiceAssocs` serviceQs) `catchAllErrors'` \e -> do logError $ "processClientNotices error: " <> tshow e @@ -1709,17 +1714,16 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do logError $ "processClientNotices error: " <> tshow e notifySub' c "" $ ERR e -resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSub -resubscribeClientService c tSess (ServiceSub _ n idsHash) = - withServiceClient c tSess $ \smp _ -> do - subscribeClientService_ c tSess smp n idsHash +resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSubResult +resubscribeClientService c tSess serviceSub = + withServiceClient c tSess $ \smp _ -> subscribeClientService_ c True tSess smp serviceSub -subscribeClientService :: AgentClient -> UserId -> SMPServer -> Int64 -> IdsHash -> AM ServiceSub -subscribeClientService c userId srv n idsHash = +subscribeClientService :: AgentClient -> Bool -> UserId -> SMPServer -> Int64 -> IdsHash -> AM ServiceSubResult +subscribeClientService c withEvent userId srv n idsHash = withServiceClient c tSess $ \smp smpServiceId -> do let serviceSub = ServiceSub smpServiceId n idsHash atomically $ SS.setPendingServiceSub tSess serviceSub $ currentSubs c - subscribeClientService_ c tSess smp n idsHash + subscribeClientService_ c withEvent tSess smp serviceSub where tSess = (userId, srv, Nothing) @@ -1730,14 +1734,15 @@ withServiceClient c tSess action = Just smpServiceId -> action smp smpServiceId Nothing -> throwE PCEServiceUnavailable -subscribeClientService_ :: AgentClient -> SMPTransportSession -> SMPClient -> Int64 -> IdsHash -> ExceptT SMPClientError IO ServiceSub -subscribeClientService_ c tSess smp n idsHash = do - -- TODO [certs rcv] handle error - serviceSub' <- subscribeService smp SMP.SRecipientService n idsHash +subscribeClientService_ :: AgentClient -> Bool -> SMPTransportSession -> SMPClient -> ServiceSub -> ExceptT SMPClientError IO ServiceSubResult +subscribeClientService_ c withEvent tSess@(_, srv, _) smp expected@(ServiceSub _ n idsHash) = do + subscribed <- subscribeService smp SMP.SRecipientService n idsHash let sessId = sessionId $ thParams smp + r = serviceSubResult expected subscribed atomically $ whenM (activeClientSession c tSess sessId) $ - SS.setActiveServiceSub tSess sessId serviceSub' $ currentSubs c - pure serviceSub' + SS.setActiveServiceSub tSess sessId subscribed $ currentSubs c + when withEvent $ notifySub c $ SERVICE_UP srv r + pure r activeClientSession :: AgentClient -> SMPTransportSession -> SessionId -> STM Bool activeClientSession c tSess sessId = sameSess <$> tryReadSessVar tSess (smpClients c) diff --git a/src/Simplex/Messaging/Agent/Protocol.hs b/src/Simplex/Messaging/Agent/Protocol.hs index 15d51aed91..d5b35611b6 100644 --- a/src/Simplex/Messaging/Agent/Protocol.hs +++ b/src/Simplex/Messaging/Agent/Protocol.hs @@ -234,6 +234,8 @@ import Simplex.Messaging.Protocol NMsgMeta, ProtocolServer (..), QueueMode (..), + ServiceSub, + ServiceSubResult, SMPClientVersion, SMPServer, SMPServerWithAuth, @@ -388,6 +390,9 @@ data AEvent (e :: AEntity) where DISCONNECT :: AProtocolType -> TransportHost -> AEvent AENone DOWN :: SMPServer -> [ConnId] -> AEvent AENone UP :: SMPServer -> [ConnId] -> AEvent AENone + SERVICE_ALL :: SMPServer -> AEvent AENone -- all service messages are delivered + SERVICE_DOWN :: SMPServer -> ServiceSub -> AEvent AENone + SERVICE_UP :: SMPServer -> ServiceSubResult -> AEvent AENone SWITCH :: QueueDirection -> SwitchPhase -> ConnectionStats -> AEvent AEConn RSYNC :: RatchetSyncState -> Maybe AgentCryptoError -> ConnectionStats -> AEvent AEConn SENT :: AgentMsgId -> Maybe SMPServer -> AEvent AEConn @@ -459,6 +464,9 @@ data AEventTag (e :: AEntity) where DISCONNECT_ :: AEventTag AENone DOWN_ :: AEventTag AENone UP_ :: AEventTag AENone + SERVICE_ALL_ :: AEventTag AENone + SERVICE_DOWN_ :: AEventTag AENone + SERVICE_UP_ :: AEventTag AENone SWITCH_ :: AEventTag AEConn RSYNC_ :: AEventTag AEConn SENT_ :: AEventTag AEConn @@ -514,6 +522,9 @@ aEventTag = \case DISCONNECT {} -> DISCONNECT_ DOWN {} -> DOWN_ UP {} -> UP_ + SERVICE_ALL _ -> SERVICE_ALL_ + SERVICE_DOWN {} -> SERVICE_DOWN_ + SERVICE_UP {} -> SERVICE_UP_ SWITCH {} -> SWITCH_ RSYNC {} -> RSYNC_ SENT {} -> SENT_ diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 6e42aac9dd..a732d28d4e 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -419,18 +419,19 @@ createClientService db userId srv (kh, (cert, pk)) = do |] (userId, host srv, port srv, serverKeyHash_, kh, cert, pk) --- TODO [certs rcv] get correct service based on key hash of the server getClientService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) getClientService db userId srv = maybeFirstRow toService $ DB.query db [sql| - SELECT service_cert_hash, service_cert, service_priv_key, service_id - FROM client_services - WHERE user_id = ? AND host = ? AND port = ? + SELECT c.service_cert_hash, c.service_cert, c.service_priv_key, c.service_id + FROM client_services c + JOIN servers s ON c.host = s.host AND c.port = s.port + WHERE c.user_id = ? AND c.host = ? AND c.port = ? + AND COALESCE(c.server_key_hash, s.key_hash) = ? |] - (userId, host srv, port srv) + (userId, host srv, port srv, keyHash srv) where toService (kh, cert, pk, serviceId_) = ((kh, (cert, pk)), serviceId_) @@ -2250,12 +2251,12 @@ getUserServerRcvQueueSubs db userId srv onlyNeeded = unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" -setRcvServiceAssocs :: DB.Connection -> [RcvQueueSub] -> IO () +setRcvServiceAssocs :: SMPQueue q => DB.Connection -> [q] -> IO () setRcvServiceAssocs db rqs = #if defined(dbPostgres) DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN " $ Only $ In (map queueId rqs) #else - DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = " $ map (Only . queueId) rqs + DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = ?" $ map (Only . queueId) rqs #endif -- * getConn helpers diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 4d4086cfd3..81e9820a2c 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -251,7 +251,7 @@ type ClientCommand msg = (EntityId, Maybe C.APrivateAuthKey, ProtoCommand msg) -- | Type synonym for transmission from SPM servers. -- Batch response is presented as a single `ServerTransmissionBatch` tuple. -type ServerTransmissionBatch v err msg = (TransportSession msg, Version v, SessionId, NonEmpty (EntityId, ServerTransmission err msg)) +type ServerTransmissionBatch v err msg = (TransportSession msg, THandleParams v 'TClient, NonEmpty (EntityId, ServerTransmission err msg)) data ServerTransmission err msg = STEvent (Either (ProtocolClientError err) msg) @@ -864,8 +864,7 @@ writeSMPMessage :: SMPClient -> RecipientId -> BrokerMsg -> IO () writeSMPMessage c rId msg = atomically $ mapM_ (`writeTBQueue` serverTransmission c [(rId, STEvent (Right msg))]) (msgQ $ client_ c) serverTransmission :: ProtocolClient v err msg -> NonEmpty (RecipientId, ServerTransmission err msg) -> ServerTransmissionBatch v err msg -serverTransmission ProtocolClient {thParams = THandleParams {thVersion, sessionId}, client_ = PClient {transportSession}} ts = - (transportSession, thVersion, sessionId, ts) +serverTransmission ProtocolClient {thParams, client_ = PClient {transportSession}} ts = (transportSession, thParams, ts) -- | Get message from SMP queue. The server returns ERR PROHIBITED if a client uses SUB and GET via the same transport connection for the same queue -- diff --git a/src/Simplex/Messaging/Notifications/Server.hs b/src/Simplex/Messaging/Notifications/Server.hs index 143d417c60..67ed89d715 100644 --- a/src/Simplex/Messaging/Notifications/Server.hs +++ b/src/Simplex/Messaging/Notifications/Server.hs @@ -524,7 +524,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = NtfPushServer {pushQ} <- asks pushServer stats <- asks serverStats liftIO $ forever $ do - ((_, srv@(SMPServer (h :| _) _ _), _), _thVersion, sessionId, ts) <- atomically $ readTBQueue msgQ + ((_, srv@(SMPServer (h :| _) _ _), _), THandleParams {sessionId}, ts) <- atomically $ readTBQueue msgQ forM ts $ \(ntfId, t) -> case t of STUnexpectedError e -> logError $ "SMP client unexpected error: " <> tshow e -- uncorrelated response, should not happen STResponse {} -> pure () -- it was already reported as timeout error diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index c00899e1c1..a5f94960e2 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -142,6 +142,8 @@ module Simplex.Messaging.Protocol MsgBody, IdsHash (..), ServiceSub (..), + ServiceSubResult (..), + serviceSubResult, queueIdsHash, queueIdHash, MaxMessageLen, @@ -712,7 +714,7 @@ data BrokerMsg where -- v2: MsgId -> SystemTime -> MsgFlags -> MsgBody -> BrokerMsg MSG :: RcvMessage -> BrokerMsg -- sent once delivering messages to SUBS command is complete - SALL :: BrokerMsg + ALLS :: BrokerMsg NID :: NotifierId -> RcvNtfPublicDhKey -> BrokerMsg NMSG :: C.CbNonce -> EncNMsgMeta -> BrokerMsg -- Should include certificate chain @@ -949,7 +951,7 @@ data BrokerMsgTag | SOK_ | SOKS_ | MSG_ - | SALL_ + | ALLS_ | NID_ | NMSG_ | PKEY_ @@ -1042,7 +1044,7 @@ instance Encoding BrokerMsgTag where SOK_ -> "SOK" SOKS_ -> "SOKS" MSG_ -> "MSG" - SALL_ -> "SALL" + ALLS_ -> "ALLS" NID_ -> "NID" NMSG_ -> "NMSG" PKEY_ -> "PKEY" @@ -1064,7 +1066,7 @@ instance ProtocolMsgTag BrokerMsgTag where "SOK" -> Just SOK_ "SOKS" -> Just SOKS_ "MSG" -> Just MSG_ - "SALL" -> Just SALL_ + "ALLS" -> Just ALLS_ "NID" -> Just NID_ "NMSG" -> Just NMSG_ "PKEY" -> Just PKEY_ @@ -1468,10 +1470,29 @@ type MsgId = ByteString type MsgBody = ByteString data ServiceSub = ServiceSub - { serviceId :: ServiceId, + { smpServiceId :: ServiceId, smpQueueCount :: Int64, smpQueueIdsHash :: IdsHash } + deriving (Eq, Show) + +data ServiceSubResult = ServiceSubResult (Maybe ServiceSubError) ServiceSub + deriving (Eq, Show) + +data ServiceSubError + = SSErrorServiceId {expectedServiceId :: ServiceId, subscribedServiceId :: ServiceId} + | SSErrorQueueCount {expectedQueueCount :: Int64, subscribedQueueCount :: Int64} + | SSErrorQueueIdsHash {expectedQueueIdsHash :: IdsHash, subscribedQueueIdsHash :: IdsHash} + deriving (Eq, Show) + +serviceSubResult :: ServiceSub -> ServiceSub -> ServiceSubResult +serviceSubResult s s' = ServiceSubResult subError_ s' + where + subError_ + | smpServiceId s /= smpServiceId s' = Just $ SSErrorServiceId (smpServiceId s) (smpServiceId s') + | smpQueueCount s /= smpQueueCount s' = Just $ SSErrorQueueCount (smpQueueCount s) (smpQueueCount s') + | smpQueueIdsHash s /= smpQueueIdsHash s' = Just $ SSErrorQueueIdsHash (smpQueueIdsHash s) (smpQueueIdsHash s') + | otherwise = Nothing newtype IdsHash = IdsHash {unIdsHash :: BS.ByteString} deriving (Eq, Show) @@ -1897,7 +1918,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where | otherwise -> e (SOKS_, ' ', n) MSG RcvMessage {msgId, msgBody = EncRcvMsgBody body} -> e (MSG_, ' ', msgId, Tail body) - SALL -> e SALL_ + ALLS -> e ALLS_ NID nId srvNtfDh -> e (NID_, ' ', nId, srvNtfDh) NMSG nmsgNonce encNMsgMeta -> e (NMSG_, ' ', nmsgNonce, encNMsgMeta) PKEY sid vr certKey -> e (PKEY_, ' ', sid, vr, certKey) @@ -1928,7 +1949,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where MSG . RcvMessage msgId <$> bodyP where bodyP = EncRcvMsgBody . unTail <$> smpP - SALL_ -> pure SALL + ALLS_ -> pure ALLS IDS_ | v >= newNtfCredsSMPVersion -> ids smpP smpP smpP smpP | v >= serviceCertsSMPVersion -> ids smpP smpP smpP nothing @@ -1981,7 +2002,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where PONG -> noEntityMsg PKEY {} -> noEntityMsg RRES _ -> noEntityMsg - SALL -> noEntityMsg + ALLS -> noEntityMsg -- other broker responses must have queue ID _ | B.null entId -> Left $ CMD NO_ENTITY diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 0598f3c539..0fc15b3e33 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -1806,7 +1806,7 @@ client where deliverServiceMessages expectedCnt = do (qCnt, _msgCnt, _dupCnt, _errCnt) <- foldRcvServiceMessages ms serviceId deliverQueueMsg (0, 0, 0, 0) - atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, SALL)] + atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, ALLS)] -- TODO [certs rcv] compare with expected logNote $ "Service subscriptions for " <> tshow serviceId <> " (" <> tshow qCnt <> " queues)" deliverQueueMsg :: (Int, Int, Int, Int) -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO (Int, Int, Int, Int) diff --git a/tests/AgentTests/EqInstances.hs b/tests/AgentTests/EqInstances.hs index e142c61776..63c493861f 100644 --- a/tests/AgentTests/EqInstances.hs +++ b/tests/AgentTests/EqInstances.hs @@ -8,7 +8,6 @@ import Data.Type.Equality import Simplex.Messaging.Agent.Protocol (ConnLinkData (..), OwnerAuth (..), UserContactData (..), UserLinkData (..)) import Simplex.Messaging.Agent.Store import Simplex.Messaging.Client (ProxiedRelay (..)) -import Simplex.Messaging.Protocol (ServiceSub (..)) instance (Eq rq, Eq sq) => Eq (SomeConn' rq sq) where SomeConn d c == SomeConn d' c' = case testEquality d d' of @@ -48,7 +47,3 @@ deriving instance Eq OwnerAuth deriving instance Show ProxiedRelay deriving instance Eq ProxiedRelay - -deriving instance Show ServiceSub - -deriving instance Eq ServiceSub diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index f3f7e817c7..cb74bc0b67 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -3668,27 +3668,35 @@ testTwoUsers = withAgentClients2 $ \a b -> do testClientServiceConnection :: HasCallStack => (ASrvTransport, AStoreType) -> IO () testClientServiceConnection ps = do - (sId, uId) <- withSmpServerStoreLogOn ps testPort $ \_ -> do + ((sId, uId), qIdHash) <- withSmpServerStoreLogOn ps testPort $ \_ -> do conns@(sId, uId) <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do conns@(sId, uId) <- makeConnection service user exchangeGreetings service uId user sId pure conns withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do - subscribeClientServices service 1 + [(_, Right (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash)))] <- M.toList <$> subscribeClientServices service 1 + ("", "", SERVICE_ALL _) <- nGet service subscribeConnection user sId exchangeGreetingsMsgId 4 service uId user sId - pure conns + pure (conns, qIdHash) withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do - subscribeClientServices service 1 + [(_, Right (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash')))] <- M.toList <$> subscribeClientServices service 1 + ("", "", SERVICE_ALL _) <- nGet service + liftIO $ qIdHash' `shouldBe` qIdHash subscribeConnection user sId exchangeGreetingsMsgId 6 service uId user sId ("", "", DOWN _ [_]) <- nGet user + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 qIdHash')) <- nGet service + qIdHash' `shouldBe` qIdHash -- TODO [certs rcv] how to integrate service counts into stats -- r <- nGet service -- TODO [certs rcv] some event when service disconnects with count -- print r withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do ("", "", UP _ [_]) <- nGet user + ("", "", SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash''))) <- nGet service + ("", "", SERVICE_ALL _) <- nGet service + liftIO $ qIdHash'' `shouldBe` qIdHash -- r <- nGet service -- TODO [certs rcv] some event when service reconnects with count exchangeGreetingsMsgId 8 service uId user sId diff --git a/tests/SMPProxyTests.hs b/tests/SMPProxyTests.hs index 09f20c1dd6..0d8ccdf89e 100644 --- a/tests/SMPProxyTests.hs +++ b/tests/SMPProxyTests.hs @@ -188,7 +188,7 @@ deliverMessagesViaProxy proxyServ relayServ alg unsecuredMsgs securedMsgs = do runExceptT' (proxySMPMessage pc NRMInteractive sess Nothing sndId noMsgFlags msg) `shouldReturn` Right () runExceptT' (proxySMPMessage pc NRMInteractive sess {prSessionId = "bad session"} Nothing sndId noMsgFlags msg) `shouldReturn` Left (ProxyProtocolError $ SMP.PROXY SMP.NO_SESSION) -- receive 1 - (_tSess, _v, _sid, [(_entId, STEvent (Right (SMP.MSG RcvMessage {msgId, msgBody = EncRcvMsgBody encBody})))]) <- atomically $ readTBQueue msgQ + (_tSess, _, [(_entId, STEvent (Right (SMP.MSG RcvMessage {msgId, msgBody = EncRcvMsgBody encBody})))]) <- atomically $ readTBQueue msgQ dec msgId encBody `shouldBe` Right msg runExceptT' $ ackSMPMessage rc rPriv rcvId msgId -- secure queue @@ -200,7 +200,7 @@ deliverMessagesViaProxy proxyServ relayServ alg unsecuredMsgs securedMsgs = do runExceptT' (proxySMPMessage pc NRMInteractive sess (Just sPriv) sndId noMsgFlags msg') `shouldReturn` Right () ) ( forM_ securedMsgs $ \msg' -> do - (_tSess, _v, _sid, [(_entId, STEvent (Right (SMP.MSG RcvMessage {msgId = msgId', msgBody = EncRcvMsgBody encBody'})))]) <- atomically $ readTBQueue msgQ + (_tSess, _, [(_entId, STEvent (Right (SMP.MSG RcvMessage {msgId = msgId', msgBody = EncRcvMsgBody encBody'})))]) <- atomically $ readTBQueue msgQ dec msgId' encBody' `shouldBe` Right msg' runExceptT' $ ackSMPMessage rc rPriv rcvId msgId' ) diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index dd97781c2c..82a39af397 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -733,7 +733,7 @@ testServiceDeliverSubscribe = pure $ Just $ Just mId3 _ -> pure Nothing ] - Resp "" NoEntity SALL <- tGet1 sh + Resp "" NoEntity ALLS <- tGet1 sh Resp "12" _ OK <- signSendRecv sh rKey ("12", rId, ACK mId3) Resp "14" _ OK <- signSendRecv h sKey ("14", sId, _SEND "hello 4") Resp "" _ (Msg mId4 msg4) <- tGet1 sh @@ -831,7 +831,7 @@ testServiceUpgradeAndDowngrade = pure $ Just $ Just (rKey2, rId2, mId3) _ -> pure Nothing ] - Resp "" NoEntity SALL <- tGet1 sh + Resp "" NoEntity ALLS <- tGet1 sh Resp "15" _ OK <- signSendRecv sh rKey3_1 ("15", rId3_1, ACK mId3_1) Resp "16" _ OK <- signSendRecv sh rKey3_2 ("16", rId3_2, ACK mId3_2) pure () From 2ea9a9a143168f2b04933bf5d6ca13a3f9a170b0 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 5 Dec 2025 20:46:48 +0000 Subject: [PATCH 05/91] agent: finalize initial service subscriptions, remove associations on service ID changes (#1672) * agent: remove service/queue associations when service ID changes * agent: check that service ID in NEW response matches session ID in transport session * agent subscription WIP * test * comment * enable tests * update queries * agent: option to add SQLite aggregates to DB connection (#1673) * agent: add build_relations_vector function to sqlite * update aggregate * use static aggregate * remove relations --------- Co-authored-by: Evgeny Poberezkin * add test, treat BAD_SERVICE as temp error, only remove queue associations on service errors * add packZipWith for backward compatibility with GHC 8.10.7 --------- Co-authored-by: spaced4ndy <8711996+spaced4ndy@users.noreply.github.com> --- src/Simplex/Messaging/Agent.hs | 45 ++++++-- src/Simplex/Messaging/Agent/Client.hs | 52 ++++++--- .../Messaging/Agent/Store/AgentStore.hs | 107 +++++++++++++++--- src/Simplex/Messaging/Agent/Store/SQLite.hs | 20 ++-- .../Messaging/Agent/Store/SQLite/Common.hs | 11 +- .../Messaging/Agent/Store/SQLite/Util.hs | 48 ++++++++ src/Simplex/Messaging/Client.hs | 2 +- src/Simplex/Messaging/Protocol.hs | 1 + src/Simplex/Messaging/Util.hs | 26 +++++ tests/AgentTests/FunctionalAPITests.hs | 88 +++++++++++--- 10 files changed, 330 insertions(+), 70 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 18e9d04659..f155ce77b4 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -153,7 +153,7 @@ import Data.Bifunctor (bimap, first) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B import Data.Composition -import Data.Either (isRight, partitionEithers, rights) +import Data.Either (fromRight, isRight, partitionEithers, rights) import Data.Foldable (foldl', toList) import Data.Functor (($>)) import Data.Functor.Identity @@ -221,7 +221,6 @@ import Simplex.Messaging.Protocol SMPMsgMeta, SParty (..), SProtocolType (..), - ServiceSub (..), ServiceSubResult, SndPublicAuthKey, SubscriptionMode (..), @@ -1451,7 +1450,23 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do let userSrvs' = case activeUserId_ of Just activeUserId -> sortOn (\(uId, _) -> if uId == activeUserId then 0 else 1 :: Int) userSrvs Nothing -> userSrvs - rs <- lift $ mapConcurrently (subscribeUserServer maxPending currPending) userSrvs' + useServices <- readTVarIO $ useClientServices c + -- These options are possible below: + -- 1) services fully disabled: + -- No service subscriptions will be attempted, and existing services and association will remain in in the database, + -- but they will be ignored because of hasService parameter set to False. + -- This approach preserves performance for all clients that do not use services. + -- 2) at least one user ID has services enabled: + -- Service will be loaded for all user/server combinations: + -- a) service is enabled for user ID and service record exists: subscription will be attempted, + -- b) service is disabled and record exists: service record and all associations will be removed, + -- c) service is disabled or no record: no subscription attempt. + -- On successful service subscription, only unassociated queues will be subscribed. + userSrvs'' <- + if any id useServices + then lift $ mapConcurrently (subscribeService useServices) userSrvs' + else pure $ map (,False) userSrvs' + rs <- lift $ mapConcurrently (subscribeUserServer maxPending currPending) userSrvs'' let (errs, oks) = partitionEithers rs logInfo $ "subscribed " <> tshow (sum oks) <> " queues" forM_ (L.nonEmpty errs) $ notifySub c . ERRS . L.map ("",) @@ -1460,21 +1475,31 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do resumeAllCommands c where handleErr = (`catchAllErrors` \e -> notifySub' c "" (ERR e) >> throwE e) - subscribeUserServer :: Int -> TVar Int -> (UserId, SMPServer) -> AM' (Either AgentErrorType Int) - subscribeUserServer maxPending currPending (userId, srv) = do + subscribeService :: Map UserId Bool -> (UserId, SMPServer) -> AM' ((UserId, SMPServer), ServiceAssoc) + subscribeService useServices us@(userId, srv) = fmap ((us,) . fromRight False) $ tryAllErrors' $ do + withStore' c (\db -> getSubscriptionService db userId srv) >>= \case + Just serviceSub -> case M.lookup userId useServices of + Just True -> tryAllErrors (subscribeClientService c True userId srv serviceSub) >>= \case + Left e | clientServiceError e -> unassocQueues $> False + _ -> pure True + _ -> unassocQueues $> False + where + unassocQueues = withStore' c $ \db -> unassocUserServerRcvQueueSubs db userId srv + _ -> pure False + subscribeUserServer :: Int -> TVar Int -> ((UserId, SMPServer), ServiceAssoc) -> AM' (Either AgentErrorType Int) + subscribeUserServer maxPending currPending ((userId, srv), hasService) = do atomically $ whenM ((maxPending <=) <$> readTVar currPending) retry tryAllErrors' $ do qs <- withStore' c $ \db -> do - qs <- getUserServerRcvQueueSubs db userId srv onlyNeeded - atomically $ modifyTVar' currPending (+ length qs) -- update before leaving transaction + qs <- getUserServerRcvQueueSubs db userId srv onlyNeeded hasService + unless (null qs) $ atomically $ modifyTVar' currPending (+ length qs) -- update before leaving transaction pure qs let n = length qs - lift $ subscribe qs `E.finally` atomically (modifyTVar' currPending $ subtract n) + unless (null qs) $ lift $ subscribe qs `E.finally` atomically (modifyTVar' currPending $ subtract n) pure n where subscribe qs = do rs <- subscribeUserServerQueues c userId srv qs - -- TODO [certs rcv] storeClientServiceAssocs store associations of queues with client service ID ns <- asks ntfSupervisor whenM (liftIO $ hasInstantNotifications ns) $ sendNtfCreate ns rs sendNtfCreate :: NtfSupervisor -> [(RcvQueueSub, Either AgentErrorType (Maybe SMP.ServiceId))] -> AM' () @@ -1522,7 +1547,7 @@ subscribeClientServices' c userId = useService = liftIO $ (Just True ==) <$> TM.lookupIO userId (useClientServices c) subscribe = do srvs <- withStore' c (`getClientServiceServers` userId) - lift $ M.fromList <$> mapConcurrently (\(srv, ServiceSub _ n idsHash) -> fmap (srv,) $ tryAllErrors' $ subscribeClientService c False userId srv n idsHash) srvs + lift $ M.fromList <$> mapConcurrently (\(srv, serviceSub) -> fmap (srv,) $ tryAllErrors' $ subscribeClientService c False userId srv serviceSub) srvs -- requesting messages sequentially, to reduce memory usage getConnectionMessages' :: AgentClient -> NonEmpty ConnMsgReq -> AM' (NonEmpty (Either AgentErrorType (Maybe SMPMsgMeta))) diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 77d73027d7..7acfb0b490 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -120,6 +120,7 @@ module Simplex.Messaging.Agent.Client getAgentSubscriptions, slowNetworkConfig, protocolClientError, + clientServiceError, Worker (..), SessionVar (..), SubscriptionsInfo (..), @@ -303,7 +304,7 @@ import Simplex.Messaging.Session import Simplex.Messaging.SystemTime import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM -import Simplex.Messaging.Transport (SMPServiceRole (..), SMPVersion, ServiceCredentials (..), SessionId, THClientService' (..), THandleParams (sessionId, thVersion), TransportError (..), TransportPeer (..), sndAuthKeySMPVersion, shortLinksSMPVersion, newNtfCredsSMPVersion) +import Simplex.Messaging.Transport (HandshakeError (..), SMPServiceRole (..), SMPVersion, ServiceCredentials (..), SessionId, THClientService' (..), THandleAuth (..), THandleParams (sessionId, thAuth, thVersion), TransportError (..), TransportPeer (..), sndAuthKeySMPVersion, shortLinksSMPVersion, newNtfCredsSMPVersion) import Simplex.Messaging.Transport.Client (TransportHost (..)) import Simplex.Messaging.Transport.Credentials import Simplex.Messaging.Util @@ -619,7 +620,7 @@ getServiceCredentials c userId srv = let g = agentDRG c ((C.KeyHash kh, serviceCreds), serviceId_) <- withStore' c $ \db -> - getClientService db userId srv >>= \case + getClientServiceCredentials db userId srv >>= \case Just service -> pure service Nothing -> do cred <- genCredentials g Nothing (25, 24 * 999999) "simplex" @@ -747,15 +748,13 @@ smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm smp <- liftError (protocolClientError SMP $ B.unpack $ strEncode srv) $ do ts <- readTVarIO proxySessTs ExceptT $ getProtocolClient g nm tSess cfg' presetDomains (Just msgQ) ts $ smpClientDisconnected c tSess env v' prs - -- TODO [certs rcv] add service to SS, possibly combine with SS.setSessionId atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c updateClientService service smp pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} - -- TODO [certs rcv] this should differentiate between service ID just set and service ID changed, and in the latter case disassociate the queues updateClientService service smp = case (service, smpClientService smp) of - (Just (_, serviceId_), Just THClientService {serviceId}) - | serviceId_ /= Just serviceId -> withStore' c $ \db -> setClientServiceId db userId srv serviceId - | otherwise -> pure () + (Just (_, serviceId_), Just THClientService {serviceId}) -> withStore' c $ \db -> do + setClientServiceId db userId srv serviceId + forM_ serviceId_ $ \sId -> when (sId /= serviceId) $ removeRcvServiceAssocs db userId srv (Just _, Nothing) -> withStore' c $ \db -> deleteClientService db userId srv -- e.g., server version downgrade (Nothing, Just _) -> logError "server returned serviceId without service credentials in request" (Nothing, Nothing) -> pure () @@ -1258,6 +1257,14 @@ protocolClientError protocolError_ host = \case PCEServiceUnavailable {} -> BROKER host NO_SERVICE PCEIOError e -> BROKER host $ NETWORK $ NEConnectError $ E.displayException e +-- it is consistent with smpClientServiceError +clientServiceError :: AgentErrorType -> Bool +clientServiceError = \case + BROKER _ NO_SERVICE -> True + SMP _ SMP.SERVICE -> True + SMP _ (SMP.PROXY (SMP.BROKER NO_SERVICE)) -> True -- for completeness, it cannot happen. + _ -> False + data ProtocolTestStep = TSConnect | TSDisconnect @@ -1446,8 +1453,8 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl withClient c nm tSess $ \(SMPConnectedClient smp _) -> do (ntfKeys, ntfCreds) <- liftIO $ mkNtfCreds a g smp (thParams smp,ntfKeys,) <$> createSMPQueue smp nm nonce_ rKeys dhKey auth subMode (queueReqData cqrd) ntfCreds - -- TODO [certs rcv] validate that serviceId is the same as in the client session, fail otherwise - -- possibly, it should allow returning Nothing - it would indicate incorrect old version + let sessServiceId = (\THClientService {serviceId = sId} -> sId) <$> (clientService =<< thAuth thParams') + when (isJust serviceId && serviceId /= sessServiceId) $ logError "incorrect service ID in NEW response" liftIO . logServer "<--" c srv NoEntity $ B.unwords ["IDS", logSecret rcvId, logSecret sndId] shortLink <- mkShortLinkCreds thParams' qik let rq = @@ -1463,7 +1470,7 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl sndId, queueMode, shortLink, - rcvServiceAssoc = isJust serviceId, + rcvServiceAssoc = isJust serviceId && serviceId == sessServiceId, status = New, enableNtfs, clientNoticeId = Nothing, @@ -1559,6 +1566,8 @@ temporaryAgentError :: AgentErrorType -> Bool temporaryAgentError = \case BROKER _ e -> tempBrokerError e SMP _ (SMP.PROXY (SMP.BROKER e)) -> tempBrokerError e + SMP _ (SMP.STORE _) -> True + NTF _ (SMP.STORE _) -> True XFTP _ XFTP.TIMEOUT -> True PROXY _ _ (ProxyProtocolError (SMP.PROXY (SMP.BROKER e))) -> tempBrokerError e PROXY _ _ (ProxyProtocolError (SMP.PROXY SMP.NO_SESSION)) -> True @@ -1569,6 +1578,7 @@ temporaryAgentError = \case tempBrokerError = \case NETWORK _ -> True TIMEOUT -> True + TRANSPORT (TEHandshake BAD_SERVICE) -> True -- this error is considered temporary because it is DB error _ -> False temporaryOrHostError :: AgentErrorType -> Bool @@ -1715,11 +1725,16 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do notifySub' c "" $ ERR e resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSubResult -resubscribeClientService c tSess serviceSub = - withServiceClient c tSess $ \smp _ -> subscribeClientService_ c True tSess smp serviceSub - -subscribeClientService :: AgentClient -> Bool -> UserId -> SMPServer -> Int64 -> IdsHash -> AM ServiceSubResult -subscribeClientService c withEvent userId srv n idsHash = +resubscribeClientService c tSess@(userId, srv, _) serviceSub = + withServiceClient c tSess (\smp _ -> subscribeClientService_ c True tSess smp serviceSub) `catchE` \e -> do + when (clientServiceError e) $ do + qs <- withStore' c $ \db -> unassocUserServerRcvQueueSubs db userId srv + void $ lift $ subscribeUserServerQueues c userId srv qs + throwE e + +-- TODO [certs rcv] update service in the database if it has different ID and re-associate queues, and send event +subscribeClientService :: AgentClient -> Bool -> UserId -> SMPServer -> ServiceSub -> AM ServiceSubResult +subscribeClientService c withEvent userId srv (ServiceSub _ n idsHash) = withServiceClient c tSess $ \smp smpServiceId -> do let serviceSub = ServiceSub smpServiceId n idsHash atomically $ SS.setPendingServiceSub tSess serviceSub $ currentSubs c @@ -1728,14 +1743,15 @@ subscribeClientService c withEvent userId srv n idsHash = tSess = (userId, srv, Nothing) withServiceClient :: AgentClient -> SMPTransportSession -> (SMPClient -> ServiceId -> ExceptT SMPClientError IO a) -> AM a -withServiceClient c tSess action = +withServiceClient c tSess subscribe = withLogClient c NRMBackground tSess B.empty "SUBS" $ \(SMPConnectedClient smp _) -> case (\THClientService {serviceId} -> serviceId) <$> smpClientService smp of - Just smpServiceId -> action smp smpServiceId + Just smpServiceId -> subscribe smp smpServiceId Nothing -> throwE PCEServiceUnavailable +-- TODO [certs rcv] send subscription error event? subscribeClientService_ :: AgentClient -> Bool -> SMPTransportSession -> SMPClient -> ServiceSub -> ExceptT SMPClientError IO ServiceSubResult -subscribeClientService_ c withEvent tSess@(_, srv, _) smp expected@(ServiceSub _ n idsHash) = do +subscribeClientService_ c withEvent tSess@(userId, srv, _) smp expected@(ServiceSub _ n idsHash) = do subscribed <- subscribeService smp SMP.SRecipientService n idsHash let sessId = sessionId $ thParams smp r = serviceSubResult expected subscribed diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index a732d28d4e..0d0b2af70c 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -37,7 +37,9 @@ module Simplex.Messaging.Agent.Store.AgentStore -- * Client services createClientService, - getClientService, + getClientServiceCredentials, + getSubscriptionServices, + getSubscriptionService, getClientServiceServers, setClientServiceId, deleteClientService, @@ -52,8 +54,10 @@ module Simplex.Messaging.Agent.Store.AgentStore updateClientNotices, getSubscriptionServers, getUserServerRcvQueueSubs, + unassocUserServerRcvQueueSubs, unsetQueuesToSubscribe, setRcvServiceAssocs, + removeRcvServiceAssocs, getConnIds, getConn, getDeletedConn, @@ -419,8 +423,8 @@ createClientService db userId srv (kh, (cert, pk)) = do |] (userId, host srv, port srv, serverKeyHash_, kh, cert, pk) -getClientService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) -getClientService db userId srv = +getClientServiceCredentials :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) +getClientServiceCredentials db userId srv = maybeFirstRow toService $ DB.query db @@ -435,21 +439,41 @@ getClientService db userId srv = where toService (kh, cert, pk, serviceId_) = ((kh, (cert, pk)), serviceId_) -getClientServiceServers :: DB.Connection -> UserId -> IO [(SMPServer, ServiceSub)] -getClientServiceServers db userId = - map toServer - <$> DB.query +getSubscriptionServices :: DB.Connection -> IO [(UserId, (SMPServer, ServiceSub))] +getSubscriptionServices db = map toUserService <$> DB.query_ db clientServiceQuery + where + toUserService (Only userId :. serviceRow) = (userId, toServerService serviceRow) + +getSubscriptionService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ServiceSub) +getSubscriptionService db userId (SMPServer h p kh) = + maybeFirstRow toService $ + DB.query db [sql| - SELECT c.host, c.port, s.key_hash, c.service_id, c.service_queue_count, c.service_queue_ids_hash + SELECT c.service_id, c.service_queue_count, c.service_queue_ids_hash FROM client_services c JOIN servers s ON s.host = c.host AND s.port = c.port - WHERE c.user_id = ? + WHERE c.user_id = ? AND c.host = ? AND c.port = ? AND COALESCE(c.server_key_hash, s.key_hash) = ? |] - (Only userId) + (userId, h, p, kh) where - toServer (host, port, kh, serviceId, n, Binary idsHash) = - (SMPServer host port kh, ServiceSub serviceId n (IdsHash idsHash)) + toService (serviceId, qCnt, idsHash) = ServiceSub serviceId qCnt idsHash + +getClientServiceServers :: DB.Connection -> UserId -> IO [(SMPServer, ServiceSub)] +getClientServiceServers db userId = + map toServerService <$> DB.query db (clientServiceQuery <> " WHERE c.user_id = ?") (Only userId) + +clientServiceQuery :: Query +clientServiceQuery = + [sql| + SELECT c.host, c.port, COALESCE(c.server_key_hash, s.key_hash), c.service_id, c.service_queue_count, c.service_queue_ids_hash + FROM client_services c + JOIN servers s ON s.host = c.host AND s.port = c.port + |] + +toServerService :: (NonEmpty TransportHost, ServiceName, C.KeyHash, ServiceId, Int64, Binary ByteString) -> (ProtocolServer 'PSMP, ServiceSub) +toServerService (host, port, kh, serviceId, n, Binary idsHash) = + (SMPServer host port kh, ServiceSub serviceId n (IdsHash idsHash)) setClientServiceId :: DB.Connection -> UserId -> SMPServer -> ServiceId -> IO () setClientServiceId db userId srv serviceId = @@ -473,7 +497,9 @@ deleteClientService db userId srv = (userId, host srv, port srv) deleteClientServices :: DB.Connection -> UserId -> IO () -deleteClientServices db userId = DB.execute db "DELETE FROM client_services WHERE user_id = ?" (Only userId) +deleteClientServices db userId = do + DB.execute db "DELETE FROM client_services WHERE user_id = ?" (Only userId) + removeUserRcvServiceAssocs db userId createConn_ :: TVar ChaChaDRG -> @@ -2236,17 +2262,36 @@ getSubscriptionServers db onlyNeeded = toUserServer :: (UserId, NonEmpty TransportHost, ServiceName, C.KeyHash) -> (UserId, SMPServer) toUserServer (userId, host, port, keyHash) = (userId, SMPServer host port keyHash) -getUserServerRcvQueueSubs :: DB.Connection -> UserId -> SMPServer -> Bool -> IO [RcvQueueSub] -getUserServerRcvQueueSubs db userId srv onlyNeeded = +-- TODO [certs rcv] check index for getting queues with service present +getUserServerRcvQueueSubs :: DB.Connection -> UserId -> SMPServer -> Bool -> ServiceAssoc -> IO [RcvQueueSub] +getUserServerRcvQueueSubs db userId srv onlyNeeded hasService = map toRcvQueueSub <$> DB.query db - (rcvQueueSubQuery <> toSubscribe <> " c.deleted = 0 AND q.deleted = 0 AND c.user_id = ? AND q.host = ? AND q.port = ?") + (rcvQueueSubQuery <> toSubscribe <> " c.deleted = 0 AND q.deleted = 0 AND c.user_id = ? AND q.host = ? AND q.port = ?" <> serviceCond) (userId, host srv, port srv) where toSubscribe | onlyNeeded = " WHERE q.to_subscribe = 1 AND " | otherwise = " WHERE " + serviceCond + | hasService = " AND q.rcv_service_assoc = 0" + | otherwise = "" + +unassocUserServerRcvQueueSubs :: DB.Connection -> UserId -> SMPServer -> IO [RcvQueueSub] +unassocUserServerRcvQueueSubs db userId (SMPServer h p kh) = + map toRcvQueueSub + <$> DB.query + db + (removeRcvAssocsQuery <> " " <> returningColums) + (h, p, userId, kh) + where + returningColums = + [sql| + RETURNING c.user_id, rcv_queues.conn_id, rcv_queues.host, rcv_queues.port, COALESCE(rcv_queues.server_key_hash, s.key_hash), + rcv_queues.rcv_id, rcv_queues.rcv_private_key, rcv_queues.status, c.enable_ntfs, rcv_queues.client_notice_id, + rcv_queues.rcv_queue_id, rcv_queues.rcv_primary, rcv_queues.replace_rcv_queue_id + |] unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" @@ -2259,6 +2304,36 @@ setRcvServiceAssocs db rqs = DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = ?" $ map (Only . queueId) rqs #endif +removeRcvServiceAssocs :: DB.Connection -> UserId -> SMPServer -> IO () +removeRcvServiceAssocs db userId (SMPServer h p kh) = DB.execute db removeRcvAssocsQuery (h, p, userId, kh) + +removeRcvAssocsQuery :: Query +removeRcvAssocsQuery = + [sql| + UPDATE rcv_queues + SET rcv_service_assoc = 0 + FROM connections c, servers s + WHERE rcv_queues.host = ? + AND rcv_queues.port = ? + AND c.conn_id = rcv_queues.conn_id + AND c.user_id = ? + AND s.host = rcv_queues.host + AND s.port = rcv_queues.port + AND COALESCE(rcv_queues.server_key_hash, s.key_hash) = ? + |] + +removeUserRcvServiceAssocs :: DB.Connection -> UserId -> IO () +removeUserRcvServiceAssocs db userId = + DB.execute + db + [sql| + UPDATE rcv_queues + SET rcv_service_assoc = 0 + FROM connections c + WHERE c.conn_id = rcv_queues.conn_id AND c.user_id = ? + |] + (Only userId) + -- * getConn helpers getConnIds :: DB.Connection -> IO [ConnId] diff --git a/src/Simplex/Messaging/Agent/Store/SQLite.hs b/src/Simplex/Messaging/Agent/Store/SQLite.hs index d5b8f82909..a670dd3e2e 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite.hs @@ -67,10 +67,10 @@ import Simplex.Messaging.Agent.Store.Migrations (DBMigrate (..), sharedMigrateSc import qualified Simplex.Messaging.Agent.Store.SQLite.Migrations as Migrations import Simplex.Messaging.Agent.Store.SQLite.Common import qualified Simplex.Messaging.Agent.Store.SQLite.DB as DB -import Simplex.Messaging.Agent.Store.SQLite.Util (SQLiteFunc, createStaticFunction, mkSQLiteFunc) +import Simplex.Messaging.Agent.Store.SQLite.Util import Simplex.Messaging.Agent.Store.Shared (Migration (..), MigrationConfig (..), MigrationError (..)) import qualified Simplex.Messaging.Crypto as C -import Simplex.Messaging.Util (ifM, safeDecodeUtf8) +import Simplex.Messaging.Util (ifM, packZipWith, safeDecodeUtf8) import System.Directory (copyFile, createDirectoryIfMissing, doesFileExist) import System.FilePath (takeDirectory, takeFileName, ()) @@ -116,9 +116,7 @@ connectDB path functions key track = do -- _printPragmas db path pure db where - functions' = SQLiteFuncDef "simplex_xor_md5_combine" 2 True sqliteXorMd5CombinePtr : functions prepare db = do - let db' = SQL.connectionHandle $ DB.conn db unless (BA.null key) . SQLite3.exec db' $ "PRAGMA key = " <> keyString key <> ";" SQLite3.exec db' . fromQuery $ [sql| @@ -128,9 +126,14 @@ connectDB path functions key track = do PRAGMA secure_delete = ON; PRAGMA auto_vacuum = FULL; |] - forM_ functions' $ \SQLiteFuncDef {funcName, argCount, deterministic, funcPtr} -> - createStaticFunction db' funcName argCount deterministic funcPtr - >>= either (throwIO . userError . show) pure + mapM_ addFunction functions' + where + db' = SQL.connectionHandle $ DB.conn db + functions' = SQLiteFuncDef "simplex_xor_md5_combine" 2 (SQLiteFuncPtr True sqliteXorMd5CombinePtr) : functions + addFunction SQLiteFuncDef {funcName, argCount, funcPtrs} = + either (throwIO . userError . show) pure =<< case funcPtrs of + SQLiteFuncPtr isDet funcPtr -> createStaticFunction db' funcName argCount isDet funcPtr + SQLiteAggrPtrs stepPtr finalPtr -> createStaticAggregate db' funcName argCount stepPtr finalPtr foreign export ccall "simplex_xor_md5_combine" sqliteXorMd5Combine :: SQLiteFunc @@ -143,7 +146,8 @@ sqliteXorMd5Combine = mkSQLiteFunc $ \cxt args -> do SQLite3.funcResultBlob cxt $ xorMd5Combine idsHash rId xorMd5Combine :: ByteString -> ByteString -> ByteString -xorMd5Combine idsHash rId = B.packZipWith xor idsHash $ C.md5Hash rId +xorMd5Combine idsHash rId = packZipWith xor idsHash $ C.md5Hash rId +{-# INLINE xorMd5Combine #-} closeDBStore :: DBStore -> IO () closeDBStore st@DBStore {dbClosed} = diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs index 0634360a2b..448c885f2e 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Common.hs @@ -7,6 +7,7 @@ module Simplex.Messaging.Agent.Store.SQLite.Common ( DBStore (..), DBOpts (..), SQLiteFuncDef (..), + SQLiteFuncPtrs (..), withConnection, withConnection', withTransaction, @@ -55,14 +56,18 @@ data DBOpts = DBOpts track :: DB.TrackQueries } --- e.g. `SQLiteFuncDef "name" 2 True f` +-- e.g. `SQLiteFuncDef "func_name" 2 (SQLiteFuncPtr True func)` +-- or `SQLiteFuncDef "aggr_name" 3 (SQLiteAggrPtrs step final)` data SQLiteFuncDef = SQLiteFuncDef { funcName :: ByteString, argCount :: CArgCount, - deterministic :: Bool, - funcPtr :: FunPtr SQLiteFunc + funcPtrs :: SQLiteFuncPtrs } +data SQLiteFuncPtrs + = SQLiteFuncPtr {deterministic :: Bool, funcPtr :: FunPtr SQLiteFunc} + | SQLiteAggrPtrs {stepPtr :: FunPtr SQLiteFunc, finalPtr :: FunPtr SQLiteFuncFinal} + withConnectionPriority :: DBStore -> Bool -> (DB.Connection -> IO a) -> IO a withConnectionPriority DBStore {dbSem, dbConnection} priority action | priority = E.bracket_ signal release $ withMVar dbConnection action diff --git a/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs b/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs index a3c3b94ac4..2cbd7ecffd 100644 --- a/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs +++ b/src/Simplex/Messaging/Agent/Store/SQLite/Util.hs @@ -3,16 +3,20 @@ module Simplex.Messaging.Agent.Store.SQLite.Util where import Control.Exception (SomeException, catch, mask_) import Data.ByteString (ByteString) import qualified Data.ByteString as B +import Data.IORef import Database.SQLite3.Direct (Database (..), FuncArgs (..), FuncContext (..)) import Database.SQLite3.Bindings import Foreign.C.String import Foreign.Ptr import Foreign.StablePtr +import Foreign.Storable data CFuncPtrs = CFuncPtrs (FunPtr CFunc) (FunPtr CFunc) (FunPtr CFuncFinal) type SQLiteFunc = Ptr CContext -> CArgCount -> Ptr (Ptr CValue) -> IO () +type SQLiteFuncFinal = Ptr CContext -> IO () + mkSQLiteFunc :: (FuncContext -> FuncArgs -> IO ()) -> SQLiteFunc mkSQLiteFunc f cxt nArgs cvals = catchAsResultError cxt $ f (FuncContext cxt) (FuncArgs nArgs cvals) {-# INLINE mkSQLiteFunc #-} @@ -25,6 +29,50 @@ createStaticFunction (Database db) name nArgs isDet funPtr = mask_ $ do B.useAsCString name $ \namePtr -> toResult () <$> c_sqlite3_create_function_v2 db namePtr nArgs flags (castStablePtrToPtr u) funPtr nullFunPtr nullFunPtr nullFunPtr +mkSQLiteAggStep :: a -> (FuncContext -> FuncArgs -> a -> IO a) -> SQLiteFunc +mkSQLiteAggStep initSt xStep cxt nArgs cvals = catchAsResultError cxt $ do + -- we store the aggregate state in the buffer returned by + -- c_sqlite3_aggregate_context as a StablePtr pointing to an IORef that + -- contains the actual aggregate state + aggCtx <- getAggregateContext cxt + aggStPtr <- peek aggCtx + aggStRef <- + if castStablePtrToPtr aggStPtr /= nullPtr + then deRefStablePtr aggStPtr + else do + aggStRef <- newIORef initSt + aggStPtr' <- newStablePtr aggStRef + poke aggCtx aggStPtr' + return aggStRef + aggSt <- readIORef aggStRef + aggSt' <- xStep (FuncContext cxt) (FuncArgs nArgs cvals) aggSt + writeIORef aggStRef aggSt' + +mkSQLiteAggFinal :: a -> (FuncContext -> a -> IO ()) -> SQLiteFuncFinal +mkSQLiteAggFinal initSt xFinal cxt = do + aggCtx <- getAggregateContext cxt + aggStPtr <- peek aggCtx + if castStablePtrToPtr aggStPtr == nullPtr + then catchAsResultError cxt $ xFinal (FuncContext cxt) initSt + else do + catchAsResultError cxt $ do + aggStRef <- deRefStablePtr aggStPtr + aggSt <- readIORef aggStRef + xFinal (FuncContext cxt) aggSt + freeStablePtr aggStPtr + +getAggregateContext :: Ptr CContext -> IO (Ptr a) +getAggregateContext cxt = c_sqlite3_aggregate_context cxt stPtrSize + where + stPtrSize = fromIntegral $ sizeOf (undefined :: StablePtr ()) + +-- Based on createAggregate from Database.SQLite3.Direct, but uses static function pointers to avoid dynamic wrappers that trigger DCL. +createStaticAggregate :: Database -> ByteString -> CArgCount -> FunPtr SQLiteFunc -> FunPtr SQLiteFuncFinal -> IO (Either Error ()) +createStaticAggregate (Database db) name nArgs stepPtr finalPtr = mask_ $ do + u <- newStablePtr $ CFuncPtrs nullFunPtr stepPtr finalPtr + B.useAsCString name $ \namePtr -> + toResult () <$> c_sqlite3_create_function_v2 db namePtr nArgs 0 (castStablePtrToPtr u) nullFunPtr stepPtr finalPtr nullFunPtr + -- Convert a 'CError' to a 'Either Error', in the common case where -- SQLITE_OK signals success and anything else signals an error. -- diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 81e9820a2c..ac2dc9a9d2 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -778,10 +778,10 @@ temporaryClientError = \case _ -> False {-# INLINE temporaryClientError #-} +-- it is consistent with clientServiceError smpClientServiceError :: SMPClientError -> Bool smpClientServiceError = \case PCEServiceUnavailable -> True - PCETransportError (TEHandshake BAD_SERVICE) -> True -- TODO [certs rcv] this error may be temporary, so we should possibly resubscribe. PCEProtocolError SERVICE -> True PCEProtocolError (PROXY (BROKER NO_SERVICE)) -> True -- for completeness, it cannot happen. _ -> False diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index a5f94960e2..6b232f12b3 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -143,6 +143,7 @@ module Simplex.Messaging.Protocol IdsHash (..), ServiceSub (..), ServiceSubResult (..), + ServiceSubError (..), serviceSubResult, queueIdsHash, queueIdHash, diff --git a/src/Simplex/Messaging/Util.hs b/src/Simplex/Messaging/Util.hs index e9f37b1ae7..83a9114527 100644 --- a/src/Simplex/Messaging/Util.hs +++ b/src/Simplex/Messaging/Util.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE MonadComprehensions #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -15,6 +16,7 @@ import qualified Data.Aeson as J import Data.Bifunctor (first, second) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B +import Data.ByteString.Internal (toForeignPtr, unsafeCreate) import qualified Data.ByteString.Lazy.Char8 as LB import Data.IORef import Data.Int (Int64) @@ -29,6 +31,9 @@ import qualified Data.Text as T import Data.Text.Encoding (decodeUtf8With, encodeUtf8) import Data.Time (NominalDiffTime) import Data.Tuple (swap) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Storable (peekByteOff, pokeByteOff) import GHC.Conc (labelThread, myThreadId, threadDelay) import UnliftIO hiding (atomicModifyIORef') import qualified UnliftIO.Exception as UE @@ -156,6 +161,27 @@ mapAccumLM_NonEmpty mapAccumLM_NonEmpty f s (x :| xs) = [(s2, x' :| xs') | (s1, x') <- f s x, (s2, xs') <- mapAccumLM_List f s1 xs] +-- | Optimized from bytestring package for GHC 8.10.7 compatibility +packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString +packZipWith f s1 s2 = + unsafeCreate len $ \r -> + withForeignPtr fp1 $ \p1 -> + withForeignPtr fp2 $ \p2 -> zipWith_ p1 p2 r + where + zipWith_ p1 p2 r = go 0 + where + go :: Int -> IO () + go !n + | n >= len = pure () + | otherwise = do + x <- peekByteOff p1 (off1 + n) + y <- peekByteOff p2 (off2 + n) + pokeByteOff r n (f x y) + go (n + 1) + (fp1, off1, l1) = toForeignPtr s1 + (fp2, off2, l2) = toForeignPtr s2 + len = min l1 l2 + tryWriteTBQueue :: TBQueue a -> a -> STM Bool tryWriteTBQueue q a = do full <- isFullTBQueue q diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 2a62deb45a..31967917a0 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -66,7 +66,7 @@ import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B import Data.Either (isRight) import Data.Int (Int64) -import Data.List (find, isSuffixOf, nub) +import Data.List (find, isPrefixOf, isSuffixOf, nub) import Data.List.NonEmpty (NonEmpty) import qualified Data.Map as M import Data.Maybe (isJust, isNothing) @@ -113,7 +113,7 @@ import Simplex.Messaging.Util (bshow, diffToMicroseconds) import Simplex.Messaging.Version (VersionRange (..)) import qualified Simplex.Messaging.Version as V import Simplex.Messaging.Version.Internal (Version (..)) -import System.Directory (copyFile, renameFile) +import System.Directory (copyFile, removeFile, renameFile) import Test.Hspec hiding (fit, it) import UnliftIO import Util @@ -124,12 +124,13 @@ import Fixtures #endif #if defined(dbServerPostgres) import qualified Database.PostgreSQL.Simple as PSQL -import Simplex.Messaging.Agent.Store (Connection' (..), StoredRcvQueue (..), SomeConn' (..)) -import Simplex.Messaging.Agent.Store.AgentStore (getConn) +import qualified Simplex.Messaging.Agent.Store.Postgres as Postgres +import qualified Simplex.Messaging.Agent.Store.Postgres.Common as Postgres import Simplex.Messaging.Server.MsgStore.Journal (JournalQueue) import Simplex.Messaging.Server.MsgStore.Postgres (PostgresQueue) import Simplex.Messaging.Server.MsgStore.Types (QSType (..)) import Simplex.Messaging.Server.QueueStore.Postgres +import Simplex.Messaging.Server.QueueStore.Postgres.Migrations import Simplex.Messaging.Server.QueueStore.Types (QueueStoreClass (..)) #endif @@ -478,6 +479,7 @@ functionalAPITests ps = do withSmpServer ps testTwoUsers describe "Client service certificates" $ do it "should connect, subscribe and reconnect as a service" $ testClientServiceConnection ps + it "should re-subscribe when service ID changed" $ testClientServiceIDChange ps describe "Connection switch" $ do describe "should switch delivery to the new queue" $ testServerMatrix2 ps testSwitchConnection @@ -3679,26 +3681,84 @@ testClientServiceConnection ps = do subscribeConnection user sId exchangeGreetingsMsgId 4 service uId user sId pure (conns, qIdHash) - withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + (uId', sId') <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do - [(_, Right (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash')))] <- M.toList <$> subscribeClientServices service 1 - ("", "", SERVICE_ALL _) <- nGet service - liftIO $ qIdHash' `shouldBe` qIdHash + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash')))) -> qIdHash' == qIdHash; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] subscribeConnection user sId exchangeGreetingsMsgId 6 service uId user sId ("", "", DOWN _ [_]) <- nGet user ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 qIdHash')) <- nGet service qIdHash' `shouldBe` qIdHash -- TODO [certs rcv] how to integrate service counts into stats - -- r <- nGet service -- TODO [certs rcv] some event when service disconnects with count - -- print r withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do ("", "", UP _ [_]) <- nGet user - ("", "", SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash''))) <- nGet service - ("", "", SERVICE_ALL _) <- nGet service - liftIO $ qIdHash'' `shouldBe` qIdHash - -- r <- nGet service -- TODO [certs rcv] some event when service reconnects with count + -- Nothing in ServiceSubResult confirms that both counts and IDs hash match + -- SERVICE_ALL may be deliverd before SERVICE_UP event in case there are no messages to deliver + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash'')))) -> qIdHash'' == qIdHash; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] exchangeGreetingsMsgId 8 service uId user sId + conns'@(uId', sId') <- makeConnection user service -- opposite direction + exchangeGreetings user sId' service uId' + pure conns' + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 2 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + -- TODO [certs rcv] test message delivery during subscription + subscribeAllConnections user False Nothing + ("", "", UP _ [_, _]) <- nGet user + exchangeGreetingsMsgId 4 user sId' service uId' + exchangeGreetingsMsgId 10 service uId user sId + +testClientServiceIDChange :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testClientServiceIDChange ps@(_, ASType qs _) = do + (sId, uId) <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + conns@(sId, uId) <- makeConnection service user + exchangeGreetings service uId user sId + pure conns + _ :: () <- case qs of + SQSPostgres -> do +#if defined(dbServerPostgres) + st <- either (error . show) pure =<< Postgres.createDBStore testStoreDBOpts serverMigrations (MigrationConfig MCError Nothing) + void $ Postgres.withTransaction st (`PSQL.execute_` "DELETE FROM services") +#else + pure () +#endif + SQSMemory -> do + s <- readFile testStoreLogFile + removeFile testStoreLogFile + writeFile testStoreLogFile $ unlines $ filter (not . ("NEW_SERVICE" `isPrefixOf`)) $ lines s + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult (Just (SMP.SSErrorQueueCount 1 0)) (SMP.ServiceSub _ 0 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False, + \case ("", "", AEvt SAENone (UP _ _)) -> True; _ -> False + ] + subscribeAllConnections user False Nothing + ("", "", UP _ [_]) <- nGet user + exchangeGreetingsMsgId 4 service uId user sId + -- disable service in the client + -- The test uses True for non-existing user to make sure it's removed for user 1, + -- because if no users use services, then it won't be checking them to optimize for most clients. + withAgentClientsServers2 (agentCfg, initAgentServers {useServices = M.fromList [(100, True)]}) (agentCfg, initAgentServers) $ \notService user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + subscribeAllConnections notService False Nothing + ("", "", UP _ [_]) <- nGet notService + subscribeAllConnections user False Nothing + ("", "", UP _ [_]) <- nGet user + exchangeGreetingsMsgId 6 notService uId user sId getSMPAgentClient' :: Int -> AgentConfig -> InitialAgentServers -> String -> IO AgentClient getSMPAgentClient' clientId cfg' initServers dbPath = do From f5eb735551cd36803845564fec3e75146370772f Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 14 Dec 2025 12:07:29 +0000 Subject: [PATCH 06/91] servers: service stats and logging, allow services without option (removed), report errors during service message delivery, remove threads when service subscription ended (#1676) * smp server: always allow services without option * smp server: maintain IDs hash in session subscription states * smp server: service message delivery error handling * ntf server: log subscription count and hash differences * smp server: remove delivery threads when service subscription ended/client disconnected --- src/Simplex/Messaging/Agent.hs | 1 - src/Simplex/Messaging/Notifications/Server.hs | 10 +- .../Messaging/Notifications/Server/Stats.hs | 1 - src/Simplex/Messaging/Protocol.hs | 11 +++ src/Simplex/Messaging/Server.hs | 95 ++++++++++--------- src/Simplex/Messaging/Server/Env/STM.hs | 14 +-- src/Simplex/Messaging/Server/Main.hs | 15 +-- .../Messaging/Server/MsgStore/Journal.hs | 4 +- .../Messaging/Server/MsgStore/Postgres.hs | 4 +- src/Simplex/Messaging/Server/MsgStore/STM.hs | 8 +- .../Messaging/Server/MsgStore/Types.hs | 2 +- src/Simplex/Messaging/Server/Prometheus.hs | 35 ++++++- .../Messaging/Server/QueueStore/Postgres.hs | 4 +- .../Messaging/Server/QueueStore/STM.hs | 4 +- src/Simplex/Messaging/Server/Stats.hs | 16 ++++ src/Simplex/Messaging/Transport.hs | 1 - tests/AgentTests/FunctionalAPITests.hs | 1 - 17 files changed, 147 insertions(+), 79 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index f155ce77b4..f44708fe6a 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -1539,7 +1539,6 @@ resubscribeConnections' c connIds = do [] -> pure True rqs' -> anyM $ map (atomically . hasActiveSubscription c) rqs' --- TODO [certs rcv] compare hash. possibly, it should return both expected and returned counts subscribeClientServices' :: AgentClient -> UserId -> AM (Map SMPServer (Either AgentErrorType ServiceSubResult)) subscribeClientServices' c userId = ifM useService subscribe $ throwError $ CMD PROHIBITED "no user service allowed" diff --git a/src/Simplex/Messaging/Notifications/Server.hs b/src/Simplex/Messaging/Notifications/Server.hs index 67ed89d715..e7c1ca5f97 100644 --- a/src/Simplex/Messaging/Notifications/Server.hs +++ b/src/Simplex/Messaging/Notifications/Server.hs @@ -576,9 +576,10 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = -- TODO [certs rcv] resubscribe queues with statuses NSErr and NSService CAServiceDisconnected srv serviceSub -> logNote $ "SMP server service disconnected " <> showService srv serviceSub - CAServiceSubscribed srv serviceSub@(ServiceSub _ expected _) (ServiceSub _ n _) -- TODO [certs rcv] compare hash - | expected == n -> logNote msg - | otherwise -> logWarn $ msg <> ", confirmed subs: " <> tshow n + CAServiceSubscribed srv serviceSub@(ServiceSub _ n idsHash) (ServiceSub _ n' idsHash') + | n /= n' -> logWarn $ msg <> ", confirmed subs: " <> tshow n' + | idsHash /= idsHash' -> logWarn $ msg <> ", different IDs hash" + | otherwise -> logNote msg where msg = "SMP server service subscribed " <> showService srv serviceSub CAServiceSubError srv serviceSub e -> @@ -593,8 +594,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = void $ subscribeSrvSubs ca st batchSize (srv, srvId, Nothing) Left e -> logError $ "SMP server update and resubscription error " <> tshow e where - -- TODO [certs rcv] compare hash - showService srv (ServiceSub serviceId n _idsHash) = showServer' srv <> ", service ID " <> decodeLatin1 (strEncode serviceId) <> ", " <> tshow n <> " subs" + showService srv (ServiceSub serviceId n _) = showServer' srv <> ", service ID " <> decodeLatin1 (strEncode serviceId) <> ", " <> tshow n <> " subs" logSubErrors :: SMPServer -> NonEmpty (SMP.NotifierId, NtfSubStatus) -> Int -> IO () logSubErrors srv subs updated = forM_ (L.group $ L.sort $ L.map snd subs) $ \ss -> do diff --git a/src/Simplex/Messaging/Notifications/Server/Stats.hs b/src/Simplex/Messaging/Notifications/Server/Stats.hs index 7125ce2906..a20e41c342 100644 --- a/src/Simplex/Messaging/Notifications/Server/Stats.hs +++ b/src/Simplex/Messaging/Notifications/Server/Stats.hs @@ -17,7 +17,6 @@ import Simplex.Messaging.Server.Stats import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM --- TODO [certs rcv] track service subscriptions and count/hash diffs for own and other servers + prometheus data NtfServerStats = NtfServerStats { fromTime :: IORef UTCTime, tknCreated :: IORef Int, diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 6b232f12b3..51128597c3 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -147,6 +147,9 @@ module Simplex.Messaging.Protocol serviceSubResult, queueIdsHash, queueIdHash, + noIdsHash, + addServiceSubs, + subtractServiceSubs, MaxMessageLen, MaxRcvMessageLen, EncRcvMsgBody (..), @@ -1526,6 +1529,14 @@ queueIdHash :: QueueId -> IdsHash queueIdHash = IdsHash . C.md5Hash . unEntityId {-# INLINE queueIdHash #-} +addServiceSubs :: (Int64, IdsHash) -> (Int64, IdsHash) -> (Int64, IdsHash) +addServiceSubs (n', idsHash') (n, idsHash) = (n + n', idsHash <> idsHash') + +subtractServiceSubs :: (Int64, IdsHash) -> (Int64, IdsHash) -> (Int64, IdsHash) +subtractServiceSubs (n', idsHash') (n, idsHash) + | n > n' = (n - n', idsHash <> idsHash') -- concat is a reversible xor: (x `xor` y) `xor` y == x + | otherwise = (0, noIdsHash) + data ProtocolErrorType = PECmdSyntax | PECmdUnknown | PESession | PEBlock -- | Type for protocol errors. diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 0fc15b3e33..b7bb0efaac 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -166,8 +166,8 @@ type AttachHTTP = Socket -> TLS.Context -> IO () -- actions used in serverThread to reduce STM transaction scope data ClientSubAction = CSAEndSub QueueId -- end single direct queue subscription - | CSAEndServiceSub -- end service subscription to one queue - | CSADecreaseSubs Int64 -- reduce service subscriptions when cancelling. Fixed number is used to correctly handle race conditions when service resubscribes + | CSAEndServiceSub QueueId -- end service subscription to one queue + | CSADecreaseSubs (Int64, IdsHash) -- reduce service subscriptions when cancelling. Fixed number is used to correctly handle race conditions when service resubscribes type PrevClientSub s = (Client s, ClientSubAction, (EntityId, BrokerMsg)) @@ -251,7 +251,7 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt Server s -> (Server s -> ServerSubscribers s) -> (Client s -> TMap QueueId sub) -> - (Client s -> TVar Int64) -> + (Client s -> TVar (Int64, IdsHash)) -> Maybe (sub -> IO ()) -> M s () serverThread label srv srvSubscribers clientSubs clientServiceSubs unsub_ = do @@ -277,7 +277,7 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt as'' <- if prevServiceId == serviceId_ then pure [] else endServiceSub prevServiceId qId END case serviceId_ of Just serviceId -> do - modifyTVar' totalServiceSubs (+ 1) -- server count for all services + modifyTVar' totalServiceSubs $ addServiceSubs (1, queueIdHash qId) -- server count and IDs hash for all services as <- endQueueSub qId END as' <- cancelServiceSubs serviceId =<< upsertSubscribedClient serviceId c serviceSubscribers pure $ as ++ as' ++ as'' @@ -289,9 +289,9 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt as <- endQueueSub qId DELD as' <- endServiceSub serviceId qId DELD pure $ as ++ as' - CSService serviceId count -> do + CSService serviceId changedSubs -> do modifyTVar' subClients $ IS.insert clntId -- add ID to server's subscribed cients - modifyTVar' totalServiceSubs (+ count) -- server count for all services + modifyTVar' totalServiceSubs $ subtractServiceSubs changedSubs -- server count and IDs hash for all services cancelServiceSubs serviceId =<< upsertSubscribedClient serviceId c serviceSubscribers updateSubDisconnected = case clntSub of -- do not insert client if it is already disconnected, but send END/DELD to any other client subscribed to this queue or service @@ -309,15 +309,15 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt endQueueSub qId msg = prevSub qId msg (CSAEndSub qId) =<< lookupDeleteSubscribedClient qId queueSubscribers endServiceSub :: Maybe ServiceId -> QueueId -> BrokerMsg -> STM [PrevClientSub s] endServiceSub Nothing _ _ = pure [] - endServiceSub (Just serviceId) qId msg = prevSub qId msg CSAEndServiceSub =<< lookupSubscribedClient serviceId serviceSubscribers + endServiceSub (Just serviceId) qId msg = prevSub qId msg (CSAEndServiceSub qId) =<< lookupSubscribedClient serviceId serviceSubscribers prevSub :: QueueId -> BrokerMsg -> ClientSubAction -> Maybe (Client s) -> STM [PrevClientSub s] prevSub qId msg action = checkAnotherClient $ \c -> pure [(c, action, (qId, msg))] cancelServiceSubs :: ServiceId -> Maybe (Client s) -> STM [PrevClientSub s] cancelServiceSubs serviceId = checkAnotherClient $ \c -> do - n <- swapTVar (clientServiceSubs c) 0 - pure [(c, CSADecreaseSubs n, (serviceId, ENDS n))] + changedSubs@(n, _) <- swapTVar (clientServiceSubs c) (0, noIdsHash) + pure [(c, CSADecreaseSubs changedSubs, (serviceId, ENDS n))] checkAnotherClient :: (Client s -> STM [PrevClientSub s]) -> Maybe (Client s) -> STM [PrevClientSub s] checkAnotherClient mkSub = \case Just c@Client {clientId, connected} | clntId /= clientId -> @@ -332,20 +332,21 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt where a (Just unsub) (Just s) = unsub s a _ _ = pure () - CSAEndServiceSub -> atomically $ do + CSAEndServiceSub qId -> atomically $ do modifyTVar' (clientServiceSubs c) decrease modifyTVar' totalServiceSubs decrease where - decrease n = max 0 (n - 1) - -- TODO [certs rcv] for SMP subscriptions CSADecreaseSubs should also remove all delivery threads of the passed client - CSADecreaseSubs n' -> atomically $ modifyTVar' totalServiceSubs $ \n -> max 0 (n - n') + decrease = subtractServiceSubs (1, queueIdHash qId) + CSADecreaseSubs changedSubs -> do + atomically $ modifyTVar' totalServiceSubs $ subtractServiceSubs changedSubs + forM_ unsub_ $ \unsub -> atomically (swapTVar (clientSubs c) M.empty) >>= mapM_ unsub where endSub :: Client s -> QueueId -> STM (Maybe sub) endSub c qId = TM.lookupDelete qId (clientSubs c) >>= (removeWhenNoSubs c $>) -- remove client from server's subscribed cients removeWhenNoSubs c = do noClientSubs <- null <$> readTVar (clientSubs c) - noServiceSubs <- (0 ==) <$> readTVar (clientServiceSubs c) + noServiceSubs <- ((0 ==) . fst) <$> readTVar (clientServiceSubs c) when (noClientSubs && noServiceSubs) $ modifyTVar' subClients $ IS.delete (clientId c) deliverNtfsThread :: Server s -> M s () @@ -1112,10 +1113,10 @@ clientDisconnected c@Client {clientId, subscriptions, ntfSubscriptions, serviceS updateSubscribers subs ServerSubscribers {queueSubscribers, subClients} = do mapM_ (\qId -> deleteSubcribedClient qId c queueSubscribers) (M.keys subs) atomically $ modifyTVar' subClients $ IS.delete clientId - updateServiceSubs :: ServiceId -> TVar Int64 -> ServerSubscribers s -> IO () + updateServiceSubs :: ServiceId -> TVar (Int64, IdsHash) -> ServerSubscribers s -> IO () updateServiceSubs serviceId subsCount ServerSubscribers {totalServiceSubs, serviceSubscribers} = do deleteSubcribedClient serviceId c serviceSubscribers - atomically . modifyTVar' totalServiceSubs . subtract =<< readTVarIO subsCount + atomically . modifyTVar' totalServiceSubs . subtractServiceSubs =<< readTVarIO subsCount cancelSub :: Sub -> IO () cancelSub s = case subThread s of @@ -1357,7 +1358,6 @@ forkClient Client {endThreads, endThreadSeq} label action = do client :: forall s. MsgStoreClass s => Server s -> s -> Client s -> M s () client - -- TODO [certs rcv] rcv subscriptions Server {subscribers, ntfSubscribers} ms clnt@Client {clientId, rcvQ, sndQ, msgQ, clientTHParams = thParams'@THandleParams {sessionId}, procThreads} = do @@ -1661,7 +1661,7 @@ client subscribeNewQueue :: RecipientId -> QueueRec -> M s () subscribeNewQueue rId QueueRec {rcvServiceId} = do case rcvServiceId of - Just _ -> atomically $ modifyTVar' (serviceSubsCount clnt) (+ 1) + Just _ -> atomically $ modifyTVar' (serviceSubsCount clnt) $ addServiceSubs (1, queueIdHash rId) Nothing -> do sub <- atomically $ newSubscription NoSub atomically $ TM.insert rId sub $ subscriptions clnt @@ -1741,7 +1741,7 @@ client Maybe ServiceId -> ServerSubscribers s -> (Client s -> TMap QueueId sub) -> - (Client s -> TVar Int64) -> + (Client s -> TVar (Int64, IdsHash)) -> STM sub -> (ServerStats -> ServiceStats) -> M s (Either ErrorType (Bool, Maybe sub)) @@ -1771,9 +1771,9 @@ client incSrvStat $ maybe srvAssocNew (const srvAssocUpdated) queueServiceId pure (hasSub, Nothing) where - hasServiceSub = (0 /=) <$> readTVar (clientServiceSubs clnt) + hasServiceSub = ((0 /=) . fst) <$> readTVar (clientServiceSubs clnt) -- This function is used when queue association with the service is created. - incServiceQueueSubs = modifyTVar' (clientServiceSubs clnt) (+ 1) -- service count + incServiceQueueSubs = modifyTVar' (clientServiceSubs clnt) $ addServiceSubs (1, queueIdHash (recipientId q)) -- service count and IDs hash Nothing -> case queueServiceId of Just _ -> runExceptT $ do ExceptT $ setQueueService (queueStore ms) q party Nothing @@ -1801,27 +1801,36 @@ client sharedSubscribeService SRecipientService serviceId expected subscribers serviceSubscribed serviceSubsCount rcvServices >>= \case Left e -> pure $ ERR e Right (hasSub, (count, idsHash)) -> do - unless hasSub $ forkClient clnt "deliverServiceMessages" $ liftIO $ deliverServiceMessages count + stats <- asks serverStats + unless hasSub $ forkClient clnt "deliverServiceMessages" $ liftIO $ deliverServiceMessages stats count pure $ SOKS count idsHash where - deliverServiceMessages expectedCnt = do - (qCnt, _msgCnt, _dupCnt, _errCnt) <- foldRcvServiceMessages ms serviceId deliverQueueMsg (0, 0, 0, 0) - atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, ALLS)] - -- TODO [certs rcv] compare with expected - logNote $ "Service subscriptions for " <> tshow serviceId <> " (" <> tshow qCnt <> " queues)" - deliverQueueMsg :: (Int, Int, Int, Int) -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO (Int, Int, Int, Int) - deliverQueueMsg (!qCnt, !msgCnt, !dupCnt, !errCnt) rId = \case - Left e -> pure (qCnt + 1, msgCnt, dupCnt, errCnt + 1) -- TODO [certs rcv] deliver subscription error + deliverServiceMessages stats expectedCnt = do + foldRcvServiceMessages ms serviceId deliverQueueMsg (0, 0, 0, [(NoCorrId, NoEntity, ALLS)]) >>= \case + Right (qCnt, msgCnt, dupCnt, evts) -> do + atomically $ writeTBQueue msgQ evts + atomicModifyIORef'_ (rcvServicesSubMsg stats) (+ msgCnt) + atomicModifyIORef'_ (rcvServicesSubDuplicate stats) (+ dupCnt) + let logMsg = "Subscribed service " <> tshow serviceId <> " (" + if qCnt == expectedCnt + then logNote $ logMsg <> tshow qCnt <> " queues)" + else logError $ logMsg <> "expected " <> tshow expectedCnt <> "," <> tshow qCnt <> " queues)" + Left e -> do + logError $ "Service subscription error for " <> tshow serviceId <> ": " <> tshow e + atomically $ writeTBQueue msgQ [(NoCorrId, NoEntity, ERR e)] + deliverQueueMsg :: (Int64, Int, Int, NonEmpty (Transmission BrokerMsg)) -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO (Int64, Int, Int, NonEmpty (Transmission BrokerMsg)) + deliverQueueMsg (!qCnt, !msgCnt, !dupCnt, evts) rId = \case + Left e -> pure (qCnt + 1, msgCnt, dupCnt, (NoCorrId, rId, ERR e) <| evts) Right qMsg_ -> case qMsg_ of - Nothing -> pure (qCnt + 1, msgCnt, dupCnt, errCnt) + Nothing -> pure (qCnt + 1, msgCnt, dupCnt, evts) Just (qr, msg) -> atomically (getSubscription rId) >>= \case - Nothing -> pure (qCnt + 1, msgCnt, dupCnt + 1, errCnt) + Nothing -> pure (qCnt + 1, msgCnt, dupCnt + 1, evts) Just sub -> do ts <- getSystemSeconds atomically $ setDelivered sub msg ts atomically $ writeTBQueue msgQ [(NoCorrId, rId, MSG (encryptMsg qr msg))] - pure (qCnt + 1, msgCnt + 1, dupCnt, errCnt) + pure (qCnt + 1, msgCnt + 1, dupCnt, evts) getSubscription rId = TM.lookup rId (subscriptions clnt) >>= \case -- If delivery subscription already exists, then there is no need to deliver message. @@ -1836,28 +1845,28 @@ client subscribeServiceNotifications serviceId expected = either ERR (uncurry SOKS . snd) <$> sharedSubscribeService SNotifierService serviceId expected ntfSubscribers ntfServiceSubscribed ntfServiceSubsCount ntfServices - sharedSubscribeService :: (PartyI p, ServiceParty p) => SParty p -> ServiceId -> (Int64, IdsHash) -> ServerSubscribers s -> (Client s -> TVar Bool) -> (Client s -> TVar Int64) -> (ServerStats -> ServiceStats) -> M s (Either ErrorType (Bool, (Int64, IdsHash))) + sharedSubscribeService :: (PartyI p, ServiceParty p) => SParty p -> ServiceId -> (Int64, IdsHash) -> ServerSubscribers s -> (Client s -> TVar Bool) -> (Client s -> TVar (Int64, IdsHash)) -> (ServerStats -> ServiceStats) -> M s (Either ErrorType (Bool, (Int64, IdsHash))) sharedSubscribeService party serviceId (count, idsHash) srvSubscribers clientServiceSubscribed clientServiceSubs servicesSel = do subscribed <- readTVarIO $ clientServiceSubscribed clnt stats <- asks serverStats liftIO $ runExceptT $ (subscribed,) <$> if subscribed - then (,mempty) <$> readTVarIO (clientServiceSubs clnt) -- TODO [certs rcv] get IDs hash + then readTVarIO $ clientServiceSubs clnt else do - (count', idsHash') <- ExceptT $ getServiceQueueCountHash @(StoreQueue s) (queueStore ms) party serviceId - incCount <- atomically $ do + subs'@(count', idsHash') <- ExceptT $ getServiceQueueCountHash @(StoreQueue s) (queueStore ms) party serviceId + subsChange <- atomically $ do writeTVar (clientServiceSubscribed clnt) True - currCount <- swapTVar (clientServiceSubs clnt) count' -- TODO [certs rcv] maintain IDs hash here? - pure $ count' - currCount + currSubs <- swapTVar (clientServiceSubs clnt) subs' + pure $ subtractServiceSubs currSubs subs' let incSrvStat sel n = liftIO $ atomicModifyIORef'_ (sel $ servicesSel stats) (+ n) diff = fromIntegral $ count' - count - if -- TODO [certs rcv] account for not provided counts/hashes (expected n = -1) - | diff == 0 && idsHash == idsHash' -> incSrvStat srvSubOk 1 + if -- `count == -1` only for subscriptions by old NTF servers + | count == -1 && (diff == 0 && idsHash == idsHash') -> incSrvStat srvSubOk 1 | diff > 0 -> incSrvStat srvSubMore 1 >> incSrvStat srvSubMoreTotal diff | diff < 0 -> incSrvStat srvSubFewer 1 >> incSrvStat srvSubFewerTotal (- diff) | otherwise -> incSrvStat srvSubDiff 1 - atomically $ writeTQueue (subQ srvSubscribers) (CSService serviceId incCount, clientId) + atomically $ writeTQueue (subQ srvSubscribers) (CSService serviceId subsChange, clientId) pure (count', idsHash') acknowledgeMsg :: MsgId -> StoreQueue s -> QueueRec -> M s (Transmission BrokerMsg) @@ -2133,7 +2142,7 @@ client -- we delete subscription here, so the client with no subscriptions can be disconnected. sub <- atomically $ TM.lookupDelete entId $ subscriptions clnt liftIO $ mapM_ cancelSub sub - when (isJust rcvServiceId) $ atomically $ modifyTVar' (serviceSubsCount clnt) $ \n -> max 0 (n - 1) + when (isJust rcvServiceId) $ atomically $ modifyTVar' (serviceSubsCount clnt) $ subtractServiceSubs (1, queueIdHash (recipientId q)) atomically $ writeTQueue (subQ subscribers) (CSDeleted entId rcvServiceId, clientId) forM_ (notifier qr) $ \NtfCreds {notifierId = nId, ntfServiceId} -> do -- queue is deleted by a different client from the one subscribed to notifications, diff --git a/src/Simplex/Messaging/Server/Env/STM.hs b/src/Simplex/Messaging/Server/Env/STM.hs index 24cd6dfcca..02cf136c75 100644 --- a/src/Simplex/Messaging/Server/Env/STM.hs +++ b/src/Simplex/Messaging/Server/Env/STM.hs @@ -363,7 +363,7 @@ data ServerSubscribers s = ServerSubscribers { subQ :: TQueue (ClientSub, ClientId), queueSubscribers :: SubscribedClients s, serviceSubscribers :: SubscribedClients s, -- service clients with long-term certificates that have subscriptions - totalServiceSubs :: TVar Int64, + totalServiceSubs :: TVar (Int64, IdsHash), subClients :: TVar IntSet, -- clients with individual or service subscriptions pendingEvents :: TVar (IntMap (NonEmpty (EntityId, BrokerMsg))) } @@ -426,7 +426,7 @@ sameClient c cv = maybe False (sameClientId c) <$> readTVar cv data ClientSub = CSClient QueueId (Maybe ServiceId) (Maybe ServiceId) -- includes previous and new associated service IDs | CSDeleted QueueId (Maybe ServiceId) -- includes previously associated service IDs - | CSService ServiceId Int64 -- only send END to idividual client subs on message delivery, not of SSUB/NSSUB + | CSService ServiceId (Int64, IdsHash) -- only send END to idividual client subs on message delivery, not of SSUB/NSSUB newtype ProxyAgent = ProxyAgent { smpAgent :: SMPClientAgent 'Sender @@ -440,8 +440,8 @@ data Client s = Client ntfSubscriptions :: TMap NotifierId (), serviceSubscribed :: TVar Bool, -- set independently of serviceSubsCount, to track whether service subscription command was received ntfServiceSubscribed :: TVar Bool, - serviceSubsCount :: TVar Int64, -- only one service can be subscribed, based on its certificate, this is subscription count - ntfServiceSubsCount :: TVar Int64, -- only one service can be subscribed, based on its certificate, this is subscription count + serviceSubsCount :: TVar (Int64, IdsHash), -- only one service can be subscribed, based on its certificate, this is subscription count + ntfServiceSubsCount :: TVar (Int64, IdsHash), -- only one service can be subscribed, based on its certificate, this is subscription count rcvQ :: TBQueue (NonEmpty (VerifiedTransmission s)), sndQ :: TBQueue (NonEmpty (Transmission BrokerMsg), [Transmission BrokerMsg]), msgQ :: TBQueue (NonEmpty (Transmission BrokerMsg)), @@ -502,7 +502,7 @@ newServerSubscribers = do subQ <- newTQueueIO queueSubscribers <- SubscribedClients <$> TM.emptyIO serviceSubscribers <- SubscribedClients <$> TM.emptyIO - totalServiceSubs <- newTVarIO 0 + totalServiceSubs <- newTVarIO (0, noIdsHash) subClients <- newTVarIO IS.empty pendingEvents <- newTVarIO IM.empty pure ServerSubscribers {subQ, queueSubscribers, serviceSubscribers, totalServiceSubs, subClients, pendingEvents} @@ -513,8 +513,8 @@ newClient clientId qSize clientTHParams createdAt = do ntfSubscriptions <- TM.emptyIO serviceSubscribed <- newTVarIO False ntfServiceSubscribed <- newTVarIO False - serviceSubsCount <- newTVarIO 0 - ntfServiceSubsCount <- newTVarIO 0 + serviceSubsCount <- newTVarIO (0, noIdsHash) + ntfServiceSubsCount <- newTVarIO (0, noIdsHash) rcvQ <- newTBQueueIO qSize sndQ <- newTBQueueIO qSize msgQ <- newTBQueueIO qSize diff --git a/src/Simplex/Messaging/Server/Main.hs b/src/Simplex/Messaging/Server/Main.hs index 7de966c36b..86ff3d4a91 100644 --- a/src/Simplex/Messaging/Server/Main.hs +++ b/src/Simplex/Messaging/Server/Main.hs @@ -18,7 +18,7 @@ module Simplex.Messaging.Server.Main where import Control.Concurrent.STM -import Control.Exception (SomeException, finally, try) +import Control.Exception (finally) import Control.Logger.Simple import Control.Monad import qualified Data.Attoparsec.ByteString.Char8 as A @@ -28,10 +28,8 @@ import Data.Char (isAlpha, isAscii, toUpper) import Data.Either (fromRight) import Data.Functor (($>)) import Data.Ini (Ini, lookupValue, readIniFile) -import Data.Int (Int64) import Data.List (find, isPrefixOf) import qualified Data.List.NonEmpty as L -import qualified Data.Map.Strict as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text (Text) import qualified Data.Text as T @@ -61,14 +59,17 @@ import Simplex.Messaging.Transport (supportedProxyClientSMPRelayVRange, alpnSupp import Simplex.Messaging.Transport.Client (TransportHost (..), defaultSocksProxy) import Simplex.Messaging.Transport.HTTP2 (httpALPN) import Simplex.Messaging.Transport.Server (ServerCredentials (..), mkTransportServerConfig) -import Simplex.Messaging.Util (eitherToMaybe, ifM, unlessM) +import Simplex.Messaging.Util (eitherToMaybe, ifM) import System.Directory (createDirectoryIfMissing, doesDirectoryExist, doesFileExist) import System.Exit (exitFailure) import System.FilePath (combine) -import System.IO (BufferMode (..), IOMode (..), hSetBuffering, stderr, stdout, withFile) +import System.IO (BufferMode (..), hSetBuffering, stderr, stdout) import Text.Read (readMaybe) #if defined(dbServerPostgres) +import Control.Exception (SomeException, try) +import Data.Int (Int64) +import qualified Data.Map.Strict as M import Data.Semigroup (Sum (..)) import Simplex.Messaging.Agent.Store.Postgres (checkSchemaExists) import Simplex.Messaging.Server.MsgStore.Journal (JournalQueue) @@ -79,7 +80,9 @@ import Simplex.Messaging.Server.QueueStore.Postgres (batchInsertQueues, batchIns import Simplex.Messaging.Server.QueueStore.STM (STMQueueStore (..)) import Simplex.Messaging.Server.QueueStore.Types import Simplex.Messaging.Server.StoreLog (closeStoreLog, logNewService, logCreateQueue, openWriteStoreLog) +import Simplex.Messaging.Util (unlessM) import System.Directory (renameFile) +import System.IO (IOMode (..), withFile) #endif smpServerCLI :: FilePath -> FilePath -> IO () @@ -556,7 +559,7 @@ smpServerCLI_ generateSite serveStaticFiles attachStaticFiles cfgPath logPath = mkTransportServerConfig (fromMaybe False $ iniOnOff "TRANSPORT" "log_tls_errors" ini) (Just $ alpnSupportedSMPHandshakes <> httpALPN) - (fromMaybe True $ iniOnOff "TRANSPORT" "accept_service_credentials" ini), -- TODO [certs rcv] remove this option + True, controlPort = eitherToMaybe $ T.unpack <$> lookupValue "TRANSPORT" "control_port" ini, smpAgentCfg = defaultSMPClientAgentConfig diff --git a/src/Simplex/Messaging/Server/MsgStore/Journal.hs b/src/Simplex/Messaging/Server/MsgStore/Journal.hs index 89e9f03831..c65660c93b 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Journal.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Journal.hs @@ -444,9 +444,9 @@ instance MsgStoreClass (JournalMsgStore s) where getLoadedQueue :: JournalQueue s -> IO (JournalQueue s) getLoadedQueue q = fromMaybe q <$> TM.lookupIO (recipientId q) (loadedQueues $ queueStore_ ms) - foldRcvServiceMessages :: JournalMsgStore s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages :: JournalMsgStore s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO (Either ErrorType a) foldRcvServiceMessages ms serviceId f acc = case queueStore_ ms of - MQStore st -> foldRcvServiceQueues st serviceId f' acc + MQStore st -> fmap Right $ foldRcvServiceQueues st serviceId f' acc where f' a (q, qr) = runExceptT (tryPeekMsg ms q) >>= f a (recipientId q) . ((qr,) <$$>) #if defined(dbServerPostgres) diff --git a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs index f3000811b5..edf7f481cd 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs @@ -119,9 +119,9 @@ instance MsgStoreClass PostgresMsgStore where toMessageStats (expiredMsgsCount, storedMsgsCount, storedQueues) = MessageStats {expiredMsgsCount, storedMsgsCount, storedQueues} - foldRcvServiceMessages :: PostgresMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages :: PostgresMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO (Either ErrorType a) foldRcvServiceMessages ms serviceId f acc = - withTransaction (dbStore $ queueStore_ ms) $ \db -> + runExceptT $ withDB' "foldRcvServiceMessages" (queueStore_ ms) $ \db -> DB.fold db [sql| diff --git a/src/Simplex/Messaging/Server/MsgStore/STM.hs b/src/Simplex/Messaging/Server/MsgStore/STM.hs index 24d489accb..f118e007ca 100644 --- a/src/Simplex/Messaging/Server/MsgStore/STM.hs +++ b/src/Simplex/Messaging/Server/MsgStore/STM.hs @@ -87,10 +87,10 @@ instance MsgStoreClass STMMsgStore where expireOldMessages _tty ms now ttl = withLoadedQueues (queueStore_ ms) $ atomically . expireQueueMsgs ms now (now - ttl) - foldRcvServiceMessages :: STMMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a - foldRcvServiceMessages ms serviceId f= - foldRcvServiceQueues (queueStore_ ms) serviceId $ \a (q, qr) -> - runExceptT (tryPeekMsg ms q) >>= f a (recipientId q) . ((qr,) <$$>) + foldRcvServiceMessages :: STMMsgStore -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO (Either ErrorType a) + foldRcvServiceMessages ms serviceId f = fmap Right . foldRcvServiceQueues (queueStore_ ms) serviceId f' + where + f' a (q, qr) = runExceptT (tryPeekMsg ms q) >>= f a (recipientId q) . ((qr,) <$$>) logQueueStates _ = pure () {-# INLINE logQueueStates #-} diff --git a/src/Simplex/Messaging/Server/MsgStore/Types.hs b/src/Simplex/Messaging/Server/MsgStore/Types.hs index e186da05a1..fc97bbc209 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Types.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Types.hs @@ -45,7 +45,7 @@ class (Monad (StoreMonad s), QueueStoreClass (StoreQueue s) (QueueStore s)) => M unsafeWithAllMsgQueues :: Monoid a => Bool -> s -> (StoreQueue s -> IO a) -> IO a -- tty, store, now, ttl expireOldMessages :: Bool -> s -> Int64 -> Int64 -> IO MessageStats - foldRcvServiceMessages :: s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO a + foldRcvServiceMessages :: s -> ServiceId -> (a -> RecipientId -> Either ErrorType (Maybe (QueueRec, Message)) -> IO a) -> a -> IO (Either ErrorType a) logQueueStates :: s -> IO () logQueueState :: StoreQueue s -> StoreMonad s () queueStore :: s -> QueueStore s diff --git a/src/Simplex/Messaging/Server/Prometheus.hs b/src/Simplex/Messaging/Server/Prometheus.hs index e4d6a2774f..1e3c5132d0 100644 --- a/src/Simplex/Messaging/Server/Prometheus.hs +++ b/src/Simplex/Messaging/Server/Prometheus.hs @@ -21,7 +21,6 @@ import Simplex.Messaging.Transport (simplexMQVersion) import Simplex.Messaging.Transport.Server (SocketStats (..)) import Simplex.Messaging.Util (tshow) --- TODO [certs rcv] add service subscriptions and count/hash diffs data ServerMetrics = ServerMetrics { statsData :: ServerStatsData, activeQueueCounts :: PeriodStatCounts, @@ -118,6 +117,8 @@ prometheusMetrics sm rtm ts = _pMsgFwdsRecv, _rcvServices, _ntfServices, + _rcvServicesSubMsg, + _rcvServicesSubDuplicate, _qCount, _msgCount, _ntfCount @@ -383,6 +384,14 @@ prometheusMetrics sm rtm ts = \# HELP simplex_smp_ntf_services_queues_count The count of queues associated with notification services.\n\ \# TYPE simplex_smp_ntf_services_queues_count gauge\n\ \simplex_smp_ntf_services_queues_count " <> mshow (ntfServiceQueuesCount entityCounts) <> "\n# ntfServiceQueuesCount\n\ + \\n\ + \# HELP simplex_smp_rcv_services_sub_msg The count of subscribed service queues with messages.\n\ + \# TYPE simplex_smp_rcv_services_sub_msg counter\n\ + \simplex_smp_rcv_services_sub_msg " <> mshow _rcvServicesSubMsg <> "\n# rcvServicesSubMsg\n\ + \\n\ + \# HELP simplex_smp_rcv_services_sub_duplicate The count of duplicate subscribed service queues.\n\ + \# TYPE simplex_smp_rcv_services_sub_duplicate counter\n\ + \simplex_smp_rcv_services_sub_duplicate " <> mshow _rcvServicesSubDuplicate <> "\n# rcvServicesSubDuplicate\n\ \\n" <> showServices _rcvServices "rcv" "receiving" <> showServices _ntfServices "ntf" "notification" @@ -418,6 +427,30 @@ prometheusMetrics sm rtm ts = \# HELP simplex_smp_" <> pfx <> "_services_sub_end Ended subscriptions with " <> name <> " services.\n\ \# TYPE simplex_smp_" <> pfx <> "_services_sub_end gauge\n\ \simplex_smp_" <> pfx <> "_services_sub_end " <> mshow (_srvSubEnd ss) <> "\n# " <> pfx <> ".srvSubEnd\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_ok Service subscriptions for " <> name <> " services.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_ok gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_ok " <> mshow (_srvSubOk ss) <> "\n# " <> pfx <> ".srvSubOk\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_more Service subscriptions for " <> name <> " services with more queues than in the client.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_more gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_more " <> mshow (_srvSubMore ss) <> "\n# " <> pfx <> ".srvSubMore\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_fewer Service subscriptions for " <> name <> " services with fewer queues than in the client.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_fewer gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_fewer " <> mshow (_srvSubFewer ss) <> "\n# " <> pfx <> ".srvSubFewer\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_diff Service subscriptions for " <> name <> " services with different hash than in the client.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_diff gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_diff " <> mshow (_srvSubDiff ss) <> "\n# " <> pfx <> ".srvSubDiff\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_more_total Service subscriptions for " <> name <> " services with more queues than in the client total.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_more_total gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_more_total " <> mshow (_srvSubMoreTotal ss) <> "\n# " <> pfx <> ".srvSubMoreTotal\n\ + \\n\ + \# HELP simplex_smp_" <> pfx <> "_services_sub_fewer_total Service subscriptions for " <> name <> " services with fewer queues than in the client total.\n\ + \# TYPE simplex_smp_" <> pfx <> "_services_sub_fewer_total gauge\n\ + \simplex_smp_" <> pfx <> "_services_sub_fewer_total " <> mshow (_srvSubFewerTotal ss) <> "\n# " <> pfx <> ".srvSubFewerTotal\n\ \\n" info = "# Info\n\ diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs index eb1ba3b2c0..a8c8c040aa 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs @@ -581,9 +581,9 @@ foldServiceRecs st f = DB.fold_ db "SELECT service_id, service_role, service_cert, service_cert_hash, created_at FROM services" mempty $ \ !acc -> fmap (acc <>) . f . rowToServiceRec -foldRcvServiceQueueRecs :: PostgresQueueStore q -> ServiceId -> (a -> (RecipientId, QueueRec) -> IO a) -> a -> IO a +foldRcvServiceQueueRecs :: PostgresQueueStore q -> ServiceId -> (a -> (RecipientId, QueueRec) -> IO a) -> a -> IO (Either ErrorType a) foldRcvServiceQueueRecs st serviceId f acc = - withTransaction (dbStore st) $ \db -> + runExceptT $ withDB' "foldRcvServiceQueueRecs" st $ \db -> DB.fold db (queueRecQuery <> " WHERE rcv_service_id = ? AND deleted_at IS NULL") (Only serviceId) acc $ \a -> f a . rowToQueueRec foldQueueRecs :: Monoid a => Bool -> Bool -> PostgresQueueStore q -> ((RecipientId, QueueRec) -> IO a) -> IO a diff --git a/src/Simplex/Messaging/Server/QueueStore/STM.hs b/src/Simplex/Messaging/Server/QueueStore/STM.hs index 8b64db55a2..3a236076c4 100644 --- a/src/Simplex/Messaging/Server/QueueStore/STM.hs +++ b/src/Simplex/Messaging/Server/QueueStore/STM.hs @@ -63,8 +63,8 @@ data STMQueueStore q = STMQueueStore data STMService = STMService { serviceRec :: ServiceRec, - serviceRcvQueues :: TVar (Set RecipientId, IdsHash), -- TODO [certs rcv] get/maintain hash - serviceNtfQueues :: TVar (Set NotifierId, IdsHash) -- TODO [certs rcv] get/maintain hash + serviceRcvQueues :: TVar (Set RecipientId, IdsHash), + serviceNtfQueues :: TVar (Set NotifierId, IdsHash) } setStoreLog :: STMQueueStore q -> StoreLog 'WriteMode -> IO () diff --git a/src/Simplex/Messaging/Server/Stats.hs b/src/Simplex/Messaging/Server/Stats.hs index 120fad7b66..613c5e8bef 100644 --- a/src/Simplex/Messaging/Server/Stats.hs +++ b/src/Simplex/Messaging/Server/Stats.hs @@ -86,6 +86,8 @@ data ServerStats = ServerStats pMsgFwdsRecv :: IORef Int, rcvServices :: ServiceStats, ntfServices :: ServiceStats, + rcvServicesSubMsg :: IORef Int, + rcvServicesSubDuplicate :: IORef Int, qCount :: IORef Int, msgCount :: IORef Int, ntfCount :: IORef Int @@ -145,6 +147,8 @@ data ServerStatsData = ServerStatsData _pMsgFwdsRecv :: Int, _ntfServices :: ServiceStatsData, _rcvServices :: ServiceStatsData, + _rcvServicesSubMsg :: Int, + _rcvServicesSubDuplicate :: Int, _qCount :: Int, _msgCount :: Int, _ntfCount :: Int @@ -206,6 +210,8 @@ newServerStats ts = do pMsgFwdsRecv <- newIORef 0 rcvServices <- newServiceStats ntfServices <- newServiceStats + rcvServicesSubMsg <- newIORef 0 + rcvServicesSubDuplicate <- newIORef 0 qCount <- newIORef 0 msgCount <- newIORef 0 ntfCount <- newIORef 0 @@ -264,6 +270,8 @@ newServerStats ts = do pMsgFwdsRecv, rcvServices, ntfServices, + rcvServicesSubMsg, + rcvServicesSubDuplicate, qCount, msgCount, ntfCount @@ -324,6 +332,8 @@ getServerStatsData s = do _pMsgFwdsRecv <- readIORef $ pMsgFwdsRecv s _rcvServices <- getServiceStatsData $ rcvServices s _ntfServices <- getServiceStatsData $ ntfServices s + _rcvServicesSubMsg <- readIORef $ rcvServicesSubMsg s + _rcvServicesSubDuplicate <- readIORef $ rcvServicesSubDuplicate s _qCount <- readIORef $ qCount s _msgCount <- readIORef $ msgCount s _ntfCount <- readIORef $ ntfCount s @@ -382,6 +392,8 @@ getServerStatsData s = do _pMsgFwdsRecv, _rcvServices, _ntfServices, + _rcvServicesSubMsg, + _rcvServicesSubDuplicate, _qCount, _msgCount, _ntfCount @@ -443,6 +455,8 @@ setServerStats s d = do writeIORef (pMsgFwdsRecv s) $! _pMsgFwdsRecv d setServiceStats (rcvServices s) $! _rcvServices d setServiceStats (ntfServices s) $! _ntfServices d + writeIORef (rcvServicesSubMsg s) $! _rcvServicesSubMsg d + writeIORef (rcvServicesSubDuplicate s) $! _rcvServicesSubDuplicate d writeIORef (qCount s) $! _qCount d writeIORef (msgCount s) $! _msgCount d writeIORef (ntfCount s) $! _ntfCount d @@ -636,6 +650,8 @@ instance StrEncoding ServerStatsData where _pMsgFwdsRecv, _rcvServices, _ntfServices, + _rcvServicesSubMsg = 0, + _rcvServicesSubDuplicate = 0, _qCount, _msgCount = 0, _ntfCount = 0 diff --git a/src/Simplex/Messaging/Transport.hs b/src/Simplex/Messaging/Transport.hs index a14118ce4d..f1eb1a8bd0 100644 --- a/src/Simplex/Messaging/Transport.hs +++ b/src/Simplex/Messaging/Transport.hs @@ -560,7 +560,6 @@ data SMPClientHandshake = SMPClientHandshake keyHash :: C.KeyHash, -- | pub key to agree shared secret for entity ID encryption, shared secret for command authorization is agreed using per-queue keys. authPubKey :: Maybe C.PublicKeyX25519, - -- TODO [certs rcv] remove proxyServer, as serviceInfo includes it as clientRole -- | Whether connecting client is a proxy server (send from SMP v12). -- This property, if True, disables additional transport encrytion inside TLS. -- (Proxy server connection already has additional encryption, so this layer is not needed there). diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 31967917a0..b63e4cb48a 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -3693,7 +3693,6 @@ testClientServiceConnection ps = do ("", "", DOWN _ [_]) <- nGet user ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 qIdHash')) <- nGet service qIdHash' `shouldBe` qIdHash - -- TODO [certs rcv] how to integrate service counts into stats withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do ("", "", UP _ [_]) <- nGet user -- Nothing in ServiceSubResult confirms that both counts and IDs hash match From a1277bf6bfb30015ef00bb0de58664ee00efe114 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 19 Dec 2025 21:10:12 +0000 Subject: [PATCH 07/91] agent: remove service queue association when service ID changed, process ENDS event, test migrating to/from service (#1677) * agent: remove service queue association when service ID changed * agent: process ENDS event * agent: send service subscription error event * agent: test migrating to/from service subscriptions, fixes * agent: always remove service when disabled, fix service subscriptions --- src/Simplex/Messaging/Agent.hs | 86 +++++---- src/Simplex/Messaging/Agent/Client.hs | 43 +++-- src/Simplex/Messaging/Agent/Protocol.hs | 3 + .../Messaging/Agent/Store/AgentStore.hs | 48 +++-- src/Simplex/Messaging/Agent/TSessionSubs.hs | 59 +++--- src/Simplex/Messaging/Protocol.hs | 32 ++-- src/Simplex/Messaging/Server.hs | 4 +- src/Simplex/Messaging/Server/Env/STM.hs | 6 +- tests/AgentTests/FunctionalAPITests.hs | 172 +++++++++++++++++- tests/CoreTests/TSessionSubs.hs | 6 +- tests/ServerTests.hs | 4 +- 11 files changed, 338 insertions(+), 125 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index f44708fe6a..e17c39a165 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -221,7 +221,9 @@ import Simplex.Messaging.Protocol SMPMsgMeta, SParty (..), SProtocolType (..), - ServiceSubResult, + ServiceSub (..), + ServiceSubResult (..), + ServiceSubError (..), SndPublicAuthKey, SubscriptionMode (..), UserProtocol, @@ -1040,10 +1042,10 @@ newRcvConnSrv c nm userId connId enableNtfs cMode userLinkData_ clientData pqIni createRcvQueue nonce_ qd e2eKeys = do AgentConfig {smpClientVRange = vr} <- asks config ntfServer_ <- if enableNtfs then newQueueNtfServer else pure Nothing - (rq, qUri, tSess, sessId) <- newRcvQueue_ c nm userId connId srvWithAuth vr qd (isJust ntfServer_) subMode nonce_ e2eKeys `catchAllErrors` \e -> liftIO (print e) >> throwE e + (rq, qUri, tSess, sessId, serviceId_) <- newRcvQueue_ c nm userId connId srvWithAuth vr qd (isJust ntfServer_) subMode nonce_ e2eKeys `catchAllErrors` \e -> liftIO (print e) >> throwE e atomically $ incSMPServerStat c userId srv connCreated rq' <- withStore c $ \db -> updateNewConnRcv db connId rq subMode - lift . when (subMode == SMSubscribe) $ addNewQueueSubscription c rq' tSess sessId + lift . when (subMode == SMSubscribe) $ addNewQueueSubscription c rq' tSess sessId serviceId_ mapM_ (newQueueNtfSubscription c rq') ntfServer_ pure (rq', qUri) createConnReq :: SMPQueueUri -> AM (ConnectionRequestUri c) @@ -1291,11 +1293,11 @@ joinConnSrvAsync _c _userId _connId _enableNtfs (CRContactUri _) _cInfo _subMode createReplyQueue :: AgentClient -> NetworkRequestMode -> ConnData -> SndQueue -> SubscriptionMode -> SMPServerWithAuth -> AM SMPQueueInfo createReplyQueue c nm ConnData {userId, connId, enableNtfs} SndQueue {smpClientVersion} subMode srv = do ntfServer_ <- if enableNtfs then newQueueNtfServer else pure Nothing - (rq, qUri, tSess, sessId) <- newRcvQueue c nm userId connId srv (versionToRange smpClientVersion) SCMInvitation (isJust ntfServer_) subMode + (rq, qUri, tSess, sessId, serviceId_) <- newRcvQueue c nm userId connId srv (versionToRange smpClientVersion) SCMInvitation (isJust ntfServer_) subMode atomically $ incSMPServerStat c userId (qServer rq) connCreated let qInfo = toVersionT qUri smpClientVersion rq' <- withStore c $ \db -> upgradeSndConnToDuplex db connId rq subMode - lift . when (subMode == SMSubscribe) $ addNewQueueSubscription c rq' tSess sessId + lift . when (subMode == SMSubscribe) $ addNewQueueSubscription c rq' tSess sessId serviceId_ mapM_ (newQueueNtfSubscription c rq') ntfServer_ pure qInfo @@ -1451,22 +1453,14 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do Just activeUserId -> sortOn (\(uId, _) -> if uId == activeUserId then 0 else 1 :: Int) userSrvs Nothing -> userSrvs useServices <- readTVarIO $ useClientServices c - -- These options are possible below: - -- 1) services fully disabled: - -- No service subscriptions will be attempted, and existing services and association will remain in in the database, - -- but they will be ignored because of hasService parameter set to False. - -- This approach preserves performance for all clients that do not use services. - -- 2) at least one user ID has services enabled: - -- Service will be loaded for all user/server combinations: - -- a) service is enabled for user ID and service record exists: subscription will be attempted, - -- b) service is disabled and record exists: service record and all associations will be removed, - -- c) service is disabled or no record: no subscription attempt. + -- Service will be loaded for all user/server combinations: + -- a) service is enabled for user ID and service record exists: subscription will be attempted, + -- b) service is disabled and record exists: service record and all associations will be removed, + -- c) service is disabled or no record: no subscription attempt. -- On successful service subscription, only unassociated queues will be subscribed. - userSrvs'' <- - if any id useServices - then lift $ mapConcurrently (subscribeService useServices) userSrvs' - else pure $ map (,False) userSrvs' - rs <- lift $ mapConcurrently (subscribeUserServer maxPending currPending) userSrvs'' + userSrvs2 <- withStore' c $ \db -> mapM (getService db useServices) userSrvs' + userSrvs3 <- lift $ mapConcurrently subscribeService userSrvs2 + rs <- lift $ mapConcurrently (subscribeUserServer maxPending currPending) userSrvs3 let (errs, oks) = partitionEithers rs logInfo $ "subscribed " <> tshow (sum oks) <> " queues" forM_ (L.nonEmpty errs) $ notifySub c . ERRS . L.map ("",) @@ -1475,16 +1469,30 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do resumeAllCommands c where handleErr = (`catchAllErrors` \e -> notifySub' c "" (ERR e) >> throwE e) - subscribeService :: Map UserId Bool -> (UserId, SMPServer) -> AM' ((UserId, SMPServer), ServiceAssoc) - subscribeService useServices us@(userId, srv) = fmap ((us,) . fromRight False) $ tryAllErrors' $ do - withStore' c (\db -> getSubscriptionService db userId srv) >>= \case + getService :: DB.Connection -> Map UserId Bool -> (UserId, SMPServer) -> IO ((UserId, SMPServer), Maybe ServiceSub) + getService db useServices us@(userId, srv) = + fmap (us,) $ getSubscriptionService db userId srv >>= \case Just serviceSub -> case M.lookup userId useServices of - Just True -> tryAllErrors (subscribeClientService c True userId srv serviceSub) >>= \case - Left e | clientServiceError e -> unassocQueues $> False + Just True -> pure $ Just serviceSub + _ -> Nothing <$ unassocUserServerRcvQueueSubs' db userId srv + _ -> pure Nothing + subscribeService :: ((UserId, SMPServer), Maybe ServiceSub) -> AM' ((UserId, SMPServer), ServiceAssoc) + subscribeService (us@(userId, srv), serviceSub_) = fmap ((us,) . fromRight False) $ tryAllErrors' $ + case serviceSub_ of + Just serviceSub -> tryAllErrors (subscribeClientService c True userId srv serviceSub) >>= \case + Right (ServiceSubResult e _) -> case e of + Just SSErrorServiceId {} -> unassocQueues + -- Possibly, we should always resubscribe all when expected is greater than subscribed + Just SSErrorQueueCount {expectedQueueCount = n, subscribedQueueCount = n'} | n > 0 && n' == 0 -> unassocQueues _ -> pure True - _ -> unassocQueues $> False + Left e -> do + atomically $ writeTBQueue (subQ c) ("", "", AEvt SAEConn $ ERR e) + if clientServiceError e + then unassocQueues + else pure True where - unassocQueues = withStore' c $ \db -> unassocUserServerRcvQueueSubs db userId srv + unassocQueues :: AM Bool + unassocQueues = False <$ withStore' c (\db -> unassocUserServerRcvQueueSubs' db userId srv) _ -> pure False subscribeUserServer :: Int -> TVar Int -> ((UserId, SMPServer), ServiceAssoc) -> AM' (Either AgentErrorType Int) subscribeUserServer maxPending currPending ((userId, srv), hasService) = do @@ -2219,10 +2227,10 @@ switchDuplexConnection c nm (DuplexConnection cData@ConnData {connId, userId} rq srv' <- if srv == server then getNextSMPServer c userId [server] else pure srvAuth -- TODO [notications] possible improvement would be to create ntf credentials here, to avoid creating them after rotation completes. -- The problem is that currently subscription already exists, and we do not support queues with credentials but without subscriptions. - (q, qUri, tSess, sessId) <- newRcvQueue c nm userId connId srv' clientVRange SCMInvitation False SMSubscribe + (q, qUri, tSess, sessId, serviceId_) <- newRcvQueue c nm userId connId srv' clientVRange SCMInvitation False SMSubscribe let rq' = (q :: NewRcvQueue) {primary = True, dbReplaceQueueId = Just dbQueueId} rq'' <- withStore c $ \db -> addConnRcvQueue db connId rq' SMSubscribe - lift $ addNewQueueSubscription c rq'' tSess sessId + lift $ addNewQueueSubscription c rq'' tSess sessId serviceId_ void . enqueueMessages c cData sqs SMP.noMsgFlags $ QADD [(qUri, Just (server, sndId))] rq1 <- withStore' c $ \db -> setRcvSwitchStatus db rq $ Just RSSendingQADD let rqs' = updatedQs rq1 rqs <> [rq''] @@ -2908,7 +2916,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandlePar processSubOk :: RcvQueue -> TVar [ConnId] -> TVar [RcvQueue] -> Maybe SMP.ServiceId -> IO () processSubOk rq@RcvQueue {connId} upConnIds serviceRQs serviceId_ = atomically . whenM (isPendingSub rq) $ do - SS.addActiveSub tSess sessId rq $ currentSubs c + SS.addActiveSub tSess sessId serviceId_ rq $ currentSubs c modifyTVar' upConnIds (connId :) when (isJust serviceId_ && serviceId_ == clientServiceId_) $ modifyTVar' serviceRQs (rq :) clientServiceId_ = (\THClientService {serviceId} -> serviceId) <$> (clientService =<< thAuth) @@ -3115,16 +3123,26 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandlePar notifyEnd removed | removed = notify END >> logServer "<--" c srv rId "END" | otherwise = logServer "<--" c srv rId "END from disconnected client - ignored" - -- TODO [certs rcv] - r@(SMP.ENDS _) -> unexpected r + SMP.ENDS n idsHash -> + atomically (ifM (activeClientSession c tSess sessId) (SS.deleteServiceSub tSess (currentSubs c) $> True) (pure False)) + >>= notifyEnd + where + notifyEnd removed + | removed = do + forM_ clientServiceId_ $ \serviceId -> + notify_ B.empty $ SERVICE_END srv $ ServiceSub serviceId n idsHash + logServer "<--" c srv rId "ENDS" + | otherwise = logServer "<--" c srv rId "ENDS from disconnected client - ignored" -- TODO [certs rcv] Possibly, we need to add some flag to connection that it was deleted SMP.DELD -> atomically (removeSubscription c tSess connId rq) >> notify DELD SMP.ERR e -> notify $ ERR $ SMP (B.unpack $ strEncode srv) e r -> unexpected r where notify :: forall e m. (AEntityI e, MonadIO m) => AEvent e -> m () - notify msg = - let t = ("", connId, AEvt (sAEntity @e) msg) + notify = notify_ connId + notify_ :: forall e m. (AEntityI e, MonadIO m) => ConnId -> AEvent e -> m () + notify_ connId' msg = + let t = ("", connId', AEvt (sAEntity @e) msg) in atomically $ ifM (isFullTBQueue subQ) (modifyTVar' pendingMsgs (t :)) (writeTBQueue subQ t) prohibited :: Text -> AM () diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 7acfb0b490..9bf1afd8db 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -266,7 +266,6 @@ import Simplex.Messaging.Protocol NetworkError (..), MsgFlags (..), MsgId, - IdsHash, NtfServer, NtfServerWithAuth, ProtoServer, @@ -283,6 +282,7 @@ import Simplex.Messaging.Protocol SProtocolType (..), ServiceSub (..), ServiceSubResult (..), + ServiceSubError (..), SndPublicAuthKey, SubscriptionMode (..), NewNtfCreds (..), @@ -1420,7 +1420,7 @@ getSessionMode :: AgentClient -> STM TransportSessionMode getSessionMode = fmap (sessionMode . snd) . readTVar . useNetworkConfig {-# INLINE getSessionMode #-} -newRcvQueue :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> SMPServerWithAuth -> VersionRangeSMPC -> SConnectionMode c -> Bool -> SubscriptionMode -> AM (NewRcvQueue, SMPQueueUri, SMPTransportSession, SessionId) +newRcvQueue :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> SMPServerWithAuth -> VersionRangeSMPC -> SConnectionMode c -> Bool -> SubscriptionMode -> AM (NewRcvQueue, SMPQueueUri, SMPTransportSession, SessionId, Maybe ServiceId) newRcvQueue c nm userId connId srv vRange cMode enableNtfs subMode = do let qrd = case cMode of SCMInvitation -> CQRMessaging Nothing; SCMContact -> CQRContact Nothing e2eKeys <- atomically . C.generateKeyPair =<< asks random @@ -1441,7 +1441,7 @@ queueReqData = \case CQRMessaging d -> QRMessaging $ srvReq <$> d CQRContact d -> QRContact $ srvReq <$> d -newRcvQueue_ :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> SMPServerWithAuth -> VersionRangeSMPC -> ClntQueueReqData -> Bool -> SubscriptionMode -> Maybe C.CbNonce -> C.KeyPairX25519 -> AM (NewRcvQueue, SMPQueueUri, SMPTransportSession, SessionId) +newRcvQueue_ :: AgentClient -> NetworkRequestMode -> UserId -> ConnId -> SMPServerWithAuth -> VersionRangeSMPC -> ClntQueueReqData -> Bool -> SubscriptionMode -> Maybe C.CbNonce -> C.KeyPairX25519 -> AM (NewRcvQueue, SMPQueueUri, SMPTransportSession, SessionId, Maybe ServiceId) newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enableNtfs subMode nonce_ (e2eDhKey, e2ePrivKey) = do C.AuthAlg a <- asks (rcvAuthAlg . config) g <- asks random @@ -1483,7 +1483,7 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl deleteErrors = 0 } qUri = SMPQueueUri vRange $ SMPQueueAddress srv sndId e2eDhKey queueMode - pure (rq, qUri, tSess, sessionId thParams') + pure (rq, qUri, tSess, sessionId thParams', sessServiceId) where mkNtfCreds :: (C.AlgorithmI a, C.AuthAlgorithm a) => C.SAlgorithm a -> TVar ChaChaDRG -> SMPClient -> IO (Maybe (C.AAuthKeyPair, C.PrivateKeyX25519), Maybe NewNtfCreds) mkNtfCreds a g smp @@ -1526,23 +1526,23 @@ newRcvQueue_ c nm userId connId (ProtoServerWithAuth srv auth) vRange cqrd enabl processSubResults :: AgentClient -> SMPTransportSession -> SessionId -> Maybe ServiceId -> NonEmpty (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> STM ([RcvQueueSub], [(RcvQueueSub, Maybe ClientNotice)]) processSubResults c tSess@(userId, srv, _) sessId serviceId_ rs = do - pending <- SS.getPendingSubs tSess $ currentSubs c - let (failed, subscribed@(qs, sQs), notices, ignored) = foldr (partitionResults pending) (M.empty, ([], []), [], 0) rs + pendingSubs <- SS.getPendingQueueSubs tSess $ currentSubs c + let (failed, subscribed@(qs, sQs), notices, ignored) = foldr (partitionResults pendingSubs) (M.empty, ([], []), [], 0) rs unless (M.null failed) $ do incSMPServerStat' c userId srv connSubErrs $ M.size failed failSubscriptions c tSess failed unless (null qs && null sQs) $ do incSMPServerStat' c userId srv connSubscribed $ length qs + length sQs - SS.batchAddActiveSubs tSess sessId subscribed $ currentSubs c + SS.batchAddActiveSubs tSess sessId serviceId_ subscribed $ currentSubs c unless (ignored == 0) $ incSMPServerStat' c userId srv connSubIgnored ignored pure (sQs, notices) where partitionResults :: - (Map SMP.RecipientId RcvQueueSub, Maybe ServiceSub) -> + Map SMP.RecipientId RcvQueueSub -> (RcvQueueSub, Either SMPClientError (Maybe ServiceId)) -> (Map SMP.RecipientId SMPClientError, ([RcvQueueSub], [RcvQueueSub]), [(RcvQueueSub, Maybe ClientNotice)], Int) -> (Map SMP.RecipientId SMPClientError, ([RcvQueueSub], [RcvQueueSub]), [(RcvQueueSub, Maybe ClientNotice)], Int) - partitionResults (pendingSubs, pendingSS) (rq@RcvQueueSub {rcvId, clientNoticeId}, r) acc@(failed, subscribed@(qs, sQs), notices, ignored) = case r of + partitionResults pendingSubs (rq@RcvQueueSub {rcvId, clientNoticeId}, r) acc@(failed, subscribed@(qs, sQs), notices, ignored) = case r of Left e -> case smpErrorClientNotice e of Just notice_ -> (failed', subscribed, (rq, notice_) : notices, ignored) where @@ -1554,8 +1554,8 @@ processSubResults c tSess@(userId, srv, _) sessId serviceId_ rs = do failed' = M.insert rcvId e failed Right serviceId_' | rcvId `M.member` pendingSubs -> - let subscribed' = case (serviceId_, serviceId_', pendingSS) of - (Just sId, Just sId', Just ServiceSub {smpServiceId}) | sId == sId' && sId == smpServiceId -> (qs, rq : sQs) + let subscribed' = case (serviceId_, serviceId_') of + (Just sId, Just sId') | sId == sId' -> (qs, rq : sQs) _ -> (rq : qs, sQs) in (failed, subscribed', notices', ignored) | otherwise -> (failed, subscribed, notices', ignored + 1) @@ -1726,11 +1726,18 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSubResult resubscribeClientService c tSess@(userId, srv, _) serviceSub = - withServiceClient c tSess (\smp _ -> subscribeClientService_ c True tSess smp serviceSub) `catchE` \e -> do - when (clientServiceError e) $ do + tryAllErrors (withServiceClient c tSess $ \smp _ -> subscribeClientService_ c True tSess smp serviceSub) >>= \case + Right r@(ServiceSubResult e _) -> case e of + Just SSErrorServiceId {} -> unassocSubscribeQueues $> r + _ -> pure r + Left e -> do + when (clientServiceError e) $ unassocSubscribeQueues + atomically $ writeTBQueue (subQ c) ("", "", AEvt SAEConn $ ERR e) + throwE e + where + unassocSubscribeQueues = do qs <- withStore' c $ \db -> unassocUserServerRcvQueueSubs db userId srv void $ lift $ subscribeUserServerQueues c userId srv qs - throwE e -- TODO [certs rcv] update service in the database if it has different ID and re-associate queues, and send event subscribeClientService :: AgentClient -> Bool -> UserId -> SMPServer -> ServiceSub -> AM ServiceSubResult @@ -1751,7 +1758,7 @@ withServiceClient c tSess subscribe = -- TODO [certs rcv] send subscription error event? subscribeClientService_ :: AgentClient -> Bool -> SMPTransportSession -> SMPClient -> ServiceSub -> ExceptT SMPClientError IO ServiceSubResult -subscribeClientService_ c withEvent tSess@(userId, srv, _) smp expected@(ServiceSub _ n idsHash) = do +subscribeClientService_ c withEvent tSess@(_, srv, _) smp expected@(ServiceSub _ n idsHash) = do subscribed <- subscribeService smp SMP.SRecipientService n idsHash let sessId = sessionId $ thParams smp r = serviceSubResult expected subscribed @@ -1821,14 +1828,14 @@ getRemovedSubs AgentClient {removedSubs} k = TM.lookup k removedSubs >>= maybe n TM.insert k s removedSubs pure s -addNewQueueSubscription :: AgentClient -> RcvQueue -> SMPTransportSession -> SessionId -> AM' () -addNewQueueSubscription c rq' tSess sessId = do +addNewQueueSubscription :: AgentClient -> RcvQueue -> SMPTransportSession -> SessionId -> Maybe ServiceId -> AM' () +addNewQueueSubscription c rq' tSess sessId serviceId_ = do let rq = rcvQueueSub rq' same <- atomically $ do modifyTVar' (subscrConns c) $ S.insert $ qConnId rq active <- activeClientSession c tSess sessId if active - then SS.addActiveSub tSess sessId rq' $ currentSubs c + then SS.addActiveSub tSess sessId serviceId_ rq' $ currentSubs c else SS.addPendingSub tSess rq $ currentSubs c pure active unless same $ resubscribeSMPSession c tSess diff --git a/src/Simplex/Messaging/Agent/Protocol.hs b/src/Simplex/Messaging/Agent/Protocol.hs index d5b35611b6..ef9bc592f2 100644 --- a/src/Simplex/Messaging/Agent/Protocol.hs +++ b/src/Simplex/Messaging/Agent/Protocol.hs @@ -393,6 +393,7 @@ data AEvent (e :: AEntity) where SERVICE_ALL :: SMPServer -> AEvent AENone -- all service messages are delivered SERVICE_DOWN :: SMPServer -> ServiceSub -> AEvent AENone SERVICE_UP :: SMPServer -> ServiceSubResult -> AEvent AENone + SERVICE_END :: SMPServer -> ServiceSub -> AEvent AENone SWITCH :: QueueDirection -> SwitchPhase -> ConnectionStats -> AEvent AEConn RSYNC :: RatchetSyncState -> Maybe AgentCryptoError -> ConnectionStats -> AEvent AEConn SENT :: AgentMsgId -> Maybe SMPServer -> AEvent AEConn @@ -467,6 +468,7 @@ data AEventTag (e :: AEntity) where SERVICE_ALL_ :: AEventTag AENone SERVICE_DOWN_ :: AEventTag AENone SERVICE_UP_ :: AEventTag AENone + SERVICE_END_ :: AEventTag AENone SWITCH_ :: AEventTag AEConn RSYNC_ :: AEventTag AEConn SENT_ :: AEventTag AEConn @@ -525,6 +527,7 @@ aEventTag = \case SERVICE_ALL _ -> SERVICE_ALL_ SERVICE_DOWN {} -> SERVICE_DOWN_ SERVICE_UP {} -> SERVICE_UP_ + SERVICE_END {} -> SERVICE_END_ SWITCH {} -> SWITCH_ RSYNC {} -> RSYNC_ SENT {} -> SENT_ diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 9508e4499c..853a769088 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -38,7 +38,6 @@ module Simplex.Messaging.Agent.Store.AgentStore -- * Client services createClientService, getClientServiceCredentials, - getSubscriptionServices, getSubscriptionService, getClientServiceServers, setClientServiceId, @@ -55,6 +54,7 @@ module Simplex.Messaging.Agent.Store.AgentStore getSubscriptionServers, getUserServerRcvQueueSubs, unassocUserServerRcvQueueSubs, + unassocUserServerRcvQueueSubs', unsetQueuesToSubscribe, setRcvServiceAssocs, removeRcvServiceAssocs, @@ -344,7 +344,7 @@ handleSQLError err e = case constraintViolation e of handleSQLError :: StoreError -> SQLError -> StoreError handleSQLError err e | SQL.sqlError e == SQL.ErrorConstraint = err - | otherwise = SEInternal $ bshow e + | otherwise = SEInternal $ encodeUtf8 $ tshow e <> ": " <> SQL.sqlErrorDetails e <> ", " <> SQL.sqlErrorContext e #endif createUserRecord :: DB.Connection -> IO UserId @@ -439,11 +439,6 @@ getClientServiceCredentials db userId srv = where toService (kh, cert, pk, serviceId_) = ((kh, (cert, pk)), serviceId_) -getSubscriptionServices :: DB.Connection -> IO [(UserId, (SMPServer, ServiceSub))] -getSubscriptionServices db = map toUserService <$> DB.query_ db clientServiceQuery - where - toUserService (Only userId :. serviceRow) = (userId, toServerService serviceRow) - getSubscriptionService :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ServiceSub) getSubscriptionService db userId (SMPServer h p kh) = maybeFirstRow toService $ @@ -453,7 +448,7 @@ getSubscriptionService db userId (SMPServer h p kh) = SELECT c.service_id, c.service_queue_count, c.service_queue_ids_hash FROM client_services c JOIN servers s ON s.host = c.host AND s.port = c.port - WHERE c.user_id = ? AND c.host = ? AND c.port = ? AND COALESCE(c.server_key_hash, s.key_hash) = ? + WHERE c.user_id = ? AND c.host = ? AND c.port = ? AND COALESCE(c.server_key_hash, s.key_hash) = ? AND service_id IS NOT NULL |] (userId, h, p, kh) where @@ -461,15 +456,16 @@ getSubscriptionService db userId (SMPServer h p kh) = getClientServiceServers :: DB.Connection -> UserId -> IO [(SMPServer, ServiceSub)] getClientServiceServers db userId = - map toServerService <$> DB.query db (clientServiceQuery <> " WHERE c.user_id = ?") (Only userId) - -clientServiceQuery :: Query -clientServiceQuery = - [sql| - SELECT c.host, c.port, COALESCE(c.server_key_hash, s.key_hash), c.service_id, c.service_queue_count, c.service_queue_ids_hash - FROM client_services c - JOIN servers s ON s.host = c.host AND s.port = c.port - |] + map toServerService <$> + DB.query + db + [sql| + SELECT c.host, c.port, COALESCE(c.server_key_hash, s.key_hash), c.service_id, c.service_queue_count, c.service_queue_ids_hash + FROM client_services c + JOIN servers s ON s.host = c.host AND s.port = c.port + WHERE c.user_id = ? AND service_id IS NOT NULL + |] + (Only userId) toServerService :: (NonEmpty TransportHost, ServiceName, C.KeyHash, ServiceId, Int64, Binary ByteString) -> (ProtocolServer 'PSMP, ServiceSub) toServerService (host, port, kh, serviceId, n, Binary idsHash) = @@ -487,14 +483,20 @@ setClientServiceId db userId srv serviceId = (serviceId, userId, host srv, port srv) deleteClientService :: DB.Connection -> UserId -> SMPServer -> IO () -deleteClientService db userId srv = +deleteClientService db userId (SMPServer h p kh) = DB.execute db [sql| DELETE FROM client_services WHERE user_id = ? AND host = ? AND port = ? + AND EXISTS ( + SELECT 1 FROM servers s + WHERE s.host = client_services.host + AND s.port = client_services.port + AND COALESCE(client_services.server_key_hash, s.key_hash) = ? + ); |] - (userId, host srv, port srv) + (userId, h, p, Just kh) deleteClientServices :: DB.Connection -> UserId -> IO () deleteClientServices db userId = do @@ -2279,7 +2281,8 @@ getUserServerRcvQueueSubs db userId (SMPServer h p kh) onlyNeeded hasService = | otherwise = "" unassocUserServerRcvQueueSubs :: DB.Connection -> UserId -> SMPServer -> IO [RcvQueueSub] -unassocUserServerRcvQueueSubs db userId (SMPServer h p kh) = +unassocUserServerRcvQueueSubs db userId srv@(SMPServer h p kh) = do + deleteClientService db userId srv map toRcvQueueSub <$> DB.query db @@ -2293,6 +2296,11 @@ unassocUserServerRcvQueueSubs db userId (SMPServer h p kh) = rcv_queues.rcv_queue_id, rcv_queues.rcv_primary, rcv_queues.replace_rcv_queue_id |] +unassocUserServerRcvQueueSubs' :: DB.Connection -> UserId -> SMPServer -> IO () +unassocUserServerRcvQueueSubs' db userId srv@(SMPServer h p kh) = do + deleteClientService db userId srv + DB.execute db removeRcvAssocsQuery (h, p, userId, kh) + unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" diff --git a/src/Simplex/Messaging/Agent/TSessionSubs.hs b/src/Simplex/Messaging/Agent/TSessionSubs.hs index ab15b9793c..a1db48c9ea 100644 --- a/src/Simplex/Messaging/Agent/TSessionSubs.hs +++ b/src/Simplex/Messaging/Agent/TSessionSubs.hs @@ -23,8 +23,10 @@ module Simplex.Messaging.Agent.TSessionSubs batchDeletePendingSubs, deleteSub, batchDeleteSubs, + deleteServiceSub, hasPendingSubs, getPendingSubs, + getPendingQueueSubs, getActiveSubs, setSubsPending, updateClientNotices, @@ -39,12 +41,12 @@ import Data.Int (Int64) import Data.List (foldl') import Data.Map.Strict (Map) import qualified Data.Map.Strict as M -import Data.Maybe (isJust) +import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as S import Simplex.Messaging.Agent.Protocol (SMPQueue (..)) -import Simplex.Messaging.Agent.Store (RcvQueue, RcvQueueSub (..), SomeRcvQueue, StoredRcvQueue (rcvServiceAssoc), rcvQueueSub) +import Simplex.Messaging.Agent.Store (RcvQueue, RcvQueueSub (..), ServiceAssoc, SomeRcvQueue, StoredRcvQueue (rcvServiceAssoc), rcvQueueSub) import Simplex.Messaging.Client (SMPTransportSession, TransportSessionMode (..)) -import Simplex.Messaging.Protocol (RecipientId, ServiceSub (..), queueIdHash) +import Simplex.Messaging.Protocol (IdsHash, RecipientId, ServiceSub (..), queueIdHash) import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM import Simplex.Messaging.Transport @@ -119,40 +121,48 @@ setActiveServiceSub tSess sessId serviceSub ss = do writeTVar (pendingServiceSub s) Nothing else writeTVar (pendingServiceSub s) $ Just serviceSub -addActiveSub :: SMPTransportSession -> SessionId -> RcvQueue -> TSessionSubs -> STM () -addActiveSub tSess sessId rq = addActiveSub' tSess sessId (rcvQueueSub rq) (rcvServiceAssoc rq) +addActiveSub :: SMPTransportSession -> SessionId -> Maybe ServiceId -> RcvQueue -> TSessionSubs -> STM () +addActiveSub tSess sessId serviceId_ rq = addActiveSub' tSess sessId serviceId_ (rcvQueueSub rq) (rcvServiceAssoc rq) {-# INLINE addActiveSub #-} -addActiveSub' :: SMPTransportSession -> SessionId -> RcvQueueSub -> Bool -> TSessionSubs -> STM () -addActiveSub' tSess sessId rq serviceAssoc ss = do +addActiveSub' :: SMPTransportSession -> SessionId -> Maybe ServiceId -> RcvQueueSub -> ServiceAssoc -> TSessionSubs -> STM () +addActiveSub' tSess sessId serviceId_ rq serviceAssoc ss = do s <- getSessSubs tSess ss sessId' <- readTVar $ subsSessId s let rId = rcvId rq if Just sessId == sessId' then do - TM.insert rId rq $ activeSubs s TM.delete rId $ pendingSubs s - when serviceAssoc $ - let updateServiceSub (ServiceSub serviceId n idsHash) = ServiceSub serviceId (n + 1) (idsHash <> queueIdHash rId) - in modifyTVar' (activeServiceSub s) (updateServiceSub <$>) + case serviceId_ of + Just serviceId | serviceAssoc -> updateActiveService s serviceId 1 (queueIdHash rId) + _ -> TM.insert rId rq $ activeSubs s else TM.insert rId rq $ pendingSubs s -batchAddActiveSubs :: SMPTransportSession -> SessionId -> ([RcvQueueSub], [RcvQueueSub]) -> TSessionSubs -> STM () -batchAddActiveSubs tSess sessId (rqs, serviceRQs) ss = do +batchAddActiveSubs :: SMPTransportSession -> SessionId -> Maybe ServiceId -> ([RcvQueueSub], [RcvQueueSub]) -> TSessionSubs -> STM () +batchAddActiveSubs tSess sessId serviceId_ (rqs, serviceRQs) ss = do s <- getSessSubs tSess ss sessId' <- readTVar $ subsSessId s - let qs = M.fromList $ map (\rq -> (rcvId rq, rq)) rqs + let qs = queuesMap rqs + serviceQs = queuesMap serviceRQs if Just sessId == sessId' then do TM.union qs $ activeSubs s modifyTVar' (pendingSubs s) (`M.difference` qs) - serviceSub_ <- readTVar $ activeServiceSub s - forM_ serviceSub_ $ \(ServiceSub serviceId n idsHash) -> do - unless (null serviceRQs) $ do - let idsHash' = idsHash <> mconcat (map (queueIdHash . rcvId) serviceRQs) - n' = n + fromIntegral (length serviceRQs) - writeTVar (activeServiceSub s) $ Just $ ServiceSub serviceId n' idsHash' - else TM.union qs $ pendingSubs s + unless (null serviceRQs) $ forM_ serviceId_ $ \serviceId -> do + modifyTVar' (pendingSubs s) (`M.difference` serviceQs) + updateActiveService s serviceId (fromIntegral $ length serviceRQs) (mconcat $ map (queueIdHash . rcvId) serviceRQs) + else do + TM.union qs $ pendingSubs s + when (isJust serviceId_ && not (null serviceRQs)) $ TM.union serviceQs $ pendingSubs s + where + queuesMap = M.fromList . map (\rq -> (rcvId rq, rq)) + +updateActiveService :: SessSubs -> ServiceId -> Int64 -> IdsHash -> STM () +updateActiveService s serviceId addN addIdsHash = do + ServiceSub serviceId' n idsHash <- + fromMaybe (ServiceSub serviceId 0 mempty) <$> readTVar (activeServiceSub s) + when (serviceId == serviceId') $ + writeTVar (activeServiceSub s) $ Just $ ServiceSub serviceId (n + addN) (idsHash <> addIdsHash) batchAddPendingSubs :: SMPTransportSession -> [RcvQueueSub] -> TSessionSubs -> STM () batchAddPendingSubs tSess rqs ss = do @@ -176,6 +186,9 @@ batchDeleteSubs tSess rqs = lookupSubs tSess >=> mapM_ (\s -> delete (activeSubs rIds = S.fromList $ map queueId rqs delete = (`modifyTVar'` (`M.withoutKeys` rIds)) +deleteServiceSub :: SMPTransportSession -> TSessionSubs -> STM () +deleteServiceSub tSess = lookupSubs tSess >=> mapM_ (\s -> writeTVar (activeServiceSub s) Nothing >> writeTVar (pendingServiceSub s) Nothing) + hasPendingSubs :: SMPTransportSession -> TSessionSubs -> STM Bool hasPendingSubs tSess = lookupSubs tSess >=> maybe (pure False) (\s -> anyM [hasSubs s, hasServiceSub s]) where @@ -187,6 +200,10 @@ getPendingSubs tSess = lookupSubs tSess >=> maybe (pure (M.empty, Nothing)) get where get s = liftM2 (,) (readTVar $ pendingSubs s) (readTVar $ pendingServiceSub s) +getPendingQueueSubs :: SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) +getPendingQueueSubs = getSubs_ pendingSubs +{-# INLINE getPendingQueueSubs #-} + getActiveSubs :: SMPTransportSession -> TSessionSubs -> STM (Map RecipientId RcvQueueSub) getActiveSubs = getSubs_ activeSubs {-# INLINE getActiveSubs #-} diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 51128597c3..4993aaac81 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -147,7 +147,6 @@ module Simplex.Messaging.Protocol serviceSubResult, queueIdsHash, queueIdHash, - noIdsHash, addServiceSubs, subtractServiceSubs, MaxMessageLen, @@ -726,7 +725,7 @@ data BrokerMsg where RRES :: EncFwdResponse -> BrokerMsg -- relay to proxy PRES :: EncResponse -> BrokerMsg -- proxy to client END :: BrokerMsg - ENDS :: Int64 -> BrokerMsg + ENDS :: Int64 -> IdsHash -> BrokerMsg DELD :: BrokerMsg INFO :: QueueInfo -> BrokerMsg OK :: BrokerMsg @@ -1518,10 +1517,6 @@ instance Monoid IdsHash where xor' :: Word8 -> Word8 -> Word8 xor' x y = let !r = xor x y in r -noIdsHash ::IdsHash -noIdsHash = IdsHash B.empty -{-# INLINE noIdsHash #-} - queueIdsHash :: [QueueId] -> IdsHash queueIdsHash = mconcat . map queueIdHash @@ -1535,7 +1530,7 @@ addServiceSubs (n', idsHash') (n, idsHash) = (n + n', idsHash <> idsHash') subtractServiceSubs :: (Int64, IdsHash) -> (Int64, IdsHash) -> (Int64, IdsHash) subtractServiceSubs (n', idsHash') (n, idsHash) | n > n' = (n - n', idsHash <> idsHash') -- concat is a reversible xor: (x `xor` y) `xor` y == x - | otherwise = (0, noIdsHash) + | otherwise = (0, mempty) data ProtocolErrorType = PECmdSyntax | PECmdUnknown | PESession | PEBlock @@ -1883,7 +1878,7 @@ instance ProtocolEncoding SMPVersion ErrorType Cmd where QUE_ -> pure QUE CT SRecipientService SUBS_ | v >= rcvServiceSMPVersion -> Cmd SRecipientService <$> (SUBS <$> _smpP <*> smpP) - | otherwise -> pure $ Cmd SRecipientService $ SUBS (-1) noIdsHash + | otherwise -> pure $ Cmd SRecipientService $ SUBS (-1) mempty CT SSender tag -> Cmd SSender <$> case tag of SKEY_ -> SKEY <$> _smpP @@ -1902,7 +1897,7 @@ instance ProtocolEncoding SMPVersion ErrorType Cmd where CT SNotifier NSUB_ -> pure $ Cmd SNotifier NSUB CT SNotifierService NSUBS_ | v >= rcvServiceSMPVersion -> Cmd SNotifierService <$> (NSUBS <$> _smpP <*> smpP) - | otherwise -> pure $ Cmd SNotifierService $ NSUBS (-1) noIdsHash + | otherwise -> pure $ Cmd SNotifierService $ NSUBS (-1) mempty fromProtocolError = fromProtocolError @SMPVersion @ErrorType @BrokerMsg {-# INLINE fromProtocolError #-} @@ -1925,9 +1920,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where SOK serviceId_ | v >= serviceCertsSMPVersion -> e (SOK_, ' ', serviceId_) | otherwise -> e OK_ -- won't happen, the association with the service requires v >= serviceCertsSMPVersion - SOKS n idsHash - | v >= rcvServiceSMPVersion -> e (SOKS_, ' ', n, idsHash) - | otherwise -> e (SOKS_, ' ', n) + SOKS n idsHash -> serviceResp SOKS_ n idsHash MSG RcvMessage {msgId, msgBody = EncRcvMsgBody body} -> e (MSG_, ' ', msgId, Tail body) ALLS -> e ALLS_ @@ -1937,7 +1930,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where RRES (EncFwdResponse encBlock) -> e (RRES_, ' ', Tail encBlock) PRES (EncResponse encBlock) -> e (PRES_, ' ', Tail encBlock) END -> e END_ - ENDS n -> e (ENDS_, ' ', n) + ENDS n idsHash -> serviceResp ENDS_ n idsHash DELD | v >= deletedEventSMPVersion -> e DELD_ | otherwise -> e END_ @@ -1954,6 +1947,9 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where where e :: Encoding a => a -> ByteString e = smpEncode + serviceResp tag n idsHash + | v >= serviceCertsSMPVersion = e (tag, ' ', n, idsHash) + | otherwise = e (tag, ' ', n) protocolP v = \case MSG_ -> do @@ -1982,21 +1978,23 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where pure $ IDS QIK {rcvId, sndId, rcvPublicDhKey, queueMode, linkId, serviceId, serverNtfCreds} LNK_ -> LNK <$> _smpP <*> smpP SOK_ -> SOK <$> _smpP - SOKS_ - | v >= rcvServiceSMPVersion -> SOKS <$> _smpP <*> smpP - | otherwise -> SOKS <$> _smpP <*> pure noIdsHash + SOKS_ -> serviceRespP SOKS NID_ -> NID <$> _smpP <*> smpP NMSG_ -> NMSG <$> _smpP <*> smpP PKEY_ -> PKEY <$> _smpP <*> smpP <*> smpP RRES_ -> RRES <$> (EncFwdResponse . unTail <$> _smpP) PRES_ -> PRES <$> (EncResponse . unTail <$> _smpP) END_ -> pure END - ENDS_ -> ENDS <$> _smpP + ENDS_ -> serviceRespP ENDS DELD_ -> pure DELD INFO_ -> INFO <$> _smpP OK_ -> pure OK ERR_ -> ERR <$> _smpP PONG_ -> pure PONG + where + serviceRespP resp + | v >= serviceCertsSMPVersion = resp <$> _smpP <*> smpP + | otherwise = resp <$> _smpP <*> pure mempty fromProtocolError = \case PECmdSyntax -> CMD SYNTAX diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index b7bb0efaac..24247e781c 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -316,8 +316,8 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt cancelServiceSubs :: ServiceId -> Maybe (Client s) -> STM [PrevClientSub s] cancelServiceSubs serviceId = checkAnotherClient $ \c -> do - changedSubs@(n, _) <- swapTVar (clientServiceSubs c) (0, noIdsHash) - pure [(c, CSADecreaseSubs changedSubs, (serviceId, ENDS n))] + changedSubs@(n, idsHash) <- swapTVar (clientServiceSubs c) (0, mempty) + pure [(c, CSADecreaseSubs changedSubs, (serviceId, ENDS n idsHash))] checkAnotherClient :: (Client s -> STM [PrevClientSub s]) -> Maybe (Client s) -> STM [PrevClientSub s] checkAnotherClient mkSub = \case Just c@Client {clientId, connected} | clntId /= clientId -> diff --git a/src/Simplex/Messaging/Server/Env/STM.hs b/src/Simplex/Messaging/Server/Env/STM.hs index 02cf136c75..e59cd5c0bd 100644 --- a/src/Simplex/Messaging/Server/Env/STM.hs +++ b/src/Simplex/Messaging/Server/Env/STM.hs @@ -502,7 +502,7 @@ newServerSubscribers = do subQ <- newTQueueIO queueSubscribers <- SubscribedClients <$> TM.emptyIO serviceSubscribers <- SubscribedClients <$> TM.emptyIO - totalServiceSubs <- newTVarIO (0, noIdsHash) + totalServiceSubs <- newTVarIO (0, mempty) subClients <- newTVarIO IS.empty pendingEvents <- newTVarIO IM.empty pure ServerSubscribers {subQ, queueSubscribers, serviceSubscribers, totalServiceSubs, subClients, pendingEvents} @@ -513,8 +513,8 @@ newClient clientId qSize clientTHParams createdAt = do ntfSubscriptions <- TM.emptyIO serviceSubscribed <- newTVarIO False ntfServiceSubscribed <- newTVarIO False - serviceSubsCount <- newTVarIO (0, noIdsHash) - ntfServiceSubsCount <- newTVarIO (0, noIdsHash) + serviceSubsCount <- newTVarIO (0, mempty) + ntfServiceSubsCount <- newTVarIO (0, mempty) rcvQ <- newTBQueueIO qSize sndQ <- newTBQueueIO qSize msgQ <- newTBQueueIO qSize diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index b63e4cb48a..34448fc104 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -480,6 +480,7 @@ functionalAPITests ps = do describe "Client service certificates" $ do it "should connect, subscribe and reconnect as a service" $ testClientServiceConnection ps it "should re-subscribe when service ID changed" $ testClientServiceIDChange ps + it "migrate connections to and from service" $ testMigrateConnectionsToService ps describe "Connection switch" $ do describe "should switch delivery to the new queue" $ testServerMatrix2 ps testSwitchConnection @@ -3721,10 +3722,22 @@ testClientServiceConnection ps = do testClientServiceIDChange :: HasCallStack => (ASrvTransport, AStoreType) -> IO () testClientServiceIDChange ps@(_, ASType qs _) = do (sId, uId) <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do - withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + conns <- withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do conns@(sId, uId) <- makeConnection service user exchangeGreetings service uId user sId pure conns + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 _)) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + withSmpServerStoreLogOn ps testPort $ \_ -> do + getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + ("", "", UP _ [_]) <- nGet user + pure () + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 _)) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + pure conns _ :: () <- case qs of SQSPostgres -> do #if defined(dbServerPostgres) @@ -3739,19 +3752,21 @@ testClientServiceIDChange ps@(_, ASType qs _) = do writeFile testStoreLogFile $ unlines $ filter (not . ("NEW_SERVICE" `isPrefixOf`)) $ lines s withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + liftIO $ threadDelay 250000 subscribeAllConnections service False Nothing liftIO $ getInAnyOrder service [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult (Just (SMP.SSErrorQueueCount 1 0)) (SMP.ServiceSub _ 0 _)))) -> True; _ -> False, \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False, - \case ("", "", AEvt SAENone (UP _ _)) -> True; _ -> False + \case ("", "", AEvt SAENone (UP _ [_])) -> True; _ -> False ] subscribeAllConnections user False Nothing ("", "", UP _ [_]) <- nGet user exchangeGreetingsMsgId 4 service uId user sId + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 1 _)) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + pure () -- disable service in the client - -- The test uses True for non-existing user to make sure it's removed for user 1, - -- because if no users use services, then it won't be checking them to optimize for most clients. - withAgentClientsServers2 (agentCfg, initAgentServers {useServices = M.fromList [(100, True)]}) (agentCfg, initAgentServers) $ \notService user -> do + withAgentClientsServers2 (agentCfg, initAgentServers) (agentCfg, initAgentServers) $ \notService user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do subscribeAllConnections notService False Nothing ("", "", UP _ [_]) <- nGet notService @@ -3759,6 +3774,153 @@ testClientServiceIDChange ps@(_, ASType qs _) = do ("", "", UP _ [_]) <- nGet user exchangeGreetingsMsgId 6 notService uId user sId +testMigrateConnectionsToService :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testMigrateConnectionsToService ps = do + (((sId1, uId1), (uId2, sId2)), ((sId3, uId3), (uId4, sId4)), ((sId5, uId5), (uId6, sId6))) <- + withSmpServerStoreLogOn ps testPort $ \_ -> do + -- starting without service + cs12@((sId1, uId1), (uId2, sId2)) <- + withAgentClientsServers2 (agentCfg, initAgentServers) (agentCfg, initAgentServers) $ \notService user -> + runRight $ (,) <$> makeConnection notService user <*> makeConnection user notService + -- migrating to service + cs34@((sId3, uId3), (uId4, sId4)) <- + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do + subscribeAllConnections service False Nothing + service `up` 2 + subscribeAllConnections user False Nothing + user `up` 2 + exchangeGreetingsMsgId 2 service uId1 user sId1 + exchangeGreetingsMsgId 2 service uId2 user sId2 + (,) <$> makeConnection service user <*> makeConnection user service + -- starting as service + cs56 <- + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 4 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + subscribeAllConnections user False Nothing + user `up` 4 + exchangeGreetingsMsgId 4 service uId1 user sId1 + exchangeGreetingsMsgId 4 service uId2 user sId2 + exchangeGreetingsMsgId 2 service uId3 user sId3 + exchangeGreetingsMsgId 2 service uId4 user sId4 + (,) <$> makeConnection service user <*> makeConnection user service + pure (cs12, cs34, cs56) + -- server reconnecting resubscribes service + let testSendMessages6 s u n = do + exchangeGreetingsMsgId (n + 4) s uId1 u sId1 + exchangeGreetingsMsgId (n + 4) s uId2 u sId2 + exchangeGreetingsMsgId (n + 2) s uId3 u sId3 + exchangeGreetingsMsgId (n + 2) s uId4 u sId4 + exchangeGreetingsMsgId n s uId5 u sId5 + exchangeGreetingsMsgId n s uId6 u sId6 + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 6 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + subscribeAllConnections user False Nothing + user `up` 6 + testSendMessages6 service user 2 + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 6 _)) <- nGet service + user `down` 6 + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 6 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + user `up` 6 + testSendMessages6 service user 4 + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 6 _)) <- nGet service + user `down` 6 + -- disabling service and adding connections + ((sId7, uId7), (uId8, sId8)) <- + withAgentClientsServers2 (agentCfg, initAgentServers) (agentCfg, initAgentServers) $ \notService user -> do + cs78@((sId7, uId7), (uId8, sId8)) <- + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + subscribeAllConnections notService False Nothing + notService `up` 6 + subscribeAllConnections user False Nothing + user `up` 6 + testSendMessages6 notService user 6 + (,) <$> makeConnection notService user <*> makeConnection user notService + notService `down` 8 + user `down` 8 + withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + notService `up` 8 + user `up` 8 + testSendMessages6 notService user 8 + exchangeGreetingsMsgId 2 notService uId7 user sId7 + exchangeGreetingsMsgId 2 notService uId8 user sId8 + notService `down` 8 + user `down` 8 + pure cs78 + let testSendMessages8 s u n = do + testSendMessages6 s u (n + 8) + exchangeGreetingsMsgId (n + 2) s uId7 u sId7 + exchangeGreetingsMsgId (n + 2) s uId8 u sId8 + -- re-enabling service and adding connections + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + subscribeAllConnections service False Nothing + service `up` 8 + subscribeAllConnections user False Nothing + user `up` 8 + testSendMessages8 service user 2 + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 8 _)) <- nGet service + user `down` 8 + -- re-connect to server + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 8 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + user `up` 8 + testSendMessages8 service user 4 + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ _ _)) <- nGet service -- should be 8 here + user `down` 8 + -- restart agents + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + subscribeAllConnections service False Nothing + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 8 _)))) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + subscribeAllConnections user False Nothing + user `up` 8 + testSendMessages8 service user 6 + ("", "", SERVICE_DOWN _ (SMP.ServiceSub _ 8 _)) <- nGet service + user `down` 8 + runRight_ $ do + void $ sendMessage user sId7 SMP.noMsgFlags "hello 1" + void $ sendMessage user sId8 SMP.noMsgFlags "hello 2" + -- re-connect to server + withSmpServerStoreLogOn ps testPort $ \_ -> runRight_ $ do + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 8 _)))) -> True; _ -> False, + \case ("", c, AEvt SAEConn (Msg "hello 1")) -> c == uId7; _ -> False, + \case ("", c, AEvt SAEConn (Msg "hello 2")) -> c == uId8; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + liftIO $ getInAnyOrder user + [ \case ("", "", AEvt SAENone (UP _ [_, _, _, _, _, _, _, _])) -> True; _ -> False, + \case ("", c, AEvt SAEConn (SENT 10)) -> c == sId7; _ -> False, + \case ("", c, AEvt SAEConn (SENT 10)) -> c == sId8; _ -> False + ] + testSendMessages6 service user 16 + where + up c n = do + ("", "", UP _ conns) <- nGet c + liftIO $ length conns `shouldBe` n + down c n = do + ("", "", DOWN _ conns) <- nGet c + liftIO $ length conns `shouldBe` n + getSMPAgentClient' :: Int -> AgentConfig -> InitialAgentServers -> String -> IO AgentClient getSMPAgentClient' clientId cfg' initServers dbPath = do Right st <- liftIO $ createStore dbPath diff --git a/tests/CoreTests/TSessionSubs.hs b/tests/CoreTests/TSessionSubs.hs index e9038b9d95..96975e9efe 100644 --- a/tests/CoreTests/TSessionSubs.hs +++ b/tests/CoreTests/TSessionSubs.hs @@ -69,21 +69,21 @@ testSessionSubs = do atomically (SS.hasPendingSub tSess1 (rcvId q4) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess1 (rcvId q4) ss) `shouldReturn` False -- setting active queue without setting session ID would keep it as pending - atomically $ SS.addActiveSub' tSess1 "123" q1 False ss + atomically $ SS.addActiveSub' tSess1 "123" Nothing q1 False ss atomically (SS.hasPendingSub tSess1 (rcvId q1) ss) `shouldReturn` True atomically (SS.hasActiveSub tSess1 (rcvId q1) ss) `shouldReturn` False dumpSessionSubs ss `shouldReturn` st countSubs ss `shouldReturn` (0, 3) -- setting active queues atomically $ SS.setSessionId tSess1 "123" ss - atomically $ SS.addActiveSub' tSess1 "123" q1 False ss + atomically $ SS.addActiveSub' tSess1 "123" Nothing q1 False ss atomically (SS.hasPendingSub tSess1 (rcvId q1) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess1 (rcvId q1) ss) `shouldReturn` True atomically (SS.getActiveSubs tSess1 ss) `shouldReturn` M.fromList [("r1", q1)] atomically (SS.getPendingSubs tSess1 ss) `shouldReturn` (M.fromList [("r2", q2)], Nothing) countSubs ss `shouldReturn` (1, 2) atomically $ SS.setSessionId tSess2 "456" ss - atomically $ SS.addActiveSub' tSess2 "456" q4 False ss + atomically $ SS.addActiveSub' tSess2 "456" Nothing q4 False ss atomically (SS.hasPendingSub tSess2 (rcvId q4) ss) `shouldReturn` False atomically (SS.hasActiveSub tSess2 (rcvId q4) ss) `shouldReturn` True atomically (SS.hasActiveSub tSess1 (rcvId q4) ss) `shouldReturn` False -- wrong transport session diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index 82a39af397..27a72d2ac1 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -1334,7 +1334,7 @@ testMessageServiceNotifications = Resp "4" _ (SOK (Just serviceId')) <- serviceSignSendRecv nh2 nKey servicePK ("4", nId, NSUB) serviceId' `shouldBe` serviceId -- service subscription is terminated - Resp "" serviceId2 (ENDS 1) <- tGet1 nh1 + Resp "" serviceId2 (ENDS 1 _) <- tGet1 nh1 serviceId2 `shouldBe` serviceId deliverMessage rh rId rKey sh sId sKey nh2 "hello again" dec 1000 `timeout` tGetClient @SMPVersion @ErrorType @BrokerMsg nh1 >>= \case @@ -1374,7 +1374,7 @@ testMessageServiceNotifications = Resp "12" serviceId5 (SOKS 2 idsHash') <- signSendRecv nh1 (C.APrivateAuthKey C.SEd25519 servicePK) ("12", serviceId, NSUBS 2 idsHash) idsHash' `shouldBe` idsHash serviceId5 `shouldBe` serviceId - Resp "" serviceId6 (ENDS 2) <- tGet1 nh2 + Resp "" serviceId6 (ENDS 2 _) <- tGet1 nh2 serviceId6 `shouldBe` serviceId deliverMessage rh rId rKey sh sId sKey nh1 "connection 1 one more" dec deliverMessage rh rId'' rKey'' sh sId'' sKey'' nh1 "connection 2 one more" dec'' From 11ae20ea20e0eca886f698ac2305387e3c08da83 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 22 Dec 2025 07:56:53 +0000 Subject: [PATCH 08/91] ntf server: use different client certs for each SMP server, remove support for store log (#1681) * ntf server: remove support for store log * ntf server: use different client certificates for each SMP server --- simplexmq.cabal | 1 - src/Simplex/FileTransfer/Client.hs | 3 +- src/Simplex/Messaging/Agent/Client.hs | 12 +- .../Messaging/Agent/Store/AgentStore.hs | 14 +- src/Simplex/Messaging/Client.hs | 14 +- src/Simplex/Messaging/Client/Agent.hs | 32 +- src/Simplex/Messaging/Notifications/Server.hs | 2 +- .../Messaging/Notifications/Server/Env.hs | 63 +-- .../Messaging/Notifications/Server/Main.hs | 96 +--- .../Notifications/Server/Store/Migrations.hs | 36 +- .../Notifications/Server/Store/Postgres.hs | 521 +++++++----------- .../Server/Store/ntf_server_schema.sql | 5 +- .../Notifications/Server/StoreLog.hs | 177 ------ src/Simplex/Messaging/Server.hs | 5 +- src/Simplex/Messaging/Server/Env/STM.hs | 2 +- .../Messaging/Transport/HTTP2/Client.hs | 9 +- tests/AgentTests/FunctionalAPITests.hs | 3 + 17 files changed, 322 insertions(+), 673 deletions(-) delete mode 100644 src/Simplex/Messaging/Notifications/Server/StoreLog.hs diff --git a/simplexmq.cabal b/simplexmq.cabal index 3f9d1f61da..13759a05a2 100644 --- a/simplexmq.cabal +++ b/simplexmq.cabal @@ -275,7 +275,6 @@ library Simplex.Messaging.Notifications.Server.Store.Migrations Simplex.Messaging.Notifications.Server.Store.Postgres Simplex.Messaging.Notifications.Server.Store.Types - Simplex.Messaging.Notifications.Server.StoreLog Simplex.Messaging.Server.MsgStore.Postgres Simplex.Messaging.Server.QueueStore.Postgres Simplex.Messaging.Server.QueueStore.Postgres.Migrations diff --git a/src/Simplex/FileTransfer/Client.hs b/src/Simplex/FileTransfer/Client.hs index 62f06b7d31..a425138e5f 100644 --- a/src/Simplex/FileTransfer/Client.hs +++ b/src/Simplex/FileTransfer/Client.hs @@ -11,6 +11,7 @@ module Simplex.FileTransfer.Client where +import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad import Control.Monad.Except @@ -264,7 +265,7 @@ downloadXFTPChunk g c@XFTPClient {config} rpKey fId chunkSpec@XFTPRcvChunkSpec { where errors = [ Handler $ \(e :: H.HTTP2Error) -> pure $ Left $ PCENetworkError $ NEConnectError $ displayException e, - Handler $ \(e :: IOException) -> pure $ Left $ PCEIOError e, + Handler $ \(e :: IOException) -> pure $ Left $ PCEIOError $ E.displayException e, Handler $ \(e :: SomeException) -> pure $ Left $ PCENetworkError $ toNetworkError e ] download cbState = diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 9bf1afd8db..4fd9eb1752 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -751,8 +751,8 @@ smpConnectClient c@AgentClient {smpClients, msgQ, proxySessTs, presetDomains} nm atomically $ SS.setSessionId tSess (sessionId $ thParams smp) $ currentSubs c updateClientService service smp pure SMPConnectedClient {connectedClient = smp, proxiedRelays = prs} - updateClientService service smp = case (service, smpClientService smp) of - (Just (_, serviceId_), Just THClientService {serviceId}) -> withStore' c $ \db -> do + updateClientService service smp = case (service, smpClientServiceId smp) of + (Just (_, serviceId_), Just serviceId) -> withStore' c $ \db -> do setClientServiceId db userId srv serviceId forM_ serviceId_ $ \sId -> when (sId /= serviceId) $ removeRcvServiceAssocs db userId srv (Just _, Nothing) -> withStore' c $ \db -> deleteClientService db userId srv -- e.g., server version downgrade @@ -1255,7 +1255,7 @@ protocolClientError protocolError_ host = \case PCETransportError e -> BROKER host $ TRANSPORT e e@PCECryptoError {} -> INTERNAL $ show e PCEServiceUnavailable {} -> BROKER host NO_SERVICE - PCEIOError e -> BROKER host $ NETWORK $ NEConnectError $ E.displayException e + PCEIOError e -> BROKER host $ NETWORK $ NEConnectError e -- it is consistent with smpClientServiceError clientServiceError :: AgentErrorType -> Bool @@ -1546,6 +1546,7 @@ processSubResults c tSess@(userId, srv, _) sessId serviceId_ rs = do Left e -> case smpErrorClientNotice e of Just notice_ -> (failed', subscribed, (rq, notice_) : notices, ignored) where + -- TODO [certs rcv] not used? notices' = if isJust notice_ || isJust clientNoticeId then (rq, notice_) : notices else notices Nothing | temporaryClientError e -> acc @@ -1678,7 +1679,7 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c (active, (serviceQs, notices)) <- atomically $ do r@(_, (_, notices)) <- ifM (activeClientSession c tSess sessId) - ((True,) <$> processSubResults c tSess sessId smpServiceId rs) + ((True,) <$> processSubResults c tSess sessId (smpClientServiceId smp) rs) ((False, ([], [])) <$ incSMPServerStat' c userId srv connSubIgnored (length rs)) unless (null notices) $ takeTMVar $ clientNoticesLock c pure r @@ -1704,7 +1705,6 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c where tSess = transportSession' smp sessId = sessionId $ thParams smp - smpServiceId = (\THClientService {serviceId} -> serviceId) <$> smpClientService smp processRcvServiceAssocs :: SMPQueue q => AgentClient -> [q] -> AM' () processRcvServiceAssocs _ [] = pure () @@ -1752,7 +1752,7 @@ subscribeClientService c withEvent userId srv (ServiceSub _ n idsHash) = withServiceClient :: AgentClient -> SMPTransportSession -> (SMPClient -> ServiceId -> ExceptT SMPClientError IO a) -> AM a withServiceClient c tSess subscribe = withLogClient c NRMBackground tSess B.empty "SUBS" $ \(SMPConnectedClient smp _) -> - case (\THClientService {serviceId} -> serviceId) <$> smpClientService smp of + case smpClientServiceId smp of Just smpServiceId -> subscribe smp smpServiceId Nothing -> throwE PCEServiceUnavailable diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 853a769088..2dcb763279 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -472,15 +472,21 @@ toServerService (host, port, kh, serviceId, n, Binary idsHash) = (SMPServer host port kh, ServiceSub serviceId n (IdsHash idsHash)) setClientServiceId :: DB.Connection -> UserId -> SMPServer -> ServiceId -> IO () -setClientServiceId db userId srv serviceId = +setClientServiceId db userId (SMPServer h p kh) serviceId = DB.execute db [sql| UPDATE client_services SET service_id = ? - WHERE user_id = ? AND host = ? AND port = ? + FROM servers s + WHERE client_services.user_id = ? + AND client_services.host = ? + AND client_services.port = ? + AND s.host = client_services.host + AND s.port = client_services.port + AND COALESCE(client_services.server_key_hash, s.key_hash) = ? |] - (serviceId, userId, host srv, port srv) + (serviceId, userId, h, p, kh) deleteClientService :: DB.Connection -> UserId -> SMPServer -> IO () deleteClientService db userId (SMPServer h p kh) = @@ -2307,7 +2313,7 @@ unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = setRcvServiceAssocs :: SMPQueue q => DB.Connection -> [q] -> IO () setRcvServiceAssocs db rqs = #if defined(dbPostgres) - DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN " $ Only $ In (map queueId rqs) + DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN ?" $ Only $ In (map queueId rqs) #else DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = ?" $ map (Only . queueId) rqs #endif diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index ac2dc9a9d2..ebc458c0e9 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -52,6 +52,7 @@ module Simplex.Messaging.Client subscribeSMPQueuesNtfs, subscribeService, smpClientService, + smpClientServiceId, secureSMPQueue, secureSndSMPQueue, proxySecureSndSMPQueue, @@ -128,7 +129,8 @@ import Control.Applicative ((<|>)) import Control.Concurrent (ThreadId, forkFinally, forkIO, killThread, mkWeakThreadId) import Control.Concurrent.Async import Control.Concurrent.STM -import Control.Exception +import Control.Exception (Exception, SomeException) +import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad import Control.Monad.Except @@ -565,7 +567,7 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS case chooseTransportHost networkConfig (host srv) of Right useHost -> (getCurrentTime >>= mkProtocolClient useHost >>= runClient useTransport useHost) - `catch` \(e :: IOException) -> pure . Left $ PCEIOError e + `E.catch` \(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e Left e -> pure $ Left e where NetworkConfig {tcpConnectTimeout, tcpTimeout, smpPingInterval} = networkConfig @@ -638,7 +640,7 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS writeTVar (connected c) True putTMVar cVar $ Right c' raceAny_ ([send c' th, process c', receive c' th] <> [monitor c' | smpPingInterval > 0]) - `finally` disconnected c' + `E.finally` disconnected c' send :: Transport c => ProtocolClient v err msg -> THandle v c 'TClient -> IO () send ProtocolClient {client_ = PClient {sndQ}} h = forever $ atomically (readTBQueue sndQ) >>= sendPending @@ -765,7 +767,7 @@ data ProtocolClientError err | -- | Error when cryptographically "signing" the command or when initializing crypto_box. PCECryptoError C.CryptoError | -- | IO Error - PCEIOError IOException + PCEIOError String deriving (Eq, Show, Exception) type SMPClientError = ProtocolClientError ErrorType @@ -926,6 +928,10 @@ smpClientService :: SMPClient -> Maybe THClientService smpClientService = thAuth . thParams >=> clientService {-# INLINE smpClientService #-} +smpClientServiceId :: SMPClient -> Maybe ServiceId +smpClientServiceId = fmap (\THClientService {serviceId} -> serviceId) . smpClientService +{-# INLINE smpClientServiceId #-} + enablePings :: SMPClient -> IO () enablePings ProtocolClient {client_ = PClient {sendPings}} = atomically $ writeTVar sendPings True {-# INLINE enablePings #-} diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index 45d747d210..9739c19c78 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -15,6 +15,7 @@ module Simplex.Messaging.Client.Agent ( SMPClientAgent (..), SMPClientAgentConfig (..), SMPClientAgentEvent (..), + DBService (..), OwnServer, defaultSMPClientAgentConfig, newSMPClientAgent, @@ -133,6 +134,7 @@ defaultSMPClientAgentConfig = data SMPClientAgent p = SMPClientAgent { agentCfg :: SMPClientAgentConfig, agentParty :: SParty p, + dbService :: Maybe DBService, active :: TVar Bool, startedAt :: UTCTime, msgQ :: TBQueue (ServerTransmissionBatch SMPVersion ErrorType BrokerMsg), @@ -155,8 +157,8 @@ data SMPClientAgent p = SMPClientAgent type OwnServer = Bool -newSMPClientAgent :: SParty p -> SMPClientAgentConfig -> TVar ChaChaDRG -> IO (SMPClientAgent p) -newSMPClientAgent agentParty agentCfg@SMPClientAgentConfig {msgQSize, agentQSize} randomDrg = do +newSMPClientAgent :: SParty p -> SMPClientAgentConfig -> Maybe DBService -> TVar ChaChaDRG -> IO (SMPClientAgent p) +newSMPClientAgent agentParty agentCfg@SMPClientAgentConfig {msgQSize, agentQSize} dbService randomDrg = do active <- newTVarIO True startedAt <- getCurrentTime msgQ <- newTBQueueIO msgQSize @@ -173,6 +175,7 @@ newSMPClientAgent agentParty agentCfg@SMPClientAgentConfig {msgQSize, agentQSize SMPClientAgent { agentCfg, agentParty, + dbService, active, startedAt, msgQ, @@ -188,6 +191,11 @@ newSMPClientAgent agentParty agentCfg@SMPClientAgentConfig {msgQSize, agentQSize workerSeq } +data DBService = DBService + { getCredentials :: SMPServer -> IO (Either SMPClientError ServiceCredentials), + updateServiceId :: SMPServer -> Maybe ServiceId -> IO (Either SMPClientError ()) + } + -- | Get or create SMP client for SMPServer getSMPServerClient' :: SMPClientAgent p -> SMPServer -> ExceptT SMPClientError IO SMPClient getSMPServerClient' ca srv = snd <$> getSMPServerClient'' ca srv @@ -218,7 +226,7 @@ getSMPServerClient'' ca@SMPClientAgent {agentCfg, smpClients, smpSessions, worke newSMPClient :: SMPClientVar -> IO (Either SMPClientError (OwnServer, SMPClient)) newSMPClient v = do - r <- connectClient ca srv v `E.catch` (pure . Left . PCEIOError) + r <- connectClient ca srv v `E.catch` \(e :: E.SomeException) -> pure $ Left $ PCEIOError $ E.displayException e case r of Right smp -> do logInfo . decodeUtf8 $ "Agent connected to " <> showServer srv @@ -227,8 +235,7 @@ getSMPServerClient'' ca@SMPClientAgent {agentCfg, smpClients, smpSessions, worke atomically $ do putTMVar (sessionVar v) (Right c) TM.insert (sessionId $ thParams smp) c smpSessions - let serviceId_ = (\THClientService {serviceId} -> serviceId) <$> smpClientService smp - notify ca $ CAConnected srv serviceId_ + notify ca $ CAConnected srv $ smpClientServiceId smp pure $ Right c Left e -> do let ei = persistErrorInterval agentCfg @@ -249,9 +256,18 @@ isOwnServer SMPClientAgent {agentCfg} ProtocolServer {host} = -- | Run an SMP client for SMPClientVar connectClient :: SMPClientAgent p -> SMPServer -> SMPClientVar -> IO (Either SMPClientError SMPClient) -connectClient ca@SMPClientAgent {agentCfg, smpClients, smpSessions, msgQ, randomDrg, startedAt} srv v = - getProtocolClient randomDrg NRMBackground (1, srv, Nothing) (smpCfg agentCfg) [] (Just msgQ) startedAt clientDisconnected +connectClient ca@SMPClientAgent {agentCfg, dbService, smpClients, smpSessions, msgQ, randomDrg, startedAt} srv v = case dbService of + Just dbs -> runExceptT $ do + creds <- ExceptT $ getCredentials dbs srv + smp <- ExceptT $ getClient cfg {serviceCredentials = Just creds} + whenM (atomically $ activeClientSession ca smp srv) $ + ExceptT $ updateServiceId dbs srv $ smpClientServiceId smp + pure smp + Nothing -> getClient cfg where + cfg = smpCfg agentCfg + getClient cfg' = getProtocolClient randomDrg NRMBackground (1, srv, Nothing) cfg' [] (Just msgQ) startedAt clientDisconnected + clientDisconnected :: SMPClient -> IO () clientDisconnected smp = do removeClientAndSubs smp >>= serverDown @@ -435,7 +451,7 @@ smpSubscribeQueues ca smp srv subs = do unless (null notPending) $ removePendingSubs ca srv notPending pure acc sessId = sessionId $ thParams smp - smpServiceId = (\THClientService {serviceId} -> serviceId) <$> smpClientService smp + smpServiceId = smpClientServiceId smp groupSub :: Map QueueId C.APrivateAuthKey -> ((QueueId, C.APrivateAuthKey), Either SMPClientError (Maybe ServiceId)) -> diff --git a/src/Simplex/Messaging/Notifications/Server.hs b/src/Simplex/Messaging/Notifications/Server.hs index e7c1ca5f97..7d9e36c993 100644 --- a/src/Simplex/Messaging/Notifications/Server.hs +++ b/src/Simplex/Messaging/Notifications/Server.hs @@ -588,7 +588,7 @@ ntfSubscriber NtfSubscriber {smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = logError $ "SMP server service subscription error " <> showService srv serviceSub <> ": " <> tshow e CAServiceUnavailable srv serviceSub -> do logError $ "SMP server service unavailable: " <> showService srv serviceSub - removeServiceAssociation st srv >>= \case + removeServiceAndAssociations st srv >>= \case Right (srvId, updated) -> do logSubStatus srv "removed service association" updated updated void $ subscribeSrvSubs ca st batchSize (srv, srvId, Nothing) diff --git a/src/Simplex/Messaging/Notifications/Server/Env.hs b/src/Simplex/Messaging/Notifications/Server/Env.hs index b0eafbc630..9ac89a12d6 100644 --- a/src/Simplex/Messaging/Notifications/Server/Env.hs +++ b/src/Simplex/Messaging/Notifications/Server/Env.hs @@ -4,13 +4,14 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Simplex.Messaging.Notifications.Server.Env where import Control.Concurrent (ThreadId) -import Control.Logger.Simple -import Control.Monad +import Control.Monad.Except +import Control.Monad.Trans.Except import Crypto.Random import Data.Int (Int64) import Data.List.NonEmpty (NonEmpty) @@ -21,28 +22,26 @@ import qualified Data.X509.Validation as XV import Network.Socket import qualified Network.TLS as TLS import Numeric.Natural -import Simplex.Messaging.Client (ProtocolClientConfig (..)) +import Simplex.Messaging.Client (ProtocolClientError (..), SMPClientError) import Simplex.Messaging.Client.Agent import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Notifications.Protocol import Simplex.Messaging.Notifications.Server.Push.APNS import Simplex.Messaging.Notifications.Server.Stats -import Simplex.Messaging.Notifications.Server.Store (newNtfSTMStore) import Simplex.Messaging.Notifications.Server.Store.Postgres import Simplex.Messaging.Notifications.Server.Store.Types -import Simplex.Messaging.Notifications.Server.StoreLog (readWriteNtfSTMStore) import Simplex.Messaging.Notifications.Transport (NTFVersion, VersionRangeNTF) -import Simplex.Messaging.Protocol (BasicAuth, CorrId, Party (..), SMPServer, SParty (..), Transmission) +import Simplex.Messaging.Protocol (BasicAuth, CorrId, Party (..), SMPServer, SParty (..), ServiceId, Transmission) import Simplex.Messaging.Server.Env.STM (StartOptions (..)) import Simplex.Messaging.Server.Expiration import Simplex.Messaging.Server.QueueStore.Postgres.Config (PostgresStoreCfg (..)) -import Simplex.Messaging.Server.StoreLog (closeStoreLog) import Simplex.Messaging.Session import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM import Simplex.Messaging.Transport (ASrvTransport, SMPServiceRole (..), ServiceCredentials (..), THandleParams, TransportPeer (..)) +import Simplex.Messaging.Transport.Credentials (genCredentials, tlsCredentials) import Simplex.Messaging.Transport.Server (AddHTTP, ServerCredentials, TransportServerConfig, loadFingerprint, loadServerCredential) -import System.Exit (exitFailure) +import Simplex.Messaging.Util (liftEitherWith) import System.Mem.Weak (Weak) import UnliftIO.STM @@ -96,33 +95,35 @@ data NtfEnv = NtfEnv } newNtfServerEnv :: NtfServerConfig -> IO NtfEnv -newNtfServerEnv config@NtfServerConfig {pushQSize, smpAgentCfg, apnsConfig, dbStoreConfig, ntfCredentials, useServiceCreds, startOptions} = do - when (compactLog startOptions) $ compactDbStoreLog $ dbStoreLogPath dbStoreConfig +newNtfServerEnv config@NtfServerConfig {pushQSize, smpAgentCfg, apnsConfig, dbStoreConfig, ntfCredentials, useServiceCreds} = do random <- C.newRandom store <- newNtfDbStore dbStoreConfig tlsServerCreds <- loadServerCredential ntfCredentials - serviceCertHash@(XV.Fingerprint fp) <- loadFingerprint ntfCredentials - smpAgentCfg' <- - if useServiceCreds - then do - serviceSignKey <- case C.x509ToPrivate' $ snd tlsServerCreds of - Right pk -> pure pk - Left e -> putStrLn ("Server has no valid key: " <> show e) >> exitFailure - let service = ServiceCredentials {serviceRole = SRNotifier, serviceCreds = tlsServerCreds, serviceCertHash, serviceSignKey} - pure smpAgentCfg {smpCfg = (smpCfg smpAgentCfg) {serviceCredentials = Just service}} - else pure smpAgentCfg - subscriber <- newNtfSubscriber smpAgentCfg' random + XV.Fingerprint fp <- loadFingerprint ntfCredentials + let dbService = if useServiceCreds then Just $ mkDbService random store else Nothing + subscriber <- newNtfSubscriber smpAgentCfg dbService random pushServer <- newNtfPushServer pushQSize apnsConfig serverStats <- newNtfServerStats =<< getCurrentTime pure NtfEnv {config, subscriber, pushServer, store, random, tlsServerCreds, serverIdentity = C.KeyHash fp, serverStats} where - compactDbStoreLog = \case - Just f -> do - logNote $ "compacting store log " <> T.pack f - newNtfSTMStore >>= readWriteNtfSTMStore False f >>= closeStoreLog - Nothing -> do - logError "Error: `--compact-log` used without `enable: on` option in STORE_LOG section of INI file" - exitFailure + mkDbService g st = DBService {getCredentials, updateServiceId} + where + getCredentials :: SMPServer -> IO (Either SMPClientError ServiceCredentials) + getCredentials srv = runExceptT $ do + ExceptT (withClientDB "" st $ \db -> getNtfServiceCredentials db srv >>= mapM (mkServiceCreds db)) >>= \case + Just (C.KeyHash kh, serviceCreds) -> do + serviceSignKey <- liftEitherWith PCEIOError $ C.x509ToPrivate' $ snd serviceCreds + pure ServiceCredentials {serviceRole = SRNotifier, serviceCreds, serviceCertHash = XV.Fingerprint kh, serviceSignKey} + Nothing -> throwE PCEServiceUnavailable -- this error cannot happen, as clients never connect to unknown servers + mkServiceCreds db = \case + (_, Just tlsCreds) -> pure tlsCreds + (srvId, Nothing) -> do + cred <- genCredentials g Nothing (25, 24 * 999999) "simplex" + let tlsCreds = tlsCredentials [cred] + setNtfServiceCredentials db srvId tlsCreds + pure tlsCreds + updateServiceId :: SMPServer -> Maybe ServiceId -> IO (Either SMPClientError ()) + updateServiceId srv serviceId_ = withClientDB "" st $ \db -> updateNtfServiceId db srv serviceId_ data NtfSubscriber = NtfSubscriber { smpSubscribers :: TMap SMPServer SMPSubscriberVar, @@ -132,11 +133,11 @@ data NtfSubscriber = NtfSubscriber type SMPSubscriberVar = SessionVar SMPSubscriber -newNtfSubscriber :: SMPClientAgentConfig -> TVar ChaChaDRG -> IO NtfSubscriber -newNtfSubscriber smpAgentCfg random = do +newNtfSubscriber :: SMPClientAgentConfig -> Maybe DBService -> TVar ChaChaDRG -> IO NtfSubscriber +newNtfSubscriber smpAgentCfg dbService random = do smpSubscribers <- TM.emptyIO subscriberSeq <- newTVarIO 0 - smpAgent <- newSMPClientAgent SNotifierService smpAgentCfg random + smpAgent <- newSMPClientAgent SNotifierService smpAgentCfg dbService random pure NtfSubscriber {smpSubscribers, subscriberSeq, smpAgent} data SMPSubscriber = SMPSubscriber diff --git a/src/Simplex/Messaging/Notifications/Server/Main.hs b/src/Simplex/Messaging/Notifications/Server/Main.hs index de12c33f89..e855c84d47 100644 --- a/src/Simplex/Messaging/Notifications/Server/Main.hs +++ b/src/Simplex/Messaging/Notifications/Server/Main.hs @@ -17,42 +17,32 @@ import Data.Functor (($>)) import Data.Ini (lookupValue, readIniFile) import Data.Int (Int64) import Data.Maybe (fromMaybe) -import Data.Set (Set) -import qualified Data.Set as S import qualified Data.Text as T import Data.Text.Encoding (encodeUtf8) import qualified Data.Text.IO as T import Network.Socket (HostName, ServiceName) import Options.Applicative -import Simplex.Messaging.Agent.Store.Postgres (checkSchemaExists) import Simplex.Messaging.Agent.Store.Postgres.Options (DBOpts (..)) import Simplex.Messaging.Agent.Store.Shared (MigrationConfirmation (..)) import Simplex.Messaging.Client (HostMode (..), NetworkConfig (..), ProtocolClientConfig (..), SMPWebPortServers (..), SocksMode (..), defaultNetworkConfig, textToHostMode) import Simplex.Messaging.Client.Agent (SMPClientAgentConfig (..), defaultSMPClientAgentConfig) import qualified Simplex.Messaging.Crypto as C -import Simplex.Messaging.Notifications.Protocol (NtfTokenId) -import Simplex.Messaging.Notifications.Server (runNtfServer, restoreServerLastNtfs) +import Simplex.Messaging.Notifications.Server (runNtfServer) import Simplex.Messaging.Notifications.Server.Env (NtfServerConfig (..), defaultInactiveClientExpiration) import Simplex.Messaging.Notifications.Server.Push.APNS (defaultAPNSPushClientConfig) -import Simplex.Messaging.Notifications.Server.Store (newNtfSTMStore) -import Simplex.Messaging.Notifications.Server.Store.Postgres (exportNtfDbStore, importNtfSTMStore, newNtfDbStore) -import Simplex.Messaging.Notifications.Server.StoreLog (readWriteNtfSTMStore) import Simplex.Messaging.Notifications.Transport (alpnSupportedNTFHandshakes, supportedServerNTFVRange) import Simplex.Messaging.Protocol (ProtoServerWithAuth (..), pattern NtfServer) import Simplex.Messaging.Server.CLI import Simplex.Messaging.Server.Env.STM (StartOptions (..)) import Simplex.Messaging.Server.Expiration -import Simplex.Messaging.Server.Main (strParse) import Simplex.Messaging.Server.Main.Init (iniDbOpts) import Simplex.Messaging.Server.QueueStore.Postgres.Config (PostgresStoreCfg (..)) -import Simplex.Messaging.Server.StoreLog (closeStoreLog) import Simplex.Messaging.Transport (ASrvTransport) import Simplex.Messaging.Transport.Client (TransportHost (..)) import Simplex.Messaging.Transport.HTTP2 (httpALPN) import Simplex.Messaging.Transport.Server (AddHTTP, ServerCredentials (..), mkTransportServerConfig) -import Simplex.Messaging.Util (eitherToMaybe, ifM, tshow) -import System.Directory (createDirectoryIfMissing, doesFileExist, renameFile) -import System.Exit (exitFailure) +import Simplex.Messaging.Util (eitherToMaybe, tshow) +import System.Directory (createDirectoryIfMissing, doesFileExist) import System.FilePath (combine) import System.IO (BufferMode (..), hSetBuffering, stderr, stdout) import Text.Read (readMaybe) @@ -73,69 +63,11 @@ ntfServerCLI cfgPath logPath = deleteDirIfExists cfgPath deleteDirIfExists logPath putStrLn "Deleted configuration and log files" - Database cmd dbOpts@DBOpts {connstr, schema} -> withIniFile $ \ini -> do - schemaExists <- checkSchemaExists connstr schema - storeLogExists <- doesFileExist storeLogFilePath - lastNtfsExists <- doesFileExist defaultLastNtfsFile - case cmd of - SCImport skipTokens - | schemaExists && (storeLogExists || lastNtfsExists) -> exitConfigureNtfStore connstr schema - | schemaExists -> do - putStrLn $ "Schema " <> B.unpack schema <> " already exists in PostrgreSQL database: " <> B.unpack connstr - exitFailure - | not storeLogExists -> do - putStrLn $ storeLogFilePath <> " file does not exist." - exitFailure - | not lastNtfsExists -> do - putStrLn $ defaultLastNtfsFile <> " file does not exist." - exitFailure - | otherwise -> do - storeLogFile <- getRequiredStoreLogFile ini - confirmOrExit - ("WARNING: store log file " <> storeLogFile <> " will be compacted and imported to PostrgreSQL database: " <> B.unpack connstr <> ", schema: " <> B.unpack schema) - "Notification server store not imported" - stmStore <- newNtfSTMStore - sl <- readWriteNtfSTMStore True storeLogFile stmStore - closeStoreLog sl - restoreServerLastNtfs stmStore defaultLastNtfsFile - let storeCfg = PostgresStoreCfg {dbOpts = dbOpts {createSchema = True}, dbStoreLogPath = Nothing, confirmMigrations = MCConsole, deletedTTL = iniDeletedTTL ini} - ps <- newNtfDbStore storeCfg - (tCnt, sCnt, nCnt, serviceCnt) <- importNtfSTMStore ps stmStore skipTokens - renameFile storeLogFile $ storeLogFile <> ".bak" - putStrLn $ "Import completed: " <> show tCnt <> " tokens, " <> show sCnt <> " subscriptions, " <> show serviceCnt <> " service associations, " <> show nCnt <> " last token notifications." - putStrLn "Configure database options in INI file." - SCExport - | schemaExists && storeLogExists -> exitConfigureNtfStore connstr schema - | not schemaExists -> do - putStrLn $ "Schema " <> B.unpack schema <> " does not exist in PostrgreSQL database: " <> B.unpack connstr - exitFailure - | storeLogExists -> do - putStrLn $ storeLogFilePath <> " file already exists." - exitFailure - | lastNtfsExists -> do - putStrLn $ defaultLastNtfsFile <> " file already exists." - exitFailure - | otherwise -> do - confirmOrExit - ("WARNING: PostrgreSQL database schema " <> B.unpack schema <> " (database: " <> B.unpack connstr <> ") will be exported to store log file " <> storeLogFilePath) - "Notification server store not imported" - let storeCfg = PostgresStoreCfg {dbOpts, dbStoreLogPath = Just storeLogFilePath, confirmMigrations = MCConsole, deletedTTL = iniDeletedTTL ini} - st <- newNtfDbStore storeCfg - (tCnt, sCnt, nCnt) <- exportNtfDbStore st defaultLastNtfsFile - putStrLn $ "Export completed: " <> show tCnt <> " tokens, " <> show sCnt <> " subscriptions, " <> show nCnt <> " last token notifications." where withIniFile a = doesFileExist iniFile >>= \case True -> readIniFile iniFile >>= either exitError a _ -> exitError $ "Error: server is not initialized (" <> iniFile <> " does not exist).\nRun `" <> executableName <> " init`." - getRequiredStoreLogFile ini = do - case enableStoreLog' ini $> storeLogFilePath of - Just storeLogFile -> do - ifM - (doesFileExist storeLogFile) - (pure storeLogFile) - (putStrLn ("Store log file " <> storeLogFile <> " not found") >> exitFailure) - Nothing -> putStrLn "Store log disabled, see `[STORE_LOG] enable`" >> exitFailure iniFile = combine cfgPath "ntf-server.ini" serverVersion = "SMP notifications server v" <> simplexmqVersionCommit defaultServerPort = "443" @@ -289,11 +221,6 @@ ntfServerCLI cfgPath logPath = startOptions } iniDeletedTTL ini = readIniDefault (86400 * defaultDeletedTTL) "STORE_LOG" "db_deleted_ttl" ini - defaultLastNtfsFile = combine logPath "ntf-server-last-notifications.log" - exitConfigureNtfStore connstr schema = do - putStrLn $ "Error: both " <> storeLogFilePath <> " file and " <> B.unpack schema <> " schema are present (database: " <> B.unpack connstr <> ")." - putStrLn "Configure notification server storage." - exitFailure printNtfServerConfig :: [(ServiceName, ASrvTransport, AddHTTP)] -> PostgresStoreCfg -> IO () printNtfServerConfig transports PostgresStoreCfg {dbOpts = DBOpts {connstr, schema}, dbStoreLogPath} = do @@ -305,9 +232,6 @@ data CliCommand | OnlineCert CertOptions | Start StartOptions | Delete - | Database StoreCmd DBOpts - -data StoreCmd = SCImport (Set NtfTokenId) | SCExport data InitOptions = InitOptions { enableStoreLog :: Bool, @@ -338,22 +262,8 @@ cliCommandP cfgPath logPath iniFile = <> command "cert" (info (OnlineCert <$> certOptionsP) (progDesc $ "Generate new online TLS server credentials (configuration: " <> iniFile <> ")")) <> command "start" (info (Start <$> startOptionsP) (progDesc $ "Start server (configuration: " <> iniFile <> ")")) <> command "delete" (info (pure Delete) (progDesc "Delete configuration and log files")) - <> command "database" (info (Database <$> databaseCmdP <*> dbOptsP defaultNtfDBOpts) (progDesc "Import/export notifications server store to/from PostgreSQL database")) ) where - databaseCmdP = - hsubparser - ( command "import" (info (SCImport <$> skipTokensP) (progDesc $ "Import store logs into a new PostgreSQL database schema")) - <> command "export" (info (pure SCExport) (progDesc $ "Export PostgreSQL database schema to store logs")) - ) - skipTokensP :: Parser (Set NtfTokenId) - skipTokensP = - option - strParse - ( long "skip-tokens" - <> help "Skip tokens during import" - <> value S.empty - ) initP :: Parser InitOptions initP = do enableStoreLog <- diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs b/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs index 8c0da7c07e..87e89ac8da 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Migrations.hs @@ -14,7 +14,8 @@ ntfServerSchemaMigrations :: [(String, Text, Maybe Text)] ntfServerSchemaMigrations = [ ("20250417_initial", m20250417_initial, Nothing), ("20250517_service_cert", m20250517_service_cert, Just down_m20250517_service_cert), - ("20250830_queue_ids_hash", m20250830_queue_ids_hash, Just down_m20250830_queue_ids_hash) + ("20250830_queue_ids_hash", m20250830_queue_ids_hash, Just down_m20250830_queue_ids_hash), + ("20251219_service_cert_per_server", m20251219_service_cert_per_server, Just down_m20251219_service_cert_per_server) ] -- | The list of migrations in ascending order by date @@ -225,3 +226,36 @@ ALTER TABLE smp_servers DROP COLUMN smp_notifier_ids_hash; |] <> dropXorHashFuncs + +m20251219_service_cert_per_server :: Text +m20251219_service_cert_per_server = + [r| +ALTER TABLE smp_servers + ADD COLUMN ntf_service_cert BYTEA, + ADD COLUMN ntf_service_cert_hash BYTEA, + ADD COLUMN ntf_service_priv_key BYTEA; + |] + <> resetNtfServices + +down_m20251219_service_cert_per_server :: Text +down_m20251219_service_cert_per_server = + [r| +ALTER TABLE smp_servers + DROP COLUMN ntf_service_cert, + DROP COLUMN ntf_service_cert_hash, + DROP COLUMN ntf_service_priv_key; + |] + <> resetNtfServices + +resetNtfServices :: Text +resetNtfServices = + [r| +ALTER TABLE subscriptions DISABLE TRIGGER tr_subscriptions_update; +UPDATE subscriptions SET ntf_service_assoc = FALSE; +ALTER TABLE subscriptions ENABLE TRIGGER tr_subscriptions_update; + +UPDATE smp_servers +SET ntf_service_id = NULL, + smp_notifier_count = 0, + smp_notifier_ids_hash = DEFAULT; + |] diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs index 60e81a68b7..80ab45ca1c 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs @@ -18,7 +18,6 @@ module Simplex.Messaging.Notifications.Server.Store.Postgres where -import Control.Concurrent.STM import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad @@ -26,19 +25,13 @@ import Control.Monad.Except import Control.Monad.IO.Class import Control.Monad.Trans.Except import Data.Bitraversable (bimapM) -import qualified Data.ByteString.Base64.URL as B64 import Data.ByteString.Char8 (ByteString) -import qualified Data.ByteString.Char8 as B -import Data.Containers.ListUtils (nubOrd) import Data.Either (fromRight) import Data.Functor (($>)) import Data.Int (Int64) -import Data.List (findIndex, foldl') import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as L -import qualified Data.Map.Strict as M import Data.Maybe (fromMaybe, isJust, mapMaybe) -import qualified Data.Set as S import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding (decodeLatin1, encodeUtf8) @@ -51,31 +44,30 @@ import Database.PostgreSQL.Simple.FromField (FromField (..)) import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.ToField (ToField (..)) import Network.Socket (ServiceName) +import qualified Network.TLS as TLS import Simplex.Messaging.Agent.Store.AgentStore () import Simplex.Messaging.Agent.Store.Postgres (closeDBStore, createDBStore) import Simplex.Messaging.Agent.Store.Postgres.Common import Simplex.Messaging.Agent.Store.Postgres.DB (fromTextField_) import Simplex.Messaging.Agent.Store.Shared (MigrationConfig (..)) +import Simplex.Messaging.Client (ProtocolClientError (..), SMPClientError) import Simplex.Messaging.Encoding import Simplex.Messaging.Encoding.String import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Notifications.Protocol -import Simplex.Messaging.Notifications.Server.Store (NtfSTMStore (..), NtfSubData (..), NtfTknData (..), TokenNtfMessageRecord (..), ntfSubServer) import Simplex.Messaging.Notifications.Server.Store.Migrations import Simplex.Messaging.Notifications.Server.Store.Types -import Simplex.Messaging.Notifications.Server.StoreLog -import Simplex.Messaging.Protocol (EntityId (..), EncNMsgMeta, ErrorType (..), IdsHash (..), NotifierId, NtfPrivateAuthKey, NtfPublicAuthKey, SMPServer, ServiceId, ServiceSub (..), pattern SMPServer) -import Simplex.Messaging.Server.QueueStore.Postgres (handleDuplicate, withLog_) +import Simplex.Messaging.Protocol (EntityId (..), EncNMsgMeta, ErrorType (..), IdsHash (..), NotifierId, NtfPrivateAuthKey, NtfPublicAuthKey, ProtocolServer (..), SMPServer, ServiceId, ServiceSub (..), pattern SMPServer) +import Simplex.Messaging.Server.QueueStore.Postgres (handleDuplicate) import Simplex.Messaging.Server.QueueStore.Postgres.Config (PostgresStoreCfg (..)) -import Simplex.Messaging.Server.StoreLog (openWriteStoreLog) import Simplex.Messaging.SystemTime import Simplex.Messaging.Transport.Client (TransportHost) -import Simplex.Messaging.Util (anyM, firstRow, maybeFirstRow, toChunks, tshow) +import Simplex.Messaging.Util (firstRow, maybeFirstRow, tshow) import System.Exit (exitFailure) -import System.IO (IOMode (..), hFlush, stdout, withFile) import Text.Hex (decodeHex) #if !defined(dbPostgres) +import qualified Data.X509 as X import Simplex.Messaging.Agent.Store.Postgres.DB (blobFieldDecoder) import Simplex.Messaging.Parsers (parseAll) import Simplex.Messaging.Util (eitherToMaybe) @@ -83,7 +75,6 @@ import Simplex.Messaging.Util (eitherToMaybe) data NtfPostgresStore = NtfPostgresStore { dbStore :: DBStore, - dbStoreLog :: Maybe (StoreLog 'WriteMode), deletedTTL :: Int64 } @@ -99,25 +90,22 @@ data NtfEntityRec (e :: NtfEntity) where NtfSub :: NtfSubRec -> NtfEntityRec 'Subscription newNtfDbStore :: PostgresStoreCfg -> IO NtfPostgresStore -newNtfDbStore PostgresStoreCfg {dbOpts, dbStoreLogPath, confirmMigrations, deletedTTL} = do +newNtfDbStore PostgresStoreCfg {dbOpts, confirmMigrations, deletedTTL} = do dbStore <- either err pure =<< createDBStore dbOpts ntfServerMigrations (MigrationConfig confirmMigrations Nothing) - dbStoreLog <- mapM (openWriteStoreLog True) dbStoreLogPath - pure NtfPostgresStore {dbStore, dbStoreLog, deletedTTL} + pure NtfPostgresStore {dbStore, deletedTTL} where err e = do logError $ "STORE: newNtfStore, error opening PostgreSQL database, " <> tshow e exitFailure closeNtfDbStore :: NtfPostgresStore -> IO () -closeNtfDbStore NtfPostgresStore {dbStore, dbStoreLog} = do - closeDBStore dbStore - mapM_ closeStoreLog dbStoreLog +closeNtfDbStore NtfPostgresStore {dbStore} = closeDBStore dbStore addNtfToken :: NtfPostgresStore -> NtfTknRec -> IO (Either ErrorType ()) addNtfToken st tkn = withFastDB "addNtfToken" st $ \db -> - E.try (DB.execute db insertNtfTknQuery $ ntfTknToRow tkn) - >>= bimapM handleDuplicate (\_ -> withLog "addNtfToken" st (`logCreateToken` tkn)) + E.try (void $ DB.execute db insertNtfTknQuery $ ntfTknToRow tkn) + >>= bimapM handleDuplicate pure insertNtfTknQuery :: Query insertNtfTknQuery = @@ -128,7 +116,7 @@ insertNtfTknQuery = |] replaceNtfToken :: NtfPostgresStore -> NtfTknRec -> IO (Either ErrorType ()) -replaceNtfToken st NtfTknRec {ntfTknId, token = token@(DeviceToken pp ppToken), tknStatus, tknRegCode = code@(NtfRegCode regCode)} = +replaceNtfToken st NtfTknRec {ntfTknId, token = DeviceToken pp ppToken, tknStatus, tknRegCode = NtfRegCode regCode} = withFastDB "replaceNtfToken" st $ \db -> runExceptT $ do ExceptT $ assertUpdated <$> DB.execute @@ -139,7 +127,6 @@ replaceNtfToken st NtfTknRec {ntfTknId, token = token@(DeviceToken pp ppToken), WHERE token_id = ? |] (pp, Binary ppToken, tknStatus, Binary regCode, ntfTknId) - withLog "replaceNtfToken" st $ \sl -> logUpdateToken sl ntfTknId token code ntfTknToRow :: NtfTknRec -> NtfTknRow ntfTknToRow NtfTknRec {ntfTknId, token, tknStatus, tknVerifyKey, tknDhPrivKey, tknDhSecret, tknRegCode, tknCronInterval, tknUpdatedAt} = @@ -160,15 +147,14 @@ getNtfToken_ :: ToRow q => NtfPostgresStore -> Query -> q -> IO (Either ErrorTyp getNtfToken_ st cond params = withFastDB' "getNtfToken" st $ \db -> do tkn_ <- maybeFirstRow rowToNtfTkn $ DB.query db (ntfTknQuery <> cond) params - mapM_ (updateTokenDate st db) tkn_ + mapM_ (updateTokenDate db) tkn_ pure tkn_ -updateTokenDate :: NtfPostgresStore -> DB.Connection -> NtfTknRec -> IO () -updateTokenDate st db NtfTknRec {ntfTknId, tknUpdatedAt} = do +updateTokenDate :: DB.Connection -> NtfTknRec -> IO () +updateTokenDate db NtfTknRec {ntfTknId, tknUpdatedAt} = do ts <- getSystemDate when (maybe True (ts /=) tknUpdatedAt) $ do void $ DB.execute db "UPDATE tokens SET updated_at = ? WHERE token_id = ?" (ts, ntfTknId) - withLog "updateTokenDate" st $ \sl -> logUpdateTokenTime sl ntfTknId ts type NtfTknRow = (NtfTokenId, PushProvider, Binary ByteString, NtfTknStatus, NtfPublicAuthKey, C.PrivateKeyX25519, C.DhSecretX25519, Binary ByteString, Word16, Maybe SystemDate) @@ -206,7 +192,6 @@ deleteNtfToken st tknId = |] (Only tknId) liftIO $ void $ DB.execute db "DELETE FROM tokens WHERE token_id = ?" (Only tknId) - withLog "deleteNtfToken" st (`logDeleteToken` tknId) pure subs where toServerSubs :: SMPServerRow :. Only Text -> (SMPServer, [NotifierId]) @@ -235,7 +220,6 @@ updateTknCronInterval st tknId cronInt = withFastDB "updateTknCronInterval" st $ \db -> runExceptT $ do ExceptT $ assertUpdated <$> DB.execute db "UPDATE tokens SET cron_interval = ? WHERE token_id = ?" (cronInt, tknId) - withLog "updateTknCronInterval" st $ \sl -> logTokenCron sl tknId 0 -- Reads servers that have subscriptions that need subscribing. -- It is executed on server start, and it is supposed to crash on database error @@ -259,6 +243,73 @@ getUsedSMPServers st = let service_ = (\serviceId -> ServiceSub serviceId n idsHash) <$> serviceId_ in (SMPServer host port kh, srvId, service_) +getNtfServiceCredentials :: DB.Connection -> SMPServer -> IO (Maybe (Int64, Maybe (C.KeyHash, TLS.Credential))) +getNtfServiceCredentials db srv = + maybeFirstRow toService $ + DB.query + db + [sql| + SELECT smp_server_id, ntf_service_cert_hash, ntf_service_cert, ntf_service_priv_key + FROM smp_servers + WHERE smp_host = ? AND smp_port = ? AND smp_keyhash = ? + FOR UPDATE + |] + (host srv, port srv, keyHash srv) + where + toService (Only srvId :. creds) = (srvId, toCredentials creds) + toCredentials = \case + (Just kh, Just cert, Just pk) -> Just (kh, (cert, pk)) + _ -> Nothing + +setNtfServiceCredentials :: DB.Connection -> Int64 -> (C.KeyHash, TLS.Credential) -> IO () +setNtfServiceCredentials db srvId (kh, (cert, pk)) = + void $ DB.execute + db + [sql| + UPDATE smp_servers + SET ntf_service_cert_hash = ?, ntf_service_cert = ?, ntf_service_priv_key = ? + WHERE smp_server_id = ? + |] + (kh, cert, pk, srvId) + +updateNtfServiceId :: DB.Connection -> SMPServer -> Maybe ServiceId -> IO () +updateNtfServiceId db srv newServiceId_ = do + maybeFirstRow id (getSMPServiceForUpdate_ db srv) >>= mapM_ updateService + where + updateService (srvId, currServiceId_) = unless (currServiceId_ == newServiceId_) $ do + when (isJust currServiceId_) $ do + void $ removeServiceAssociation_ db srvId + logError $ "STORE: service ID for " <> enc (host srv) <> toServiceId <> ", removed sub associations" + void $ case newServiceId_ of + Just newServiceId -> + DB.execute + db + [sql| + UPDATE smp_servers + SET ntf_service_id = ?, + smp_notifier_count = 0, + smp_notifier_ids_hash = DEFAULT + WHERE smp_server_id = ? + |] + (newServiceId, srvId) + Nothing -> + DB.execute + db + [sql| + UPDATE smp_servers + SET ntf_service_id = NULL, + ntf_service_cert = NULL, + ntf_service_cert_hash = NULL, + ntf_service_priv_key = NULL, + smp_notifier_count = 0, + smp_notifier_ids_hash = DEFAULT + WHERE smp_server_id = ? + |] + (Only srvId) + toServiceId = maybe " removed" ((" changed to " <>) . enc) newServiceId_ + enc :: StrEncoding a => a -> Text + enc = decodeLatin1 . strEncode + getServerNtfSubscriptions :: NtfPostgresStore -> Int64 -> Maybe NtfSubscriptionId -> Int -> IO (Either ErrorType [ServerNtfSub]) getServerNtfSubscriptions st srvId afterSubId_ count = withDB' "getServerNtfSubscriptions" st $ \db -> do @@ -297,7 +348,7 @@ findNtfSubscription st tknId q = withFastDB "findNtfSubscription" st $ \db -> runExceptT $ do tkn@NtfTknRec {ntfTknId, tknStatus} <- ExceptT $ getNtfToken st tknId unless (allowNtfSubCommands tknStatus) $ throwE AUTH - liftIO $ updateTokenDate st db tkn + liftIO $ updateTokenDate db tkn sub_ <- liftIO $ maybeFirstRow (rowToNtfSub q) $ DB.query @@ -330,7 +381,7 @@ getNtfSubscription st subId = WHERE s.subscription_id = ? |] (Only subId) - liftIO $ updateTokenDate st db tkn + liftIO $ updateTokenDate db tkn unless (allowNtfSubCommands tknStatus) $ throwE AUTH pure r @@ -352,36 +403,30 @@ mkNtfSubRec ntfSubId (NewNtfSub tokenId smpQueue notifierKey) = updateTknStatus :: NtfPostgresStore -> NtfTknRec -> NtfTknStatus -> IO (Either ErrorType ()) updateTknStatus st tkn status = - withFastDB' "updateTknStatus" st $ \db -> updateTknStatus_ st db tkn status + withFastDB' "updateTknStatus" st $ \db -> updateTknStatus_ db tkn status -updateTknStatus_ :: NtfPostgresStore -> DB.Connection -> NtfTknRec -> NtfTknStatus -> IO () -updateTknStatus_ st db NtfTknRec {ntfTknId} status = do - updated <- DB.execute db "UPDATE tokens SET status = ? WHERE token_id = ? AND status != ?" (status, ntfTknId, status) - when (updated > 0) $ withLog "updateTknStatus" st $ \sl -> logTokenStatus sl ntfTknId status +updateTknStatus_ :: DB.Connection -> NtfTknRec -> NtfTknStatus -> IO () +updateTknStatus_ db NtfTknRec {ntfTknId} status = + void $ DB.execute db "UPDATE tokens SET status = ? WHERE token_id = ? AND status != ?" (status, ntfTknId, status) -- unless it was already active setTknStatusConfirmed :: NtfPostgresStore -> NtfTknRec -> IO (Either ErrorType ()) setTknStatusConfirmed st NtfTknRec {ntfTknId} = - withFastDB' "updateTknStatus" st $ \db -> do - updated <- DB.execute db "UPDATE tokens SET status = ? WHERE token_id = ? AND status != ? AND status != ?" (NTConfirmed, ntfTknId, NTConfirmed, NTActive) - when (updated > 0) $ withLog "updateTknStatus" st $ \sl -> logTokenStatus sl ntfTknId NTConfirmed + withFastDB' "updateTknStatus" st $ \db -> + void $ DB.execute db "UPDATE tokens SET status = ? WHERE token_id = ? AND status != ? AND status != ?" (NTConfirmed, ntfTknId, NTConfirmed, NTActive) setTokenActive :: NtfPostgresStore -> NtfTknRec -> IO (Either ErrorType ()) setTokenActive st tkn@NtfTknRec {ntfTknId, token = DeviceToken pp ppToken} = withFastDB' "setTokenActive" st $ \db -> do - updateTknStatus_ st db tkn NTActive + updateTknStatus_ db tkn NTActive -- this removes other instances of the same token, e.g. because of repeated token registration attempts - tknIds <- - liftIO $ map fromOnly <$> - DB.query - db - [sql| - DELETE FROM tokens - WHERE push_provider = ? AND push_provider_token = ? AND token_id != ? - RETURNING token_id - |] - (pp, Binary ppToken, ntfTknId) - withLog "deleteNtfToken" st $ \sl -> mapM_ (logDeleteToken sl) tknIds + void $ DB.execute + db + [sql| + DELETE FROM tokens + WHERE push_provider = ? AND push_provider_token = ? AND token_id != ? + |] + (pp, Binary ppToken, ntfTknId) withPeriodicNtfTokens :: NtfPostgresStore -> Int64 -> (NtfTknRec -> IO ()) -> IO Int withPeriodicNtfTokens st now notify = @@ -399,7 +444,6 @@ addNtfSubscription st sub = withFastDB "addNtfSubscription" st $ \db -> runExceptT $ do srvId :: Int64 <- ExceptT $ upsertServer db $ ntfSubServer' sub n <- liftIO $ DB.execute db insertNtfSubQuery $ ntfSubToRow srvId sub - withLog "addNtfSubscription" st (`logCreateSubscription` sub) pure (srvId, n > 0) where -- It is possible to combine these two statements into one with CTEs, @@ -442,76 +486,66 @@ ntfSubToRow srvId NtfSubRec {ntfSubId, tokenId, smpQueue = SMPQueueNtf _ nId, no deleteNtfSubscription :: NtfPostgresStore -> NtfSubscriptionId -> IO (Either ErrorType ()) deleteNtfSubscription st subId = - withFastDB "deleteNtfSubscription" st $ \db -> runExceptT $ do - ExceptT $ assertUpdated <$> + withFastDB "deleteNtfSubscription" st $ \db -> + assertUpdated <$> DB.execute db "DELETE FROM subscriptions WHERE subscription_id = ?" (Only subId) - withLog "deleteNtfSubscription" st (`logDeleteSubscription` subId) updateSubStatus :: NtfPostgresStore -> Int64 -> NotifierId -> NtfSubStatus -> IO (Either ErrorType ()) updateSubStatus st srvId nId status = withFastDB' "updateSubStatus" st $ \db -> do - sub_ :: Maybe (NtfSubscriptionId, NtfAssociatedService) <- - maybeFirstRow id $ - DB.query - db - [sql| - UPDATE subscriptions SET status = ? - WHERE smp_server_id = ? AND smp_notifier_id = ? AND status != ? - RETURNING subscription_id, ntf_service_assoc - |] - (status, srvId, nId, status) - forM_ sub_ $ \(subId, serviceAssoc) -> - withLog "updateSubStatus" st $ \sl -> logSubscriptionStatus sl (subId, status, serviceAssoc) + void $ + DB.execute + db + [sql| + UPDATE subscriptions SET status = ? + WHERE smp_server_id = ? AND smp_notifier_id = ? AND status != ? + |] + (status, srvId, nId, status) updateSrvSubStatus :: NtfPostgresStore -> SMPQueueNtf -> NtfSubStatus -> IO (Either ErrorType ()) updateSrvSubStatus st q status = - withFastDB' "updateSrvSubStatus" st $ \db -> do - sub_ :: Maybe (NtfSubscriptionId, NtfAssociatedService) <- - maybeFirstRow id $ - DB.query - db - [sql| - UPDATE subscriptions s - SET status = ? - FROM smp_servers p - WHERE p.smp_server_id = s.smp_server_id - AND p.smp_host = ? AND p.smp_port = ? AND p.smp_keyhash = ? AND s.smp_notifier_id = ? - AND s.status != ? - RETURNING s.subscription_id, s.ntf_service_assoc - |] - (Only status :. smpQueueToRow q :. Only status) - forM_ sub_ $ \(subId, serviceAssoc) -> - withLog "updateSrvSubStatus" st $ \sl -> logSubscriptionStatus sl (subId, status, serviceAssoc) + withFastDB' "updateSrvSubStatus" st $ \db -> + void $ + DB.execute + db + [sql| + UPDATE subscriptions s + SET status = ? + FROM smp_servers p + WHERE p.smp_server_id = s.smp_server_id + AND p.smp_host = ? AND p.smp_port = ? AND p.smp_keyhash = ? AND s.smp_notifier_id = ? + AND s.status != ? + |] + (Only status :. smpQueueToRow q :. Only status) batchUpdateSrvSubStatus :: NtfPostgresStore -> SMPServer -> Maybe ServiceId -> NonEmpty NotifierId -> NtfSubStatus -> IO Int batchUpdateSrvSubStatus st srv newServiceId nIds status = fmap (fromRight (-1)) $ withDB "batchUpdateSrvSubStatus" st $ \db -> runExceptT $ do - (srvId :: Int64, currServiceId) <- ExceptT $ getSMPServerService db + (srvId, currServiceId) <- ExceptT $ firstRow id AUTH $ getSMPServiceForUpdate_ db srv + -- TODO [certs rcv] should this remove associations/credentials when newServiceId is Nothing or different unless (currServiceId == newServiceId) $ liftIO $ void $ DB.execute db "UPDATE smp_servers SET ntf_service_id = ? WHERE smp_server_id = ?" (newServiceId, srvId) let params = L.toList $ L.map (srvId,isJust newServiceId,status,) nIds liftIO $ fromIntegral <$> DB.executeMany db updateSubStatusQuery params - where - getSMPServerService db = - firstRow id AUTH $ - DB.query - db - [sql| - SELECT smp_server_id, ntf_service_id - FROM smp_servers - WHERE smp_host = ? AND smp_port = ? AND smp_keyhash = ? - FOR UPDATE - |] - (srvToRow srv) + +getSMPServiceForUpdate_ :: DB.Connection -> SMPServer -> IO [(Int64, Maybe ServiceId)] +getSMPServiceForUpdate_ db srv = + DB.query + db + [sql| + SELECT smp_server_id, ntf_service_id + FROM smp_servers + WHERE smp_host = ? AND smp_port = ? AND smp_keyhash = ? + FOR UPDATE + |] + (srvToRow srv) batchUpdateSrvSubErrors :: NtfPostgresStore -> SMPServer -> NonEmpty (NotifierId, NtfSubStatus) -> IO Int batchUpdateSrvSubErrors st srv subs = fmap (fromRight (-1)) $ withDB "batchUpdateSrvSubErrors" st $ \db -> runExceptT $ do srvId :: Int64 <- ExceptT $ getSMPServerId db let params = map (\(nId, status) -> (srvId, False, status, nId)) $ L.toList subs - subs' <- liftIO $ DB.returning db (updateSubStatusQuery <> " RETURNING s.subscription_id, s.status, s.ntf_service_assoc") params - withLog "batchUpdateStatus_" st $ forM_ subs' . logSubscriptionStatus - pure $ length subs' + liftIO $ fromIntegral <$> DB.executeMany db updateSubStatusQuery params where getSMPServerId db = firstRow fromOnly AUTH $ @@ -535,36 +569,51 @@ updateSubStatusQuery = AND (s.status != upd.status OR s.ntf_service_assoc != upd.ntf_service_assoc) |] -removeServiceAssociation :: NtfPostgresStore -> SMPServer -> IO (Either ErrorType (Int64, Int)) -removeServiceAssociation st srv = do - withDB "removeServiceAssociation" st $ \db -> runExceptT $ do - srvId <- ExceptT $ removeServerService db - subs <- - liftIO $ - DB.query - db - [sql| - UPDATE subscriptions s - SET status = ?, ntf_service_assoc = FALSE - WHERE smp_server_id = ? - AND (s.status != ? OR s.ntf_service_assoc != FALSE) - RETURNING s.subscription_id, s.status, s.ntf_service_assoc - |] - (NSInactive, srvId, NSInactive) - withLog "removeServiceAssociation" st $ forM_ subs . logSubscriptionStatus - pure (srvId, length subs) +removeServiceAssociation_ :: DB.Connection -> Int64 -> IO Int64 +removeServiceAssociation_ db srvId = + DB.execute + db + [sql| + UPDATE subscriptions s + SET status = ?, ntf_service_assoc = FALSE + WHERE smp_server_id = ? + AND (s.status != ? OR s.ntf_service_assoc != FALSE) + |] + (NSInactive, srvId, NSInactive) + +removeServiceAndAssociations :: NtfPostgresStore -> SMPServer -> IO (Either ErrorType (Int64, Int)) +removeServiceAndAssociations st srv = do + withDB "removeServiceAndAssociations" st $ \db -> runExceptT $ do + srvId <- ExceptT $ getServerId db + subsCount <- liftIO $ removeServiceAssociation_ db srvId + liftIO $ removeServerService db srvId + pure (srvId, fromIntegral subsCount) where - removeServerService db = + getServerId db = firstRow fromOnly AUTH $ DB.query db [sql| - UPDATE smp_servers - SET ntf_service_id = NULL + SELECT smp_server_id + FROM smp_servers WHERE smp_host = ? AND smp_port = ? AND smp_keyhash = ? - RETURNING smp_server_id + FOR UPDATE |] (srvToRow srv) + removeServerService db srvId = + DB.execute + db + [sql| + UPDATE smp_servers + SET ntf_service_id = NULL, + ntf_service_cert = NULL, + ntf_service_cert_hash = NULL, + ntf_service_priv_key = NULL, + smp_notifier_count = 0, + smp_notifier_ids_hash = DEFAULT + WHERE smp_server_id = ? + |] + (Only srvId) addTokenLastNtf :: NtfPostgresStore -> PNMessageData -> IO (Either ErrorType (NtfTknRec, NonEmpty PNMessageData)) addTokenLastNtf st newNtf = @@ -646,216 +695,6 @@ getEntityCounts st = count (Only n : _) = n count [] = 0 -importNtfSTMStore :: NtfPostgresStore -> NtfSTMStore -> S.Set NtfTokenId -> IO (Int64, Int64, Int64, Int64) -importNtfSTMStore NtfPostgresStore {dbStore = s} stmStore skipTokens = do - (tIds, tCnt) <- importTokens - subLookup <- readTVarIO $ subscriptionLookup stmStore - sCnt <- importSubscriptions tIds subLookup - nCnt <- importLastNtfs tIds subLookup - serviceCnt <- importNtfServiceIds - pure (tCnt, sCnt, nCnt, serviceCnt) - where - importTokens = do - allTokens <- M.elems <$> readTVarIO (tokens stmStore) - tokens <- filterTokens allTokens - let skipped = length allTokens - length tokens - when (skipped /= 0) $ putStrLn $ "Total skipped tokens " <> show skipped - -- uncomment this line instead of the next two to import tokens one by one. - -- tCnt <- withConnection s $ \db -> foldM (importTkn db) 0 tokens - -- token interval is reset to 0 to only send notifications to devices with periodic mode, - -- and before clients are upgraded - to all active devices. - tRows <- mapM (fmap (ntfTknToRow . (\t -> t {tknCronInterval = 0} :: NtfTknRec)) . mkTknRec) tokens - tCnt <- withConnection s $ \db -> DB.executeMany db insertNtfTknQuery tRows - let tokenIds = S.fromList $ map (\NtfTknData {ntfTknId} -> ntfTknId) tokens - (tokenIds,) <$> checkCount "token" (length tokens) tCnt - where - filterTokens tokens = do - let deviceTokens = foldl' (\m t -> M.alter (Just . (t :) . fromMaybe []) (tokenKey t) m) M.empty tokens - tokenSubs <- readTVarIO (tokenSubscriptions stmStore) - filterM (keepTokenRegistration deviceTokens tokenSubs) tokens - tokenKey NtfTknData {token, tknVerifyKey} = strEncode token <> ":" <> C.toPubKey C.pubKeyBytes tknVerifyKey - keepTokenRegistration deviceTokens tokenSubs tkn@NtfTknData {ntfTknId, tknStatus} = - case M.lookup (tokenKey tkn) deviceTokens of - Just ts - | length ts < 2 -> pure True - | ntfTknId `S.member` skipTokens -> False <$ putStrLn ("Skipped token " <> enc ntfTknId <> " from --skip-tokens") - | otherwise -> - readTVarIO tknStatus >>= \case - NTConfirmed -> do - hasSubs <- maybe (pure False) (\v -> not . S.null <$> readTVarIO v) $ M.lookup ntfTknId tokenSubs - if hasSubs - then pure True - else do - anyBetterToken <- anyM $ map (\NtfTknData {tknStatus = tknStatus'} -> activeOrInvalid <$> readTVarIO tknStatus') ts - if anyBetterToken - then False <$ putStrLn ("Skipped duplicate inactive token " <> enc ntfTknId) - else case findIndex (\NtfTknData {ntfTknId = tId} -> tId == ntfTknId) ts of - Just 0 -> pure True -- keeping the first token - Just _ -> False <$ putStrLn ("Skipped duplicate inactive token " <> enc ntfTknId <> " (no active token)") - Nothing -> True <$ putStrLn "Error: no device token in the list" - _ -> pure True - Nothing -> True <$ putStrLn "Error: no device token in lookup map" - activeOrInvalid = \case - NTActive -> True - NTInvalid _ -> True - _ -> False - -- importTkn db !n tkn@NtfTknData {ntfTknId} = do - -- tknRow <- ntfTknToRow <$> mkTknRec tkn - -- (DB.execute db insertNtfTknQuery tknRow >>= pure . (n + )) `E.catch` \(e :: E.SomeException) -> - -- putStrLn ("Error inserting token " <> enc ntfTknId <> " " <> show e) $> n - importSubscriptions :: S.Set NtfTokenId -> M.Map SMPQueueNtf NtfSubscriptionId -> IO Int64 - importSubscriptions tIds subLookup = do - subs <- filterSubs . M.elems =<< readTVarIO (subscriptions stmStore) - srvIds <- importServers subs - putStrLn $ "Importing " <> show (length subs) <> " subscriptions..." - -- uncomment this line instead of the next to import subs one by one. - -- (sCnt, errTkns) <- withConnection s $ \db -> foldM (importSub db srvIds) (0, M.empty) subs - sCnt <- foldM (importSubs srvIds) 0 $ toChunks 500000 subs - checkCount "subscription" (length subs) sCnt - where - filterSubs allSubs = do - let subs = filter (\NtfSubData {tokenId} -> S.member tokenId tIds) allSubs - skipped = length allSubs - length subs - when (skipped /= 0) $ putStrLn $ "Skipped " <> show skipped <> " subscriptions of missing tokens" - let (removedSubTokens, removeSubs, dupQueues) = foldl' addSubToken (S.empty, S.empty, S.empty) subs - unless (null removeSubs) $ putStrLn $ "Skipped " <> show (S.size removeSubs) <> " duplicate subscriptions of " <> show (S.size removedSubTokens) <> " tokens for " <> show (S.size dupQueues) <> " queues" - pure $ filter (\NtfSubData {ntfSubId} -> S.notMember ntfSubId removeSubs) subs - where - addSubToken acc@(!stIds, !sIds, !qs) NtfSubData {ntfSubId, smpQueue, tokenId} = - case M.lookup smpQueue subLookup of - Just sId | sId /= ntfSubId -> - (S.insert tokenId stIds, S.insert ntfSubId sIds, S.insert smpQueue qs) - _ -> acc - importSubs srvIds !n subs = do - rows <- mapM (ntfSubRow srvIds) subs - cnt <- withConnection s $ \db -> DB.executeMany db insertNtfSubQuery $ L.toList rows - let n' = n + cnt - putStr $ "Imported " <> show n' <> " subscriptions" <> "\r" - hFlush stdout - pure n' - -- importSub db srvIds (!n, !errTkns) sub@NtfSubData {ntfSubId = sId, tokenId} = do - -- subRow <- ntfSubRow srvIds sub - -- E.try (DB.execute db insertNtfSubQuery subRow) >>= \case - -- Right i -> do - -- let n' = n + i - -- when (n' `mod` 100000 == 0) $ do - -- putStr $ "Imported " <> show n' <> " subscriptions" <> "\r" - -- hFlush stdout - -- pure (n', errTkns) - -- Left (e :: E.SomeException) -> do - -- when (n `mod` 100000 == 0) $ putStrLn "" - -- putStrLn $ "Error inserting subscription " <> enc sId <> " for token " <> enc tokenId <> " " <> show e - -- pure (n, M.alter (Just . maybe [sId] (sId :)) tokenId errTkns) - ntfSubRow srvIds sub = case M.lookup srv srvIds of - Just sId -> ntfSubToRow sId <$> mkSubRec sub - Nothing -> E.throwIO $ userError $ "no matching server ID for server " <> show srv - where - srv = ntfSubServer sub - importServers subs = do - sIds <- withConnection s $ \db -> map fromOnly <$> DB.returning db srvQuery (map srvToRow srvs) - void $ checkCount "server" (length srvs) (length sIds) - pure $ M.fromList $ zip srvs sIds - where - srvQuery = "INSERT INTO smp_servers (smp_host, smp_port, smp_keyhash) VALUES (?, ?, ?) RETURNING smp_server_id" - srvs = nubOrd $ map ntfSubServer subs - importLastNtfs :: S.Set NtfTokenId -> M.Map SMPQueueNtf NtfSubscriptionId -> IO Int64 - importLastNtfs tIds subLookup = do - ntfs <- readTVarIO (tokenLastNtfs stmStore) - ntfRows <- filterLastNtfRows ntfs - nCnt <- withConnection s $ \db -> DB.executeMany db lastNtfQuery ntfRows - checkCount "last notification" (length ntfRows) nCnt - where - lastNtfQuery = "INSERT INTO last_notifications(token_id, subscription_id, sent_at, nmsg_nonce, nmsg_data) VALUES (?,?,?,?,?)" - filterLastNtfRows ntfs = do - (skippedTkns, ntfCnt, (skippedQueues, ntfRows)) <- foldM lastNtfRows (S.empty, 0, (S.empty, [])) $ M.assocs ntfs - let skipped = ntfCnt - length ntfRows - when (skipped /= 0) $ putStrLn $ "Skipped last notifications " <> show skipped <> " for " <> show (S.size skippedTkns) <> " missing tokens and " <> show (S.size skippedQueues) <> " missing subscriptions with token present" - pure ntfRows - lastNtfRows (!stIds, !cnt, !acc) (tId, ntfVar) = do - ntfs <- L.toList <$> readTVarIO ntfVar - let cnt' = cnt + length ntfs - pure $ - if S.member tId tIds - then (stIds, cnt', foldl' ntfRow acc ntfs) - else (S.insert tId stIds, cnt', acc) - where - ntfRow (!qs, !rows) PNMessageData {smpQueue, ntfTs, nmsgNonce, encNMsgMeta} = case M.lookup smpQueue subLookup of - Just ntfSubId -> - let row = (tId, ntfSubId, systemToUTCTime ntfTs, nmsgNonce, Binary encNMsgMeta) - in (qs, row : rows) - Nothing -> (S.insert smpQueue qs, rows) - importNtfServiceIds = do - ss <- M.assocs <$> readTVarIO (ntfServices stmStore) - withConnection s $ \db -> DB.executeMany db serviceQuery $ map serviceToRow ss - where - serviceQuery = - [sql| - INSERT INTO smp_servers (smp_host, smp_port, smp_keyhash, ntf_service_id) - VALUES (?, ?, ?, ?) - ON CONFLICT (smp_host, smp_port, smp_keyhash) - DO UPDATE SET ntf_service_id = EXCLUDED.ntf_service_id - |] - serviceToRow (srv, serviceId) = srvToRow srv :. Only serviceId - checkCount name expected inserted - | fromIntegral expected == inserted = do - putStrLn $ "Imported " <> show inserted <> " " <> name <> "s." - pure inserted - | otherwise = do - putStrLn $ "Incorrect " <> name <> " count: expected " <> show expected <> ", imported " <> show inserted - putStrLn "Import aborted, fix data and repeat" - exitFailure - enc = B.unpack . B64.encode . unEntityId - -exportNtfDbStore :: NtfPostgresStore -> FilePath -> IO (Int, Int, Int) -exportNtfDbStore NtfPostgresStore {dbStoreLog = Nothing} _ = - putStrLn "Internal error: export requires store log" >> exitFailure -exportNtfDbStore NtfPostgresStore {dbStore = s, dbStoreLog = Just sl} lastNtfsFile = - (,,) <$> exportTokens <*> exportSubscriptions <*> exportLastNtfs - where - exportTokens = do - tCnt <- withConnection s $ \db -> DB.fold_ db ntfTknQuery 0 $ \ !i tkn -> - logCreateToken sl (rowToNtfTkn tkn) $> (i + 1) - putStrLn $ "Exported " <> show tCnt <> " tokens" - pure tCnt - exportSubscriptions = do - sCnt <- withConnection s $ \db -> DB.fold_ db ntfSubQuery 0 $ \ !i sub -> do - let i' = i + 1 - logCreateSubscription sl (toNtfSub sub) - when (i' `mod` 500000 == 0) $ do - putStr $ "Exported " <> show i' <> " subscriptions" <> "\r" - hFlush stdout - pure i' - putStrLn $ "Exported " <> show sCnt <> " subscriptions" - pure sCnt - where - ntfSubQuery = - [sql| - SELECT s.token_id, s.subscription_id, s.smp_notifier_key, s.status, s.ntf_service_assoc, - p.smp_host, p.smp_port, p.smp_keyhash, s.smp_notifier_id - FROM subscriptions s - JOIN smp_servers p ON p.smp_server_id = s.smp_server_id - |] - toNtfSub :: Only NtfTokenId :. NtfSubRow :. SMPQueueNtfRow -> NtfSubRec - toNtfSub (Only tokenId :. (ntfSubId, notifierKey, subStatus, ntfServiceAssoc) :. qRow) = - let smpQueue = rowToSMPQueue qRow - in NtfSubRec {ntfSubId, tokenId, smpQueue, notifierKey, subStatus, ntfServiceAssoc} - exportLastNtfs = - withFile lastNtfsFile WriteMode $ \h -> - withConnection s $ \db -> DB.fold_ db lastNtfsQuery 0 $ \ !i (Only tknId :. ntfRow) -> - B.hPutStr h (encodeLastNtf tknId $ toLastNtf ntfRow) $> (i + 1) - where - -- Note that the order here is ascending, to be compatible with how it is imported - lastNtfsQuery = - [sql| - SELECT s.token_id, p.smp_host, p.smp_port, p.smp_keyhash, s.smp_notifier_id, - n.sent_at, n.nmsg_nonce, n.nmsg_data - FROM last_notifications n - JOIN subscriptions s ON s.subscription_id = n.subscription_id - JOIN smp_servers p ON p.smp_server_id = s.smp_server_id - ORDER BY token_ntf_id ASC - |] - encodeLastNtf tknId ntf = strEncode (TNMRv1 tknId ntf) `B.snoc` '\n' - withFastDB' :: Text -> NtfPostgresStore -> (DB.Connection -> IO a) -> IO (Either ErrorType a) withFastDB' op st action = withFastDB op st $ fmap Right . action {-# INLINE withFastDB' #-} @@ -881,9 +720,12 @@ withDB_ op st priority action = where err = op <> ", withDB, " <> tshow e -withLog :: MonadIO m => Text -> NtfPostgresStore -> (StoreLog 'WriteMode -> IO ()) -> m () -withLog op NtfPostgresStore {dbStoreLog} = withLog_ op dbStoreLog -{-# INLINE withLog #-} +withClientDB :: Text -> NtfPostgresStore -> (DB.Connection -> IO a) -> IO (Either SMPClientError a) +withClientDB op st action = + E.uninterruptibleMask_ $ E.try (withTransaction (dbStore st) action) >>= bimapM logErr pure + where + logErr :: E.SomeException -> IO SMPClientError + logErr e = logError ("STORE: " <> op <> ", withDB, " <> tshow e) $> PCEIOError (E.displayException e) assertUpdated :: Int64 -> Either ErrorType () assertUpdated 0 = Left AUTH @@ -921,4 +763,9 @@ instance ToField C.KeyHash where toField = toField . Binary . strEncode instance FromField C.CbNonce where fromField = blobFieldDecoder $ parseAll smpP instance ToField C.CbNonce where toField = toField . Binary . smpEncode + +instance ToField X.PrivKey where toField = toField . Binary . C.encodeASNObj + +instance FromField X.PrivKey where + fromField = blobFieldDecoder $ C.decodeASNKey >=> \case (pk, []) -> Right pk; r -> C.asnKeyError r #endif diff --git a/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql b/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql index b739956840..801208aaa8 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql +++ b/src/Simplex/Messaging/Notifications/Server/Store/ntf_server_schema.sql @@ -172,7 +172,10 @@ CREATE TABLE ntf_server.smp_servers ( smp_keyhash bytea NOT NULL, ntf_service_id bytea, smp_notifier_count bigint DEFAULT 0 NOT NULL, - smp_notifier_ids_hash bytea DEFAULT '\x00000000000000000000000000000000'::bytea NOT NULL + smp_notifier_ids_hash bytea DEFAULT '\x00000000000000000000000000000000'::bytea NOT NULL, + ntf_service_cert bytea, + ntf_service_cert_hash bytea, + ntf_service_priv_key bytea ); diff --git a/src/Simplex/Messaging/Notifications/Server/StoreLog.hs b/src/Simplex/Messaging/Notifications/Server/StoreLog.hs deleted file mode 100644 index 7c71ddb086..0000000000 --- a/src/Simplex/Messaging/Notifications/Server/StoreLog.hs +++ /dev/null @@ -1,177 +0,0 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE StrictData #-} -{-# OPTIONS_GHC -fno-warn-ambiguous-fields #-} - -module Simplex.Messaging.Notifications.Server.StoreLog - ( StoreLog, - NtfStoreLogRecord (..), - readWriteNtfSTMStore, - logCreateToken, - logTokenStatus, - logUpdateToken, - logTokenCron, - logDeleteToken, - logUpdateTokenTime, - logCreateSubscription, - logSubscriptionStatus, - logDeleteSubscription, - closeStoreLog, - ) -where - -import Control.Applicative (optional, (<|>)) -import Control.Concurrent.STM -import Control.Monad -import qualified Data.Attoparsec.ByteString.Char8 as A -import qualified Data.ByteString.Base64.URL as B64 -import qualified Data.ByteString.Char8 as B -import Data.Functor (($>)) -import qualified Data.Map.Strict as M -import Data.Maybe (fromMaybe) -import Data.Word (Word16) -import Simplex.Messaging.Encoding.String -import Simplex.Messaging.Notifications.Protocol -import Simplex.Messaging.Notifications.Server.Store -import Simplex.Messaging.Notifications.Server.Store.Types -import Simplex.Messaging.Protocol (EntityId (..), SMPServer, ServiceId) -import Simplex.Messaging.Server.StoreLog -import Simplex.Messaging.SystemTime -import System.IO - -data NtfStoreLogRecord - = CreateToken NtfTknRec - | TokenStatus NtfTokenId NtfTknStatus - | UpdateToken NtfTokenId DeviceToken NtfRegCode - | TokenCron NtfTokenId Word16 - | DeleteToken NtfTokenId - | UpdateTokenTime NtfTokenId SystemDate - | CreateSubscription NtfSubRec - | SubscriptionStatus NtfSubscriptionId NtfSubStatus NtfAssociatedService - | DeleteSubscription NtfSubscriptionId - | SetNtfService SMPServer (Maybe ServiceId) - deriving (Show) - -instance StrEncoding NtfStoreLogRecord where - strEncode = \case - CreateToken tknRec -> strEncode (Str "TCREATE", tknRec) - TokenStatus tknId tknStatus -> strEncode (Str "TSTATUS", tknId, tknStatus) - UpdateToken tknId token regCode -> strEncode (Str "TUPDATE", tknId, token, regCode) - TokenCron tknId cronInt -> strEncode (Str "TCRON", tknId, cronInt) - DeleteToken tknId -> strEncode (Str "TDELETE", tknId) - UpdateTokenTime tknId ts -> strEncode (Str "TTIME", tknId, ts) - CreateSubscription subRec -> strEncode (Str "SCREATE", subRec) - SubscriptionStatus subId subStatus serviceAssoc -> strEncode (Str "SSTATUS", subId, subStatus) <> serviceStr - where - serviceStr = if serviceAssoc then " service=" <> strEncode True else "" - DeleteSubscription subId -> strEncode (Str "SDELETE", subId) - SetNtfService srv serviceId -> strEncode (Str "SERVICE", srv) <> " service=" <> maybe "off" strEncode serviceId - strP = - A.choice - [ "TCREATE " *> (CreateToken <$> strP), - "TSTATUS " *> (TokenStatus <$> strP_ <*> strP), - "TUPDATE " *> (UpdateToken <$> strP_ <*> strP_ <*> strP), - "TCRON " *> (TokenCron <$> strP_ <*> strP), - "TDELETE " *> (DeleteToken <$> strP), - "TTIME " *> (UpdateTokenTime <$> strP_ <*> strP), - "SCREATE " *> (CreateSubscription <$> strP), - "SSTATUS " *> (SubscriptionStatus <$> strP_ <*> strP <*> (fromMaybe False <$> optional (" service=" *> strP))), - "SDELETE " *> (DeleteSubscription <$> strP), - "SERVICE " *> (SetNtfService <$> strP <* " service=" <*> ("off" $> Nothing <|> strP)) - ] - -logNtfStoreRecord :: StoreLog 'WriteMode -> NtfStoreLogRecord -> IO () -logNtfStoreRecord = writeStoreLogRecord -{-# INLINE logNtfStoreRecord #-} - -logCreateToken :: StoreLog 'WriteMode -> NtfTknRec -> IO () -logCreateToken s = logNtfStoreRecord s . CreateToken - -logTokenStatus :: StoreLog 'WriteMode -> NtfTokenId -> NtfTknStatus -> IO () -logTokenStatus s tknId tknStatus = logNtfStoreRecord s $ TokenStatus tknId tknStatus - -logUpdateToken :: StoreLog 'WriteMode -> NtfTokenId -> DeviceToken -> NtfRegCode -> IO () -logUpdateToken s tknId token regCode = logNtfStoreRecord s $ UpdateToken tknId token regCode - -logTokenCron :: StoreLog 'WriteMode -> NtfTokenId -> Word16 -> IO () -logTokenCron s tknId cronInt = logNtfStoreRecord s $ TokenCron tknId cronInt - -logDeleteToken :: StoreLog 'WriteMode -> NtfTokenId -> IO () -logDeleteToken s tknId = logNtfStoreRecord s $ DeleteToken tknId - -logUpdateTokenTime :: StoreLog 'WriteMode -> NtfTokenId -> SystemDate -> IO () -logUpdateTokenTime s tknId t = logNtfStoreRecord s $ UpdateTokenTime tknId t - -logCreateSubscription :: StoreLog 'WriteMode -> NtfSubRec -> IO () -logCreateSubscription s = logNtfStoreRecord s . CreateSubscription - -logSubscriptionStatus :: StoreLog 'WriteMode -> (NtfSubscriptionId, NtfSubStatus, NtfAssociatedService) -> IO () -logSubscriptionStatus s (subId, subStatus, serviceAssoc) = logNtfStoreRecord s $ SubscriptionStatus subId subStatus serviceAssoc - -logDeleteSubscription :: StoreLog 'WriteMode -> NtfSubscriptionId -> IO () -logDeleteSubscription s subId = logNtfStoreRecord s $ DeleteSubscription subId - -logSetNtfService :: StoreLog 'WriteMode -> SMPServer -> Maybe ServiceId -> IO () -logSetNtfService s srv serviceId = logNtfStoreRecord s $ SetNtfService srv serviceId - -readWriteNtfSTMStore :: Bool -> FilePath -> NtfSTMStore -> IO (StoreLog 'WriteMode) -readWriteNtfSTMStore tty = readWriteStoreLog (readNtfStore tty) writeNtfStore - -readNtfStore :: Bool -> FilePath -> NtfSTMStore -> IO () -readNtfStore tty f st = readLogLines tty f $ \_ -> processLine - where - processLine s = either printError procNtfLogRecord (strDecode s) - where - printError e = B.putStrLn $ "Error parsing log: " <> B.pack e <> " - " <> B.take 100 s - procNtfLogRecord = \case - CreateToken r@NtfTknRec {ntfTknId} -> do - tkn <- mkTknData r - atomically $ stmAddNtfToken st ntfTknId tkn - TokenStatus tknId status -> do - tkn_ <- stmGetNtfTokenIO st tknId - forM_ tkn_ $ \tkn@NtfTknData {tknStatus} -> do - atomically $ writeTVar tknStatus status - when (status == NTActive) $ void $ atomically $ stmRemoveInactiveTokenRegistrations st tkn - UpdateToken tknId token' tknRegCode -> do - stmGetNtfTokenIO st tknId - >>= mapM_ - ( \tkn@NtfTknData {tknStatus} -> do - atomically $ stmRemoveTokenRegistration st tkn - atomically $ writeTVar tknStatus NTRegistered - atomically $ stmAddNtfToken st tknId tkn {token = token', tknRegCode} - ) - TokenCron tknId cronInt -> - stmGetNtfTokenIO st tknId - >>= mapM_ (\NtfTknData {tknCronInterval} -> atomically $ writeTVar tknCronInterval cronInt) - DeleteToken tknId -> - atomically $ void $ stmDeleteNtfToken st tknId - UpdateTokenTime tknId t -> - stmGetNtfTokenIO st tknId - >>= mapM_ (\NtfTknData {tknUpdatedAt} -> atomically $ writeTVar tknUpdatedAt $ Just t) - CreateSubscription r@NtfSubRec {tokenId, ntfSubId} -> do - sub <- mkSubData r - atomically (stmAddNtfSubscription st ntfSubId sub) >>= \case - Just () -> pure () - Nothing -> B.putStrLn $ "Warning: no token " <> enc tokenId <> ", subscription " <> enc ntfSubId - where - enc = B64.encode . unEntityId - SubscriptionStatus subId status serviceAssoc -> do - stmGetNtfSubscriptionIO st subId >>= mapM_ update - where - update NtfSubData {subStatus, ntfServiceAssoc} = atomically $ do - writeTVar subStatus status - writeTVar ntfServiceAssoc serviceAssoc - DeleteSubscription subId -> - atomically $ stmDeleteNtfSubscription st subId - SetNtfService srv serviceId -> - atomically $ stmSetNtfService st srv serviceId - -writeNtfStore :: StoreLog 'WriteMode -> NtfSTMStore -> IO () -writeNtfStore s NtfSTMStore {tokens, subscriptions, ntfServices} = do - mapM_ (logCreateToken s <=< mkTknRec) =<< readTVarIO tokens - mapM_ (logCreateSubscription s <=< mkSubRec) =<< readTVarIO subscriptions - mapM_ (\(srv, serviceId) -> logSetNtfService s srv $ Just serviceId) . M.assocs =<< readTVarIO ntfServices diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 24247e781c..21b03f3cfd 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -46,6 +46,7 @@ module Simplex.Messaging.Server where import Control.Concurrent.STM (throwSTM) +import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad import Control.Monad.Except @@ -1385,7 +1386,7 @@ client Just r -> Just <$> proxyServerResponse a r Nothing -> forkProxiedCmd $ - liftIO (runExceptT (getSMPServerClient'' a srv) `catch` (pure . Left . PCEIOError)) + liftIO (runExceptT (getSMPServerClient'' a srv) `E.catch` (\(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e)) >>= proxyServerResponse a proxyServerResponse :: SMPClientAgent 'Sender -> Either SMPClientError (OwnServer, SMPClient) -> M s BrokerMsg proxyServerResponse a smp_ = do @@ -1422,7 +1423,7 @@ client inc own pRequests if v >= sendingProxySMPVersion then forkProxiedCmd $ do - liftIO (runExceptT (forwardSMPTransmission smp corrId fwdV pubKey encBlock) `catch` (pure . Left . PCEIOError)) >>= \case + liftIO (runExceptT (forwardSMPTransmission smp corrId fwdV pubKey encBlock) `E.catch` (\(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e)) >>= \case Right r -> PRES r <$ inc own pSuccesses Left e -> ERR (smpProxyError e) <$ case e of PCEProtocolError {} -> inc own pSuccesses diff --git a/src/Simplex/Messaging/Server/Env/STM.hs b/src/Simplex/Messaging/Server/Env/STM.hs index e59cd5c0bd..574111c15e 100644 --- a/src/Simplex/Messaging/Server/Env/STM.hs +++ b/src/Simplex/Messaging/Server/Env/STM.hs @@ -706,7 +706,7 @@ mkJournalStoreConfig queueStoreCfg storePath msgQueueQuota maxJournalMsgCount ma newSMPProxyAgent :: SMPClientAgentConfig -> TVar ChaChaDRG -> IO ProxyAgent newSMPProxyAgent smpAgentCfg random = do - smpAgent <- newSMPClientAgent SSender smpAgentCfg random + smpAgent <- newSMPClientAgent SSender smpAgentCfg Nothing random pure ProxyAgent {smpAgent} readWriteQueueStore :: forall q. StoreQueueClass q => Bool -> (RecipientId -> QueueRec -> IO q) -> FilePath -> STMQueueStore q -> IO (StoreLog 'WriteMode) diff --git a/src/Simplex/Messaging/Transport/HTTP2/Client.hs b/src/Simplex/Messaging/Transport/HTTP2/Client.hs index 91a8bf0e53..e805fa86c4 100644 --- a/src/Simplex/Messaging/Transport/HTTP2/Client.hs +++ b/src/Simplex/Messaging/Transport/HTTP2/Client.hs @@ -11,7 +11,6 @@ module Simplex.Messaging.Transport.HTTP2.Client where import Control.Concurrent.Async -import Control.Exception (IOException, try) import qualified Control.Exception as E import Control.Monad import Data.Functor (($>)) @@ -90,7 +89,7 @@ defaultHTTP2ClientConfig = suportedTLSParams = http2TLSParams } -data HTTP2ClientError = HCResponseTimeout | HCNetworkError NetworkError | HCIOError IOException +data HTTP2ClientError = HCResponseTimeout | HCNetworkError NetworkError | HCIOError String deriving (Show) getHTTP2Client :: HostName -> ServiceName -> Maybe XS.CertificateStore -> HTTP2ClientConfig -> IO () -> IO (Either HTTP2ClientError HTTP2Client) @@ -111,7 +110,7 @@ attachHTTP2Client config host port disconnected bufferSize tls = getVerifiedHTTP getVerifiedHTTP2ClientWith :: forall p. TransportPeerI p => HTTP2ClientConfig -> TransportHost -> ServiceName -> IO () -> ((TLS p -> H.Client HTTP2Response) -> IO HTTP2Response) -> IO (Either HTTP2ClientError HTTP2Client) getVerifiedHTTP2ClientWith config host port disconnected setup = (mkHTTPS2Client >>= runClient) - `E.catch` \(e :: IOException) -> pure . Left $ HCIOError e + `E.catch` \(e :: E.SomeException) -> pure $ Left $ HCIOError $ E.displayException e where mkHTTPS2Client :: IO HClient mkHTTPS2Client = do @@ -177,9 +176,9 @@ sendRequest HTTP2Client {client_ = HClient {config, reqQ}} req reqTimeout_ = do sendRequestDirect :: HTTP2Client -> Request -> Maybe Int -> IO (Either HTTP2ClientError HTTP2Response) sendRequestDirect HTTP2Client {client_ = HClient {config, disconnected}, sendReq} req reqTimeout_ = do let reqTimeout = http2RequestTimeout config reqTimeout_ - reqTimeout `timeout` try (sendReq req process) >>= \case + reqTimeout `timeout` E.try (sendReq req process) >>= \case Just (Right r) -> pure $ Right r - Just (Left e) -> disconnected $> Left (HCIOError e) + Just (Left (e :: E.SomeException)) -> disconnected $> Left (HCIOError $ E.displayException e) Nothing -> pure $ Left HCResponseTimeout where process r = do diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 34448fc104..18cdfd1fa7 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -3677,6 +3677,7 @@ testClientServiceConnection ps = do exchangeGreetings service uId user sId pure conns withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> runRight $ do + liftIO $ threadDelay 250000 [(_, Right (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash)))] <- M.toList <$> subscribeClientServices service 1 ("", "", SERVICE_ALL _) <- nGet service subscribeConnection user sId @@ -3684,6 +3685,7 @@ testClientServiceConnection ps = do pure (conns, qIdHash) (uId', sId') <- withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + liftIO $ threadDelay 250000 subscribeAllConnections service False Nothing liftIO $ getInAnyOrder service [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 1 qIdHash')))) -> qIdHash' == qIdHash; _ -> False, @@ -3708,6 +3710,7 @@ testClientServiceConnection ps = do pure conns' withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + liftIO $ threadDelay 250000 subscribeAllConnections service False Nothing liftIO $ getInAnyOrder service [ \case ("", "", AEvt SAENone (SERVICE_UP _ (SMP.ServiceSubResult Nothing (SMP.ServiceSub _ 2 _)))) -> True; _ -> False, From bafdbc1dec778021eacbf621f1467ca78287d2a4 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 25 Dec 2025 13:00:29 +0000 Subject: [PATCH 09/91] smp protocol: fix encoding for SOKS/ENDS responses (#1683) --- src/Simplex/Messaging/Protocol.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 4993aaac81..25b8ce357f 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -1948,7 +1948,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where e :: Encoding a => a -> ByteString e = smpEncode serviceResp tag n idsHash - | v >= serviceCertsSMPVersion = e (tag, ' ', n, idsHash) + | v >= rcvServiceSMPVersion = e (tag, ' ', n, idsHash) | otherwise = e (tag, ' ', n) protocolP v = \case @@ -1993,7 +1993,7 @@ instance ProtocolEncoding SMPVersion ErrorType BrokerMsg where PONG_ -> pure PONG where serviceRespP resp - | v >= serviceCertsSMPVersion = resp <$> _smpP <*> smpP + | v >= rcvServiceSMPVersion = resp <$> _smpP <*> smpP | otherwise = resp <$> _smpP <*> pure mempty fromProtocolError = \case From db4b27e88a95af5b295d393b4c4483ffd220fafb Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 27 Dec 2025 09:12:22 +0000 Subject: [PATCH 10/91] agent: create user with option to enable client service (#1684) * agent: create user with option to enable client service * handle HTTP2 errors * do not catch async exceptions --- src/Simplex/FileTransfer/Client.hs | 16 ++++++------- src/Simplex/Messaging/Agent.hs | 23 +++++++++++++------ src/Simplex/Messaging/Client.hs | 12 ++++++++-- src/Simplex/Messaging/Client/Agent.hs | 6 ++--- .../Notifications/Server/Store/Postgres.hs | 2 +- src/Simplex/Messaging/Server.hs | 6 ++--- .../Messaging/Transport/HTTP2/Client.hs | 14 ++++++++--- tests/AgentTests/FunctionalAPITests.hs | 10 ++++---- 8 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/Simplex/FileTransfer/Client.hs b/src/Simplex/FileTransfer/Client.hs index a425138e5f..d8ed04bc86 100644 --- a/src/Simplex/FileTransfer/Client.hs +++ b/src/Simplex/FileTransfer/Client.hs @@ -47,6 +47,7 @@ import Simplex.Messaging.Client transportClientConfig, clientSocksCredentials, unexpectedResponse, + clientHandlers, useWebPort, ) import qualified Simplex.Messaging.Crypto as C @@ -61,7 +62,6 @@ import Simplex.Messaging.Protocol SenderId, pattern NoEntity, NetworkError (..), - toNetworkError, ) import Simplex.Messaging.Transport (ALPN, CertChainPubKey (..), HandshakeError (..), THandleAuth (..), THandleParams (..), TransportError (..), TransportPeer (..), defaultSupportedParams) import Simplex.Messaging.Transport.Client (TransportClientConfig (..), TransportHost) @@ -70,8 +70,10 @@ import Simplex.Messaging.Transport.HTTP2.Client import Simplex.Messaging.Transport.HTTP2.File import Simplex.Messaging.Util (liftEitherWith, liftError', tshow, whenM) import Simplex.Messaging.Version -import UnliftIO +import System.IO (IOMode (..), SeekMode (..), hSeek, withFile) +import System.Timeout (timeout) import UnliftIO.Directory +import UnliftIO.STM data XFTPClient = XFTPClient { http2Client :: HTTP2Client, @@ -261,13 +263,11 @@ downloadXFTPChunk g c@XFTPClient {config} rpKey fId chunkSpec@XFTPRcvChunkSpec { let dhSecret = C.dh' sDhKey rpDhKey cbState <- liftEither . first PCECryptoError $ LC.cbInit dhSecret cbNonce let t = chunkTimeout config chunkSize - ExceptT (sequence <$> (t `timeout` (download cbState `catches` errors))) >>= maybe (throwE PCEResponseTimeout) pure + ExceptT (sequence <$> (t `timeout` (download cbState `E.catches` handlers))) >>= maybe (throwE PCEResponseTimeout) pure where - errors = - [ Handler $ \(e :: H.HTTP2Error) -> pure $ Left $ PCENetworkError $ NEConnectError $ displayException e, - Handler $ \(e :: IOException) -> pure $ Left $ PCEIOError $ E.displayException e, - Handler $ \(e :: SomeException) -> pure $ Left $ PCENetworkError $ toNetworkError e - ] + handlers = + E.Handler (\(e :: H.HTTP2Error) -> pure $ Left $ PCENetworkError $ NEConnectError $ E.displayException e) + : clientHandlers download cbState = runExceptT . withExceptT PCEResponseError $ receiveEncFile chunkPart cbState chunkSpec `catchError` \e -> diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index e17c39a165..4acf880dd2 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -337,8 +337,8 @@ resumeAgentClient :: AgentClient -> IO () resumeAgentClient c = atomically $ writeTVar (active c) True {-# INLINE resumeAgentClient #-} -createUser :: AgentClient -> NonEmpty (ServerCfg 'PSMP) -> NonEmpty (ServerCfg 'PXFTP) -> AE UserId -createUser c = withAgentEnv c .: createUser' c +createUser :: AgentClient -> Bool -> NonEmpty (ServerCfg 'PSMP) -> NonEmpty (ServerCfg 'PXFTP) -> AE UserId +createUser c = withAgentEnv c .:. createUser' c {-# INLINE createUser #-} -- | Delete user record optionally deleting all user's connections on SMP servers @@ -754,14 +754,23 @@ logConnection c connected = let event = if connected then "connected to" else "disconnected from" in logInfo $ T.unwords ["client", tshow (clientId c), event, "Agent"] -createUser' :: AgentClient -> NonEmpty (ServerCfg 'PSMP) -> NonEmpty (ServerCfg 'PXFTP) -> AM UserId -createUser' c smp xftp = do +createUser' :: AgentClient -> Bool -> NonEmpty (ServerCfg 'PSMP) -> NonEmpty (ServerCfg 'PXFTP) -> AM UserId +createUser' c useService smp xftp = do liftIO $ checkUserServers "createUser SMP" smp liftIO $ checkUserServers "createUser XFTP" xftp userId <- withStore' c createUserRecord - atomically $ TM.insert userId (mkUserServers smp) $ smpServers c - atomically $ TM.insert userId (mkUserServers xftp) $ xftpServers c - atomically $ TM.insert userId False $ useClientServices c + ok <- atomically $ do + (cfg, _) <- readTVar $ useNetworkConfig c + if useService && sessionMode cfg == TSMEntity + then pure False + else do + TM.insert userId (mkUserServers smp) $ smpServers c + TM.insert userId (mkUserServers xftp) $ xftpServers c + TM.insert userId useService $ useClientServices c + pure True + unless ok $ do + withStore c (`deleteUserRecord` userId) + throwE $ CMD PROHIBITED "createUser'" pure userId deleteUser' :: AgentClient -> UserId -> Bool -> AM () diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index ebc458c0e9..bfd45f3a19 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -107,6 +107,7 @@ module Simplex.Messaging.Client smpProxyError, smpErrorClientNotice, textToHostMode, + clientHandlers, ServerTransmissionBatch, ServerTransmission (..), ClientCommand, @@ -129,7 +130,7 @@ import Control.Applicative ((<|>)) import Control.Concurrent (ThreadId, forkFinally, forkIO, killThread, mkWeakThreadId) import Control.Concurrent.Async import Control.Concurrent.STM -import Control.Exception (Exception, SomeException) +import Control.Exception (Exception, Handler (..), IOException, SomeAsyncException, SomeException) import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad @@ -567,7 +568,7 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS case chooseTransportHost networkConfig (host srv) of Right useHost -> (getCurrentTime >>= mkProtocolClient useHost >>= runClient useTransport useHost) - `E.catch` \(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e + `E.catches` clientHandlers Left e -> pure $ Left e where NetworkConfig {tcpConnectTimeout, tcpTimeout, smpPingInterval} = networkConfig @@ -719,6 +720,13 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS Left e -> logError $ "SMP client error: " <> tshow e Right _ -> logWarn "SMP client unprocessed event" +clientHandlers :: [Handler (Either (ProtocolClientError e) a)] +clientHandlers = + [ Handler $ \(e :: IOException) -> pure $ Left $ PCEIOError $ E.displayException e, + Handler $ \(e :: SomeAsyncException) -> E.throwIO e, + Handler $ \(e :: SomeException) -> pure $ Left $ PCENetworkError $ toNetworkError e + ] + useWebPort :: NetworkConfig -> [HostName] -> ProtocolServer p -> Bool useWebPort cfg presetDomains ProtocolServer {host = h :| _} = case smpWebPortServers cfg of SWPAll -> True diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index 9739c19c78..d302ba2371 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -37,6 +37,7 @@ where import Control.Concurrent (forkIO) import Control.Concurrent.Async (Async, uninterruptibleCancel) import Control.Concurrent.STM (retry) +import qualified Control.Exception as E import Control.Logger.Simple import Control.Monad import Control.Monad.Except @@ -83,7 +84,6 @@ import Simplex.Messaging.Transport import Simplex.Messaging.Util (catchAll_, ifM, safeDecodeUtf8, toChunks, tshow, whenM, ($>>=), (<$$>)) import System.Timeout (timeout) import UnliftIO (async) -import qualified UnliftIO.Exception as E import UnliftIO.STM type SMPClientVar = SessionVar (Either (SMPClientError, Maybe UTCTime) (OwnServer, SMPClient)) @@ -226,7 +226,7 @@ getSMPServerClient'' ca@SMPClientAgent {agentCfg, smpClients, smpSessions, worke newSMPClient :: SMPClientVar -> IO (Either SMPClientError (OwnServer, SMPClient)) newSMPClient v = do - r <- connectClient ca srv v `E.catch` \(e :: E.SomeException) -> pure $ Left $ PCEIOError $ E.displayException e + r <- connectClient ca srv v `E.catches` clientHandlers case r of Right smp -> do logInfo . decodeUtf8 $ "Agent connected to " <> showServer srv @@ -324,7 +324,7 @@ reconnectClient ca@SMPClientAgent {active, agentCfg, smpSubWorkers, workerSeq} s (Just <$> getSessVar workerSeq srv smpSubWorkers ts) newSubWorker :: SessionVar (Async ()) -> IO () newSubWorker v = do - a <- async $ void (E.tryAny runSubWorker) >> atomically (cleanup v) + a <- async $ void (E.try @E.SomeException runSubWorker) >> atomically (cleanup v) atomically $ putTMVar (sessionVar v) a runSubWorker = withRetryInterval (reconnectInterval agentCfg) $ \_ loop -> do diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs index 80ab45ca1c..54668d45cc 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs @@ -586,7 +586,7 @@ removeServiceAndAssociations st srv = do withDB "removeServiceAndAssociations" st $ \db -> runExceptT $ do srvId <- ExceptT $ getServerId db subsCount <- liftIO $ removeServiceAssociation_ db srvId - liftIO $ removeServerService db srvId + liftIO $ void $ removeServerService db srvId pure (srvId, fromIntegral subsCount) where getServerId db = diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 21b03f3cfd..3d977dc8c4 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -97,7 +97,7 @@ import Network.Socket (ServiceName, Socket, socketToHandle) import qualified Network.TLS as TLS import Numeric.Natural (Natural) import Simplex.Messaging.Agent.Lock -import Simplex.Messaging.Client (ProtocolClient (thParams), ProtocolClientError (..), SMPClient, SMPClientError, forwardSMPTransmission, smpProxyError, temporaryClientError) +import Simplex.Messaging.Client (ProtocolClient (thParams), ProtocolClientError (..), SMPClient, SMPClientError, clientHandlers, forwardSMPTransmission, smpProxyError, temporaryClientError) import Simplex.Messaging.Client.Agent (OwnServer, SMPClientAgent (..), SMPClientAgentEvent (..), closeSMPClientAgent, getSMPServerClient'', isOwnServer, lookupSMPServerClient, getConnectedSMPServerClient) import qualified Simplex.Messaging.Crypto as C import Simplex.Messaging.Encoding @@ -1386,7 +1386,7 @@ client Just r -> Just <$> proxyServerResponse a r Nothing -> forkProxiedCmd $ - liftIO (runExceptT (getSMPServerClient'' a srv) `E.catch` (\(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e)) + liftIO (runExceptT (getSMPServerClient'' a srv) `E.catches` clientHandlers) >>= proxyServerResponse a proxyServerResponse :: SMPClientAgent 'Sender -> Either SMPClientError (OwnServer, SMPClient) -> M s BrokerMsg proxyServerResponse a smp_ = do @@ -1423,7 +1423,7 @@ client inc own pRequests if v >= sendingProxySMPVersion then forkProxiedCmd $ do - liftIO (runExceptT (forwardSMPTransmission smp corrId fwdV pubKey encBlock) `E.catch` (\(e :: SomeException) -> pure $ Left $ PCEIOError $ E.displayException e)) >>= \case + liftIO (runExceptT (forwardSMPTransmission smp corrId fwdV pubKey encBlock) `E.catches` clientHandlers) >>= \case Right r -> PRES r <$ inc own pSuccesses Left e -> ERR (smpProxyError e) <$ case e of PCEProtocolError {} -> inc own pSuccesses diff --git a/src/Simplex/Messaging/Transport/HTTP2/Client.hs b/src/Simplex/Messaging/Transport/HTTP2/Client.hs index e805fa86c4..09a1089ea2 100644 --- a/src/Simplex/Messaging/Transport/HTTP2/Client.hs +++ b/src/Simplex/Messaging/Transport/HTTP2/Client.hs @@ -11,6 +11,7 @@ module Simplex.Messaging.Transport.HTTP2.Client where import Control.Concurrent.Async +import Control.Exception (Handler (..), IOException, SomeAsyncException, SomeException) import qualified Control.Exception as E import Control.Monad import Data.Functor (($>)) @@ -92,6 +93,13 @@ defaultHTTP2ClientConfig = data HTTP2ClientError = HCResponseTimeout | HCNetworkError NetworkError | HCIOError String deriving (Show) +httpClientHandlers :: [Handler (Either HTTP2ClientError a)] +httpClientHandlers = + [ Handler $ \(e :: IOException) -> pure $ Left $ HCIOError $ E.displayException e, + Handler $ \(e :: SomeAsyncException) -> E.throwIO e, + Handler $ \(e :: SomeException) -> pure $ Left $ HCNetworkError $ toNetworkError e + ] + getHTTP2Client :: HostName -> ServiceName -> Maybe XS.CertificateStore -> HTTP2ClientConfig -> IO () -> IO (Either HTTP2ClientError HTTP2Client) getHTTP2Client host port = getVerifiedHTTP2Client Nothing (THDomainName host) port Nothing @@ -110,7 +118,7 @@ attachHTTP2Client config host port disconnected bufferSize tls = getVerifiedHTTP getVerifiedHTTP2ClientWith :: forall p. TransportPeerI p => HTTP2ClientConfig -> TransportHost -> ServiceName -> IO () -> ((TLS p -> H.Client HTTP2Response) -> IO HTTP2Response) -> IO (Either HTTP2ClientError HTTP2Client) getVerifiedHTTP2ClientWith config host port disconnected setup = (mkHTTPS2Client >>= runClient) - `E.catch` \(e :: E.SomeException) -> pure $ Left $ HCIOError $ E.displayException e + `E.catches` httpClientHandlers where mkHTTPS2Client :: IO HClient mkHTTPS2Client = do @@ -176,9 +184,9 @@ sendRequest HTTP2Client {client_ = HClient {config, reqQ}} req reqTimeout_ = do sendRequestDirect :: HTTP2Client -> Request -> Maybe Int -> IO (Either HTTP2ClientError HTTP2Response) sendRequestDirect HTTP2Client {client_ = HClient {config, disconnected}, sendReq} req reqTimeout_ = do let reqTimeout = http2RequestTimeout config reqTimeout_ - reqTimeout `timeout` E.try (sendReq req process) >>= \case + reqTimeout `timeout` ((Right <$> sendReq req process) `E.catches` httpClientHandlers) >>= \case Just (Right r) -> pure $ Right r - Just (Left (e :: E.SomeException)) -> disconnected $> Left (HCIOError $ E.displayException e) + Just (Left e) -> disconnected $> Left e Nothing -> pure $ Left HCResponseTimeout where process r = do diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 18cdfd1fa7..62f0facd3f 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -1018,7 +1018,7 @@ testUpdateConnectionUserId :: HasCallStack => IO () testUpdateConnectionUserId = withAgentClients2 $ \alice bob -> runRight_ $ do (connId, qInfo) <- createConnection alice 1 True SCMInvitation Nothing SMSubscribe - newUserId <- createUser alice [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] + newUserId <- createUser alice False [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] _ <- changeConnectionUser alice 1 connId newUserId aliceId <- A.prepareConnectionToJoin bob 1 True qInfo PQSupportOn sqSecured' <- A.joinConnection bob NRMInteractive 1 aliceId True qInfo "bob's connInfo" PQSupportOn SMSubscribe @@ -3001,7 +3001,7 @@ testUsers = withAgentClients2 $ \a b -> runRight_ $ do (aId, bId) <- makeConnection a b exchangeGreetings a bId b aId - auId <- createUser a [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] + auId <- createUser a False [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] (aId', bId') <- makeConnectionForUsers a auId b 1 exchangeGreetings a bId' b aId' deleteUser a auId True @@ -3016,7 +3016,7 @@ testDeleteUserQuietly = withAgentClients2 $ \a b -> runRight_ $ do (aId, bId) <- makeConnection a b exchangeGreetings a bId b aId - auId <- createUser a [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] + auId <- createUser a False [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] (aId', bId') <- makeConnectionForUsers a auId b 1 exchangeGreetings a bId' b aId' deleteUser a auId False @@ -3028,7 +3028,7 @@ testUsersNoServer ps = withAgentClientsCfg2 aCfg agentCfg $ \a b -> do (aId, bId, auId, _aId', bId') <- withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do (aId, bId) <- makeConnection a b exchangeGreetings a bId b aId - auId <- createUser a [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] + auId <- createUser a False [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] (aId', bId') <- makeConnectionForUsers a auId b 1 exchangeGreetings a bId' b aId' pure (aId, bId, auId, aId', bId') @@ -3628,7 +3628,7 @@ testTwoUsers = withAgentClients2 $ \a b -> do ("", "", UP _ _) <- nGet a a `hasClients` 1 - aUserId2 <- createUser a [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] + aUserId2 <- createUser a False [noAuthSrvCfg testSMPServer] [noAuthSrvCfg testXFTPServer] (aId2, bId2) <- makeConnectionForUsers a aUserId2 b 1 exchangeGreetings a bId2 b aId2 (aId2', bId2') <- makeConnectionForUsers a aUserId2 b 1 From 502d92381729d5f42ec88fe07d54d0913b50b7da Mon Sep 17 00:00:00 2001 From: Evgeny Poberezkin Date: Sat, 17 Jan 2026 10:21:25 +0000 Subject: [PATCH 11/91] agent: minor fixes --- src/Simplex/Messaging/Agent/Client.hs | 3 +-- tests/AgentTests/FunctionalAPITests.hs | 11 ++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index c42c0fa346..d8df98d1b2 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -1545,9 +1545,8 @@ processSubResults c tSess@(userId, srv, _) sessId serviceId_ rs = do (Map SMP.RecipientId SMPClientError, ([RcvQueueSub], [RcvQueueSub]), [(RcvQueueSub, Maybe ClientNotice)], Int) partitionResults pendingSubs (rq@RcvQueueSub {rcvId, clientNoticeId}, r) acc@(failed, subscribed@(qs, sQs), notices, ignored) = case r of Left e -> case smpErrorClientNotice e of - Just notice_ -> (failed', subscribed, (rq, notice_) : notices, ignored) + Just notice_ -> (failed', subscribed, notices', ignored) where - -- TODO [certs rcv] not used? notices' = if isJust notice_ || isJust clientNoticeId then (rq, notice_) : notices else notices Nothing | temporaryClientError e -> acc diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index 2aa5c0aca5..11548c9e9d 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -122,6 +122,8 @@ import XFTPClient (testXFTPServer) #if defined(dbPostgres) import Fixtures +import Simplex.Messaging.Agent.Store (RcvQueue, RcvQueueSub (..), ServiceAssoc) +import Simplex.Messaging.Agent.Store.AgentStore (deleteClientService, getSubscriptionService, getUserServerRcvQueueSubs, removeRcvServiceAssocs, setRcvServiceAssocs) #endif #if defined(dbServerPostgres) import qualified Database.PostgreSQL.Simple as PSQL @@ -786,7 +788,7 @@ runAgentClientStressTestOneWay n pqSupport sqSecured viaProxy alice bob baseId = msgId = subtract baseId . fst runAgentClientStressTestConc :: HasCallStack => Int64 -> PQSupport -> SndQueueSecured -> Bool -> AgentClient -> AgentClient -> AgentMsgId -> IO () -runAgentClientStressTestConc n pqSupport sqSecured viaProxy alice bob baseId = runRight_ $ do +runAgentClientStressTestConc n pqSupport sqSecured viaProxy alice bob _baseId = runRight_ $ do (aliceId, bobId) <- makeConnection_ pqSupport sqSecured alice bob amId <- newTVarIO 0 bmId <- newTVarIO 0 @@ -803,7 +805,6 @@ runAgentClientStressTestConc n pqSupport sqSecured viaProxy alice bob baseId = r liftIO $ noMessagesIngoreQCONT alice "nothing else should be delivered to alice" liftIO $ noMessagesIngoreQCONT bob "nothing else should be delivered to bob" where - msgId = subtract baseId . fst pqEnc = PQEncryption $ supportPQ pqSupport proxySrv = if viaProxy then Just testSMPServer else Nothing message i = "message " <> bshow i @@ -816,11 +817,11 @@ runAgentClientStressTestConc n pqSupport sqSecured viaProxy alice bob baseId = r timeout 100000 (get a) >>= mapM_ (\case ("", _, QCONT) -> drain; r -> expectationFailure $ "unexpected: " <> show r) loop (0, 0, 0, 0) = pure () - loop acc@(!s, !m, !r, !o) = + loop acc@(s, !m, !r, !o) = timeout 3000000 (get a) >>= \case Nothing -> error $ "timeout " <> show acc Just evt -> case evt of - ("", c, A.SENT mId srv) -> do + ("", c, A.SENT _mId srv) -> do liftIO $ c == bId && srv == proxySrv `shouldBe` True unless (s > 0) $ error "unexpected SENT" loop (s - 1, m, r, o) @@ -834,7 +835,7 @@ runAgentClientStressTestConc n pqSupport sqSecured viaProxy alice bob baseId = r ackMessageAsync a "123" bId mId (Just "") unless (m > 0) $ error "unexpected MSG" loop (s, m - 1, r, o) - ("", c, Rcvd' mId rcvdMsgId) -> do + ("", c, Rcvd' mId _rcvdMsgId) -> do liftIO $ (mId >) <$> atomically (swapTVar mIdVar mId) `shouldReturn` True liftIO $ c == bId `shouldBe` True ackMessageAsync a "123" bId mId Nothing From 84e8b72ca3fa55bc3e8c9535c8fda3500bdaac22 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 27 Jan 2026 21:21:54 +0000 Subject: [PATCH 12/91] docs: update protocol (#1705) --- protocol/agent-protocol.md | 172 +++++++++++++++-- protocol/simplex-messaging.md | 351 ++++++++++++++++++++++++++++++---- protocol/xftp.md | 54 +++++- 3 files changed, 522 insertions(+), 55 deletions(-) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index d8744f1286..f7da11c008 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -1,4 +1,4 @@ -Version 5, 2024-06-22 +Version 7, 2025-01-24 # SMP agent protocol - duplex communication over SMP protocol @@ -9,6 +9,7 @@ Version 5, 2024-06-22 - [SMP servers management](#smp-servers-management) - [SMP agent protocol scope](#smp-agent-protocol-scope) - [Duplex connection procedure](#duplex-connection-procedure) +- [Fast duplex connection procedure](#fast-duplex-connection-procedure) - [Contact addresses](#contact-addresses) - [Communication between SMP agents](#communication-between-smp-agents) - [Message syntax](#messages-between-smp-agents) @@ -20,6 +21,14 @@ Version 5, 2024-06-22 - [Rotating messaging queue](#rotating-messaging-queue) - [End-to-end encryption](#end-to-end-encryption) - [Connection link: 1-time invitation and contact address](#connection-link-1-time-invitation-and-contact-address) + - [Full connection link syntax](#full-connection-link-syntax) + - [Short connection link syntax](#short-connection-link-syntax) +- [Short links](#short-links) + - [Short link structure](#short-link-structure) + - [Link key derivation](#link-key-derivation) + - [Link data encryption](#link-data-encryption) + - [Short link resolution](#short-link-resolution) + - [Link data management](#link-data-management) - [Appendix A: SMP agent API](#smp-agent-api) - [API functions](#api-functions) - [API events](#api-events) @@ -37,6 +46,16 @@ It provides: SMP agent API provides no security between the agent and the client - it is assumed that the agent is executed in the trusted and secure environment, via the agent library, when the agent logic is included directly into the client application - [SimpleX Chat for terminal](https://github.com/simplex-chat/simplex-chat) uses this approach. +This document describes SMP agent protocol version 7. The version history: + +- v1: initial version +- v2: duplex handshake - allows including reply queue(s) in the initial confirmation +- v3: ratchet sync - supports re-negotiating double ratchet encryption +- v4: delivery receipts - supports acknowledging message delivery to the sender +- v5: post-quantum - supports post-quantum key exchange in double ratchet (PQDR) +- v6: sender auth key - supports sender authentication key in confirmations +- v7: ratchet on confirmation - initializes double ratchet during confirmation + ## SMP agent SMP agents communicate with each other via SMP servers using [simplex messaging protocol (SMP)](./simplex-messaging.md) according to the API calls used by the client applications. This protocol is a middle layer in SimpleX protocols (above SMP protocol but below any application level protocol) - it is intended to be used by client-side applications that need secure asynchronous bi-directional communication channels ("connections"). @@ -152,7 +171,7 @@ These messages are encrypted with per-queue shared secret using NaCL crypto_box - `agentConfirmation` - used when confirming SMP queues, contains connection information encrypted with double ratchet. This envelope can only contain `agentConnInfo` or `agentConnInfoReply` encrypted with double ratchet. - `agentMsgEnvelope` - contains different agent messages encrypted with double ratchet, as defined in `agentMessage`. - `agentInvitation` - sent to SMP queue that is used as contact address, does not use double ratchet. -- `agentRatchetKey` - used to re-negotiate double ratchet encryption - can contain additional information in `agentRatchetKey`. +- `agentRatchetKey` - used to re-negotiate double ratchet encryption - can contain additional information in `agentRatchetInfo`. ```abnf decryptedSMPClientMessage = agentConfirmation / agentMsgEnvelope / agentInvitation / agentRatchetKey @@ -182,7 +201,7 @@ Decrypted SMP message client body can be one of 4 types: - `agentMessage` - all other agent messages. `agentMessage` contains these parts: -- `agentMsgHeader` - agent message header that contains sequential agent message ID for a particular SMP queue, agent timestamp (ISO8601) and the hash of the previous message. +- `agentMsgHeader` - agent message header that contains sequential agent message ID for a particular SMP queue and the hash of the previous message. - `aMessage` - a command/message to the other SMP agent: - to confirm the connection (`HELLO`). - to send and to confirm reception of user messages (`A_MSG`, `A_RCVD`). @@ -200,7 +219,9 @@ decryptedAgentMessage = agentConnInfo / agentConnInfoReply / agentRatchetInfo / agentConnInfo = %s"I" connInfo connInfo = *OCTET agentConnInfoReply = %s"D" smpQueues connInfo +smpQueues = length 1*newQueueInfo ; NonEmpty list of reply queues agentRatchetInfo = %s"R" ratchetInfo +ratchetInfo = *OCTET agentMessage = %s"M" agentMsgHeader aMessage msgPadding agentMsgHeader = agentMsgId prevMsgHash @@ -215,8 +236,10 @@ HELLO = %s"H" A_MSG = %s"M" userMsgBody userMsgBody = *OCTET -A_RCVD = %s"V" msgReceipt +A_RCVD = %s"V" msgReceipts +msgReceipts = length 1*msgReceipt ; NonEmpty list msgReceipt = agentMsgId msgHash rcptLength rcptInfo +msgHash = shortString EREADY = %s"E" agentMsgId @@ -224,14 +247,14 @@ A_QCONT = %s"QC" sndQueueAddr QADD = %s"QA" sndQueues sndQueues = length 1*(newQueueUri replacedSndQueue) -newQueueUri = clientVRange smpServer senderId dhPublicKey [sndSecure] +newQueueUri = clientVRange smpServer senderId dhPublicKey [queueMode] dhPublicKey = length x509encoded -sndSecure = "T" +queueMode = %s"M" / %s"C" ; M - messaging (sender can secure), C - contact replacedSndQueue = "0" / "1" sndQueueAddr QKEY = %s"QK" sndQueueKeys sndQueueKeys = length 1*(newQueueInfo senderKey) -newQueueInfo = version smpServer senderId dhPublicKey [sndSecure] +newQueueInfo = version smpServer senderId dhPublicKey [queueMode] senderKey = length x509encoded QUSE = %s"QU" sndQueuesReady @@ -270,7 +293,7 @@ This is the agent envelope used to send client messages once the connection is e #### A_RCVD message -This message is sent to confirm the client message reception. It includes received message number and message hash. +This message is sent to confirm the client message reception. It includes a list of message receipts, each containing the received message number, message hash and receipt info. #### EREADY message @@ -345,24 +368,22 @@ To summarize, the upgrade to DH+KEM secret happens in a sent message that has PQ Connection links are generated by SMP agent in response to `createConnection` api call, used by another party user with `joinConnection` api, and then another connection link is sent by the agent in `agentConnInfoReply` and used by the first party agent to connect to the reply queue (the second part of the process is invisible to the users). -Connection link syntax: +### Full connection link syntax ``` -connectionLink = connectionScheme "/" connLinkType "#/?smp=" smpQueues "&e2e=" e2eEncryption +connectionLink = connectionScheme "/" connLinkType "#/?v=" versionRange "&smp=" smpQueues ["&e2e=" e2eEncryption] ["&data=" clientData] connLinkType = %s"invitation" / %s"contact" connectionScheme = (%s"https://" clientAppServer) | %s"simplex:" clientAppServer = hostname [ ":" port ] ; client app server, e.g. simplex.chat -e2eEncryption = encryptionScheme ":" publicKey -encryptionScheme = %s"rsa" ; end-to-end encryption and key exchange protocols, - ; the current hybrid encryption scheme (RSA-OAEP/AES-256-GCM-SHA256) - ; will be replaced with double ratchet protocol and DH key exchange. -publicKey = -smpQueues = smpQueue [ "," 1*smpQueue ] ; SMP queues for the connection +versionRange = 1*DIGIT / 1*DIGIT "-" 1*DIGIT ; agent version range +e2eEncryption = +smpQueues = smpQueue *(";" smpQueue) ; SMP queues for the connection (semicolon-separated) smpQueue = +clientData = ``` -All parameters are passed via URI hash to avoid sending them to the server (in case "https" scheme is used) - they can be used by the client-side code and processed by the client application. Parameters `smp` and `e2e` can be present in any order, any unknown additional parameters SHOULD be ignored. +All parameters are passed via URI hash to avoid sending them to the server (in case "https" scheme is used) - they can be used by the client-side code and processed by the client application. Parameters can be present in any order, any unknown additional parameters SHOULD be ignored. `clientAppServer` is not an SMP server - it is a server that shows the instruction on how to download the client app that will connect using this connection link. This server can also host a mobile or desktop app manifest so that this link is opened directly in the app if it is installed on the device. @@ -370,6 +391,123 @@ All parameters are passed via URI hash to avoid sending them to the server (in c See SMP protocol [out-of-band messages](./simplex-messaging.md#out-of-band-messages) for syntax of `queueURI`. +### Short connection link syntax + +Short links provide a more compact representation by storing connection data on the server: + +``` +shortLink = shortLinkScheme "/" linkType "#" [linkId "/"] linkKey ["?" shortLinkParams] +shortLinkScheme = %s"simplex:" / (%s"https://" serverHost) +linkType = %s"i" / contactType ; i - invitation, or contact type +contactType = %s"a" / %s"c" / %s"g" / %s"r" ; a - contact, c - channel, g - group, r - relay +linkId = base64url ; only for invitation links +linkKey = base64url ; SHA3-256 hash of fixed data, used to decrypt link data +shortLinkParams = hostParam ["&" portParam] ["&" keyHashParam] +hostParam = %s"h=" hostList +hostList = host *("," host) +portParam = %s"p=" port +keyHashParam = %s"c=" base64url ; server certificate fingerprint +``` + +Contact types: +- `a` (CCTContact) - direct contact connection +- `c` (CCTChannel) - channel connection +- `g` (CCTGroup) - group connection +- `r` (CCTRelay) - relay connection + +Short links can use either the `simplex:` scheme or `https://` with a server hostname. When using the simplex scheme, server information is included in query parameters. + +## Short links + +Short links provide a compact representation of connection links by storing encrypted connection data on the SMP server. The link key in the URI fragment (after `#`) is never sent to the server, ensuring the server cannot decrypt the stored connection data. + +### Link key derivation + +The link key is derived from the fixed link data using SHA3-256 hash function: + +``` +linkKey = SHA3-256(fixedLinkData) +``` + +The fixed link data includes: +- Agent version range +- Root public key (Ed25519) for signing +- SMP queue connection request (server, queue IDs, encryption keys) +- Optional link entity ID + +For contact links, the link ID and encryption key are derived from the link key using HKDF: + +``` +(linkId, encryptionKey) = HKDF(info="SimpleXContactLink", key=linkKey, outputLen=56) +; linkId = first 24 bytes, encryptionKey = remaining 32 bytes +``` + +For invitation links, the link ID is stored separately (usually included in the URI), and only the encryption key is derived: + +``` +encryptionKey = HKDF(info="SimpleXInvLink", key=linkKey, outputLen=32) +``` + +### Link data encryption + +Link data stored on the server consists of two encrypted parts: fixed data and user data. Both are encrypted using NaCl secret_box (XSalsa20-Poly1305) with the derived encryption key: + +```abnf +queueLinkData = encFixedData encUserData +encFixedData = largeString ; encrypted padded(signedFixedData, 2008) +encUserData = largeString ; encrypted padded(signedUserData, 13784) + +signedFixedData = signature fixedData +signedUserData = signature userData +signature = length 64*64 OCTET ; Ed25519 signature + +fixedData = agentVersionRange rootKey linkConnReq [linkEntityId] +agentVersionRange = version version ; min and max agent protocol version +version = 2*2 OCTET +rootKey = length x509encoded ; Ed25519 public key +linkEntityId = shortString +userData = invitationLinkData / contactLinkData +invitationLinkData = %s"I" agentVersionRange connInfo +contactLinkData = %s"C" agentVersionRange userContactData +largeString = 2*2 OCTET *OCTET ; Word16 length prefix +length = 1*1 OCTET +shortString = length *OCTET +``` + +The fixed data is signed with the root key and its hash becomes the link key. The user data is signed either with the root key (for invitations) or with an owner key (for contact addresses). + +### Short link resolution + +When a user receives a short link, the agent resolves it as follows: + +1. Extract the link key from the URI fragment +2. Send `LGET` command to the SMP server with the link ID +3. Receive encrypted link data from the server +4. Decrypt the link data using the link key +5. Extract the full connection information (SMP queue URI, encryption keys, profile) +6. Proceed with the standard connection procedure using `joinConnection` + +For invitation links, the `LKEY` command is used to set the sender key when getting link data. Repeated `LKEY` would require using the same key. + +### Link data management + +The recipient who created the queue can manage the short link data: + +- **LSET** - Set or update the link data associated with a queue. This is used when creating a short link or updating the user data (e.g., profile changes). +- **LDEL** - Delete the link data from the server. This effectively invalidates the short link. + +Short links support different connection modes: +- **invitation** - One-time invitation links that can only be used once +- **contact** - Reusable contact address links that can be used multiple times + +For contact addresses, the link data includes additional information about the contact type: +- **contact** - Direct contact connection +- **channel** - Channel connection +- **group** - Group connection +- **relay** - Relay connection + +The agent maintains the link data and updates it when connection parameters change, ensuring short links remain valid and reflect current connection information. + ## Appendix A: SMP agent API The exact specification of agent library API and of the events that the agent sends to the client application is out of scope of the protocol specification. diff --git a/protocol/simplex-messaging.md b/protocol/simplex-messaging.md index 16e4e66069..5a077eff20 100644 --- a/protocol/simplex-messaging.md +++ b/protocol/simplex-messaging.md @@ -1,4 +1,4 @@ -Version 9, 2024-06-22 +Version 19, 2025-01-24 # Simplex Messaging Protocol (SMP) @@ -18,6 +18,10 @@ Version 9, 2024-06-22 - [Simplex queue IDs](#simplex-queue-ids) - [Server security requirements](#server-security-requirements) - [Message delivery notifications](#message-delivery-notifications) +- [Client services](#client-services) + - [Service roles](#service-roles) + - [Service certificates](#service-certificates) + - [Service subscriptions](#service-subscriptions) - [SMP Transmission and transport block structure](#smp-transmission-and-transport-block-structure) - [SMP commands](#smp-commands) - [Correlating responses with commands](#correlating-responses-with-commands) @@ -26,7 +30,11 @@ Version 9, 2024-06-22 - [Recipient commands](#recipient-commands) - [Create queue command](#create-queue-command) - [Subscribe to queue](#subscribe-to-queue) + - [Subscribe to multiple queues](#subscribe-to-multiple-queues) - [Secure queue by recipient](#secure-queue-by-recipient) + - [Set queue recipient keys](#set-queue-recipient-keys) + - [Set short link](#set-short-link) + - [Delete short link](#delete-short-link) - [Enable notifications command](#enable-notifications-command) - [Disable notifications command](#disable-notifications-command) - [Get message command](#get-message-command) @@ -41,12 +49,22 @@ Version 9, 2024-06-22 - [Request proxied session](#request-proxied-session) - [Send command via proxy](#send-command-via-proxy) - [Forward command to destination server](#forward-command-to-destination-server) + - [Short link commands](#short-link-commands) + - [Set link key](#set-link-key) + - [Get link data](#get-link-data) - [Notifier commands](#notifier-commands) - [Subscribe to queue notifications](#subscribe-to-queue-notifications) + - [Subscribe to multiple queue notifications](#subscribe-to-multiple-queue-notifications) - [Server messages](#server-messages) + - [Link response](#link-response) + - [Queue subscription response](#queue-subscription-response) + - [Service subscription response](#service-subscription-response) + - [All service messages received](#all-service-messages-received) - [Deliver queue message](#deliver-queue-message) - [Deliver message notification](#deliver-message-notification) - [Subscription END notification](#subscription-end-notification) + - [Service subscription END notification](#service-subscription-end-notification) + - [Queue deleted notification](#queue-deleted-notification) - [Error responses](#error-responses) - [OK response](#ok-response) - [Transport connection with the SMP server](#transport-connection-with-the-SMP-server) @@ -65,7 +83,26 @@ It's designed with the focus on communication security and integrity, under the It is designed as a low level protocol for other application protocols to solve the problem of secure and private message transmission, making [MITM attack][1] very difficult at any part of the message transmission system. -This document describes SMP protocol versions 6 and 7, the previous versions are discontinued. +This document describes SMP protocol version 19. Versions 1-5 are discontinued. The version history: + +- v1: binary protocol encoding +- v2: message flags (used to control notifications) +- v3: encrypt message timestamp and flags together with the body when delivered to recipient +- v4: support command batching +- v5: basic auth for SMP servers +- v6: allow creating queues without subscribing (current minimum version) +- v7: support authenticated encryption to verify senders' commands +- v8: SMP proxy for sender commands (PRXY, PFWD, RFWD, PKEY, PRES, RRES) +- v9: faster handshake with SKEY command for sender to secure queue +- v10: DELD event to subscriber when queue is deleted via another connection +- v11: additional encryption of transport blocks with forward secrecy +- v12: BLOCKED error for blocked queues +- v14: proxyServer handshake property to disable transport encryption between server and proxy +- v15: short links with associated data passed in NEW or LSET command +- v16: service certificates +- v17: create notification credentials with NEW command +- v18: support client notices in BLOCKED error +- v19: service subscriptions to messages (SUBS, SOKS, ENDS commands) ## Introduction @@ -395,13 +432,52 @@ To protect the privacy of the recipients, there are several commands in SMP prot The clients can optionally instruct a dedicated push notification server to subscribe to notifications and deliver push notifications to the device, which can then retrieve the messages in the background and send local notifications to the user - this is out of scope of SMP protocol. The commands that SMP protocol provides to allow it: -- `enableNotifications` (`"NKEY"`) with `notifierId` (`"NID"`) response - see [Enable notifications command](#enable-notifications-command). +- `enableNotifications` (`"NKEY"`) with `notifierIdResp` (`"NID"`) response - see [Enable notifications command](#enable-notifications-command). - `disableNotifications` (`"NDEL"`) - see [Disable notifications command](#disable-notifications-command). - `subscribeNotifications` (`"NSUB"`) - see [Subscribe to queue notifications](#subscribe-to-queue-notifications). - `messageNotification` (`"NMSG"`) - see [Deliver message notification](#deliver-message-notification). [`SEND` command](#send-message) includes the notification flag to instruct SMP server whether to send the notification - this flag is forwarded to the recipient inside encrypted envelope, together with the timestamp and the message body, so even if TLS is compromised this flag cannot be used for traffic correlation. +## Client services + +SMP protocol supports client services - high capacity clients that act as services. Client services allow scalable message and notification delivery services. + +### Service roles + +A client service can have one of two roles: + +- **Messaging** - Message receiver service that subscribes to and receives messages from multiple SMP queues with a single command. + +- **Notifications** - Notification service that subscribes to queue notifications and delivers push notifications to user devices. + +Service role is identified in the transport handshake and determines what commands the service is authorized to send. + +### Service certificates + +To send service commands, services should authenticate themselves to SMP servers using service certificates. This provides: + +- **Service identity** - The server assigns a unique service ID based on the service certificate, allowing associating multiple SMP queues with a service. +- **Subscription management** - Services can efficiently manage subscriptions across reconnections without re-subscribing to individual queues. +- **Rate limiting** - Servers can apply rate limits per service identity rather than per connection. + +Service certificates are included in the client handshake and verified by the server. The service receives a service ID in the handshake response, which is then used as entity ID in service transmissions. + +```abnf +clientHandshakeService = serviceRole serviceCertKey +serviceRole = %s"M" / %s"N" ; Messaging / Notifier +serviceCertKey = certChainPubKey +``` + +### Service subscriptions + +Services use batch subscription commands to subscribe to multiple queues: + +- **SUBS** - Subscribe to messages from all associated SMP queues at once. The service provides a count and hash of queue IDs, and receives `SOKS` response with the service ID. +- **NSUBS** - Subscribe to notifications from all associated SMP queues. Similar to SUBS. +- **SOKS** - Server response confirming batch subscription success. +- **ENDS** - Server notification when batch subscriptions are terminated (e.g., when another instance of service connects). + ## SMP Transmission and transport block structure Each transport block has a fixed size of 16384 bytes for traffic uniformity. @@ -455,15 +531,19 @@ Commands syntax below is provided using [ABNF][8] with [case-sensitive strings e ```abnf smpCommand = ping / recipientCmd / senderCommand / - proxyCommand / subscribeNotifications / serverMsg -recipientCmd = create / subscribe / rcvSecure / + proxyCommand / notifierCommand / linkCommand / serverMsg +recipientCmd = create / subscribe / subscribeMultiple / rcvSecure / recipientKeys / enableNotifications / disableNotifications / getMessage - acknowledge / suspend / delete / getQueueInfo + acknowledge / suspend / delete / getQueueInfo / setShortLink / deleteShortLink senderCommand = send / sndSecure -proxyCommand = proxySession / proxyCommand / relayCommand -serverMsg = queueIds / message / notifierId / messageNotification / - proxySessionKey / proxyResponse / relayResponse - unsubscribed / queueInfo/ ok / error +linkCommand = setLinkKey / getLinkData +proxyCommand = proxySession / proxyForward / relayForward +notifierCommand = subscribeNotifications / subscribeNotificationsMultiple +serverMsg = queueIds / linkResponse / serviceOk / serviceOkMultiple / + message / allReceived / notifierIdResp / messageNotification / + proxySessionKey / proxyResponse / relayResponse / + unsubscribed / serviceUnsubscribed / deleted / + queueInfo / ok / error / pong ``` The syntax of specific commands and responses is defined below. @@ -480,13 +560,14 @@ SMP servers must verify all transmissions (excluding `ping` and initial `send` c ### Keep-alive command -To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the server responds with `ok` response. This command should be sent unsigned and without queue ID. +To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the server responds with `pong` response. This command should be sent unsigned and without queue ID. ```abnf ping = %s"PING" +pong = %s"PONG" ``` -This command is always send unsigned. +This command is always sent unsigned. ### Recipient commands @@ -501,30 +582,54 @@ Servers SHOULD support basic auth with this command, to allow only server owners The syntax is: ```abnf -create = %s"NEW " recipientAuthPublicKey recipientDhPublicKey basicAuth subscribe sndSecure +create = %s"NEW " recipientAuthPublicKey recipientDhPublicKey optBasicAuth subscribeMode optQueueReqData optNtfCreds recipientAuthPublicKey = length x509encoded ; the recipient's Ed25519 or X25519 public key to verify commands for this queue recipientDhPublicKey = length x509encoded ; the recipient's Curve25519 key for DH exchange to derive the secret ; that the server will use to encrypt delivered message bodies ; using [NaCl crypto_box][16] encryption scheme (curve25519xsalsa20poly1305). -basicAuth = "0" / "1" shortString ; server password +optBasicAuth = %s"0" / (%s"1" shortString) ; optional server password subscribeMode = %s"S" / %s"C" ; S - create and subscribe, C - only create -sndSecure = %s"T" / %s"F" ; T - sender can secure the queue, from v9 +optQueueReqData = %s"0" / (%s"1" queueReqData) ; optional queue request data +queueReqData = queueReqMessaging / queueReqContact +queueReqMessaging = %s"M" optMessagingLinkData +queueReqContact = %s"C" optContactLinkData +optMessagingLinkData = %s"0" / (%s"1" senderId encFixedData encUserData) +optContactLinkData = %s"0" / (%s"1" linkId senderId encFixedData encUserData) +senderId = shortString ; first 24 bytes of SHA3-384(corrId) +linkId = shortString +encFixedData = largeString ; encrypted fixed link data +encUserData = largeString ; encrypted user data +optNtfCreds = %s"0" / (%s"1" ntfKey ntfDhKey) ; optional notification credentials +ntfKey = length x509encoded +ntfDhKey = length x509encoded x509encoded = +shortString = length *OCTET +largeString = length2 *OCTET length = 1*1 OCTET +length2 = 2*2 OCTET ; Word16, network byte order ``` If the queue is created successfully, the server must send `queueIds` response with the recipient's and sender's queue IDs and public key to encrypt delivered message bodies: ```abnf -queueIds = %s"IDS " recipientId senderId srvDhPublicKey sndSecure -serverDhPublicKey = length x509encoded +queueIds = %s"IDS " recipientId senderId srvDhPublicKey optQueueMode optLinkId optServiceId optServerNtfCreds +srvDhPublicKey = length x509encoded ; the server's Curve25519 key for DH exchange to derive the secret ; that the server will use to encrypt delivered message bodies to the recipient recipientId = shortString ; 16-24 bytes senderId = shortString ; 16-24 bytes +optQueueMode = %s"0" / (%s"1" queueMode) +queueMode = %s"M" / %s"C" ; M - messaging (sender can secure), C - contact +optLinkId = %s"0" / (%s"1" linkId) +linkId = shortString +optServiceId = %s"0" / (%s"1" serviceId) +serviceId = shortString +optServerNtfCreds = %s"0" / (%s"1" srvNtfId srvNtfDhKey) +srvNtfId = shortString +srvNtfDhKey = length x509encoded ``` Once the queue is created, depending on `subscribeMode` parameter of `NEW` command the recipient gets automatically subscribed to receive the messages from that queue, until the transport connection is closed. To start receiving the messages from the existing queue when the new transport connection is opened the client must use `subscribe` command. @@ -541,12 +646,24 @@ When the simplex queue was not created in the current transport connection, the subscribe = %s"SUB" ``` -If subscription is successful the server must respond with the first available message or with `ok` response if no messages are available. The recipient will continue receiving the messages from this queue until the transport connection is closed or until another transport connection subscribes to the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. +If subscription is successful the server must respond with the first available message or with [queue subscription response](#queue-subscription-response) (`SOK`) if no messages are available. The recipient will continue receiving the messages from this queue until the transport connection is closed or until another transport connection subscribes to the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. The first message will be delivered either immediately or as soon as it is available; to receive the following message the recipient must acknowledge the reception of the message (see [Acknowledge message delivery](#acknowledge-message-delivery)). This transmission and its response MUST be signed. +#### Subscribe to multiple queues + +This command is used by recipient services to subscribe to multiple queues at once: + +```abnf +subscribeMultiple = %s"SUBS " count idsHash +count = 8*8 OCTET ; Int64, network byte order (big-endian) +idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs +``` + +The count and idsHash allow the server to detect subscription drift. The server responds with `serviceOkMultiple` (`SOKS`) response. + #### Secure queue by recipient This command is only used until v8 of SMP protocol. V9 uses [SKEY](#secure-queue-by-sender). @@ -565,6 +682,44 @@ Once the queue is secured only authorized messages can be sent to it. This command MUST be used in transmission with recipient queue ID. +#### Set queue recipient keys + +This command is used to set additional recipient keys to support shared management of the queue: + +```abnf +recipientKeys = %s"RKEY " recipientKeysList +recipientKeysList = count 1*recipientKey ; non-empty list +count = 1*1 OCTET ; number of keys (1-255) +recipientKey = length x509encoded +``` + +This command added to allow multiple group owners manage data of the same queue link. + +#### Set short link + +This command is used to associate a short link with the queue: + +```abnf +setShortLink = %s"LSET " linkId encFixedData encUserData +linkId = shortString +encFixedData = largeString ; encrypted fixed link data +encUserData = largeString ; encrypted user data (e.g., profile) +largeString = length2 *OCTET +length2 = 2*2 OCTET ; Word16, network byte order (big-endian) +``` + +The server responds with `OK` response if successful. + +#### Delete short link + +This command is used to remove a short link association from the queue: + +```abnf +deleteShortLink = %s"LDEL" +``` + +The server responds with `OK` or `ERR` + #### Enable notifications command This command is sent by the recipient to the server to add notifier's key to the queue, to allow push notifications server to receive notifications when the message arrives, via a separate queue ID, without receiving message content. @@ -580,10 +735,10 @@ recipientNotificationDhPublicKey = length x509encoded ; using [NaCl crypto_box][16] encryption scheme (curve25519xsalsa20poly1305). ``` -The server will respond with `notifierId` response if notifications were enabled and the notifier's key was successfully added to the queue: +The server will respond with `NID` response if notifications were enabled and the notifier's key was successfully added to the queue: ```abnf -notifierId = %s"NID " notifierId srvNotificationDhPublicKey +notifierIdResponse = %s"NID " notifierId srvNotificationDhPublicKey notifierId = shortString ; 16-24 bytes srvNotificationDhPublicKey = length x509encoded ; the server's Curve25519 key for DH exchange to derive the secret @@ -1001,6 +1156,35 @@ The shared secret for encrypting transmission bodies between proxy server and de relayResponse = %s"RRES" SP ``` +### Short link commands + +These commands are used by senders to access queues via short links (added in v8). + +#### Set link key + +This command is used to set the sender key and to get link data associated with a "messaging" queue: + +```abnf +setLinkKey = %s"LKEY " senderAuthPublicKey +senderAuthPublicKey = length x509encoded +``` + +The server secures the queue with the provided key and responds with `LNK` response containing the sender ID and encrypted link data. + +Once this command is used, the queue is secured, and the command can only be repeated with the same key. + +#### Get link data + +This command is used to retrieve the link data associated with a "contact" queue: + +```abnf +getLinkData = %s"LGET" +``` + +The server responds with `LNK` response containing the sender ID and encrypted link data. + +This command may be repeated multiple times. + ### Notifier commands #### Subscribe to queue notifications @@ -1011,16 +1195,69 @@ The push notifications server (notifier) must use this command to start receivin subscribeNotifications = %s"NSUB" ``` -If subscription is successful the server must respond with `ok` response if no messages are available. The notifier will be receiving the message notifications from this queue until the transport connection is closed or until another transport connection subscribes to notifications from the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. +If subscription is successful the server must respond with [queue subscription response](#queue-subscription-response) (`SOK`). The notifier will be receiving the message notifications from this queue until the transport connection is closed or until another transport connection subscribes to notifications from the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. The first message notification will be delivered either immediately or as soon as the message is available. +#### Subscribe to multiple queue notifications + +This command is used by notifier services to subscribe to multiple queues at once: + +```abnf +subscribeNotificationsMultiple = %s"NSUBS " count idsHash +count = 8*8 OCTET ; Int64, network byte order (big-endian) +idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs +``` + +The server responds with `serviceOkMultiple` (`SOKS`) response. + ### Server messages This section includes server events and generic command responses used for several commands. The syntax for command-specific responses is shown together with the commands. +#### Link response + +Sent in response to `LKEY` and `LGET` commands: + +```abnf +linkResponse = %s"LNK " senderId encFixedData encUserData +senderId = shortString ; the sender ID for the queue +encFixedData = largeString ; encrypted fixed link data +encUserData = largeString ; encrypted user data +``` + +#### Queue subscription response + +Sent in response to `SUB` and `NSUB` commands: + +```abnf +serviceOk = %s"SOK " optServiceId +optServiceId = %s"0" / (%s"1" serviceId) +serviceId = shortString +``` + +If response contains `serviceId`, it means that queue is associated with the service. + +#### Service subscription response + +Sent in response to `SUBS` or `NSUBS` commands: + +```abnf +serviceOkMultiple = %s"SOKS " count idsHash +count = 8*8 OCTET ; Int64, network byte order (big-endian) +idsHash = 16*16 OCTET ; XOR of MD5 hashes of all subscribed queue IDs +``` + +#### All service messages received + +Sent to indicate all messages have been delivered from all queues associated with the service: + +```abnf +allReceived = %s"ALLS" +``` + #### Deliver queue message When server delivers the messages to the recipient, message body should be encrypted with the secret derived from DH exchange using the keys passed during the queue creation and returned with `queueIds` response. @@ -1077,6 +1314,24 @@ unsubscribed = %s"END" No further messages should be delivered to unsubscribed transport connection. +#### Service subscription END notification + +Sent when service subscription is terminated (can be sent when service re-connects): + +```abnf +serviceUnsubscribed = %s"ENDS " count idsHash +count = 8*8 OCTET ; Int64, network byte order (big-endian) +idsHash = 16*16 OCTET ; XOR of MD5 hashes of terminated queue IDs +``` + +#### Queue deleted notification + +Sent when a queue has been deleted via another connection: + +```abnf +deleted = %s"DELD" +``` + #### Error responses - incorrect block format, encoding or authorization size (`BLOCK`). @@ -1100,6 +1355,7 @@ No further messages should be delivered to unsubscribed transport connection. - `NETWORK` - network error. - `TIMEOUT` - command response timeout. - `HOST` - no compatible server host (e.g. onion when public is required, or vice versa) + - `NO_SERVICE` - service unavailable client-side. - `TRANSPORT` - handshake or other transport error: - `BLOCK` - error parsing transport block. - `VERSION` - incompatible client or server version. @@ -1111,25 +1367,42 @@ No further messages should be delivered to unsubscribed transport connection. - `IDENTITY` - incorrect server identity (certificate fingerprint does not match server address). - `BAD_AUTH` - incorrect or missing server credentials in handshake. - authentication error (`AUTH`) - incorrect authorization, unknown (or suspended) queue, sender's ID is used in place of recipient's and vice versa, and some other cases (see [Send message](#send-message) command). +- blocked entity error (`BLOCKED`) - the entity (queue or message) was blocked due to policy violation (added in v17). Contains blocking information: + - `reason` - blocking reason (`spam` or `content`). + - `notice` - optional client notice with additional information. +- service error (`SERVICE`) - service-related error. +- crypto error (`CRYPTO`) - cryptographic operation failed. - message queue quota exceeded error (`QUOTA`) - too many messages were sent to the message queue. Further messages can only be sent after the recipient retrieves the messages. +- store error (`STORE`) - server storage error with error message. +- message expired (`EXPIRED`) - message has expired. +- no message (`NO_MSG`) - no message available or message ID mismatch. - sent message is too large (> 16064) to be delivered (`LARGE_MSG`). - internal server error (`INTERNAL`). +- duplicate error (`DUPLICATE_`) - internal duplicate detection error (not returned by server). The syntax for error responses: ```abnf error = %s"ERR " errorType -errorType = %s"BLOCK" / %s"SESSION" / %s"CMD" SP cmdError / %s"PROXY" proxyError / - %s"AUTH" / %s"QUOTA" / %s"LARGE_MSG" / %s"INTERNAL" -cmdError = %s"SYNTAX" / %s"PROHIBITED" / %s"NO_AUTH" / %s"HAS_AUTH" / %s"NO_ENTITY" +errorType = %s"BLOCK" / %s"SESSION" / %s"CMD" SP cmdError / %s"PROXY" SP proxyError / + %s"AUTH" / %s"BLOCKED" SP blockingInfo / %s"SERVICE" / %s"CRYPTO" / + %s"QUOTA" / %s"STORE" SP storeError / %s"EXPIRED" / %s"NO_MSG" / + %s"LARGE_MSG" / %s"INTERNAL" / %s"DUPLICATE_" +cmdError = %s"UNKNOWN" / %s"SYNTAX" / %s"PROHIBITED" / %s"NO_AUTH" / %s"HAS_AUTH" / %s"NO_ENTITY" proxyError = %s"PROTOCOL" SP errorType / %s"BROKER" SP brokerError / %s"BASIC_AUTH" / %s"NO_SESSION" brokerError = %s"RESPONSE" SP shortString / %s"UNEXPECTED" SP shortString / - %s"NETWORK" / %s"TIMEOUT" / %s"HOST" / + %s"NETWORK" [SP networkError] / %s"TIMEOUT" / %s"HOST" / %s"NO_SERVICE" / %s"TRANSPORT" SP transportError +networkError = %s"CONNECT" SP shortString / %s"TLS" SP shortString / + %s"UNKNOWNCA" / %s"FAILED" / %s"TIMEOUT" / %s"SUBSCRIBE" SP shortString transportError = %s"BLOCK" / %s"VERSION" / %s"LARGE_MSG" / %s"SESSION" / %s"NO_AUTH" / %s"HANDSHAKE" SP handshakeError -handshakeError = %s"PARSE" / %s"IDENTITY" / %s"BAD_AUTH" +handshakeError = %s"PARSE" / %s"IDENTITY" / %s"BAD_AUTH" / %s"BAD_SERVICE" +blockingInfo = %s"reason=" blockingReason ["," %s"notice=" jsonNotice] +blockingReason = %s"spam" / %s"content" +jsonNotice = +storeError = *OCTET ``` Server implementations must aim to respond within the same time for each command in all cases when `"ERR AUTH"` response is required to prevent timing attacks (e.g., the server should verify authorization even when the queue does not exist on the server or the authorization of different type is sent, using any dummy key compatible with the used authorization). @@ -1218,7 +1491,7 @@ The first block sent by the server should be `paddedServerHello` and the client ```abnf paddedServerHello = -serverHello = smpVersionRange sessionIdentifier [serverCert signedServerKey] ignoredPart +serverHello = smpVersionRange sessionIdentifier [serverCertKey] ignoredPart smpVersionRange = minSmpVersion maxSmpVersion minSmpVersion = smpVersion maxSmpVersion = smpVersion @@ -1226,25 +1499,39 @@ sessionIdentifier = shortString ; unique session identifier derived from transport connection handshake ; it should be included in authorized part of all SMP transmissions sent in this transport connection, ; but it must not be sent as part of the transmission in the current protocol version. -serverCert = originalLength x509encoded -signedServerKey = originalLength x509encoded ; signed by server certificate +serverCertKey = certChain signedServerKey +certChain = count 1*cert ; 2-4 certificates +cert = originalLength x509encoded +signedServerKey = originalLength x509encoded ; X25519 key signed by server certificate paddedClientHello = -clientHello = smpVersion [clientKey] ignoredPart +clientHello = smpVersion keyHash [clientKey] proxyServer optClientService ignoredPart ; chosen SMP protocol version - it must be the maximum supported version ; within the range offered by the server -clientKey = length x509encoded +keyHash = shortString ; server identity - CA certificate fingerprint +clientKey = length x509encoded ; X25519 public key for session encryption - only present if needed +proxyServer = %s"T" / %s"F" ; true if connecting client is a proxy server +optClientService = %s"0" / (%s"1" clientService) ; optional service client credentials +clientService = serviceRole serviceCertKey +serviceRole = %s"M" / %s"N" ; Messaging / Notifier +serviceCertKey = certChain signedServiceKey +signedServiceKey = originalLength x509encoded ; Ed25519 key signed by service certificate smpVersion = 2*2OCTET ; Word16 version number originalLength = 2*2OCTET +count = 1*1OCTET ignoredPart = *OCTET pad = *OCTET ``` -`signedServerKey` is used to compute a shared secret to authorize client transmission - it is combined with the per-queue key that was used when the queue was created. +`signedServerKey` is used to compute a shared secret to authorize client transmissions - it is combined with the per-queue key that was used when the queue was created. `clientKey` is used only by SMP proxy server when it connects to the destination server to agree shared secret for the additional encryption layer, end user clients do not use this key. +`proxyServer` flag (v14+) disables additional transport encryption inside TLS for proxy connections, since proxy server connection already has additional encryption. + +`clientService` (v16+) provides long-term service client certificate for high-volume services using SMP server (chat relays, notification servers, high traffic bots). The server responds with a third handshake message containing the assigned service ID. + `ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and servers must ignore any extra bytes within the original block length. For TLS transport client should assert that `sessionIdentifier` is equal to `tls-unique` channel binding defined in [RFC 5929][14] (TLS Finished message struct); we pass it in `serverHello` block to allow communication over some other transport protocol (possibly, with another channel binding). diff --git a/protocol/xftp.md b/protocol/xftp.md index bd1d9e6645..1a17524d71 100644 --- a/protocol/xftp.md +++ b/protocol/xftp.md @@ -1,4 +1,4 @@ -Version 2, 2024-06-22 +Version 3, 2025-01-24 # SimpleX File Transfer Protocol @@ -33,6 +33,7 @@ Version 2, 2024-06-22 - [File recipient commands](#file-recipient-commands) - [Download file chunk](#download-file-chunk) - [Acknowledge file chunk download](#acknowledge-file-chunk-download) + - [Error responses](#error-responses) - [Threat model](#threat-model) ## Abstract @@ -49,6 +50,12 @@ The objective of SimpleX File Transfer Protocol (XFTP) is to facilitate the secu XFTP is implemented as an application level protocol on top of HTTP2 and TLS. +This document describes XFTP protocol version 3. The version history: + +- v1: initial version +- v2: authenticated commands - added basic auth support for commands +- v3: blocked files - added BLOCKED error type for policy violations + The protocol describes the set of commands that senders and recipients can send to XFTP servers to create, upload, download and delete file chunks of several pre-defined sizes. XFTP servers SHOULD support chunks of 4 sizes: 64KB, 256KB, 1MB and 4MB (1KB = 1024 bytes, 1MB = 1024KB). The protocol is designed with the focus on meta-data privacy and security. While using TLS, the protocol does not rely on TLS security by using additional encryption to achieve that there are no identifiers or ciphertext in common in received and sent server traffic, frustrating traffic correlation even if TLS is compromised. @@ -283,7 +290,7 @@ XFTP server implementations MUST NOT create, store or send to any other servers: - binary-encoded commands sent as fixed-size padded block in the body of HTTP2 POST request, similar to SMP and notifications server protocol transmission encodings. - HTTP2 POST with a fixed size padded block body for file upload and download. -Block size - 4096 bytes (it would fit ~120 Ed25519 recipient keys). +Block size - 16384 bytes (it would fit ~350 Ed25519 recipient keys). The reasons to use HTTP2: @@ -320,12 +327,13 @@ Once TLS handshake is complete, client and server will exchange blocks of fixed ```abnf paddedServerHello = -serverHello = xftpVersionRange sessionIdentifier serverCert signedServerKey ignoredPart +serverHello = xftpVersionRange sessionIdentifier serverCerts signedServerKey ignoredPart xftpVersionRange = minXftpVersion maxXftpVersion minXftpVersion = xftpVersion maxXftpVersion = xftpVersion sessionIdentifier = shortString ; unique session identifier derived from transport connection handshake +serverCerts = length 1*serverCert ; NonEmpty list of certificates in chain serverCert = originalLength signedServerKey = originalLength ; signed by server certificate @@ -382,7 +390,7 @@ Commands syntax below is provided using ABNF with case-sensitive strings extensi xftpCommand = ping / senderCommand / recipientCmd / serverMsg senderCommand = register / add / put / delete recipientCmd = get / ack -serverMsg = pong / sndIds / rcvIds / ok / file +serverMsg = pong / sndIds / rcvIds / ok / file / error ``` The syntax of specific commands and responses is defined below. @@ -427,7 +435,7 @@ The syntax is: register = %s"FNEW " fileInfo rcvPublicAuthKeys basicAuth fileInfo = sndKey size digest sndKey = length x509encoded -size = 1*DIGIT +size = 4*4 OCTET ; Word32 big-endian digest = length *OCTET rcvPublicAuthKeys = length 1*rcvPublicAuthKey rcvPublicAuthKey = length x509encoded @@ -509,7 +517,7 @@ If requested file is successfully located, the server must send `file` response. ```abnf file = %s"FILE " sDhKey cbNonce sDhKey = length x509encoded -cbNonce = +cbNonce = 24*24 OCTET ; NaCl crypto_box nonce ``` Chunk is additionally encrypted on the way from the server to the recipient using a key agreed via ephemeral DH keys `rDhKey` and `sDhKey`, so there is no ciphertext in common between sent and received traffic inside TLS connection, in order to complicate traffic correlation attacks, if TLS is compromised. @@ -526,6 +534,40 @@ If file recipient ID is successfully deleted, the server must send `ok` response In current implementation of XFTP protocol in SimpleX Chat clients don't use FACK command. Files are automatically expired on servers after configured time interval. +### Error responses + +The server responds with `ERR` followed by the error type: + +```abnf +error = %s"ERR " errorType +errorType = %s"BLOCK" / %s"SESSION" / %s"HANDSHAKE" / + %s"CMD" SP cmdError / %s"AUTH" / %s"BLOCKED" SP blockingInfo / + %s"SIZE" / %s"QUOTA" / %s"DIGEST" / %s"CRYPTO" / + %s"NO_FILE" / %s"HAS_FILE" / %s"FILE_IO" / + %s"TIMEOUT" / %s"INTERNAL" +cmdError = %s"UNKNOWN" / %s"SYNTAX" / %s"PROHIBITED" / %s"NO_AUTH" / %s"HAS_AUTH" / %s"NO_ENTITY" +blockingInfo = %s"reason=" blockingReason ["," %s"notice=" jsonNotice] +blockingReason = %s"spam" / %s"content" +jsonNotice = *OCTET ; JSON-encoded notice object +``` + +Error types: +- `BLOCK` - incorrect block format, encoding or signature size. +- `SESSION` - incorrect session ID (TLS Finished message / tls-unique binding). +- `HANDSHAKE` - incorrect handshake command. +- `CMD` - command syntax errors (UNKNOWN, SYNTAX, PROHIBITED, NO_AUTH, HAS_AUTH, NO_ENTITY). +- `AUTH` - command authorization error - bad signature or non-existing file chunk. +- `BLOCKED` - file chunk was blocked due to policy violation (added in v3). Contains blocking reason and optional notice. +- `SIZE` - incorrect file size. +- `QUOTA` - storage quota exceeded. +- `DIGEST` - incorrect file digest. +- `CRYPTO` - file encryption/decryption failed. +- `NO_FILE` - no expected file body in request/response or no file on the server. +- `HAS_FILE` - unexpected file body. +- `FILE_IO` - file IO error. +- `TIMEOUT` - file sending or receiving timeout. +- `INTERNAL` - internal server error. + ## Threat model #### Global Assumptions From 8518f6087bc0a1143d817d683b3c399dcb8a678a Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 7 Mar 2026 15:47:46 +0000 Subject: [PATCH 13/91] docs: agent threat model --- protocol/agent-protocol.md | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index f7da11c008..f43d195e40 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -608,6 +608,58 @@ Agent API uses these events dispatch to notify client application about events r This list of events is not exhaustive and provided for information only. Please consult the source code for more information. +## Threat model + +This threat model complements SimpleX Messaging Protocol [threat model](./overview-tjr.md#threat-model) with agent-level concerns: duplex connections, end-to-end encryption with [post-quantum double ratchet](./pqdr.md), message integrity, connection establishment and queue rotation. Only additional properties not covered in the SMP threat model are listed below. + +This section uses network architecture terminology: SMP servers are referred to as routers, SMP queues as streams, and transport messages as packets — see [SimpleX Network architecture](../docs/network-architecture-v2.md). + +#### Additional global assumptions + + - The connection link is shared via a trusted out-of-band channel. + - Both agents support post-quantum double ratchet (PQDR). + +#### A passive adversary + +*cannot:* + - learn the contents of packets, which are additionally encrypted with the double ratchet independently from per-stream encryption. + +#### Destination router (chosen by the receiving client application) + +*can:* + - correlate streams belonging to the same duplex connection when queue rotation creates a new stream on the same router. + - when both peers of a connection chose the same router, correlate the two directions of the duplex connection. + +*cannot:* + - compromise end-to-end encryption even with full access to the per-stream NaCl DH secret. + - correlate streams belonging to the same connection after queue rotation to a different router. + +#### An attacker who obtained a client application's (decrypted) database + +*can:* + - learn the full communication graph: all communication peers, associated router addresses, and stream identifiers. + +*cannot:* + - decrypt future messages once the client application resumes communication and the double ratchet completes a new ratchet step, provided PQDR is active. + +#### A communication peer + +*can:* + - send malformed agent messages that may affect the client application processing them. + - skip message IDs, causing the recipient to generate and store excessive intermediate ratchet keys. + - prevent double ratchet advancement by not sending messages, delaying break-in recovery. + +*cannot:* + - disrupt packet delivery in other streams. + +#### An attacker who obtained a connection link + +*can:* + - learn the initiating party's chosen router address and public keys. + +*cannot:* + - use the link after the intended recipient has completed the connection. + [1]: https://en.wikipedia.org/wiki/End-to-end_encryption [2]: https://en.wikipedia.org/wiki/Man-in-the-middle_attack [3]: https://tools.ietf.org/html/rfc5234 From 7eaff88b4237ebe71a4ae9a09202036b23283809 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:39:24 +0000 Subject: [PATCH 14/91] product and engineering specifications for simplexmq --- CODE.md | 210 ++++++++++++++++++++++++++++ product/README.md | 22 +++ product/components/agent.md | 9 ++ product/components/notifications.md | 7 + product/components/servers.md | 9 ++ product/components/smp.md | 9 ++ product/components/xftp.md | 9 ++ product/components/xrcp.md | 7 + product/concepts.md | 5 + product/glossary.md | 5 + product/goals.md | 5 + product/rules.md | 5 + product/threat-model.md | 22 +++ spec/README.md | 64 +++++++++ spec/agent-protocol.md | 13 ++ spec/agent.md | 13 ++ spec/compression.md | 7 + spec/crypto-ratchet.md | 13 ++ spec/crypto-tls.md | 11 ++ spec/crypto.md | 19 +++ spec/encoding.md | 11 ++ spec/ntf-protocol.md | 15 ++ spec/ntf-server.md | 11 ++ spec/remote-control.md | 11 ++ spec/security-invariants.md | 19 +++ spec/smp-client.md | 11 ++ spec/smp-protocol.md | 13 ++ spec/smp-server.md | 13 ++ spec/storage-agent.md | 11 ++ spec/storage-server.md | 9 ++ spec/transport-http2.md | 13 ++ spec/transport-websocket.md | 7 + spec/transport.md | 11 ++ spec/version.md | 9 ++ spec/xftp-client.md | 11 ++ spec/xftp-protocol.md | 13 ++ spec/xftp-server.md | 11 ++ spec/xrcp-protocol.md | 13 ++ 38 files changed, 676 insertions(+) create mode 100644 CODE.md create mode 100644 product/README.md create mode 100644 product/components/agent.md create mode 100644 product/components/notifications.md create mode 100644 product/components/servers.md create mode 100644 product/components/smp.md create mode 100644 product/components/xftp.md create mode 100644 product/components/xrcp.md create mode 100644 product/concepts.md create mode 100644 product/glossary.md create mode 100644 product/goals.md create mode 100644 product/rules.md create mode 100644 product/threat-model.md create mode 100644 spec/README.md create mode 100644 spec/agent-protocol.md create mode 100644 spec/agent.md create mode 100644 spec/compression.md create mode 100644 spec/crypto-ratchet.md create mode 100644 spec/crypto-tls.md create mode 100644 spec/crypto.md create mode 100644 spec/encoding.md create mode 100644 spec/ntf-protocol.md create mode 100644 spec/ntf-server.md create mode 100644 spec/remote-control.md create mode 100644 spec/security-invariants.md create mode 100644 spec/smp-client.md create mode 100644 spec/smp-protocol.md create mode 100644 spec/smp-server.md create mode 100644 spec/storage-agent.md create mode 100644 spec/storage-server.md create mode 100644 spec/transport-http2.md create mode 100644 spec/transport-websocket.md create mode 100644 spec/transport.md create mode 100644 spec/version.md create mode 100644 spec/xftp-client.md create mode 100644 spec/xftp-protocol.md create mode 100644 spec/xftp-server.md create mode 100644 spec/xrcp-protocol.md diff --git a/CODE.md b/CODE.md new file mode 100644 index 0000000000..2b99fff9ef --- /dev/null +++ b/CODE.md @@ -0,0 +1,210 @@ +# simplexmq — LLM Navigation Guide + +This file is the entry point for LLMs working on simplexmq. Read it before making any code changes. + +## Three-Layer Architecture + +simplexmq maintains three documentation layers alongside source code: + +| Layer | Directory | Answers | Audience | +|-------|-----------|---------|----------| +| **Product** | `product/` | What does this do? Who uses it? What must never break? | Anyone reasoning about behavior, privacy, security | +| **Spec** | `spec/` | How does the code work? What does each function do? What are the security invariants? | LLMs and developers modifying code | +| **Protocol** | `protocol/` | What is the wire protocol? What are the message formats and state machines? | Protocol implementors, formal verification | + +Additionally: +- `rfcs/` — Protocol evolution: each RFC describes a delta to a protocol spec +- `product/threat-model.md` — Comprehensive threat model across all protocols +- `spec/security-invariants.md` — Every security invariant with enforcement and test coverage + +## Navigation Workflow + +When modifying code, follow this sequence: + +1. **Identify scope** — Find the relevant component in `product/concepts.md` +2. **Load product context** — Read the component file in `product/components/` to understand what users depend on +3. **Load spec context** — Read the relevant `spec/` file(s) for implementation details and call graphs +4. **Check security** — Read `spec/security-invariants.md` for any invariants enforced by the code you're changing +5. **Load source** — Read the actual source files referenced in spec/ +6. **Identify impact** — Trace the call graph to understand what your change affects +7. **Implement** — Make the change +8. **Update all layers** — Update spec/, product/, and protocol/ (if wire protocol changed) to stay coherent + +## Protocol Specifications + +Consolidated protocol specs live in `protocol/`. These describe the wire protocols as originally specified. Code has advanced beyond these versions — Phase 2 of this project will synchronize them. + +| File | Protocol | Spec version | Code version | +|------|----------|-------------|--------------| +| `simplex-messaging.md` | SMP (simplex messaging) | v9 | SMP relay v18, SMP client v4 | +| `agent-protocol.md` | Agent (duplex connections) | v5 | Agent v7 | +| `xftp.md` | XFTP (file transfer) | v2 | XFTP v3 | +| `xrcp.md` | XRCP (remote control) | v1 | RCP v1 | +| `push-notifications.md` | Push notifications | v2 | NTF v3 | +| `pqdr.md` | PQDR (post-quantum double ratchet) | v1 | E2E v3 | +| `overview-tjr.md` | Cross-protocol overview | — | — | + +Note: SMP has multiple version axes — `VersionSMP` (relay/transport, currently 18), `VersionSMPC` (client protocol, currently 4), and `VersionSMPA` (agent, currently 7). These are negotiated independently. + +Protocol specs are amended in place when implementation changes. RFCs in `rfcs/` track the evolution history. + +## Source Structure + +``` +src/Simplex/ + Messaging/ + Protocol.hs, Protocol/Types.hs — SMP wire protocol types + encoding + Client.hs — SMP client (protocol operations, proxy relay) + Client/Agent.hs — Low-level async SMP agent + Server.hs — SMP server request handling + Server/Env/STM.hs — Server environment + STM state + Server/Main.hs, Server/Main/Init.hs — Server CLI + initialization + Server/QueueStore/ — Queue storage (STM, Postgres) + Server/MsgStore/ — Message storage (STM, Journal, Postgres) + Server/MsgStore/Journal.hs — Journal message store (1000 lines) + Server/StoreLog/ — Store log (append-only write, read-compact-rewrite restore) + Server/NtfStore.hs — Message notification store + Server/Control.hs, Server/CLI.hs — Control protocol + CLI utilities + Server/Stats.hs, Server/Prometheus.hs — Metrics + Server/Information.hs — Server public information / metadata + Agent.hs — SMP agent: duplex connections, queue rotation + Agent/Client.hs — Agent's SMP/XFTP/NTF client management + Agent/Protocol.hs — Agent wire protocol types + encoding (2200 lines) + Agent/Store.hs — Agent storage types (queues, connections, messages) + Agent/Store/AgentStore.hs — Agent storage implementation (3500 lines) + Agent/Store/ — Agent storage backends (SQLite, Postgres) + Agent/Env/SQLite.hs — Agent environment + configuration + Agent/NtfSubSupervisor.hs — Notification subscription management + Agent/TSessionSubs.hs — Transport session subscriptions + Agent/Stats.hs — Agent statistics + Agent/RetryInterval.hs — Retry interval logic + Agent/Lock.hs — Named locks + Agent/QueryString.hs — Query string parsing + Transport.hs — TLS transport abstraction + handshake + Transport/Client.hs, Transport/Server.hs — TLS client + server + Transport/HTTP2.hs — HTTP/2 transport setup + Transport/HTTP2/Client.hs — HTTP/2 client + Transport/HTTP2/Server.hs — HTTP/2 server + Transport/HTTP2/File.hs — HTTP/2 file streaming + Transport/WebSockets.hs — WebSocket adapter + Transport/Buffer.hs — Transport buffering + Transport/KeepAlive.hs — TCP keepalive + Transport/Shared.hs — Certificate chain validation + Transport/Credentials.hs — TLS credential generation + Crypto.hs — All cryptographic primitives + Crypto/File.hs — File encryption (NaCl secret box + lazy) + Crypto/Lazy.hs — Lazy hashing + encryption + Crypto/Ratchet.hs — Double ratchet + PQDR + Crypto/ShortLink.hs — Short link key derivation + Crypto/SNTRUP761.hs — Post-quantum KEM hybrid secret + Crypto/SNTRUP761/Bindings.hs — sntrup761 C FFI bindings + Notifications/Protocol.hs — NTF wire protocol types + encoding + Notifications/Types.hs — NTF agent types (tokens, subscriptions) + Notifications/Transport.hs — NTF transport handshake + Notifications/Client.hs — NTF client operations + Notifications/Server.hs — NTF server + Notifications/Server/Env.hs — NTF server environment + config + Notifications/Server/Store.hs — NTF server storage (STM) + Notifications/Server/Store/Postgres.hs — NTF server storage (Postgres) + Notifications/Server/Push/APNS.hs — Apple push notification integration + Notifications/Server/Push/APNS/Internal.hs — APNS HTTP/2 client + Notifications/Server/Main.hs — NTF server CLI + Notifications/Server/Stats.hs — NTF server metrics + Notifications/Server/Prometheus.hs — NTF Prometheus metrics + Notifications/Server/Control.hs — NTF server control + Encoding.hs, Encoding/String.hs — Binary + string encoding + Version.hs, Version/Internal.hs — Version ranges + negotiation + Util.hs — Utilities (error handling, STM, grouping) + Parsers.hs — Attoparsec parser combinators + TMap.hs — Transactional map (STM) + Compression.hs — Zstd compression + ServiceScheme.hs — Service scheme + server location types + Session.hs — Session variables (TVar-based) + SystemTime.hs — Rounded system time types + FileTransfer/ + Protocol.hs — XFTP wire protocol types + encoding + Client.hs — XFTP client operations + Client/Agent.hs — XFTP client agent (connection pooling) + Client/Main.hs — XFTP CLI client implementation + Client/Presets.hs — Default XFTP servers + Server.hs — XFTP server request handling + Server/Env.hs — XFTP server environment + config + Server/Store.hs — XFTP server storage + Server/StoreLog.hs — XFTP server store log + Server/Main.hs — XFTP server CLI + Server/Stats.hs — XFTP server metrics + Server/Prometheus.hs — XFTP Prometheus metrics + Server/Control.hs — XFTP server control + Agent.hs — XFTP agent operations + Description.hs — File description format + Transport.hs — XFTP transport + Crypto.hs — File encryption for transfer + Types.hs — File transfer types + Chunks.hs — Chunk sizing + RemoteControl/ + Client.hs — XRCP client (ctrl device) + Invitation.hs — XRCP invitation handling + Discovery.hs — Local network discovery + Discovery/Multicast.hsc — Multicast discovery (C FFI) + Types.hs — XRCP types + version + +apps/ + smp-server/Main.hs — SMP server executable + smp-server/web/Static.hs — SMP server web static files + xftp-server/Main.hs — XFTP server executable + xftp/Main.hs — XFTP CLI executable + ntf-server/Main.hs — Notification server executable + smp-agent/Main.hs — SMP agent (experimental, not in cabal) +``` + +## Linking Conventions + +### spec → src +Fully qualified exported function names inline in prose: `Simplex.Messaging.Client.connectSMPProxiedRelay`. Use Grep/Glob to locate in source. For app targets: `xftp/Main.main`. + +### src → spec +Comment above function: +```haskell +-- spec/crypto-tls.md#certificate-chain-validation +-- Validates relay certificate chain to prevent proxy MITM (SI-XX) +connectSMPProxiedRelay :: ... +``` + +### spec ↔ spec +Named markdown heading anchors: `spec/crypto.md#ed25519-signing` + +### spec ↔ product +Cross-references: `product/rules.md#pr-05`, `spec/security-invariants.md#si-01` + +### protocol/ references +`protocol/simplex-messaging.md` with section name + +## Build Flags + +simplexmq builds with several flag combinations: + +| Flag | Effect | +|------|--------| +| (none) | Default: SQLite storage, all executables | +| `-fserver_postgres` | Postgres backend for SMP server | +| `-fclient_postgres` | Postgres backend for agent storage | +| `-fclient_library` | Library-only build (no server executables) | +| `-fswift` | Swift JSON format for mobile bindings | +| `-fuse_crypton` | Use crypton in cryptostore | + +All flag combinations must compile with `--enable-tests`. Verify with: +``` +cabal build all --ghc-options="-O0" [-flags] [--enable-tests] +``` + +## Change Protocol + +Every code change must maintain coherence across all three layers: + +1. **Code change** — Implement in src/ +2. **Spec update** — Update the relevant spec/ file(s): types, call graphs, security notes +3. **Product update** — If user-visible behavior changed, update product/ files +4. **Protocol update** — If wire protocol changed, amend protocol/ spec (requires user approval) +5. **Security check** — If the change touches a trust boundary, update spec/security-invariants.md + +Protocol spec amendments require explicit user approval before committing. diff --git a/product/README.md b/product/README.md new file mode 100644 index 0000000000..a3466bc662 --- /dev/null +++ b/product/README.md @@ -0,0 +1,22 @@ +# SimpleX Network — Product Layer + +> What does this do? Who uses it? What must never break? + +## Vision + + + +## Components + +| Component | Description | Spec | Protocol | +|-----------|-------------|------|----------| +| SMP | Simplex messaging queues | spec/smp-protocol.md | protocol/simplex-messaging.md | +| Agent | Duplex connections over simplex queues | spec/agent.md | protocol/agent-protocol.md | +| XFTP | File transfer via encrypted chunks | spec/xftp-protocol.md | protocol/xftp.md | +| XRCP | Remote control of mobile clients | spec/remote-control.md | protocol/xrcp.md | +| NTF | Push notifications with privacy | spec/ntf-protocol.md | protocol/push-notifications.md | +| Servers | SMP, XFTP, NTF server operation | spec/smp-server.md | — | + +## Capability Map + + diff --git a/product/components/agent.md b/product/components/agent.md new file mode 100644 index 0000000000..f09b878a1c --- /dev/null +++ b/product/components/agent.md @@ -0,0 +1,9 @@ +# Agent — Duplex Connections + +> Bidirectional connections built over pairs of simplex queues. + +## Users + +## Connection Lifecycle + +## Guarantees diff --git a/product/components/notifications.md b/product/components/notifications.md new file mode 100644 index 0000000000..4ad41f4630 --- /dev/null +++ b/product/components/notifications.md @@ -0,0 +1,7 @@ +# Push Notifications + +> Push notifications with metadata privacy. + +## Users + +## Privacy Trade-offs diff --git a/product/components/servers.md b/product/components/servers.md new file mode 100644 index 0000000000..9e6ea5300a --- /dev/null +++ b/product/components/servers.md @@ -0,0 +1,9 @@ +# Server Operation + +> SMP, XFTP, and NTF server deployment and operation. + +## Deployment + +## Configuration + +## Monitoring diff --git a/product/components/smp.md b/product/components/smp.md new file mode 100644 index 0000000000..a7fcadc98c --- /dev/null +++ b/product/components/smp.md @@ -0,0 +1,9 @@ +# SMP — Simplex Messaging Protocol + +> Unidirectional messaging queues with sender/receiver separation. + +## Users + +## Guarantees + +## Privacy Properties diff --git a/product/components/xftp.md b/product/components/xftp.md new file mode 100644 index 0000000000..104a5f9b48 --- /dev/null +++ b/product/components/xftp.md @@ -0,0 +1,9 @@ +# XFTP — File Transfer + +> Encrypted file transfer via content-addressed chunks. + +## Users + +## Guarantees + +## Privacy Properties diff --git a/product/components/xrcp.md b/product/components/xrcp.md new file mode 100644 index 0000000000..bd6b015763 --- /dev/null +++ b/product/components/xrcp.md @@ -0,0 +1,7 @@ +# XRCP — Remote Control + +> Remote control of mobile clients from desktop. + +## Users + +## Trust Model diff --git a/product/concepts.md b/product/concepts.md new file mode 100644 index 0000000000..c67705a8f7 --- /dev/null +++ b/product/concepts.md @@ -0,0 +1,5 @@ +# Concepts & Entity Index + +> Domain concepts with cross-references to spec/ and src/. + + diff --git a/product/glossary.md b/product/glossary.md new file mode 100644 index 0000000000..87ad50fbf5 --- /dev/null +++ b/product/glossary.md @@ -0,0 +1,5 @@ +# Glossary + +> Domain terminology used across simplexmq. + + diff --git a/product/goals.md b/product/goals.md new file mode 100644 index 0000000000..26f0fcf43c --- /dev/null +++ b/product/goals.md @@ -0,0 +1,5 @@ +# Design Goals + +> Verified against protocol specs and code. + + diff --git a/product/rules.md b/product/rules.md new file mode 100644 index 0000000000..c363de59f6 --- /dev/null +++ b/product/rules.md @@ -0,0 +1,5 @@ +# Invariant Rules + +> Invariants users depend on: privacy, delivery, ordering, security. + + diff --git a/product/threat-model.md b/product/threat-model.md new file mode 100644 index 0000000000..511cdebcf6 --- /dev/null +++ b/product/threat-model.md @@ -0,0 +1,22 @@ +# Threat Model + +> Comprehensive threat model across all protocols. + +Consistent with threat models in: +- `protocol/overview-tjr.md` (cross-protocol) +- `protocol/simplex-messaging.md` (SMP) +- `protocol/xftp.md` (XFTP) +- `protocol/xrcp.md` (XRCP) +- `protocol/push-notifications.md` (notifications) + +## Actors + + + +## Trust Boundaries + + + +## Security Properties + + diff --git a/spec/README.md b/spec/README.md new file mode 100644 index 0000000000..83ce5097c9 --- /dev/null +++ b/spec/README.md @@ -0,0 +1,64 @@ +# Spec Layer + +> How does the code work? What does each function do? What are the security invariants? + +## Conventions + +Each spec file documents: +1. **Purpose** — What this component does +2. **Protocol reference** — Link to `protocol/` file (where applicable) +3. **Types** — Key data types with field descriptions +4. **Functions** — Every exported function with call graph +5. **Security notes** — Trust assumptions, validation requirements + +Function documentation format: +``` +### Module.functionName +**Purpose**: ... +**Calls**: Module.a, Module.b +**Called by**: Module.c +**Invariant**: SI-XX +**Security**: ... +``` + +## Index + +### Protocol Implementation +- [smp-protocol.md](smp-protocol.md) — SMP commands, types, encoding +- [xftp-protocol.md](xftp-protocol.md) — XFTP commands, chunk operations +- [ntf-protocol.md](ntf-protocol.md) — NTF commands, token/subscription lifecycle +- [xrcp-protocol.md](xrcp-protocol.md) — XRCP session handshake, commands +- [agent-protocol.md](agent-protocol.md) — Agent connection procedures, queue rotation + +### Cryptography +- [crypto.md](crypto.md) — All primitives: Ed25519, X25519, NaCl, AES-GCM, SHA, HKDF +- [crypto-ratchet.md](crypto-ratchet.md) — Double ratchet + PQDR +- [crypto-tls.md](crypto-tls.md) — TLS setup, certificate chains, validation + +### Transport +- [transport.md](transport.md) — Transport abstraction, handshake, block padding +- [transport-http2.md](transport-http2.md) — HTTP/2 framing, file streaming +- [transport-websocket.md](transport-websocket.md) — WebSocket adapter + +### Server Implementations +- [smp-server.md](smp-server.md) — SMP server +- [xftp-server.md](xftp-server.md) — XFTP server +- [ntf-server.md](ntf-server.md) — Notification server + +### Client Implementations +- [smp-client.md](smp-client.md) — SMP client, proxy relay +- [xftp-client.md](xftp-client.md) — XFTP client +- [agent.md](agent.md) — SMP agent, duplex connections + +### Storage +- [storage-server.md](storage-server.md) — Server storage backends +- [storage-agent.md](storage-agent.md) — Agent storage backends + +### Auxiliary +- [encoding.md](encoding.md) — Binary and string encoding +- [version.md](version.md) — Version ranges and negotiation +- [remote-control.md](remote-control.md) — XRCP implementation +- [compression.md](compression.md) — Zstd compression + +### Security +- [security-invariants.md](security-invariants.md) — All security invariants diff --git a/spec/agent-protocol.md b/spec/agent-protocol.md new file mode 100644 index 0000000000..b84ffb9ceb --- /dev/null +++ b/spec/agent-protocol.md @@ -0,0 +1,13 @@ +# Agent Protocol Implementation + +> Implements agent connection procedures, queue rotation, and duplex messaging. + +**Protocol reference**: [`protocol/agent-protocol.md`](../protocol/agent-protocol.md) + +## Types + +## Connection Procedures + +## Queue Rotation + +## Functions diff --git a/spec/agent.md b/spec/agent.md new file mode 100644 index 0000000000..250bf22534 --- /dev/null +++ b/spec/agent.md @@ -0,0 +1,13 @@ +# SMP Agent + +> SMP agent implementation: duplex connections, queue rotation, ratchet sync, and notification subscriptions. + +## Duplex Connections + +## Queue Rotation + +## Ratchet Sync + +## Notification Subscriptions + +## Functions diff --git a/spec/compression.md b/spec/compression.md new file mode 100644 index 0000000000..7e457438b9 --- /dev/null +++ b/spec/compression.md @@ -0,0 +1,7 @@ +# Compression + +> Compression support for SimpleX protocols. + +## Zstd + +## Functions diff --git a/spec/crypto-ratchet.md b/spec/crypto-ratchet.md new file mode 100644 index 0000000000..de5af38a28 --- /dev/null +++ b/spec/crypto-ratchet.md @@ -0,0 +1,13 @@ +# Double Ratchet & PQDR + +> Implements the double ratchet algorithm with post-quantum extensions (PQDR). + +**Protocol reference**: [`protocol/pqdr.md`](../protocol/pqdr.md) + +## State + +## Transitions + +## Key Derivation + +## Functions diff --git a/spec/crypto-tls.md b/spec/crypto-tls.md new file mode 100644 index 0000000000..9327ae69a7 --- /dev/null +++ b/spec/crypto-tls.md @@ -0,0 +1,11 @@ +# TLS & Certificate Chains + +> TLS session setup, certificate chain construction, and server identity validation. + +## TLS Setup + +## Certificate Validation + +## Trust Anchoring + +## Functions diff --git a/spec/crypto.md b/spec/crypto.md new file mode 100644 index 0000000000..ec8fb0a497 --- /dev/null +++ b/spec/crypto.md @@ -0,0 +1,19 @@ +# Cryptographic Primitives + +> All cryptographic primitives used across SimpleX protocols. + +## Ed25519 + +## X25519 + +## NaCl + +## AES-GCM + +## SHA + +## HKDF + +## Key Generation + +## Functions diff --git a/spec/encoding.md b/spec/encoding.md new file mode 100644 index 0000000000..2b8dded012 --- /dev/null +++ b/spec/encoding.md @@ -0,0 +1,11 @@ +# Encoding + +> Binary and string encoding used across all SimpleX protocols. + +## Binary Encoding + +## String Encoding + +## Parsers + +## Functions diff --git a/spec/ntf-protocol.md b/spec/ntf-protocol.md new file mode 100644 index 0000000000..c826e7e722 --- /dev/null +++ b/spec/ntf-protocol.md @@ -0,0 +1,15 @@ +# NTF Protocol Implementation + +> Implements NTF commands, token registration, and subscription lifecycle for push notifications. + +**Protocol reference**: [`protocol/push-notifications.md`](../protocol/push-notifications.md) + +## Types + +## Commands + +## Token Lifecycle + +## Subscription Lifecycle + +## Functions diff --git a/spec/ntf-server.md b/spec/ntf-server.md new file mode 100644 index 0000000000..4a39957e3b --- /dev/null +++ b/spec/ntf-server.md @@ -0,0 +1,11 @@ +# Notification Server + +> Notification server implementation: token management, subscriptions, and APNS integration. + +## Token Management + +## Subscription Management + +## APNS Integration + +## Functions diff --git a/spec/remote-control.md b/spec/remote-control.md new file mode 100644 index 0000000000..5a064437c8 --- /dev/null +++ b/spec/remote-control.md @@ -0,0 +1,11 @@ +# Remote Control (XRCP) + +> XRCP implementation: discovery, invitation, and session management. + +## Discovery + +## Invitation + +## Session Management + +## Functions diff --git a/spec/security-invariants.md b/spec/security-invariants.md new file mode 100644 index 0000000000..fc1323665d --- /dev/null +++ b/spec/security-invariants.md @@ -0,0 +1,19 @@ +# Security Invariants + +> Every security invariant with enforcement and test coverage. + +## Format + +``` +### SI-XX: [Name] +**Statement**: [Precise invariant] +**Threat**: [What attack this prevents] +**Actors**: [Which threat model actors are relevant] +**Enforced by**: [Qualified function names] — [how] +**Tested by**: [test module.function] or [MISSING TEST] +**Product rule**: PR-XX +``` + +## Invariants + + diff --git a/spec/smp-client.md b/spec/smp-client.md new file mode 100644 index 0000000000..39ae87f9ae --- /dev/null +++ b/spec/smp-client.md @@ -0,0 +1,11 @@ +# SMP Client + +> SMP client implementation: protocol operations, proxy relay, and reconnection logic. + +## Protocol Operations + +## Proxy Relay + +## Reconnection + +## Functions diff --git a/spec/smp-protocol.md b/spec/smp-protocol.md new file mode 100644 index 0000000000..0def979418 --- /dev/null +++ b/spec/smp-protocol.md @@ -0,0 +1,13 @@ +# SMP Protocol Implementation + +> Implements SMP commands, types, and binary encoding for the SimpleX Messaging Protocol. + +**Protocol reference**: [`protocol/simplex-messaging.md`](../protocol/simplex-messaging.md) + +## Types + +## Commands + +## Encoding + +## Functions diff --git a/spec/smp-server.md b/spec/smp-server.md new file mode 100644 index 0000000000..696d190673 --- /dev/null +++ b/spec/smp-server.md @@ -0,0 +1,13 @@ +# SMP Server + +> SMP server implementation: connection handling, queue operations, proxying, and control port. + +## Connection Handling + +## Queue Operations + +## Proxying + +## Control + +## Functions diff --git a/spec/storage-agent.md b/spec/storage-agent.md new file mode 100644 index 0000000000..4ba4c414cb --- /dev/null +++ b/spec/storage-agent.md @@ -0,0 +1,11 @@ +# Agent Storage + +> Agent storage backends: SQLite, Postgres, and migration framework. + +## SQLite Backend + +## Postgres Backend + +## Migration Framework + +## Functions diff --git a/spec/storage-server.md b/spec/storage-server.md new file mode 100644 index 0000000000..b2dec18425 --- /dev/null +++ b/spec/storage-server.md @@ -0,0 +1,9 @@ +# Server Storage + +> Server storage backends: STM queues and message stores (STM, Journal, Postgres). + +## STM Queues + +## Message Stores (STM, Journal, Postgres) + +## Functions diff --git a/spec/transport-http2.md b/spec/transport-http2.md new file mode 100644 index 0000000000..2594b84311 --- /dev/null +++ b/spec/transport-http2.md @@ -0,0 +1,13 @@ +# HTTP/2 Transport + +> HTTP/2 framing, client and server sessions, and file streaming for XFTP. + +## Framing + +## Client Sessions + +## Server Sessions + +## File Streaming + +## Functions diff --git a/spec/transport-websocket.md b/spec/transport-websocket.md new file mode 100644 index 0000000000..182a43c47c --- /dev/null +++ b/spec/transport-websocket.md @@ -0,0 +1,7 @@ +# WebSocket Transport + +> WebSocket adapter for browser-based SimpleX clients. + +## Adapter + +## Functions diff --git a/spec/transport.md b/spec/transport.md new file mode 100644 index 0000000000..0e50a67d94 --- /dev/null +++ b/spec/transport.md @@ -0,0 +1,11 @@ +# Transport Layer + +> Transport abstraction, handshake protocol, and block padding for metadata privacy. + +## Abstraction + +## Handshake Protocol + +## Block Padding + +## Functions diff --git a/spec/version.md b/spec/version.md new file mode 100644 index 0000000000..f5b954534e --- /dev/null +++ b/spec/version.md @@ -0,0 +1,9 @@ +# Version Negotiation + +> Version ranges and compatibility checking for protocol evolution. + +## Version Ranges + +## Compatibility + +## Functions diff --git a/spec/xftp-client.md b/spec/xftp-client.md new file mode 100644 index 0000000000..99306bb73e --- /dev/null +++ b/spec/xftp-client.md @@ -0,0 +1,11 @@ +# XFTP Client + +> XFTP client implementation: file operations, CLI interface, and agent integration. + +## File Operations + +## CLI + +## Agent + +## Functions diff --git a/spec/xftp-protocol.md b/spec/xftp-protocol.md new file mode 100644 index 0000000000..26eb950bee --- /dev/null +++ b/spec/xftp-protocol.md @@ -0,0 +1,13 @@ +# XFTP Protocol Implementation + +> Implements XFTP commands, types, and chunk operations for the SimpleX File Transfer Protocol. + +**Protocol reference**: [`protocol/xftp.md`](../protocol/xftp.md) + +## Types + +## Commands + +## Chunk Operations + +## Functions diff --git a/spec/xftp-server.md b/spec/xftp-server.md new file mode 100644 index 0000000000..bdcbbb9aad --- /dev/null +++ b/spec/xftp-server.md @@ -0,0 +1,11 @@ +# XFTP Server + +> XFTP server implementation: chunk storage, recipient management, and control port. + +## Chunk Storage + +## Recipient Management + +## Control + +## Functions diff --git a/spec/xrcp-protocol.md b/spec/xrcp-protocol.md new file mode 100644 index 0000000000..8f084f7ca8 --- /dev/null +++ b/spec/xrcp-protocol.md @@ -0,0 +1,13 @@ +# XRCP Protocol Implementation + +> Implements XRCP session handshake and commands for remote control of SimpleX clients. + +**Protocol reference**: [`protocol/xrcp.md`](../protocol/xrcp.md) + +## Types + +## Session Handshake + +## Commands + +## Functions From c7ff63743796ae554cc42190a49e9c3dab18eb8f Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 9 Mar 2026 10:29:12 +0000 Subject: [PATCH 15/91] update CODE.md --- contributing/CODE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/contributing/CODE.md b/contributing/CODE.md index ab5d7efccf..eefe68f6cc 100644 --- a/contributing/CODE.md +++ b/contributing/CODE.md @@ -92,6 +92,7 @@ cabal list-bin exe:smp-server ### Cabal Flags - `swift`: Enable Swift JSON format +- `use_crypton`: Use crypton in cryptostore (default: enabled) - `client_library`: Build without server code - `client_postgres`: Use PostgreSQL instead of SQLite for agent persistence - `server_postgres`: PostgreSQL support for server queue/notification store From 40875e319985b6dcec43a4a646aaaff7775ea751 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 9 Mar 2026 12:27:02 +0000 Subject: [PATCH 16/91] update --- contributing/PROJECT.md | 2 +- product/threat-model.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contributing/PROJECT.md b/contributing/PROJECT.md index cda71597d9..5e8f4c6b58 100644 --- a/contributing/PROJECT.md +++ b/contributing/PROJECT.md @@ -13,7 +13,7 @@ Key components: - **SMP Client**: Functional API with STM-based message delivery ([code](../src/Simplex/Messaging/Client.hs)). - **SMP Agent**: High-level duplex connections via multiple simplex queues with E2E encryption ([code](../src/Simplex/Messaging/Agent.hs)). Implements Agent-to-agent protocol ([code](../src/Simplex/Messaging/Agent/Protocol.hs), [spec](../protocol/agent-protocol.md)) via intermediary agent client ([code](../src/Simplex/Messaging/Agent/Client.hs)). - **XFTP**: SimpleX File Transfer Protocol, server and CLI client ([code](../src/Simplex/FileTransfer/), [spec](../protocol/xftp.md)). -- **XRCP**: SimpleX Remote Control Protocol ([code](`../src/Simplex/RemoteControl/`), [spec](../protocol/xrcp.md)). +- **XRCP**: SimpleX Remote Control Protocol ([code](../src/Simplex/RemoteControl/), [spec](../protocol/xrcp.md)). - **Notifications**: Push notifications server requires PostgreSQL ([code](../src/Simplex/Messaging/Notifications), [executable](../apps/ntf-server/)). Client protocol is used for clients to communicate with the server ([code](../src/Simplex/Messaging/Notifications/Protocol.hs), [spec](../protocol/push-notifications.md)). For subscribing to SMP notifications the server uses [lightweight SMP client](../src/Simplex/Messaging/Client/Agent.hs). ## Architecture diff --git a/product/threat-model.md b/product/threat-model.md index 511cdebcf6..efc2effd03 100644 --- a/product/threat-model.md +++ b/product/threat-model.md @@ -5,6 +5,7 @@ Consistent with threat models in: - `protocol/overview-tjr.md` (cross-protocol) - `protocol/simplex-messaging.md` (SMP) +- `protocol/agent-protocol.md` (Agent: duplex connections, ratchet, queue rotation) - `protocol/xftp.md` (XFTP) - `protocol/xrcp.md` (XRCP) - `protocol/push-notifications.md` (notifications) From 3c5752383d450b4f310b16ed84a90ad3a2fa1bcb Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 9 Mar 2026 16:29:05 +0000 Subject: [PATCH 17/91] update protocol docs --- protocol/agent-protocol.md | 46 ++++++++++++++++---- protocol/push-notifications.md | 28 +++++++++++-- protocol/simplex-messaging.md | 77 ++++++++++++++++++++-------------- protocol/xrcp.md | 4 +- 4 files changed, 110 insertions(+), 45 deletions(-) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index f43d195e40..83898d9b9e 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -177,7 +177,7 @@ These messages are encrypted with per-queue shared secret using NaCL crypto_box decryptedSMPClientMessage = agentConfirmation / agentMsgEnvelope / agentInvitation / agentRatchetKey agentConfirmation = agentVersion %s"C" ("0" / "1" sndE2EEncryptionParams) encConnInfo agentVersion = 2*2 OCTET -sndE2EEncryptionParams = TODO +sndE2EEncryptionParams = encConnInfo = doubleRatchetEncryptedMessage agentMsgEnvelope = agentVersion %s"M" encAgentMessage @@ -187,11 +187,20 @@ agentInvitation = agentVersion %s"I" connReqLength connReq connInfo connReqLength = 2*2 OCTET ; Word16 agentRatchetKey = agentVersion %s"R" rcvE2EEncryptionParams agentRatchetInfo -rcvE2EEncryptionParams = TODO +rcvE2EEncryptionParams = -doubleRatchetEncryptedMessage = TODO +doubleRatchetEncryptedMessage = ``` +The maximum size of the encrypted connection info and agent message depend on whether post-quantum key exchange is used: + +| Constant | PQ on | PQ off | +|----------|-------|--------| +| `e2eEncConnInfoLength` | 11106 | 14832 | +| `e2eEncAgentMsgLength` | 13618 | 15840 | + +The PQ-on sizes are smaller because the ratchet header and reply link include larger PQ keys (SNTRUP761). + This syntax of decrypted SMP client message body is defined by `decryptedAgentMessage` below. Decrypted SMP message client body can be one of 4 types: @@ -465,10 +474,20 @@ fixedData = agentVersionRange rootKey linkConnReq [linkEntityId] agentVersionRange = version version ; min and max agent protocol version version = 2*2 OCTET rootKey = length x509encoded ; Ed25519 public key +linkConnReq = connectionRequestUri ; see full connection link syntax above linkEntityId = shortString userData = invitationLinkData / contactLinkData -invitationLinkData = %s"I" agentVersionRange connInfo +invitationLinkData = %s"I" agentVersionRange userLinkData contactLinkData = %s"C" agentVersionRange userContactData +userLinkData = *OCTET ; opaque application data (e.g., user profile) +userContactData = direct ownersList relaysList userLinkData +direct = %s"T" / %s"F" ; whether direct connection via connReq is allowed +ownersList = length *ownerAuth +ownerAuth = shortString ; length-prefixed encoding of (ownerId ownerKey authOwnerSig) +ownerId = shortString ; application-specific owner ID (e.g., MemberId) +ownerKey = length x509encoded ; Ed25519 public key +authOwnerSig = 64*64 OCTET ; Ed25519 signature of (ownerId || ownerKey) by previous owner +relaysList = length *connShortLink ; alternative relay short links largeString = 2*2 OCTET *OCTET ; Word16 length prefix length = 1*1 OCTET shortString = length *OCTET @@ -564,6 +583,14 @@ This api is also used to acknowledge message delivery to the sending party - tha `getNotificationMessage` is used by push notification subsystem of the client application to receive the message from a specific messaging queue mentioned in the notification. The client application would receive `MSG` and any other events from the agent, and then `MSGNTF` event once the message related to this notification is received. +#### Set short link data + +`setConnectionLink` api (`LSET` command) is used to set or update short link data associated with a contact address queue. Returns `LINK` event with the short link URI. + +#### Get short link data + +`getConnectionLink` api (`LGET` command) is used to retrieve and decrypt the short link data from the server. Returns `LDATA` event with the decrypted link data. + #### Rotate message queue to another server `switchConnection` api is used to rotate connection queues to another messaging server. @@ -574,7 +601,7 @@ This api is also used to acknowledge message delivery to the sending party - tha #### Delete connection -`deleteConnection` api is used to delete connection. In case of asynchronous call, the connection deletion will be confirmed with `DEL_RCVQ` and `DEL_CONN` events. +`deleteConnection` api is used to delete connection. In case of asynchronous call, the connection deletion will be confirmed with `DEL_RCVQS` and `DEL_CONNS` events. #### Suspend connection @@ -601,8 +628,13 @@ Agent API uses these events dispatch to notify client application about events r - `MSGNTF` - sent after agent received and processed the message referenced in the push notification. - `RCVD` - notification confirming message receipt by another party. - `QCONT` - notification that the agent continued sending messages after queue capacity was exceeded and recipient received all messages. -- `DEL_RCVQ` - confirmation that message queue was deleted. -- `DEL_CONN` - confirmation that connection was deleted. +- `LINK` - short link URI created or updated for a contact address. +- `LDATA` - decrypted short link data received from the server. +- `DELD` - notification that the connection was deleted. +- `JOINED` - notification that a member joined via a contact address. +- `STAT` - connection statistics event. +- `DEL_RCVQS` - confirmation that receiver message queues were deleted. +- `DEL_CONNS` - confirmation that connections were deleted. - `OK` - confirmation that asynchronous api call was successful. - `ERR` - error of asynchronous api call or some other error event. diff --git a/protocol/push-notifications.md b/protocol/push-notifications.md index 6d5e1dea07..c5b2ce1548 100644 --- a/protocol/push-notifications.md +++ b/protocol/push-notifications.md @@ -1,7 +1,12 @@ -Version 2, 2024-06-22 +Version 3, 2025-01-24 # Overview of push notifications for SimpleX Messaging Servers +This document describes Notification Server protocol version 3. Version history: +- v1: initial version +- v2: authenticated commands, command batching +- v3: detailed invalid token reason + ## Table of contents - [Introduction](#introduction) @@ -91,13 +96,15 @@ To manage notification subscriptions to SMP servers, SimpleX Notification Server This protocol sends requests and responses in a fixed size blocks of 512 bytes over TLS, uses the same [syntax of protocol transmissions](./simplex-messaging.md#smp-transmission-and-transport-block-structure) as SMP protocol, and has the same transport [handshake syntax](./simplex-messaging.md#transport-handshake) (except the server certificate is not included in the handshake). +The client and server use ALPN extension with `ntf/1` protocol name to agree handshake version. + Protocol commands have this syntax: ``` ntfServerTransmission = ntfServerCmd = newTokenCmd / verifyTokenCmd / checkTokenCmd / replaceTokenCmd / deleteTokenCmd / cronCmd / - newSubCmd / checkSubCmd / deleteSubCmd + newSubCmd / checkSubCmd / deleteSubCmd / pingCmd ``` ### Register new notification token @@ -159,7 +166,9 @@ The response to this command: ```abnf tokenStatusResp = %s"TKN" SP tokenStatus -tokenStatus = %s"NEW" / %s"REGISTERED" / %s"INVALID" / %s"CONFIRMED" / %s"ACTIVE" / %s"EXPIRED" +tokenStatus = %s"NEW" / %s"REGISTERED" / tokenInvalid / %s"CONFIRMED" / %s"ACTIVE" / %s"EXPIRED" +tokenInvalid = %s"INVALID" ["," invalidReason] ; optional reason added in v3 +invalidReason = %s"BAD" / %s"TOPIC" / %s"EXPIRED" / %s"UNREGISTERED" ``` ### Replace notification token @@ -249,7 +258,7 @@ The response: subStatusResp = %s"SUB" SP subStatus subStatus = %s"NEW" / %s"PENDING" / ; e.g., after SMP server disconnect/timeout while ntf server is retrying to connect %s"ACTIVE" / %s"INACTIVE" / %s"END" / ; if another server subscribed to notifications - %s"AUTH" / subErrStatus + %s"AUTH" / %s"DELETED" / %s"SERVICE" / subErrStatus subErrStatus = %s"ERR" SP shortString ``` @@ -265,6 +274,17 @@ The response to this command is `okResp` or `errorResp`. After this command no more message notifications will be sent from this queue. +### Keep-alive command + +To keep the transport connection alive the clients should use `PING` command: + +```abnf +pingCmd = %s"PING" +pongResp = %s"PONG" +``` + +This command is sent unsigned and without entity ID. + ### Error responses All commands can return error response: diff --git a/protocol/simplex-messaging.md b/protocol/simplex-messaging.md index 5a077eff20..758d4cead8 100644 --- a/protocol/simplex-messaging.md +++ b/protocol/simplex-messaging.md @@ -102,7 +102,7 @@ This document describes SMP protocol version 19. Versions 1-5 are discontinued. - v16: service certificates - v17: create notification credentials with NEW command - v18: support client notices in BLOCKED error -- v19: service subscriptions to messages (SUBS, SOKS, ENDS commands) +- v19: service subscriptions to messages (SUBS, NSUBS, SOKS, ENDS, ALLS commands) ## Introduction @@ -445,11 +445,13 @@ SMP protocol supports client services - high capacity clients that act as servic ### Service roles -A client service can have one of two roles: +A client service can have one of three roles: -- **Messaging** - Message receiver service that subscribes to and receives messages from multiple SMP queues with a single command. +- **Messaging** (`"M"`) - Message receiver service that subscribes to and receives messages from multiple SMP queues with a single command. -- **Notifications** - Notification service that subscribes to queue notifications and delivers push notifications to user devices. +- **Notifications** (`"N"`) - Notification service that subscribes to queue notifications and delivers push notifications to user devices. + +- **Proxy** (`"P"`) - Proxy service that forwards sender commands to destination servers. Service role is identified in the transport handshake and determines what commands the service is authorized to send. @@ -465,7 +467,7 @@ Service certificates are included in the client handshake and verified by the se ```abnf clientHandshakeService = serviceRole serviceCertKey -serviceRole = %s"M" / %s"N" ; Messaging / Notifier +serviceRole = %s"M" / %s"N" / %s"P" ; Messaging / Notifier / Proxy serviceCertKey = certChainPubKey ``` @@ -506,7 +508,7 @@ transmissionCount = 1*1 OCTET ; equal or greater than 1 transmissions = transmissionLength transmission [transmissions] transmissionLength = 2*2 OCTET ; word16 encoded in network byte order -transmission = authorization authorized +transmission = authorization [serviceSig] authorized authorized = sessionIdentifier corrId entityId smpCommand corrId = %x18 24*24 OCTET / %x0 "" ; corrId is required in client commands and server responses, @@ -517,6 +519,8 @@ entityId = shortString ; queueId or proxySessionId authorization = shortString ; signature or authenticator ; empty authorization can be used with "send" before the queue is secured with secure command ; authorization is always empty with "ping" and server responses +serviceSig = shortString ; optional Ed25519 service signature (v16+) + ; present only in service sessions when authorization is non-empty sessionIdentifier = "" ; sessionIdentifierForAuth = shortString ; sessionIdentifierForAuth MUST be included in authorized transmission body. @@ -881,17 +885,17 @@ This command is sent to the server by the sender both to confirm the queue after send = %s"SEND " msgFlags SP smpEncMessage msgFlags = notificationFlag reserved notificationFlag = %s"T" / %s"F" -smpEncMessage = smpEncClientMessage / smpEncConfirmation ; message up to 16064 bytes +smpEncMessage = smpEncClientMessage / smpEncConfirmation ; message up to 16048 bytes (v11+) -smpEncClientMessage = smpPubHeaderNoKey msgNonce sentClientMsgBody ; message up to 16064 bytes +smpEncClientMessage = smpPubHeaderNoKey msgNonce sentClientMsgBody ; message up to maxMessageLength bytes smpPubHeaderNoKey = smpClientVersion "0" -sentClientMsgBody = 16016*16016 OCTET +sentClientMsgBody = 16000*16000 OCTET ; = maxMessageLength(v11+) - 48 = 16048 - 48 smpEncConfirmation = smpPubHeaderWithKey msgNonce sentConfirmationBody smpPubHeaderWithKey = smpClientVersion "1" senderPublicDhKey ; sender's Curve25519 public key to agree DH secret for E2E encryption in this queue ; it is only sent in confirmation message -sentConfirmationBody = 15920*15920 OCTET ; E2E-encrypted smpClientMessage padded to 16016 bytes before encryption +sentConfirmationBody = 15904*15904 OCTET ; E2E-encrypted smpClientMessage padded to e2eEncMessageLength before encryption senderPublicDhKey = length x509encoded smpClientVersion = word16 @@ -917,16 +921,18 @@ Until the queue is secured, the server should accept any number of unsigned mess The body should be encrypted with the shared secret based on recipient's "public" key (`EK`); once decrypted it must have this format: ```abnf -sentClientMsgBody = +sentClientMsgBody = + ; e2eEncMessageLength = 16000 smpClientMessage = emptyHeader clientMsgBody emptyHeader = "_" -clientMsgBody = *OCTET ; up to 16016 - 2 +clientMsgBody = *OCTET ; up to e2eEncMessageLength - 2 -sentConfirmationBody = +sentConfirmationBody = + ; e2eEncConfirmationLength = 15904 smpConfirmation = smpConfirmationHeader confirmationBody smpConfirmationHeader = emptyHeader / %s"K" senderKey ; emptyHeader is used when queue is already secured by sender -confirmationBody = *OCTET ; up to 15920 - 2 +confirmationBody = *OCTET ; up to e2eEncConfirmationLength - 2 senderKey = length x509encoded ; the sender's Ed25519 or X25519 public key to authorize SEND commands for this queue ``` @@ -940,15 +946,15 @@ SMP transmission structure for directly sent messages: 1 | transmission count (= 1) 2 | originalLength 299- | authorization sessionId corrId queueId %s"SEND" SP (1+114 + 1+32? + 1+24 + 1+24 + 4+1 = 203) - ....... smpEncMessage (= 16064 bytes = 16384 - 320 bytes) + ....... smpEncMessage (= 16048 bytes for v11+, within 16384 - 320 bytes) 8- | smpPubHeader (for messages it is only version and '0' to mean "no DH key" = 3 bytes) 24 | nonce for smpClientMessage 16 | auth tag for smpClientMessage - ------- smpClientMessage (E2E encrypted, = 16016 bytes = 16064 - 48) + ------- smpClientMessage (E2E encrypted, = 16000 bytes = 16048 - 48, for v11+) 2 | originalLength 2- | smpPrivHeader ....... - | clientMsgBody (<= 16012 bytes = 16016 - 4) + | clientMsgBody (<= 15996 bytes = 16000 - 4) ....... 0+ | smpClientMessage pad ------- smpClientMessage end @@ -971,17 +977,17 @@ SMP transmission structure for received messages: 2 | originalLength 8 | timestamp 8- | message flags - ....... smpEncMessage (= 16064 bytes = 16082 - 18 bytes) + ....... smpEncMessage (= 16048 bytes for v11+, padded within 16082 - 18 = 16064 bytes) 8- | smpPubHeader (empty header for the message) 24 | nonce for smpClientMessage 16 | auth tag for smpClientMessage - ------- smpClientMessage (E2E encrypted, = 16016 bytes = 16064 - 48 bytes) + ------- smpClientMessage (E2E encrypted, = 16000 bytes = 16048 - 48 bytes, for v11+) 2 | originalLength 2- | smpPrivHeader (empty header for the message) - ....... clientMsgBody (<= 16012 bytes = 16016 - 4) + ....... clientMsgBody (<= 15996 bytes = 16000 - 4) -- TODO move internal structure (below) to agent protocol 20- | agentPublicHeader (the size is for user messages post handshake, without E2E X3DH keys - it is version and 'M' for the messages - 3 bytes in total) - ....... E2E double-ratchet encrypted (<= 15996 bytes = 16016 - 20) + ....... E2E double-ratchet encrypted (<= 15980 bytes = 16000 - 20) 1 | encoded double ratchet header length (it is 123 now) 123 | encoded double ratchet header, including: 2 | version @@ -989,12 +995,12 @@ SMP transmission structure for received messages: 16 | double-ratchet header auth tag 1+88 | double-ratchet header (actual size is 69 bytes, the rest is reserved) 16 | message auth tag (IV generated from chain ratchet) - ------- encrypted agent message (= 15856 bytes = 15996 - 140) + ------- encrypted agent message (= 15840 bytes = 15980 - 140) 2 | originalLength 64- | agentHeader (the actual size is 41 = 8 + 1+32) 2 | %s"MM" ....... - | application message (<= 15788 bytes = 15856 - 68) + | application message (<= 15772 bytes = 15840 - 68) ....... 0+ | encrypted agent message pad ------- encrypted agent message end @@ -1117,7 +1123,7 @@ Transmission sent to proxy server should use session ID as entity ID and use a r Encrypted transmission should use the received session ID from the connection between proxy server and destination server in the authorized body. ```abnf -proxyCommand = %s"PFWD" SP smpVersion commandKey +proxyCommand = %s"PFWD" SP smpVersion commandKey smpVersion = 2*2 OCTET commandKey = length x509encoded ``` @@ -1127,7 +1133,7 @@ The proxy server will forward the encrypted transmission in `RFWD` command (see Having received the `RRES` response from the destination server, proxy server will forward `PRES` response to the client. `PRES` response should use the same correlation ID as `PFWD` command. The destination server will use this correlation ID increased by 1 as a nonce for encryption of the response. ```abnf -proxyResponse = %s"PRES" SP +proxyResponse = %s"PRES" SP ``` #### Forward command to destination server @@ -1158,7 +1164,7 @@ relayResponse = %s"RRES" SP ### Short link commands -These commands are used by senders to access queues via short links (added in v8). +These commands are used by senders to access queues via short links (added in v15). #### Set link key @@ -1270,7 +1276,7 @@ The server must deliver messages to all subscribed simplex queues on the current message = %s"MSG" SP msgId encryptedRcvMsgBody encryptedRcvMsgBody = ; server-encrypted padded sent msgBody - ; maxMessageLength = 16064 + ; maxMessageLength = 16048 (v11+) rcvMsgBody = timestamp msgFlags SP sentMsgBody / msgQuotaExceeded msgQuotaExceeded = %s"QUOTA" SP timestamp msgId = length 24*24OCTET @@ -1367,16 +1373,16 @@ deleted = %s"DELD" - `IDENTITY` - incorrect server identity (certificate fingerprint does not match server address). - `BAD_AUTH` - incorrect or missing server credentials in handshake. - authentication error (`AUTH`) - incorrect authorization, unknown (or suspended) queue, sender's ID is used in place of recipient's and vice versa, and some other cases (see [Send message](#send-message) command). -- blocked entity error (`BLOCKED`) - the entity (queue or message) was blocked due to policy violation (added in v17). Contains blocking information: +- blocked entity error (`BLOCKED`) - the entity (queue or message) was blocked due to policy violation (added in v12). Contains blocking information: - `reason` - blocking reason (`spam` or `content`). - `notice` - optional client notice with additional information. - service error (`SERVICE`) - service-related error. - crypto error (`CRYPTO`) - cryptographic operation failed. - message queue quota exceeded error (`QUOTA`) - too many messages were sent to the message queue. Further messages can only be sent after the recipient retrieves the messages. - store error (`STORE`) - server storage error with error message. -- message expired (`EXPIRED`) - message has expired. +- relay public key expired (`EXPIRED`) - relay public key has expired. - no message (`NO_MSG`) - no message available or message ID mismatch. -- sent message is too large (> 16064) to be delivered (`LARGE_MSG`). +- sent message is too large (> maxMessageLength) to be delivered (`LARGE_MSG`). - internal server error (`INTERNAL`). - duplicate error (`DUPLICATE_`) - internal duplicate detection error (not returned by server). @@ -1513,7 +1519,7 @@ clientKey = length x509encoded ; X25519 public key for session encryption - only proxyServer = %s"T" / %s"F" ; true if connecting client is a proxy server optClientService = %s"0" / (%s"1" clientService) ; optional service client credentials clientService = serviceRole serviceCertKey -serviceRole = %s"M" / %s"N" ; Messaging / Notifier +serviceRole = %s"M" / %s"N" / %s"P" ; Messaging / Notifier / Proxy serviceCertKey = certChain signedServiceKey signedServiceKey = originalLength x509encoded ; Ed25519 key signed by service certificate @@ -1530,7 +1536,14 @@ pad = *OCTET `proxyServer` flag (v14+) disables additional transport encryption inside TLS for proxy connections, since proxy server connection already has additional encryption. -`clientService` (v16+) provides long-term service client certificate for high-volume services using SMP server (chat relays, notification servers, high traffic bots). The server responds with a third handshake message containing the assigned service ID. +`clientService` (v16+) provides long-term service client certificate for high-volume services using SMP server (chat relays, notification servers, high traffic bots). The server responds with a third handshake message containing the assigned service ID: + +```abnf +paddedServerHandshakeResponse = +serverHandshakeResponse = %s"R" serviceId / %s"E" handshakeError +serviceId = shortString +handshakeError = transportError +``` `ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and servers must ignore any extra bytes within the original block length. diff --git a/protocol/xrcp.md b/protocol/xrcp.md index c8042f8583..b3a304572b 100644 --- a/protocol/xrcp.md +++ b/protocol/xrcp.md @@ -143,7 +143,7 @@ hostHello = %s"HELLO " dhPubKey nonce encrypted(unpaddedSize hostHelloJSON hello unpaddedSize = largeLength dhPubKey = length x509encoded pad = -helloPad = +helloPad = largeLength = 2*2 OCTET ``` @@ -190,7 +190,7 @@ ctrlHello = %s"HELLO " kemCiphertext encrypted(unpaddedSize ctrlHelloJSON helloP unpaddedSize = largeLength kemCiphertext = largeLength *OCTET pad = -helloPad = +helloPad = largeLength = 2*2 OCTET ctrlError = %s"ERROR " nonce encrypted(unpaddedSize ctrlErrorMessage helloPad) pad From 583f4e059dd21f1d95e8d293ef20a46df1853c39 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 9 Mar 2026 23:35:41 +0000 Subject: [PATCH 18/91] update RFCs (#1730) * update RFCs * update * update overview * update terminology * original language in threat model --------- Co-authored-by: Evgeny @ SimpleX Chat <259188159+evgeny-simplex@users.noreply.github.com> --- protocol/agent-protocol.md | 122 +++-- protocol/overview-tjr.md | 313 ++++------- protocol/push-notifications.md | 90 ++-- protocol/security.md | 215 ++++++++ protocol/simplex-messaging.md | 486 +++++++++--------- protocol/xftp.md | 314 +++++------ rfcs/2022-04-20-smp-conf-timeout-recovery.md | 4 +- rfcs/2024-07-06-ios-notifications.md | 4 +- rfcs/2024-09-05-queue-storage.md | 24 +- rfcs/2024-09-10-private-rendezvous.md | 16 +- rfcs/2024-09-25-ios-notifications-2.md | 18 +- rfcs/2024-11-25-queue-blobs-2.md | 2 +- rfcs/2025-03-30-ios-notifications-3.md | 42 +- rfcs/2025-04-04-short-links-for-groups.md | 18 +- rfcs/2025-07-15-multi-device.md | 28 +- rfcs/2025-08-20-service-subs-drift.md | 36 +- rfcs/README.md | 8 +- rfcs/{ => done}/2024-02-12-encryption.md | 6 +- rfcs/{ => done}/2024-03-20-server-metadata.md | 26 +- rfcs/{ => done}/2024-06-01-agent-protocol.md | 0 rfcs/{ => done}/2024-06-21-short-links.md | 28 +- .../2024-09-01-smp-message-storage.md | 22 +- rfcs/{ => done}/2024-09-09-smp-blobs.md | 12 +- rfcs/{ => done}/2024-09-15-shared-port.md | 26 +- .../2024-11-25-journal-expiration.md | 2 +- rfcs/{ => done}/2025-03-16-smp-queues.md | 72 +-- .../2025-05-05-client-certificates.md | 38 +- rfcs/{ => done}/2026-01-30-send-file-page.md | 233 ++++----- .../2026-01-31-xftp-web-server-changes.md | 22 +- .../2026-02-02-xftp-web-handshake.md | 8 +- .../2026-02-03-xftp-web-browser-tests.md | 0 .../2026-02-04-xftp-web-browser-transport.md | 0 .../2026-02-04-xftp-web-page.md | 6 +- ...6-02-04-xftp-web-persistent-connections.md | 0 .../2026-02-05-xftp-web-e2e-tests.md | 10 +- .../2026-02-08-xftp-web-hello-header.md | 20 +- .../2026-02-11-xftp-web-error-handling.md | 186 +++---- .../2026-02-12-xftp-cli-web-link-compat.md | 46 +- .../2026-02-17-fix-subq-deadlock.md | 0 .../2026-03-09-access-via-tor.md} | 7 + .../2026-03-09-crypto.md} | 9 + .../2026-03-09-db-sync.md} | 7 + .../2026-03-09-db-sync.mmd} | 7 + .../2026-03-09-delivery-receipts.md} | 7 + .../2026-03-09-deniability.md} | 7 + .../2026-03-09-fast-connection.md} | 7 + .../2026-03-09-file-links.md} | 7 + .../2026-03-09-logging.md} | 9 + .../2026-03-09-messages.md} | 9 + .../2026-03-09-nofication-server.md} | 7 + .../2026-03-09-open-connection.md} | 7 + .../2026-03-09-pqdr-version.md} | 9 + .../2026-03-09-pqdr.md} | 7 + .../2026-03-09-queue-quota.md} | 7 + .../2026-03-09-queue-rotation.md} | 7 + .../2026-03-09-remote-control.md} | 7 + .../2026-03-09-resync-ratchets-design.md} | 9 + .../2026-03-09-resync-ratchets.md} | 7 + .../2026-03-09-second-relays.md} | 7 + .../2026-03-09-simplex-file-transfer.md} | 7 + .../2026-03-09-smp-basic-auth.md} | 7 + .../2026-03-09-smp-delivery-proxy.md} | 9 + .../2026-03-09-smp-notifications.md} | 7 + .../2026-03-09-xftp-version.md} | 7 + 64 files changed, 1476 insertions(+), 1214 deletions(-) create mode 100644 protocol/security.md rename rfcs/{ => done}/2024-02-12-encryption.md (91%) rename rfcs/{ => done}/2024-03-20-server-metadata.md (87%) rename rfcs/{ => done}/2024-06-01-agent-protocol.md (100%) rename rfcs/{ => done}/2024-06-21-short-links.md (84%) rename rfcs/{ => done}/2024-09-01-smp-message-storage.md (93%) rename rfcs/{ => done}/2024-09-09-smp-blobs.md (91%) rename rfcs/{ => done}/2024-09-15-shared-port.md (86%) rename rfcs/{ => done}/2024-11-25-journal-expiration.md (94%) rename rfcs/{ => done}/2025-03-16-smp-queues.md (85%) rename rfcs/{ => done}/2025-05-05-client-certificates.md (79%) rename rfcs/{ => done}/2026-01-30-send-file-page.md (88%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md (89%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md (97%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-03-xftp-web-browser-tests.md (100%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md (100%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md (99%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-04-xftp-web-persistent-connections.md (100%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md (98%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md (94%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md (81%) rename rfcs/{ => done}/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md (87%) rename rfcs/{ => done}/2026-02-17-fix-subq-deadlock.md (100%) rename rfcs/{done/2022-07-22-access-via-tor.md => standard/2026-03-09-access-via-tor.md} (94%) rename rfcs/{done/2021-01-26-crypto.md => standard/2026-03-09-crypto.md} (96%) rename rfcs/{done/2022-06-13-db-sync.md => standard/2026-03-09-db-sync.md} (98%) rename rfcs/{done/2022-06-13-db-sync.mmd => standard/2026-03-09-db-sync.mmd} (95%) rename rfcs/{done/2023-05-03-delivery-receipts.md => standard/2026-03-09-delivery-receipts.md} (99%) rename rfcs/{done/2024-02-03-deniability.md => standard/2026-03-09-deniability.md} (98%) rename rfcs/{done/2024-06-14-fast-connection.md => standard/2026-03-09-fast-connection.md} (96%) rename rfcs/{done/2024-01-26-file-links.md => standard/2026-03-09-file-links.md} (98%) rename rfcs/{done/2021-01-20-logging.md => standard/2026-03-09-logging.md} (85%) rename rfcs/{done/2021-01-26-messages.md => standard/2026-03-09-messages.md} (89%) rename rfcs/{done/2022-03-22-nofication-server.md => standard/2026-03-09-nofication-server.md} (96%) rename rfcs/{done/2021-05-17-open-connection.md => standard/2026-03-09-open-connection.md} (95%) rename rfcs/{done/2024-03-03-pqdr-version.md => standard/2026-03-09-pqdr-version.md} (94%) rename rfcs/{done/2023-12-29-pqdr.md => standard/2026-03-09-pqdr.md} (96%) rename rfcs/{done/2022-12-27-queue-quota.md => standard/2026-03-09-queue-quota.md} (94%) rename rfcs/{done/2022-08-14-queue-rotation.md => standard/2026-03-09-queue-rotation.md} (96%) rename rfcs/{done/2023-10-25-remote-control.md => standard/2026-03-09-remote-control.md} (99%) rename rfcs/{done/2023-05-02-resync-ratchets.md => standard/2026-03-09-resync-ratchets-design.md} (92%) rename rfcs/{done/2023-06-08-resync-ratchets.md => standard/2026-03-09-resync-ratchets.md} (99%) rename rfcs/{done/2023-09-12-second-relays.md => standard/2026-03-09-second-relays.md} (99%) rename rfcs/{done/2022-12-26-simplex-file-transfer.md => standard/2026-03-09-simplex-file-transfer.md} (98%) rename rfcs/{done/2022-11-11-smp-basic-auth.md => standard/2026-03-09-smp-basic-auth.md} (95%) rename rfcs/{done/2023-05-24-smp-delivery-proxy.md => standard/2026-03-09-smp-delivery-proxy.md} (86%) rename rfcs/{done/2022-06-05-smp-notifications.md => standard/2026-03-09-smp-notifications.md} (97%) rename rfcs/{done/2024-03-28-xftp-version.md => standard/2026-03-09-xftp-version.md} (98%) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index 83898d9b9e..a105ce795c 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -6,7 +6,7 @@ Version 7, 2025-01-24 - [Abstract](#abstract) - [SMP agent](#smp-agent) -- [SMP servers management](#smp-servers-management) +- [SMP routers management](#smp-routers-management) - [SMP agent protocol scope](#smp-agent-protocol-scope) - [Duplex connection procedure](#duplex-connection-procedure) - [Fast duplex connection procedure](#fast-duplex-connection-procedure) @@ -35,13 +35,13 @@ Version 7, 2025-01-24 ## Abstract -The purpose of SMP agent protocol is to define the syntax and the semantics of communications between the client and the agent that connects to [SMP](./simplex-messaging.md) servers. +The purpose of SMP agent protocol is to define the syntax and the semantics of communications between the client and the agent that connects to [SMP](./simplex-messaging.md) routers. It provides: -- API to create and manage bi-directional (duplex) connections between the users of SMP agents consisting of two (or more) separate unidirectional (simplex) SMP queues, abstracting away multiple steps required to establish bi-directional connections and any information about the servers location from the users of the agent protocol. +- API to create and manage bi-directional (duplex) connections between the users of SMP agents consisting of two (or more) separate unidirectional (simplex) SMP queues, abstracting away multiple steps required to establish bi-directional connections and any information about the routers location from the users of the agent protocol. - management of E2E encryption between SMP agents, generating ephemeral asymmetric keys for each connection. -- SMP command authentication on SMP servers, generating ephemeral keys for each SMP queue. -- TCP/TLS transport handshake with SMP servers. +- SMP command authentication on SMP routers, generating ephemeral keys for each SMP queue. +- TCP/TLS transport handshake with SMP routers. - validation of message integrity. SMP agent API provides no security between the agent and the client - it is assumed that the agent is executed in the trusted and secure environment, via the agent library, when the agent logic is included directly into the client application - [SimpleX Chat for terminal](https://github.com/simplex-chat/simplex-chat) uses this approach. @@ -58,22 +58,22 @@ This document describes SMP agent protocol version 7. The version history: ## SMP agent -SMP agents communicate with each other via SMP servers using [simplex messaging protocol (SMP)](./simplex-messaging.md) according to the API calls used by the client applications. This protocol is a middle layer in SimpleX protocols (above SMP protocol but below any application level protocol) - it is intended to be used by client-side applications that need secure asynchronous bi-directional communication channels ("connections"). +SMP agents communicate with each other via SMP routers using [simplex messaging protocol (SMP)](./simplex-messaging.md) according to the API calls used by the client applications. This protocol is a middle layer in SimpleX protocols (above SMP protocol but below any application level protocol) - it is intended to be used by client-side applications that need secure asynchronous bi-directional communication channels ("connections"). The agent must have a persistent storage to manage the states of known connections and of the client-side information of SMP queues that each connection consists of, and also the buffer of the most recent sent and received messages. The number of the messages that should be stored is implementation specific, depending on the error management approach that the agent implements; at the very least the agent must store the hashes and IDs of the last received and sent messages. -## SMP servers management +## SMP routers management -SMP agent API does not use the addresses of the SMP servers that the agent will use to create and use the connections (excluding the server address in queue URIs used in JOIN command). The list of the servers is a part of the agent configuration and can be dynamically changed by the agent implementation: +SMP agent API does not use the addresses of the SMP routers that the agent will use to create and use the connections (excluding the router address in queue URIs used in JOIN command). The list of the routers is a part of the agent configuration and can be dynamically changed by the agent implementation: - by the client applications via any API that is outside of scope of this protocol. -- by the agents themselves based on availability and latency of the configured servers. +- by the agents themselves based on availability and latency of the configured routers. ## SMP agent protocol scope SMP agent protocol has 2 main parts: - the messages that SMP agents exchange with each other in order to: - - negotiate establishing unidirectional (simplex) encrypted queues on SMP servers. + - negotiate establishing unidirectional (simplex) encrypted queues on SMP routers. - exchange client messages and delivery notifications, providing sequential message IDs and message integrity (by including the hash of the previous message). - re-negotiate messaging queues to use and connection e2e encryption. - the messages that the clients of SMP agents should send out-of-band (as pre-shared "invitation" including queue URIs) to protect [E2E encryption][1] from active attacks ([MITM attacks][2]). @@ -86,19 +86,19 @@ SMP agent protocol has 2 main parts: ![Duplex connection procedure](./diagrams/duplex-messaging/duplex-creating.svg) -The procedure of establishing a duplex connection is explained on the example of Alice and Bob creating a bi-directional connection consisting of two unidirectional (simplex) queues, using SMP agents (A and B) to facilitate it, and two different SMP servers (which could be the same server). It is shown on the diagram above and has these steps: +The procedure of establishing a duplex connection is explained on the example of Alice and Bob creating a bi-directional connection consisting of two unidirectional (simplex) queues, using SMP agents (A and B) to facilitate it, and two different SMP routers (which could be the same router). It is shown on the diagram above and has these steps: 1. Alice requests the new connection from the SMP agent A using agent `createConnection` api function. -2. Agent A creates an SMP queue on the server (using [SMP protocol](./simplex-messaging.md) `NEW` command) and responds to Alice with the invitation that contains queue information and the encryption keys Bob's agent B should use. The invitation format is described in [Connection link](connection-link-1-time-invitation-and-contact-address). +2. Agent A creates an SMP queue on the router (using [SMP protocol](./simplex-messaging.md) `NEW` command) and responds to Alice with the invitation that contains queue information and the encryption keys Bob's agent B should use. The invitation format is described in [Connection link](connection-link-1-time-invitation-and-contact-address). 3. Alice sends the [connection link](#connection-link-1-time-invitation-and-contact-address) to Bob via any secure channel (out-of-band message) - as a link or as a QR code. 4. Bob uses agent `joinConnection` api function with the connection link as a parameter to agent B to accept the connection. -5. Agent B creates Bob's SMP reply queue with SMP server `NEW` command. -6. Agent B confirms the connection: sends an "SMP confirmation" with SMP server `SEND` command to the SMP queue specified in the connection link - SMP confirmation is an unauthenticated message with an ephemeral key that will be used to authenticate Bob's commands to the queue, as described in SMP protocol, and Bob's info (profile, public key for E2E encryption, and the connection link to this 2nd queue to Agent A - this connection link SHOULD use "simplex" URI scheme). This message is encrypted using key passed in the connection link (or with the derived shared secret, in which case public key for key derivation should be sent in clear text). +5. Agent B creates Bob's SMP reply queue with SMP router `NEW` command. +6. Agent B confirms the connection: sends an "SMP confirmation" with SMP router `SEND` command to the SMP queue specified in the connection link - SMP confirmation is an unauthenticated message with an ephemeral key that will be used to authenticate Bob's commands to the queue, as described in SMP protocol, and Bob's info (profile, public key for E2E encryption, and the connection link to this 2nd queue to Agent A - this connection link SHOULD use "simplex" URI scheme). This message is encrypted using key passed in the connection link (or with the derived shared secret, in which case public key for key derivation should be sent in clear text). 6. Alice confirms and continues the connection: - - Agent A receives the SMP confirmation containing Bob's key, reply queue and info as SMP server `MSG`. + - Agent A receives the SMP confirmation containing Bob's key, reply queue and info as SMP router `MSG`. - Agent A notifies Alice sending `CONF` notification with Bob's info. - Alice allows connection to continue with agent `allowConnection` api function. - - Agent A secures the queue with SMP server `KEY` command. + - Agent A secures the queue with SMP router `KEY` command. - Agent A sends SMP confirmation with ephemeral sender key, ephemeral public encryption key and profile (but without reply queue). 7. Agent B confirms the connection: - receives the confirmation. @@ -114,7 +114,7 @@ The procedure of establishing a duplex connection is explained on the example of At this point the duplex connection between Alice and Bob is established, they can use `SEND` command to send messages. The diagram also shows how the connection status changes for both parties, where the first part is the status of the SMP queue to receive messages, and the second part - the status of the queue to send messages. -The most communication happens between the agents and servers, from the point of view of Alice and Bob there are 4 steps (not including notifications): +The most communication happens between the agents and routers, from the point of view of Alice and Bob there are 4 steps (not including notifications): 1. Alice requests a new connection with `createConnection` agent API function and receives the connection link. 2. Alice passes connection link out-of-band to Bob. @@ -137,14 +137,14 @@ Faster duplex connection process is possible with the `SKEY` command added in v9 ![Fast duplex connection procedure](./diagrams/duplex-messaging/duplex-creating-fast.svg) 1. Alice requests the new connection from the SMP agent A using agent `createConnection` api function -2. Agent A creates an SMP queue on the server (using [SMP protocol](./simplex-messaging.md) `NEW` command with the flag allowing the sender to secure the queue) and responds to Alice with the invitation that contains queue information and the encryption keys Bob's agent B should use. The invitation format is described in [Connection link](connection-link-1-time-invitation-and-contact-address). +2. Agent A creates an SMP queue on the router (using [SMP protocol](./simplex-messaging.md) `NEW` command with the flag allowing the sender to secure the queue) and responds to Alice with the invitation that contains queue information and the encryption keys Bob's agent B should use. The invitation format is described in [Connection link](connection-link-1-time-invitation-and-contact-address). 3. Alice sends the [connection link](connection-link-1-time-invitation-and-contact-address) to Bob via any secure channel (out-of-band message) - as a link or as a QR code. This link contains the flag that the queue can be secured by the sender. 4. Bob uses agent `joinConnection` api function with the connection link as a parameter to agent B to accept the connection. 5. Agent B secures Alice's queue with SMP command `SKEY` - this command can be proxied. -6. Agent B creates Bob's SMP reply queue with SMP server `NEW` command (with the flag allowing the sender to secure the queue). -7. Agent B confirms the connection: sends an "SMP confirmation" with SMP server `SEND` command to the SMP queue specified in the connection link - SMP confirmation is an unauthenticated message with an ephemeral key that will be used to authenticate Bob's commands to the queue, as described in SMP protocol, and Bob's info (profile, public key for E2E encryption, and the connection link to this 2nd queue to Agent A - this connection link SHOULD use "simplex" URI scheme). This message is encrypted using key passed in the connection link (or with the derived shared secret, in which case public key for key derivation should be sent in clear text). +6. Agent B creates Bob's SMP reply queue with SMP router `NEW` command (with the flag allowing the sender to secure the queue). +7. Agent B confirms the connection: sends an "SMP confirmation" with SMP router `SEND` command to the SMP queue specified in the connection link - SMP confirmation is an unauthenticated message with an ephemeral key that will be used to authenticate Bob's commands to the queue, as described in SMP protocol, and Bob's info (profile, public key for E2E encryption, and the connection link to this 2nd queue to Agent A - this connection link SHOULD use "simplex" URI scheme). This message is encrypted using key passed in the connection link (or with the derived shared secret, in which case public key for key derivation should be sent in clear text). 8. Alice confirms the connection: - - Agent A receives the SMP confirmation containing Bob's key, reply queue and info as SMP server `MSG`. + - Agent A receives the SMP confirmation containing Bob's key, reply queue and info as SMP router `MSG`. - Agent A notifies Alice sending `CONF` notification with Bob's info (that indicates that Agent B already secured the queue). - Alice allows connection to continue with agent `allowConnection` api function. - Agent A secures Bob's queue with SMP command `SKEY`. @@ -159,11 +159,11 @@ Faster duplex connection process is possible with the `SKEY` command added in v9 SMP agents support creating a special type of connection - a contact address - that allows to connect to multiple network users who can send connection requests by sending 1-time connection links to the message queue. -This connection address uses a messaging queue on SMP server to receive invitations to connect - see `agentInvitation` message below. Once connection request is accepted, a new connection is created and the address itself is no longer used to send the messages - deleting this address does not disrupt the connections that were created via it. +This connection address uses a messaging queue on SMP router to receive invitations to connect - see `agentInvitation` message below. Once connection request is accepted, a new connection is created and the address itself is no longer used to send the messages - deleting this address does not disrupt the connections that were created via it. ## Communication between SMP agents -To establish duplex connections and to send messages on behalf of their clients, SMP agents communicate via SMP servers. +To establish duplex connections and to send messages on behalf of their clients, SMP agents communicate via SMP routers. Agents use SMP message client body (the part of the SMP message after header - see [SMP protocol](./simplex-messaging.md)) to transmit agent client messages and exchange messages between each other. @@ -217,7 +217,7 @@ Decrypted SMP message client body can be one of 4 types: - to confirm that the new double ratchet encryption is agreed (`EREADY`). - to notify another party that it can continue sending messages after queue capacity was exceeded (`A_QCONT`). - to manage SMP queue rotation (`QADD`, `QKEY`, `QUSE`, `QTEST`). -- `msgPadding` - an optional message padding to make all SMP messages have constant size, to prevent servers from observing the actual message size. The only case the message padding can be absent is when the message has exactly the maximum size, in all other cases the message MUST be padded to a fixed size. +- `msgPadding` - an optional message padding to make all SMP messages have constant size, to prevent routers from observing the actual message size. The only case the message padding can be absent is when the message has exactly the maximum size, in all other cases the message MUST be padded to a fixed size. ### Messages between SMP agents @@ -256,14 +256,14 @@ A_QCONT = %s"QC" sndQueueAddr QADD = %s"QA" sndQueues sndQueues = length 1*(newQueueUri replacedSndQueue) -newQueueUri = clientVRange smpServer senderId dhPublicKey [queueMode] +newQueueUri = clientVRange smpRouter senderId dhPublicKey [queueMode] dhPublicKey = length x509encoded queueMode = %s"M" / %s"C" ; M - messaging (sender can secure), C - contact replacedSndQueue = "0" / "1" sndQueueAddr QKEY = %s"QK" sndQueueKeys sndQueueKeys = length 1*(newQueueInfo senderKey) -newQueueInfo = version smpServer senderId dhPublicKey [queueMode] +newQueueInfo = version smpRouter senderId dhPublicKey [queueMode] senderKey = length x509encoded QUSE = %s"QU" sndQueuesReady @@ -273,8 +273,8 @@ primary = %s"T" / %s"F" QTEST = %s"QT" sndQueueAddrs sndQueueAddrs = length 1*sndQueueAddr -sndQueueAddr = smpServer senderId -smpServer = hosts port keyHash +sndQueueAddr = smpRouter senderId +smpRouter = hosts port keyHash hosts = length 1*host host = shortString port = shortString @@ -298,7 +298,7 @@ This message is not used with [fast duplex connection](#fast-duplex-connection-p #### A_MSG message -This is the agent envelope used to send client messages once the connection is established. This is different from the MSG sent by SMP server to the agent and MSG event from SMP agent to the client that are sent in different contexts. +This is the agent envelope used to send client messages once the connection is established. This is different from the MSG sent by SMP router to the agent and MSG event from SMP agent to the client that are sent in different contexts. #### A_RCVD message @@ -314,7 +314,7 @@ This message is sent to notify the sender client that it can continue sending th ### Rotating messaging queue -SMP agents SHOULD support 4 messages to rotate message reception to another messaging server: +SMP agents SHOULD support 4 messages to rotate message reception to another messaging router: `QADD`: add the new queue address(es) to the connection - sent by the client that initiates rotation. `QKEY`: pass sender's key via existing connection (SMP confirmation message will not be used, to avoid the same "race" of the initial key exchange that would create the risk of intercepting the queue for the attacker) - sent by the client accepting the rotation `QUSE`: instruct the sender to use the new queue with sender's queue ID as parameter. From this point some messages can be sent to both the new queue and the old queue. @@ -384,7 +384,7 @@ connectionLink = connectionScheme "/" connLinkType "#/?v=" versionRange "&smp=" connLinkType = %s"invitation" / %s"contact" connectionScheme = (%s"https://" clientAppServer) | %s"simplex:" clientAppServer = hostname [ ":" port ] -; client app server, e.g. simplex.chat +; client app router, e.g. simplex.chat versionRange = 1*DIGIT / 1*DIGIT "-" 1*DIGIT ; agent version range e2eEncryption = smpQueues = smpQueue *(";" smpQueue) ; SMP queues for the connection (semicolon-separated) @@ -392,17 +392,17 @@ smpQueue = clientData = ``` -All parameters are passed via URI hash to avoid sending them to the server (in case "https" scheme is used) - they can be used by the client-side code and processed by the client application. Parameters can be present in any order, any unknown additional parameters SHOULD be ignored. +All parameters are passed via URI hash to avoid sending them to the router (in case "https" scheme is used) - they can be used by the client-side code and processed by the client application. Parameters can be present in any order, any unknown additional parameters SHOULD be ignored. -`clientAppServer` is not an SMP server - it is a server that shows the instruction on how to download the client app that will connect using this connection link. This server can also host a mobile or desktop app manifest so that this link is opened directly in the app if it is installed on the device. +`clientAppServer` is not an SMP router - it is a server that shows the instruction on how to download the client app that will connect using this connection link. This server can also host a mobile or desktop app manifest so that this link is opened directly in the app if it is installed on the device. -"simplex" URI scheme in `connectionProtocol` can be used instead of client app server, to connect without creating any web traffic. Client apps MUST support this URI scheme. +"simplex" URI scheme in `connectionProtocol` can be used instead of client app router, to connect without creating any web traffic. Client apps MUST support this URI scheme. See SMP protocol [out-of-band messages](./simplex-messaging.md#out-of-band-messages) for syntax of `queueURI`. ### Short connection link syntax -Short links provide a more compact representation by storing connection data on the server: +Short links provide a more compact representation by storing connection data on the router: ``` shortLink = shortLinkScheme "/" linkType "#" [linkId "/"] linkKey ["?" shortLinkParams] @@ -415,7 +415,7 @@ shortLinkParams = hostParam ["&" portParam] ["&" keyHashParam] hostParam = %s"h=" hostList hostList = host *("," host) portParam = %s"p=" port -keyHashParam = %s"c=" base64url ; server certificate fingerprint +keyHashParam = %s"c=" base64url ; router certificate fingerprint ``` Contact types: @@ -424,11 +424,11 @@ Contact types: - `g` (CCTGroup) - group connection - `r` (CCTRelay) - relay connection -Short links can use either the `simplex:` scheme or `https://` with a server hostname. When using the simplex scheme, server information is included in query parameters. +Short links can use either the `simplex:` scheme or `https://` with a router hostname. When using the simplex scheme, router information is included in query parameters. ## Short links -Short links provide a compact representation of connection links by storing encrypted connection data on the SMP server. The link key in the URI fragment (after `#`) is never sent to the server, ensuring the server cannot decrypt the stored connection data. +Short links provide a compact representation of connection links by storing encrypted connection data on the SMP router. The link key in the URI fragment (after `#`) is never sent to the router, ensuring the router cannot decrypt the stored connection data. ### Link key derivation @@ -441,7 +441,7 @@ linkKey = SHA3-256(fixedLinkData) The fixed link data includes: - Agent version range - Root public key (Ed25519) for signing -- SMP queue connection request (server, queue IDs, encryption keys) +- SMP queue connection request (router, queue IDs, encryption keys) - Optional link entity ID For contact links, the link ID and encryption key are derived from the link key using HKDF: @@ -459,7 +459,7 @@ encryptionKey = HKDF(info="SimpleXInvLink", key=linkKey, outputLen=32) ### Link data encryption -Link data stored on the server consists of two encrypted parts: fixed data and user data. Both are encrypted using NaCl secret_box (XSalsa20-Poly1305) with the derived encryption key: +Link data stored on the router consists of two encrypted parts: fixed data and user data. Both are encrypted using NaCl secret_box (XSalsa20-Poly1305) with the derived encryption key: ```abnf queueLinkData = encFixedData encUserData @@ -500,8 +500,8 @@ The fixed data is signed with the root key and its hash becomes the link key. Th When a user receives a short link, the agent resolves it as follows: 1. Extract the link key from the URI fragment -2. Send `LGET` command to the SMP server with the link ID -3. Receive encrypted link data from the server +2. Send `LGET` command to the SMP router with the link ID +3. Receive encrypted link data from the router 4. Decrypt the link data using the link key 5. Extract the full connection information (SMP queue URI, encryption keys, profile) 6. Proceed with the standard connection procedure using `joinConnection` @@ -513,7 +513,7 @@ For invitation links, the `LKEY` command is used to set the sender key when gett The recipient who created the queue can manage the short link data: - **LSET** - Set or update the link data associated with a queue. This is used when creating a short link or updating the user data (e.g., profile changes). -- **LDEL** - Delete the link data from the server. This effectively invalidates the short link. +- **LDEL** - Delete the link data from the router. This effectively invalidates the short link. Short links support different connection modes: - **invitation** - One-time invitation links that can only be used once @@ -565,13 +565,13 @@ Client can `acceptContact` and `rejectContact`, with `OK` and `ERR` events in ca #### Send message -`sendMessage` api is always asynchronous. The api call returns message ID, `SENT` event once the message is sent to the server, `MWARN` event in case of temporary delivery failure that can be resolved by the user (e.g., by connecting via Tor or by upgrading the client) and `MERR` in case of permanent delivery failure. +`sendMessage` api is always asynchronous. The api call returns message ID, `SENT` event once the message is sent to the router, `MWARN` event in case of temporary delivery failure that can be resolved by the user (e.g., by connecting via Tor or by upgrading the client) and `MERR` in case of permanent delivery failure. #### Acknowledge received message Messages are delivered to the client application via `MSG` event. -Client application must always `ackMessage` to receive the next one - failure to call it in reference implementation will prevent the delivery of subsequent messages until the client reconnects to the server. +Client application must always `ackMessage` to receive the next one - failure to call it in reference implementation will prevent the delivery of subsequent messages until the client reconnects to the router. This api is also used to acknowledge message delivery to the sending party - that party client application will receive `RCVD` event. @@ -589,11 +589,11 @@ This api is also used to acknowledge message delivery to the sending party - tha #### Get short link data -`getConnectionLink` api (`LGET` command) is used to retrieve and decrypt the short link data from the server. Returns `LDATA` event with the decrypted link data. +`getConnectionLink` api (`LGET` command) is used to retrieve and decrypt the short link data from the router. Returns `LDATA` event with the decrypted link data. -#### Rotate message queue to another server +#### Rotate message queue to another router -`switchConnection` api is used to rotate connection queues to another messaging server. +`switchConnection` api is used to rotate connection queues to another messaging router. #### Renegotiate e2e encryption @@ -616,20 +616,20 @@ Agent API uses these events dispatch to notify client application about events r - `INFO` - information from the party that initiated the connection with `createConnection` sent to the party accepting the connection with `joinConnection`. - `CON` - notification that connection is established sent to both parties of the connection. - `END` - notification that connection subscription is terminated when another client subscribed to the same messaging queue. -- `DOWN` - notification that connection server is temporarily unavailable. -- `UP` - notification that the subscriptions made in the current client session are resumed after the server became available. +- `DOWN` - notification that connection router is temporarily unavailable. +- `UP` - notification that the subscriptions made in the current client session are resumed after the router became available. - `SWITCH` - notification about queue rotation process. - `RSYNC` - notification about e2e encryption re-negotiation process. -- `SENT` - notification to confirm that the message was delivered to at least one of SMP servers. This notification contains the same message ID as returned to `sendMessage` api. `SENT` notification, depending on network availability, can be sent at any time later, potentially in the next client session. +- `SENT` - notification to confirm that the message was delivered to at least one of SMP routers. This notification contains the same message ID as returned to `sendMessage` api. `SENT` notification, depending on network availability, can be sent at any time later, potentially in the next client session. - `MWARN` - temporary delivery failure that can be resolved by the user (e.g., by connecting via Tor or by upgrading the client). - `MERR` - notification about permanent message delivery failure. - `MERRS` - notification about permanent message delivery failure for multiple messages (e.g., when multiple messages expire). -- `MSG` - sent when agent receives the message from the SMP server. +- `MSG` - sent when agent receives the message from the SMP router. - `MSGNTF` - sent after agent received and processed the message referenced in the push notification. - `RCVD` - notification confirming message receipt by another party. - `QCONT` - notification that the agent continued sending messages after queue capacity was exceeded and recipient received all messages. - `LINK` - short link URI created or updated for a contact address. -- `LDATA` - decrypted short link data received from the server. +- `LDATA` - decrypted short link data received from the router. - `DELD` - notification that the connection was deleted. - `JOINED` - notification that a member joined via a contact address. - `STAT` - connection statistics event. @@ -642,9 +642,7 @@ This list of events is not exhaustive and provided for information only. Please ## Threat model -This threat model complements SimpleX Messaging Protocol [threat model](./overview-tjr.md#threat-model) with agent-level concerns: duplex connections, end-to-end encryption with [post-quantum double ratchet](./pqdr.md), message integrity, connection establishment and queue rotation. Only additional properties not covered in the SMP threat model are listed below. - -This section uses network architecture terminology: SMP servers are referred to as routers, SMP queues as streams, and transport messages as packets — see [SimpleX Network architecture](../docs/network-architecture-v2.md). +This threat model complements SimpleX Messaging Protocol [threat model](./security.md#threat-model) with agent-level concerns: duplex connections, end-to-end encryption with [post-quantum double ratchet](./pqdr.md), message integrity, connection establishment and queue rotation. Only additional properties not covered in the SMP threat model are listed below. #### Additional global assumptions @@ -654,22 +652,22 @@ This section uses network architecture terminology: SMP servers are referred to #### A passive adversary *cannot:* - - learn the contents of packets, which are additionally encrypted with the double ratchet independently from per-stream encryption. + - learn the contents of packets, which are additionally encrypted with the double ratchet independently from per-queue encryption. #### Destination router (chosen by the receiving client application) *can:* - - correlate streams belonging to the same duplex connection when queue rotation creates a new stream on the same router. + - correlate queues belonging to the same duplex connection when queue rotation creates a new queue on the same router. - when both peers of a connection chose the same router, correlate the two directions of the duplex connection. *cannot:* - - compromise end-to-end encryption even with full access to the per-stream NaCl DH secret. - - correlate streams belonging to the same connection after queue rotation to a different router. + - compromise end-to-end encryption even with full access to the per-queue NaCl DH secret. + - correlate queues belonging to the same connection after queue rotation to a different router. #### An attacker who obtained a client application's (decrypted) database *can:* - - learn the full communication graph: all communication peers, associated router addresses, and stream identifiers. + - learn the full communication graph: all communication peers, associated router addresses, and queue identifiers. *cannot:* - decrypt future messages once the client application resumes communication and the double ratchet completes a new ratchet step, provided PQDR is active. @@ -682,7 +680,7 @@ This section uses network architecture terminology: SMP servers are referred to - prevent double ratchet advancement by not sending messages, delaying break-in recovery. *cannot:* - - disrupt packet delivery in other streams. + - disrupt packet delivery in other queues. #### An attacker who obtained a connection link diff --git a/protocol/overview-tjr.md b/protocol/overview-tjr.md index d30f552357..2fa8f717e7 100644 --- a/protocol/overview-tjr.md +++ b/protocol/overview-tjr.md @@ -1,4 +1,4 @@ -Revision 2, 2024-06-22 +Revision 4, 2026-03-09 Evgeny Poberezkin @@ -8,16 +8,17 @@ Evgeny Poberezkin - [Introduction](#introduction) - [What is SimpleX](#what-is-simplex) + - [Network model](#network-model) + - [Applications](#applications) - [SimpleX objectives](#simplex-objectives) - [In Comparison](#in-comparison) - [Technical Details](#technical-details) - - [Trust in Servers](#trust-in-servers) - - [Client -> Server Communication](#client---server-communication) + - [Trust in Routers](#trust-in-routers) + - [Client -> Router Communication](#client---router-communication) - [2-hop Onion Message Routing](#2-hop-onion-message-routing) - [SimpleX Messaging Protocol](#simplex-messaging-protocol) - [SimpleX Agents](#simplex-agents) - - [Encryption Primitives Used](#encryption-primitives-used) -- [Threat model](#threat-model) +- [Security](#security) - [Acknowledgements](#acknowledgements) @@ -27,27 +28,27 @@ Evgeny Poberezkin SimpleX as a whole is a platform upon which applications can be built. [SimpleX Chat](https://github.com/simplex-chat/simplex-chat) is one such application that also serves as an example and reference application. - - [SimpleX Messaging Protocol](./simplex-messaging.md) (SMP) is a protocol to send messages in one direction to a recipient, relying on a server in-between. The messages are delivered via uni-directional queues created by recipients. - - - SMP protocol allows to send message via a SMP server playing proxy role using 2-hop onion routing (referred to as "private routing" in messaging clients) to protect transport information of the sender (IP address and session) from the server chosen (and possibly controlled) by the recipient. + - [SimpleX Messaging Protocol](./simplex-messaging.md) (SMP) is a protocol to send messages in one direction to a recipient, relying on a router in-between. The messages are delivered via uni-directional queues created by recipients. + + - SMP protocol allows to send message via a SMP router playing proxy role using 2-hop onion routing (referred to as "private routing" in messaging clients) to protect transport information of the sender (IP address and session) from the router chosen (and possibly controlled) by the recipient. - SMP runs over a transport protocol (shown below as TLS) that provides integrity, server authentication, confidentiality, and transport channel binding. - - A SimpleX Server is one of those servers. + - A SimpleX router is one of those routers. - - The SimpleX Network is the term used for the collective of SimpleX Servers that facilitate SMP. + - The SimpleX Network is the term used for the collective of SimpleX routers that facilitate SMP. - - SimpleX Client libraries speak SMP to SimpleX Servers and provide a low-level API not generally intended to be used by applications. + - SimpleX Client libraries speak SMP to SimpleX routers and provide a low-level API not generally intended to be used by applications. - SimpleX Agents interface with SimpleX Clients to provide a more high-level API intended to be used by applications. Typically they are embedded as libraries, but can also be abstracted into local services. - SimpleX Agents communicate with other agents inside e2e encrypted envelopes provided by SMP protocol - the syntax and semantics of the messages exchanged by the agent are defined by [SMP agent protocol](./agent-protocol.md) -*Diagram showing the SimpleX Chat app, with logical layers of the chat application interfacing with a SimpleX Agent library, which in turn interfaces with a SimpleX Client library. The Client library in turn speaks the Messaging Protocol to a SimpleX Server.* +*Diagram showing the SimpleX Chat app, with logical layers of the chat application interfacing with a SimpleX Agent library, which in turn interfaces with a SimpleX Client library. The Client library in turn speaks the Messaging Protocol to a SimpleX router.* ``` - User's Computer Internet Third-Party Server + User's Computer Internet Third-Party Router ------------------ | ---------------------- | ------------------------- | | SimpleX Chat | | @@ -57,11 +58,43 @@ SimpleX as a whole is a platform upon which applications can be built. [SimpleX +----------------+ | | | SimpleX Agent | | | +----------------+ -------------- TLS ---------------- +----------------+ -| SimpleX Client | ------ SimpleX Messaging Protocol ------> | SimpleX Server | +| SimpleX Client | ------ SimpleX Messaging Protocol ------> | SimpleX router | +----------------+ ----------------------------------- +----------------+ | | ``` +#### Network model + +SimpleX is a general-purpose packet routing network built on top of the Internet. Network endpoints — end-user devices, automated services, AI-enabled applications, IoT devices — exchange data packets through SimpleX network nodes (SMP routers), which accept, buffer, and deliver packets. Each router operates independently and can be operated by any party on standard computing hardware. + +SimpleX routers use resource-based addressing: each address identifies a resource on a router, similar to how the World Wide Web addresses resources via URLs. Internet routers, by comparison, use endpoint-based addressing, where IP addresses identify destination devices. Because of this design, SimpleX network participants do not need globally unique addresses to communicate. + +SimpleX network has two resource-based addressing schemes: + +- *Messaging queues* ([SMP](./simplex-messaging.md)). A queue is a unidirectional, ordered sequence of fixed-size data packets (16,384 bytes each). Each queue has a resource address on a specific router, gated by cryptographic credentials that separately authorize sending and receiving. + +- *Data packets* ([XFTP](./xftp.md)). A data packet is an individually addressed block in one of the standard sizes. Each packet has a unique resource address on a specific router, gated by cryptographic credentials. Data packet addressing is more efficient for delivery of larger payloads than queues. + +Packet delivery follows a two-router path. The sending endpoint submits a packet to a first router, which forwards it to a second router, where the receiving endpoint retrieves it. The sending endpoint's IP address is known only to the first router; the receiving endpoint's IP address is known only to the second router. See [2-hop Onion Message Routing](#2-hop-onion-message-routing) for details. + +Routers buffer packets between submission and retrieval — from seconds to days, enabling asynchronous delivery when endpoints are online at different times. Packets are removed after delivery or after a configured expiration period. + + +#### Applications + +Applications currently using SimpleX network: + +- **SimpleX Chat** — a peer-to-peer messenger using SimpleX network as a transport layer, in the same way that communication applications use WebRTC, Tor, i2p, or Nym. All communication logic — contacts, conversations, groups, message formats, end-to-end encryption — runs on endpoint devices. + +- **IoT devices** — using the SimpleX queue protocol directly for sensor data collection and device control. + +- **AI-based services** — automated services built on the SimpleX Chat application core. + +- **Secure monitoring and control systems** — applications for equipment monitoring and control, including robotics, using the network for command delivery and telemetry collection. + +[SimpleGo](https://simplego.dev), developed by an independent organization, is a microcontroller-based device running a SimpleX Chat-compatible messenger directly on a microcontroller without a general-purpose operating system. Running over 20 days on a single battery charge, it demonstrates the energy efficiency of resource-based addressing: the device receives packets without continuous polling. A microcontroller-based router implementation that functions simultaneously as a WiFi router is also in development. + + #### SimpleX objectives 1. Provide messaging infrastructure for distributed applications. This infrastructure needs to have the following qualities: @@ -70,7 +103,7 @@ SimpleX as a whole is a platform upon which applications can be built. [SimpleX - Privacy: protect against traffic correlation attacks to determine the contacts that the users communicate with. - - Reliability: the messages should be delivered even if some participating network servers or receiving clients fail, with “at least once” delivery guarantee. + - Reliability: the messages should be delivered even if some participating network routers or receiving clients fail, with "at least once" delivery guarantee. - Integrity: the messages sent in one direction are ordered in a way that sender and recipient agree on; the recipient can detect when a message was removed or changed. @@ -78,63 +111,63 @@ SimpleX as a whole is a platform upon which applications can be built. [SimpleX - Low latency: the delay introduced by the network should not be higher than 100ms-1s in addition to the underlying TCP network latency. -2. Provide better communication security and privacy than the alternative instant messaging solutions. In particular SimpleX provides better privacy of metadata (who talks to whom and when) and better security against active network attackers and malicious servers. +2. Provide better communication security and privacy than the alternative instant messaging solutions. In particular SimpleX provides better privacy of metadata (who talks to whom and when) and better security against active network attackers and malicious routers. 3. Balance user experience with privacy requirements, prioritizing experience of mobile device users. #### In Comparison -SimpleX network has a design similar to P2P networks, but unlike most P2P networks it consists of clients and servers without depending on any centralized component. +SimpleX network has a design similar to P2P networks, but unlike most P2P networks it consists of clients and routers without depending on any centralized component. In comparison to more traditional messaging applications (e.g. WhatsApp, Signal, Telegram) the key differences of SimpleX network are: - participants do not need to have globally unique addresses to communicate, instead they use redundant unidirectional (simplex) messaging queues, with a separate set of queues for each contact. - connection requests are passed out-of-band, non-optionally protecting key exchange against man-in-the-middle attack. -- simple message queues provided by network servers are used by the clients to create more complex communication scenarios, such as duplex one-to-one communication, transmitting files, group communication without central servers, and content/communication channels. +- simple message queues provided by network routers are used by the clients to create more complex communication scenarios, such as duplex one-to-one communication, transmitting files, group communication without central routers, and content/communication channels. -- servers do not store any user information (no user profiles or contacts, or messages once they are delivered), and primarily use in-memory persistence. +- routers do not store any user information (no user profiles or contacts, or messages once they are delivered), and primarily use in-memory persistence. -- users can change servers with minimal disruption - even after an in-use server disappears, simply by changing the configuration on which servers the new queues are created. +- users can change routers with minimal disruption - even after an in-use router disappears, simply by changing the configuration on which routers the new queues are created. ## Technical Details -#### Trust in Servers +#### Trust in Routers -Clients communicate directly with servers (but not with other clients) using SimpleX Messaging Protocol (SMP) running over some transport protocol that provides integrity, server authentication, confidentiality, and transport channel binding. By default, we assume this transport protocol is TLS. +Clients communicate directly with routers (but not with other clients) using SimpleX Messaging Protocol (SMP) running over some transport protocol that provides integrity, server authentication, confidentiality, and transport channel binding. By default, we assume this transport protocol is TLS. -Users use multiple servers, and choose where to receive their messages. Accordingly, they send messages to their communication partners' chosen servers either directly, if this is a known/trusted server, or via another SMP server providing proxy functionality to protect IP address and session of the sender. +Users use multiple routers, and choose where to receive their messages. Accordingly, they send messages to their communication partners' chosen routers either directly, if this is a known/trusted router, or via another SMP router providing proxy functionality to protect IP address and session of the sender. -Although end-to-end encryption is always present, users place a degree of trust in servers they connect to. This trust decision is very similar to a user's choice of email provider; however the trust placed in a SimpleX server is significantly less. Notably, there is no re-used identifier or credential between queues on the same (or different) servers. While a user *may* re-use a transport connection to fetch messages from multiple queues, or connect to a server from the same IP address, both are choices a user may opt into to break the promise of un-correlatable queues. +Although end-to-end encryption is always present, users place a degree of trust in routers they connect to. This trust decision is very similar to a user's choice of email provider; however the trust placed in a SimpleX router is significantly less. Notably, there is no re-used identifier or credential between queues on the same (or different) routers. While a user *may* re-use a transport connection to fetch messages from multiple queues, or connect to a router from the same IP address, both are choices a user may opt into to break the promise of un-correlatable queues. -Users may trust a server because: +Users may trust a router because: -- They deploy and control the servers themselves from the available open-source code. This has the trade-offs of strong trust in the server but limited metadata obfuscation to a passive network observer. Techniques such as noise traffic, traffic mixing (incurring latency), and using an onion routing transport protocol can mitigate that. +- They deploy and control the routers themselves from the available open-source code. This has the trade-offs of strong trust in the router but limited metadata obfuscation to a passive network observer. Techniques such as noise traffic, traffic mixing (incurring latency), and using an onion routing transport protocol can mitigate that. -- They use servers from a trusted commercial provider. The more clients the provider has, the less metadata about the communication times is leaked to the network observers. +- They use routers from a trusted commercial provider. The more clients the provider has, the less metadata about the communication times is leaked to the network observers. -By default, servers do not retain access logs, and permanently delete messages and queues when requested. Messages persist only in memory until they cross a threshold of time, typically on the order of days.[0] There is still a risk that a server maliciously records all queues and messages (even though encrypted) sent via the same transport connection to gain a partial knowledge of the user’s communications graph and other meta-data. +By default, routers do not retain access logs, and permanently delete messages and queues when requested. Messages persist in memory or in a database until they cross a threshold of time, typically on the order of days.[0] There is still a risk that a router maliciously records all queues and messages (even though encrypted) sent via the same transport connection to gain a partial knowledge of the user's communications graph and other meta-data. -SimpleX supports measures (managed transparently to the user at the agent level) to mitigate the trust placed in servers. These include rotating the queues in use between users, noise traffic, supporting overlay networks such as Tor, and isolating traffic to different queues to different transport connections (and Tor circuits, if Tor is used). +SimpleX supports measures (managed transparently to the user at the agent level) to mitigate the trust placed in routers. These include rotating the queues in use between users, noise traffic, supporting overlay networks such as Tor, and isolating traffic to different queues to different transport connections (and Tor circuits, if Tor is used). -[0] While configurable by servers, a minimum value is enforced by the default software. SimpleX Agents can provide redundant routing over queues to mitigate against message loss. +[0] While configurable by routers, a minimum value is enforced by the default software. SimpleX Agents can provide redundant routing over queues to mitigate against message loss. -#### Client -> Server Communication +#### Client -> Router Communication Utilizing TLS grants the SimpleX Messaging Protocol (SMP) server authentication and metadata protection to a passive network observer. But SMP does not rely on the transport protocol for message confidentiality or client authentication. The SMP protocol itself provides end-to-end confidentiality, authentication, and integrity of messages between communicating parties. -Servers have long-lived, self-signed, offline certificates whose hash is pre-shared with clients over secure channels - either provided with the client library or provided in the secure introduction between clients, as part of the server address. The offline certificate signs an online certificate used in the transport protocol handshake. [0] +Routers have long-lived, self-signed, offline certificates whose hash is pre-shared with clients over secure channels - either provided with the client library or provided in the secure introduction between clients, as part of the router address. The offline certificate signs an online certificate used in the transport protocol handshake. [0] -If the transport protocol's confidentiality is broken, incoming and outgoing messages to the server cannot be correlated by message contents. Additionally, because of encryption at the SMP layer, impersonating the server is not sufficient to pass (and therefore correlate) a message from a sender to recipient - the only attack possible is to drop the messages. Only by additionally *compromising* the server can one pass and correlate messages. +If the transport protocol's confidentiality is broken, incoming and outgoing messages to the router cannot be correlated by message contents. Additionally, because of encryption at the SMP layer, impersonating the router is not sufficient to pass (and therefore correlate) a message from a sender to recipient - the only attack possible is to drop the messages. Only by additionally *compromising* the router can one pass and correlate messages. -It's important to note that the SMP protocol does not do server authentication. Instead we rely upon the fact that an attacker who tricks the transport protocol into authenticating the server incorrectly cannot do anything with the SMP messages except drop them. +It's important to note that the SMP protocol does not do server authentication. Instead we rely upon the fact that an attacker who tricks the transport protocol into authenticating the router incorrectly cannot do anything with the SMP messages except drop them. -After the connection is established, the client sends blocks of a fixed size 16KB, and the server replies with the blocks of the same size to reduce metadata observable to a network adversary. The protocol has been designed to make traffic correlation attacks difficult, adapting ideas from Tor, remailers, and more general onion and mix networks. It does not try to replace Tor though - SimpleX servers can be deployed as onion services and SimpleX clients can communicate with servers over Tor to further improve participants privacy. +After the connection is established, the client sends blocks of a fixed size 16KB, and the router replies with the blocks of the same size to reduce metadata observable to a network adversary. The protocol has been designed to make traffic correlation attacks difficult, adapting ideas from Tor, remailers, and more general onion and mix networks. It does not try to replace Tor though - SimpleX routers can be deployed as onion services and SimpleX clients can communicate with routers over Tor to further improve participants privacy. -By using fixed-size blocks, oversized for the expected content, the vast majority of traffic is uniform in nature. When enough traffic is transiting a server simultaneously, the server acts as a low-latency mix node. We can't rely on this behavior to make a security claim, but we have engineered to take advantage of it when we can. As mentioned, this holds true even if the transport connection is compromised. +By using fixed-size blocks, oversized for the expected content, the vast majority of traffic is uniform in nature. When enough traffic is transiting a router simultaneously, the router acts as a low-latency mix node. We can't rely on this behavior to make a security claim, but we have engineered to take advantage of it when we can. As mentioned, this holds true even if the transport connection is compromised. The protocol does not protect against attacks targeted at particular users with known identities - e.g., if the attacker wants to prove that two known users are communicating, they can achieve it by observing their local traffic. At the same time, it substantially complicates large-scale traffic correlation, making determining the real user identities much less effective. @@ -143,39 +176,39 @@ The protocol does not protect against attacks targeted at particular users with #### 2-hop Onion Message Routing -As SimpleX Messaging Protocol servers providing messaging queues are chosen by the recipients, in case senders connect to these servers directly the server owners (who potentially can be the recipients themselves) can learn senders' IP addresses (if Tor is not used) and which other queues on the same server are accessed by the user in the same transport connection (even if Tor is used). +As SimpleX Messaging Protocol routers providing messaging queues are chosen by the recipients, in case senders connect to these routers directly the router owners (who potentially can be the recipients themselves) can learn senders' IP addresses (if Tor is not used) and which other queues on the same router are accessed by the user in the same transport connection (even if Tor is used). While the clients support isolating the messages sent to different queues into different transport connections (and Tor circuits), this is not practical, as it consumes additional traffic and system resources. -To mitigate this problem SimpleX Messaging Protocol servers support 2-hop onion message routing when the SMP server chosen by the sender forwards the messages to the servers chosen by the recipients, thus protecting both the senders IP addresses and sessions, even if connection isolation and Tor are not used. +To mitigate this problem SimpleX Messaging Protocol routers support 2-hop onion message routing when the SMP router chosen by the sender forwards the messages to the routers chosen by the recipients, thus protecting both the senders IP addresses and sessions, even if connection isolation and Tor are not used. The design of 2-hop onion message routing prevents these potential attacks: -- MITM by proxy (SMP server that forwards the messages). +- MITM by proxy (SMP router that forwards the messages). -- Identification by the proxy which and how many queues the sender sends messages to (as messages are additionally e2e encrypted between the sender and the destination SMP server). +- Identification by the proxy which and how many queues the sender sends messages to (as messages are additionally e2e encrypted between the sender and the destination SMP router). - Correlation of messages sent to different queues via the same user session (as random correlation IDs and keys are used for each message). See more details about 2-hop onion message routing design in [SimpleX Messaging Protocol](./simplex-messaging.md#proxying-sender-commands) -Also see [Threat model](#threat-model) +Also see [Security](./security.md) #### SimpleX Messaging Protocol -SMP is initialized with an in-person or out-of-band introduction message, where Alice provides Bob with details of a server (including IP address or host name, port, and hash of the long-lived offline certificate), a queue ID, and Alice's public keys to agree e2e encryption. These introductions are similar to the PANDA key-exchange, in that if observed, the adversary can race to establish the communication channel instead of the intended participant. [0] +SMP is initialized with an in-person or out-of-band introduction message, where Alice provides Bob with details of a router (including IP address or host name, port, and hash of the long-lived offline certificate), a queue ID, and Alice's public keys to agree e2e encryption. These introductions are similar to the PANDA key-exchange, in that if observed, the adversary can race to establish the communication channel instead of the intended participant. [0] Because queues are uni-directional, Bob provides an identically-formatted introduction message to Alice over Alice's now-established receiving queue. -When setting up a queue, the server will create separate sender and recipient queue IDs (provided to Alice during set-up and Bob during initial connection). Additionally, during set-up Alice will perform a DH exchange with the server to agree upon a shared secret. This secret will be used to re-encrypt Bob's incoming message before Alice receives it, creating the anti-correlation property earlier-described should the transport encryption be compromised. +When setting up a queue, the router will create separate sender and recipient queue IDs (provided to Alice during set-up and Bob during initial connection). Additionally, during set-up Alice will perform a DH exchange with the router to agree upon a shared secret. This secret will be used to re-encrypt Bob's incoming message before Alice receives it, creating the anti-correlation property earlier-described should the transport encryption be compromised. -[0] Users can additionally create public 'contact queues' that are only used to receive connection requests. +[0] Users can additionally create public 'contact queues' that are only used to receive connection requests. #### SimpleX Agents -SimpleX agents provide higher-level operations compared to SimpleX Clients, who are primarily concerned with creating queues and communicating with servers using SMP. Agent operations include: +SimpleX agents provide higher-level operations compared to SimpleX Clients, who are primarily concerned with creating queues and communicating with routers using SMP. Agent operations include: - Managing sets of bi-directional, redundant queues for communication partners @@ -186,195 +219,21 @@ SimpleX agents provide higher-level operations compared to SimpleX Clients, who - Noise traffic -#### Encryption Primitives Used - -- Ed25519 or Curve25519 to authorize/verify commands to SMP servers (authorization algorithm is set via client/server configuration). -- Curve25519 for DH exchange to agree: - - the shared secret between server and recipient (to encrypt message bodies - it avoids shared cipher-text in sender and recipient traffic) - - the shared secret between sender and recipient (to encrypt messages end-to-end in each queue - it avoids shared cipher-text in redundant queues). -- [NaCl crypto_box](https://nacl.cr.yp.to/box.html) encryption scheme (curve25519xsalsa20poly1305) for message body encryption between server and recipient and for E2E per-queue encryption. -- SHA256 to validate server offline certificates. -- [double ratchet](https://signal.org/docs/specifications/doubleratchet/) protocol for end-to-end message encryption between the agents: - - Curve448 keys to agree shared secrets required for double ratchet initialization (using [X3DH](https://signal.org/docs/specifications/x3dh/) key agreement with 2 ephemeral keys for each side), - - AES-GCM AEAD cipher, - - SHA512-based HKDF for key derivation. - - -## Threat Model - -#### Global Assumptions - - - A user protects their local database and key material. - - The user's application is authentic, and no local malware is running. - - The cryptographic primitives in use are not broken. - - A user's choice of servers is not directly tied to their identity or otherwise represents distinguishing information about the user. - - The user's client uses 2-hop onion message routing. - -#### A passive adversary able to monitor the traffic of one user - -*can:* - - - identify that and when a user is using SimpleX. - - - determine which servers the user receives the messages from. - - - observe how much traffic is being sent, and make guesses as to its purpose. - -*cannot:* - - - see who sends messages to the user and who the user sends the messages to. - - - determine the servers used by users' contacts. - -#### A passive adversary able to monitor a set of senders and recipients - - *can:* - - - identify who and when is using SimpleX. - - - learn which SimpleX Messaging Protocol servers are used as receive queues for which users. - - - learn when messages are sent and received. - - - perform traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the servers. - - - observe how much traffic is being sent, and make guesses as to its purpose - -*cannot, even in case of a compromised transport protocol:* - - - perform traffic correlation attacks with any increase in efficiency over a non-compromised transport protocol - -#### SimpleX Messaging Protocol server - -*can:* - -- learn when a queue recipient is online - -- know how many messages are sent via the queue (although some may be noise or not content messages). - -- learn which messages would trigger notifications even if a user does not use [push notifications](./push-notifications.md). - -- perform the correlation of the queue used to receive messages (matching multiple queues to a single user) via either a re-used transport connection, user's IP Address, or connection timing regularities. - -- learn a recipient's IP address, track them through other IP addresses they use to access the same queue, and infer information (e.g. employer) based on the IP addresses, as long as Tor is not used. - -- drop all future messages inserted into a queue, detectable only over other, redundant queues. - -- lie about the state of a queue to the recipient and/or to the sender (e.g. suspended or deleted when it is not). - -- spam a user with invalid messages. - -*cannot:* - -- undetectably add, duplicate, or corrupt individual messages. - -- undetectably drop individual messages, so long as a subsequent message is delivered. - -- learn the contents or type of messages. - -- distinguish noise messages from regular messages except via timing regularities. - -- compromise the users' end-to-end encryption with an active attack. - -- learn a sender's IP address, track them through other IP addresses they use to access the same queue, and infer information (e.g. employer) based on the IP addresses, even if Tor is not used (provided messages are sent via proxy SMP server). - -- perform senders' queue correlation (matching multiple queues to a single sender) via either a re-used transport connection, user's IP Address, or connection timing regularities, unless it has additional information from the proxy SMP server (provided messages are sent via proxy SMP server). - -#### SimpleX Messaging Protocol server that proxies the messages to another SMP server - -*can:* - -- learn a sender's IP address, as long as Tor is not used. - -- learn when a sender with a given IP address is online. - -- know how many messages are sent from a given IP address and to a given destination SMP server. - -- drop all messages from a given IP address or to a given destination server. - -- unless destination SMP server detects repeated public DH keys of senders, replay messages to a destination server within a single session, causing either duplicate message delivery (which will be detected and ignored by the receiving clients), or, when receiving client is not connected to SMP server, exhausting capacity of destination queues used within the session. - -*cannot:* - -- perform queue correlation (matching multiple queues to a single user), unless it has additional information from the destination SMP server. - -- undetectably add, duplicate, or corrupt individual messages. - -- undetectably drop individual messages, so long as a subsequent message is delivered. - -- learn the contents or type of messages. - -- learn which messages would trigger notifications. - -- learn the destination queues of messages. - -- distinguish noise messages from regular messages except via timing regularities. - -- compromise the user's end-to-end encryption with another user via an active attack. - -- compromise the user's end-to-end encryption with the destination SMP servers via an active attack. - -#### An attacker who obtained Alice's (decrypted) chat database - -*can:* - -- see the history of all messages exchanged by Alice with her communication partners. - -- see shared profiles of contacts and groups. - -- surreptitiously receive new messages sent to Alice via existing queues; until communication queues are rotated or the Double-Ratchet advances forward. - -- prevent Alice from receiving all new messages sent to her - either surreptitiously by emptying the queues regularly or overtly by deleting them. - -- send messages from the user to their contacts; recipients will detect it as soon as the user sends the next message, because the previous message hash won’t match (and potentially won’t be able to decrypt them in case they don’t keep the previous ratchet keys). - -*cannot:* - -- impersonate a sender and send messages to the user whose database was stolen. Doing so requires also compromising the server (to place the message in the queue, that is possible until the Double-Ratchet advances forward) or the user's device at a subsequent time (to place the message in the database). - -- undetectably communicate at the same time as Alice with her contacts. Doing so would result in the contact getting different messages with repeated IDs. - -- undetectably monitor message queues in realtime without alerting the user they are doing so, as a second subscription request unsubscribes the first and notifies the second. - -#### A user’s contact - -*can:* - -- spam the user with messages. - -- forever retain messages from the user. - -*cannot:* - -- cryptographically prove to a third-party that a message came from a user (assuming the user’s device is not seized). - -- prove that two contacts they have is the same user. - -- cannot collaborate with another of the user's contacts to confirm they are communicating with the same user. - -#### An attacker who observes Alice showing an introduction message to Bob - -*can:* - - - Impersonate Bob to Alice. - -*cannot:* - - - Impersonate Alice to Bob. +## Security -#### An attacker with Internet access +For encryption primitives, threat model, and detailed security analysis, see [Security](./security.md). -*can:* +SimpleX provides these security properties: -- Denial of Service SimpleX messaging servers. +- **End-to-end encryption** with forward secrecy via double ratchet protocol, with optional post-quantum protection. -- spam a user's public “contact queue” with connection requests. +- **No shared identifiers** across connections — contacts cannot prove they communicate with the same user. -*cannot:* +- **Sender deniability** — neither routers nor recipients can cryptographically prove message origin. -- send messages to a user who they are not connected with. +- **Transport metadata protection** — fixed-size blocks, 2-hop onion routing, and connection isolation frustrate traffic correlation. -- enumerate queues on a SimpleX server. +- **Out-of-band key exchange** — connection requests passed outside the network protect against MITM attacks. ## Acknowledgements diff --git a/protocol/push-notifications.md b/protocol/push-notifications.md index c5b2ce1548..4a68a5236b 100644 --- a/protocol/push-notifications.md +++ b/protocol/push-notifications.md @@ -1,8 +1,8 @@ Version 3, 2025-01-24 -# Overview of push notifications for SimpleX Messaging Servers +# Overview of push notifications for SimpleX Messaging Routers -This document describes Notification Server protocol version 3. Version history: +This document describes Notification Router protocol version 3. Version history: - v1: initial version - v2: authenticated commands, command batching - v3: detailed invalid token reason @@ -10,10 +10,10 @@ This document describes Notification Server protocol version 3. Version history: ## Table of contents - [Introduction](#introduction) -- [Participating servers](#participating-servers) +- [Participating routers](#participating-routers) - [Register device token to receive push notifications](#register-device-token-to-receive-push-notifications) - [Subscribe to connection notifications](#subscribe-to-connection-notifications) -- [SimpleX Notification Server protocol](#simplex-notification-server-protocol) +- [SimpleX Notification Router protocol](#simplex-notification-router-protocol) - [Register new notification token](#register-new-notification-token) - [Verify notification token](#verify-notification-token) - [Check notification token status](#check-notification-token-status) @@ -28,35 +28,35 @@ This document describes Notification Server protocol version 3. Version history: ## Introduction -SimpleX Messaging servers already operate as push servers and deliver the messages to subscribed clients as soon as they are sent to the servers. +SimpleX Messaging routers already operate as push routers and deliver the messages to subscribed clients as soon as they are sent to the routers. The reason for push notifications is to support instant message notifications on iOS that does not allow background services. -## Participating servers +## Participating routers -The diagram below shows which servers participate in message notification delivery. +The diagram below shows which routers participate in message notification delivery. -While push provider (e.g., APN) can learn how many notifications are delivered to the user, it cannot access message content, even encrypted, or any message metadata - the notifications are e2e encrypted between SimpleX Notification Server and the user's device. +While push provider (e.g., APN) can learn how many notifications are delivered to the user, it cannot access message content, even encrypted, or any message metadata - the notifications are e2e encrypted between SimpleX Notification Router and the user's device. ``` - User's iOS device Internet Servers + User's iOS device Internet Routers --------------------- . ------------------------ . ----------------------------- . . . . can be self-hosted now +--------------+ . . +----------------+ | SimpleX Chat | -------------- TLS --------------- | SimpleX | | client |------> SimpleX Messaging Protocol (SMP) ------> | Messaging | -+--------------+ ---------------------------------- | Server | ++--------------+ ---------------------------------- | Router | ^ | . . +----------------+ | | . . . . . | . . . | | . . | V | | | . . |SMP| TLS | | . . | | | SimpleX - | | . . . . . V . . . NTF Server + | | . . . . . V . . . NTF Router | | . . +----------------------------------+ | | . . | +---------------+ | | | -------------- TLS --------------- | | SimpleX | can be | - | |-----------> Notification Server Protocol -----> | | Notifications | self-hosted | + | |-----------> Notification Router Protocol -----> | | Notifications | self-hosted | | ---------------------------------- | | Subscriber | in the future | | . . | +---------------+ | | . . | | | @@ -64,7 +64,7 @@ While push provider (e.g., APN) can learn how many notifications are delivered t | . . | +---------------+ | | . . | | SimpleX | | | . . | | Push | | - | . . | | Server | | + | . . | | Router | | | . . | +---------------+ | | . . +----------------------------------+ | . . . . . | . . . @@ -90,27 +90,27 @@ This diagram shows the process of subscription to notifications, notification de ![Subscribe to notifications](./diagrams/notifications/subscription.svg) -## SimpleX Notification Server protocol +## SimpleX Notification Router protocol -To manage notification subscriptions to SMP servers, SimpleX Notification Server provides an RPC protocol with a similar design to SimpleX Messaging Protocol server. +To manage notification subscriptions to SMP routers, SimpleX Notification Router provides an RPC protocol with a similar design to SimpleX Messaging Protocol router. -This protocol sends requests and responses in a fixed size blocks of 512 bytes over TLS, uses the same [syntax of protocol transmissions](./simplex-messaging.md#smp-transmission-and-transport-block-structure) as SMP protocol, and has the same transport [handshake syntax](./simplex-messaging.md#transport-handshake) (except the server certificate is not included in the handshake). +This protocol sends requests and responses in a fixed size blocks of 512 bytes over TLS, uses the same [syntax of protocol transmissions](./simplex-messaging.md#smp-transmission-and-transport-block-structure) as SMP protocol, and has the same transport [handshake syntax](./simplex-messaging.md#transport-handshake) (except the router certificate is not included in the handshake). -The client and server use ALPN extension with `ntf/1` protocol name to agree handshake version. +The client and router use ALPN extension with `ntf/1` protocol name to agree handshake version. Protocol commands have this syntax: ``` -ntfServerTransmission = -ntfServerCmd = newTokenCmd / verifyTokenCmd / checkTokenCmd / +ntfRouterTransmission = +ntfRouterCmd = newTokenCmd / verifyTokenCmd / checkTokenCmd / replaceTokenCmd / deleteTokenCmd / cronCmd / newSubCmd / checkSubCmd / deleteSubCmd / pingCmd ``` ### Register new notification token -This command should be used after the client app obtains a token from push notifications provider to register the token with the server. +This command should be used after the client app obtains a token from push notifications provider to register the token with the router. -Having received this command the server will deliver a test notification via the push provider to validate that the client has this token. +Having received this command the router will deliver a test notification via the push provider to validate that the client has this token. The command syntax: @@ -121,20 +121,20 @@ deviceToken = pushProvider tokenString pushProvider = apnsDev / apnsProd / apnsNull apnsDev = "AD" ; APNS token for development environment apnsProd = "AP" ; APNS token for production environment -apnsNull = "AN" ; token that does not trigger any notification delivery - used for server testing +apnsNull = "AN" ; token that does not trigger any notification delivery - used for router testing tokenString = shortString authPubKey = length x509encoded ; Ed25519 key used to verify clients commands -clientDhPubKey = length x509encoded ; X25519 key to agree e2e encryption between the server and client +clientDhPubKey = length x509encoded ; X25519 key to agree e2e encryption between the router and client shortString = length *OCTET length = 1*1 OCTET ``` -The server response syntax: +The router response syntax: ```abnf -tokenIdResp = %s"IDTKN" SP entityId serverDhPubKey +tokenIdResp = %s"IDTKN" SP entityId routerDhPubKey entityId = shortString -serverDhPubKey = length x509encoded ; X25519 key to agree e2e encryption between the server and client +routerDhPubKey = length x509encoded ; X25519 key to agree e2e encryption between the router and client ``` ### Verify notification token @@ -209,8 +209,8 @@ After this command all message notification subscriptions will be removed and no This command enables or disables periodic notifications sent to the client device irrespective of message notifications. This is useful for two reasons: -- it provides better privacy from notification server, as while the server learns the device token, it doesn't learn anything else about user communications. -- it allows to receive messages when notifications were dropped by push provider, e.g. while the device was offline, or lost by notification server, e.g. while it was restarting. +- it provides better privacy from notification router, as while the router learns the device token, it doesn't learn anything else about user communications. +- it allows to receive messages when notifications were dropped by push provider, e.g. while the device was offline, or lost by notification router, e.g. while it was restarting. The command syntax: @@ -223,18 +223,18 @@ The interval for periodic notifications is set in minutes, with the minimum of 2 ### Create SMP message notification subscription -This command makes notification server subscribe to message notifications from SMP server and to deliver them to push provider: +This command makes notification router subscribe to message notifications from SMP router and to deliver them to push provider: ```abnf newSubCmd = %s"SNEW" newSub -newSub = %s "S" tokenId smpServer notifierId notifierKey +newSub = %s "S" tokenId smpRouter notifierId notifierKey tokenId = shortString ; returned in response to `TNEW` command -smpServer = smpServer = hosts port fingerprint +smpRouter = smpRouter = hosts port fingerprint hosts = length 1*host host = shortString port = shortString fingerprint = shortString -notifierId = shortString ; returned by SMP server in response to `NKEY` SMP command +notifierId = shortString ; returned by SMP router in response to `NKEY` SMP command notifierKey = length x509encoded ; private key used to authorize requests to subscribe to message notifications ``` @@ -256,8 +256,8 @@ The response: ```abnf subStatusResp = %s"SUB" SP subStatus -subStatus = %s"NEW" / %s"PENDING" / ; e.g., after SMP server disconnect/timeout while ntf server is retrying to connect - %s"ACTIVE" / %s"INACTIVE" / %s"END" / ; if another server subscribed to notifications +subStatus = %s"NEW" / %s"PENDING" / ; e.g., after SMP router disconnect/timeout while ntf router is retrying to connect + %s"ACTIVE" / %s"INACTIVE" / %s"END" / ; if another router subscribed to notifications %s"AUTH" / %s"DELETED" / %s"SERVICE" / subErrStatus subErrStatus = %s"ERR" SP shortString ``` @@ -297,7 +297,7 @@ Where `errorType` has the same syntax as in [SimpleX Messaging Protocol](./simpl ## Threat Model -This threat model compliments SimpleX Messaging Protocol [threat model](./overview-tjr.md#threat-model) +This threat model compliments SimpleX Messaging Protocol [threat model](./security.md#threat-model) #### A passive adversary able to monitor the traffic of one user @@ -307,21 +307,21 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi *cannot:* - - determine which servers a user subscribed to the notifications from. + - determine which routers a user subscribed to the notifications from. #### A passive adversary able to monitor a set of senders and recipients *can:* - - perform more efficient traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the servers. + - perform more efficient traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the routers. -#### SimpleX Messaging Protocol server +#### SimpleX Messaging Protocol router *can:* - learn which messages trigger push notifications. -- learn IP address of SimpleX notification servers used by the user. +- learn IP address of SimpleX notification routers used by the user. - drop message notifications. @@ -333,13 +333,13 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi - learn which queues belong to the same users with any additional efficiency compared with not using push notifications. -#### SimpleX Notification Server subscribed to message notifications +#### SimpleX Notification Router subscribed to message notifications *can:* - learn a user device token. -- learn how many messaging queues and servers a user receives messages from. +- learn how many messaging queues and routers a user receives messages from. - learn how many message notifications are delivered to the user from each queue. @@ -359,7 +359,7 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi - add, duplicate, or corrupt individual messages that will be shown to the user. -#### SimpleX Notification Server subscribed ONLY to periodic notifications +#### SimpleX Notification Router subscribed ONLY to periodic notifications *can:* @@ -371,7 +371,7 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi *cannot:* -- learn how many messaging queues and servers a user receives messages from. +- learn how many messaging queues and routers a user receives messages from. - learn how many message notifications are delivered to the user from each queue. @@ -403,7 +403,7 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi *cannot:* -- learn which SimpleX Messaging Protocol servers are used by a user (notifications are e2e encrypted). +- learn which SimpleX Messaging Protocol routers are used by a user (notifications are e2e encrypted). - learn which or how many messaging queues a user receives notifications from. @@ -415,4 +415,4 @@ This threat model compliments SimpleX Messaging Protocol [threat model](./overvi - register notification token not present on attacker's device. -- enumerate tokens or subscriptions on a SimpleX Notification Server. +- enumerate tokens or subscriptions on a SimpleX Notification Router. diff --git a/protocol/security.md b/protocol/security.md new file mode 100644 index 0000000000..3c84fa5ca1 --- /dev/null +++ b/protocol/security.md @@ -0,0 +1,215 @@ +Revision 1, 2026-03-09 + +# SimpleX Network: Security + +This document describes the cryptographic primitives and threat model for the SimpleX network. For a general introduction, see [SimpleX: messaging and application platform](./overview-tjr.md). + +## Table of contents + +- [Encryption primitives](#encryption-primitives) +- [Threat model](#threat-model) + - [Global Assumptions](#global-assumptions) + - [A passive adversary able to monitor the traffic of one user](#a-passive-adversary-able-to-monitor-the-traffic-of-one-user) + - [A passive adversary able to monitor a set of senders and recipients](#a-passive-adversary-able-to-monitor-a-set-of-senders-and-recipients) + - [SimpleX Messaging Protocol router](#simplex-messaging-protocol-router) + - [SimpleX Messaging Protocol router that proxies the messages to another SMP router](#simplex-messaging-protocol-router-that-proxies-the-messages-to-another-smp-router) + - [An attacker who obtained Alice's (decrypted) chat database](#an-attacker-who-obtained-alices-decrypted-chat-database) + - [A user's contact](#a-users-contact) + - [An attacker who observes Alice showing an introduction message to Bob](#an-attacker-who-observes-alice-showing-an-introduction-message-to-bob) + - [An attacker with Internet access](#an-attacker-with-internet-access) + + +## Encryption primitives + +- **Router command authorization**: X25519 DH-based authenticated encryption (SMP v7+), providing sender deniability. Ed25519 signatures used for recipient commands and notifier commands. + +- **Per-queue key agreement**: Curve25519 DH exchange to agree: + - the shared secret between router and recipient (to encrypt message bodies — avoids shared ciphertext in sender and recipient traffic), + - the shared secret between sender and recipient (to encrypt messages end-to-end in each queue — avoids shared ciphertext in redundant queues). + +- **SMP-layer encryption**: [NaCl crypto_box](https://nacl.cr.yp.to/box.html) (curve25519xsalsa20poly1305) for message body encryption between router and recipient, and for e2e per-queue encryption. + +- **Certificate validation**: SHA256 to validate router offline certificates. + +- **End-to-end encryption**: [Double ratchet](https://signal.org/docs/specifications/doubleratchet/) protocol: + - Curve448 keys for shared secret agreement via [X3DH](https://signal.org/docs/specifications/x3dh/) with 2 ephemeral keys per side, + - optional [SNTRUP761](https://ntruprime.cr.yp.to/) post-quantum KEM running in parallel with the DH ratchet (see [PQDR](./pqdr.md)), providing post-quantum forward secrecy, + - AES-GCM AEAD cipher, + - SHA512-based HKDF for key derivation. + + +## Threat Model + +### Global Assumptions + +- A user protects their local database and key material. +- The user's application is authentic, and no local malware is running. +- The cryptographic primitives in use are not broken. +- A user's choice of routers is not directly tied to their identity or otherwise represents distinguishing information about the user. +- The user's client uses 2-hop onion message routing. + +### A passive adversary able to monitor the traffic of one user + +*can:* + +- identify that and when a user is using SimpleX. + +- determine which routers the user receives messages from. + +- observe how much traffic is being sent, and make guesses as to its purpose. + +*cannot:* + +- see who sends messages to the user and who the user sends messages to. + +- determine the routers used by users' contacts. + +### A passive adversary able to monitor a set of senders and recipients + +*can:* + +- identify who and when is using SimpleX. + +- learn which SimpleX Messaging Protocol routers are used as receive queues for which users. + +- learn when messages are sent and received. + +- perform traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the routers. + +- observe how much traffic is being sent, and make guesses as to its purpose. + +*cannot, even in case of a compromised transport protocol:* + +- perform traffic correlation attacks with any increase in efficiency over a non-compromised transport protocol. + +### SimpleX Messaging Protocol router + +*can:* + +- learn when a queue recipient is online. + +- know how many messages are sent via the queue (although some may be noise or not content messages). + +- learn which messages would trigger notifications even if a user does not use [push notifications](./push-notifications.md). + +- perform the correlation of the queue used to receive messages (matching multiple queues to a single user) via either a re-used transport connection, user's IP Address, or connection timing regularities. + +- learn a recipient's IP address, track them through other IP addresses they use to access the same queue, and infer information (e.g. employer) based on the IP addresses, as long as Tor is not used. + +- drop all future messages inserted into a queue, detectable only over other, redundant queues. + +- lie about the state of a queue to the recipient and/or to the sender (e.g. suspended or deleted when it is not). + +- spam a user with invalid messages. + +*cannot:* + +- undetectably add, duplicate, or corrupt individual messages. + +- undetectably drop individual messages, so long as a subsequent message is delivered. + +- learn the contents or type of messages. + +- distinguish noise messages from regular messages except via timing regularities. + +- compromise the users' end-to-end encryption with an active attack. + +- learn a sender's IP address, track them through other IP addresses they use to access the same queue, and infer information (e.g. employer) based on the IP addresses, even if Tor is not used (provided messages are sent via proxy SMP router). + +- perform senders' queue correlation (matching multiple queues to a single sender) via either a re-used transport connection, user's IP Address, or connection timing regularities, unless it has additional information from the proxy SMP router (provided messages are sent via proxy SMP router). + +### SimpleX Messaging Protocol router that proxies the messages to another SMP router + +*can:* + +- learn a sender's IP address, as long as Tor is not used. + +- learn when a sender with a given IP address is online. + +- know how many messages are sent from a given IP address and to a given destination SMP router. + +- drop all messages from a given IP address or to a given destination router. + +- unless destination SMP router detects repeated public DH keys of senders, replay messages to a destination router within a single session, causing either duplicate message delivery (which will be detected and ignored by the receiving clients), or, when receiving client is not connected to SMP router, exhausting capacity of destination queues used within the session. + +*cannot:* + +- perform queue correlation (matching multiple queues to a single user), unless it has additional information from the destination SMP router. + +- undetectably add, duplicate, or corrupt individual messages. + +- undetectably drop individual messages, so long as a subsequent message is delivered. + +- learn the contents or type of messages. + +- learn which messages would trigger notifications. + +- learn the destination queues of messages. + +- distinguish noise messages from regular messages except via timing regularities. + +- compromise the user's end-to-end encryption with another user via an active attack. + +- compromise the user's end-to-end encryption with the destination SMP routers via an active attack. + +### An attacker who obtained Alice's (decrypted) chat database + +*can:* + +- see the history of all messages exchanged by Alice with her communication partners. + +- see shared profiles of contacts and groups. + +- surreptitiously receive new messages sent to Alice via existing queues; until communication queues are rotated or the Double-Ratchet advances forward. + +- prevent Alice from receiving all new messages sent to her - either surreptitiously by emptying the queues regularly or overtly by deleting them. + +- send messages from the user to their contacts; recipients will detect it as soon as the user sends the next message, because the previous message hash won't match (and potentially won't be able to decrypt them in case they don't keep the previous ratchet keys). + +*cannot:* + +- impersonate a sender and send messages to the user whose database was stolen. Doing so requires also compromising the router (to place the message in the queue, that is possible until the Double-Ratchet advances forward) or the user's device at a subsequent time (to place the message in the database). + +- undetectably communicate at the same time as Alice with her contacts. Doing so would result in the contact getting different messages with repeated IDs. + +- undetectably monitor message queues in realtime without alerting the user they are doing so, as a second subscription request unsubscribes the first and notifies the second. + +### A user's contact + +*can:* + +- spam the user with messages. + +- forever retain messages from the user. + +*cannot:* + +- cryptographically prove to a third-party that a message came from a user (assuming the user's device is not seized). + +- prove that two contacts they have is the same user. + +- cannot collaborate with another of the user's contacts to confirm they are communicating with the same user. + +### An attacker who observes Alice showing an introduction message to Bob + +*can:* + +- Impersonate Bob to Alice. + +*cannot:* + +- Impersonate Alice to Bob. + +### An attacker with Internet access + +*can:* + +- Denial of Service SimpleX messaging routers. + +- spam a user's public "contact queue" with connection requests. + +*cannot:* + +- send messages to a user who they are not connected with. + +- enumerate queues on a SimpleX router. diff --git a/protocol/simplex-messaging.md b/protocol/simplex-messaging.md index 758d4cead8..4d2d8e141c 100644 --- a/protocol/simplex-messaging.md +++ b/protocol/simplex-messaging.md @@ -16,7 +16,7 @@ Version 19, 2025-01-24 - [Cryptographic algorithms](#cryptographic-algorithms) - [Deniable client authentication scheme](#deniable-client-authentication-scheme) - [Simplex queue IDs](#simplex-queue-ids) -- [Server security requirements](#server-security-requirements) +- [Router security requirements](#router-security-requirements) - [Message delivery notifications](#message-delivery-notifications) - [Client services](#client-services) - [Service roles](#service-roles) @@ -48,14 +48,14 @@ Version 19, 2025-01-24 - [Proxying sender commands](#proxying-sender-commands) - [Request proxied session](#request-proxied-session) - [Send command via proxy](#send-command-via-proxy) - - [Forward command to destination server](#forward-command-to-destination-server) + - [Forward command to destination router](#forward-command-to-destination-router) - [Short link commands](#short-link-commands) - [Set link key](#set-link-key) - [Get link data](#get-link-data) - [Notifier commands](#notifier-commands) - [Subscribe to queue notifications](#subscribe-to-queue-notifications) - [Subscribe to multiple queue notifications](#subscribe-to-multiple-queue-notifications) - - [Server messages](#server-messages) + - [Router messages](#router-messages) - [Link response](#link-response) - [Queue subscription response](#queue-subscription-response) - [Service subscription response](#service-subscription-response) @@ -67,17 +67,17 @@ Version 19, 2025-01-24 - [Queue deleted notification](#queue-deleted-notification) - [Error responses](#error-responses) - [OK response](#ok-response) -- [Transport connection with the SMP server](#transport-connection-with-the-SMP-server) +- [Transport connection with the SMP router](#transport-connection-with-the-SMP-router) - [General transport protocol considerations](#general-transport-protocol-considerations) - [TLS transport encryption](#tls-transport-encryption) - - [Server certificate](#server-certificate) + - [Router certificate](#router-certificate) - [ALPN to agree handshake version](#alpn-to-agree-handshake-version) - [Transport handshake](#transport-handshake) - [Additional transport privacy](#additional-transport-privacy) ## Abstract -Simplex Messaging Protocol is a transport agnostic client-server protocol for asynchronous distributed secure unidirectional message transmission via persistent simplex message queues. +Simplex Messaging Protocol is a transport agnostic client-router protocol for asynchronous distributed secure unidirectional message transmission via persistent simplex message queues. It's designed with the focus on communication security and integrity, under the assumption that any part of the message transmission network can be compromised. @@ -89,7 +89,7 @@ This document describes SMP protocol version 19. Versions 1-5 are discontinued. - v2: message flags (used to control notifications) - v3: encrypt message timestamp and flags together with the body when delivered to recipient - v4: support command batching -- v5: basic auth for SMP servers +- v5: basic auth for SMP routers - v6: allow creating queues without subscribing (current minimum version) - v7: support authenticated encryption to verify senders' commands - v8: SMP proxy for sender commands (PRXY, PFWD, RFWD, PKEY, PRES, RRES) @@ -97,7 +97,7 @@ This document describes SMP protocol version 19. Versions 1-5 are discontinued. - v10: DELD event to subscriber when queue is deleted via another connection - v11: additional encryption of transport blocks with forward secrecy - v12: BLOCKED error for blocked queues -- v14: proxyServer handshake property to disable transport encryption between server and proxy +- v14: proxyRouter handshake property to disable transport encryption between router and proxy - v15: short links with associated data passed in NEW or LSET command - v16: service certificates - v17: create notification credentials with NEW command @@ -106,31 +106,31 @@ This document describes SMP protocol version 19. Versions 1-5 are discontinued. ## Introduction -The objective of Simplex Messaging Protocol (SMP) is to facilitate the secure and private unidirectional transfer of messages from senders to recipients via persistent simplex queues managed by the message brokers (servers). +The objective of Simplex Messaging Protocol (SMP) is to facilitate the secure and private unidirectional transfer of messages from senders to recipients via persistent simplex queues managed by the message routers. SMP is independent of any particular transmission system and requires only a reliable ordered data stream channel. While this document describes transport over TCP, other transports are also possible. -The protocol describes the set of commands that recipients and senders can exchange with SMP servers to create and to operate unidirectional "queues" (a data abstraction identifying one of many communication channels managed by the server) and to send messages from the sender to the recipient via the SMP server. +The protocol describes the set of commands that recipients and senders can exchange with SMP routers to create and to operate unidirectional "queues" (a data abstraction identifying one of many communication channels managed by the router) and to send messages from the sender to the recipient via the SMP router. More complex communication scenarios can be designed using multiple queues - for example, a duplex communication channel can be made of 2 simplex queues. -The protocol is designed with the focus on privacy and security, to some extent deprioritizing reliability by requiring that SMP servers only store messages until they are acknowledged by the recipients and, in any case, for a limited period of time. For communication scenarios requiring more reliable transmission the users should use several SMP servers to pass each message and implement some additional protocol to ensure that messages are not removed, inserted or changed - this is out of scope of this document. +The protocol is designed with the focus on privacy and security, to some extent deprioritizing reliability by requiring that SMP routers only store messages until they are acknowledged by the recipients and, in any case, for a limited period of time. For communication scenarios requiring more reliable transmission the users should use several SMP routers to pass each message and implement some additional protocol to ensure that messages are not removed, inserted or changed - this is out of scope of this document. SMP does not use any form of participants' identities and provides [E2EE][2] without the possibility of [MITM attack][1] relying on two pre-requisites: -- the users can establish a secure encrypted transport connection with the SMP server. [Transport connection](#transport-connection-with-the-smp-server) section describes SMP transport protocol of such connection over TCP, but any other transport connection protocol can be used. +- the users can establish a secure encrypted transport connection with the SMP router. [Transport connection](#transport-connection-with-the-smp-router) section describes SMP transport protocol of such connection over TCP, but any other transport connection protocol can be used. -- the recipient can pass a single message to the sender via a pre-existing secure and private communication channel (out-of-band message) - the information in this message is used to encrypt messages and to establish connection with SMP server. +- the recipient can pass a single message to the sender via a pre-existing secure and private communication channel (out-of-band message) - the information in this message is used to encrypt messages and to establish connection with SMP router. ## SMP Model -The SMP model has three communication participants: the recipient, the message broker (SMP server) that is chosen and, possibly, controlled by the recipient, and the sender. +The SMP model has three communication participants: the recipient, the message router (SMP router) that is chosen and, possibly, controlled by the recipient, and the sender. -SMP server manages multiple "simplex queues" - data records on the server that identify communication channels from the senders to the recipients. The same communicating party that is the sender in one queue, can be the recipient in another - without exposing this fact to the server. +SMP router manages multiple "simplex queues" - data records on the router that identify communication channels from the senders to the recipients. The same communicating party that is the sender in one queue, can be the recipient in another - without exposing this fact to the router. -The queue record consists of 2 unique random IDs generated by the server, one for the recipient and another for the sender, and 2 keys to verify the recipient's and the sender's commands, provided by the clients. The users of SMP protocol must use a unique ephemeral keys for each queue, to prevent aggregating their queues by keys in case SMP server is compromised. +The queue record consists of 2 unique random IDs generated by the router, one for the recipient and another for the sender, and 2 keys to verify the recipient's and the sender's commands, provided by the clients. The users of SMP protocol must use a unique ephemeral keys for each queue, to prevent aggregating their queues by keys in case SMP router is compromised. -Creating and using the queue requires sending commands to the SMP server from the recipient and the sender - they are described in detail in [SMP commands](#smp-commands) section. +Creating and using the queue requires sending commands to the SMP router from the recipient and the sender - they are described in detail in [SMP commands](#smp-commands) section. ## Out-of-band messages @@ -142,17 +142,17 @@ The approach to out-of-band message passing and their syntax should be defined i The simplex queue is the main unit of SMP protocol. It is used by: -- Sender of the queue (who received out-of-band message) to send messages to the server using sender's queue ID, authorized by sender's key. +- Sender of the queue (who received out-of-band message) to send messages to the router using sender's queue ID, authorized by sender's key. -- Recipient of the queue (who created the queue and sent out-of-band message) will use it to retrieve messages from the server, authorizing the commands by the recipient key. Recipient decrypts the messages with the key negotiated during the creation of the queue. +- Recipient of the queue (who created the queue and sent out-of-band message) will use it to retrieve messages from the router, authorizing the commands by the recipient key. Recipient decrypts the messages with the key negotiated during the creation of the queue. -- Participant identities are not shared with the server - new unique keys and queue IDs are used for each queue. +- Participant identities are not shared with the router - new unique keys and queue IDs are used for each queue. -This simplex queue can serve as a building block for more complex communication network. For example, two (or more, for redundancy) simplex queues can be used to create a duplex communication channel. Higher level primitives that are only known to system participants in their client applications can be created as well - e.g., contacts, conversations, groups and broadcasts. Simplex messaging servers only have the information about the low-level simplex queues. In this way a high level of privacy and security of the communication is provided. Application level primitives are not in scope of this protocol. +This simplex queue can serve as a building block for more complex communication network. For example, two (or more, for redundancy) simplex queues can be used to create a duplex communication channel. Higher level primitives that are only known to system participants in their client applications can be created as well - e.g., contacts, conversations, groups and broadcasts. Simplex messaging routers only have the information about the low-level simplex queues. In this way a high level of privacy and security of the communication is provided. Application level primitives are not in scope of this protocol. This approach is based on the concept of [unidirectional networks][4] that are used for applications with high level of information security. -Access to each queue is controlled with unique (not shared with other queues) asymmetric key pairs, separate for the sender and the recipient. The sender and the receiver have private keys, and the server has associated public keys to authenticate participants' commands by verifying cryptographic authorizations. +Access to each queue is controlled with unique (not shared with other queues) asymmetric key pairs, separate for the sender and the recipient. The sender and the receiver have private keys, and the router has associated public keys to authenticate participants' commands by verifying cryptographic authorizations. The messages sent over the queue are end-to-end encrypted using the DH secret agreed via out-of-band message and SMP confirmation. @@ -160,22 +160,22 @@ The messages sent over the queue are end-to-end encrypted using the DH secret ag ![Simplex queue](./diagrams/simplex-messaging/simplex.svg) -Queue is defined by recipient ID `RID` and sender ID `SID`, unique for the server. Sender key (`SK`) is used by the server to verify sender's commands (identified by `SID`) to send messages. Recipient key (`RK`) is used by the server to verify recipient's commands (identified by `RID`) to retrieve messages. +Queue is defined by recipient ID `RID` and sender ID `SID`, unique for the router. Sender key (`SK`) is used by the router to verify sender's commands (identified by `SID`) to send messages. Recipient key (`RK`) is used by the router to verify recipient's commands (identified by `RID`) to retrieve messages. -The protocol uses different IDs for sender and recipient in order to provide an additional privacy by preventing the correlation of senders and recipients commands sent over the network - in case the encrypted transport is compromised, it would still be difficult to correlate senders and recipients without access to the queue records on the server. +The protocol uses different IDs for sender and recipient in order to provide an additional privacy by preventing the correlation of senders and recipients commands sent over the network - in case the encrypted transport is compromised, it would still be difficult to correlate senders and recipients without access to the queue records on the router. ## SMP queue URI -The SMP queue URIs MUST include server identity, queue hostname, an optional port, sender queue ID, and the recipient's public key to agree shared secret for e2e encryption, and an optional query string parameter `k=s` to indicate that the queue can be secured by the sender using `SKEY` command (see [Fast SMP procedure](#fast-smp-procedure) and [Secure queue by sender](#secure-queue-by-sender)). Server identity is used to establish secure connection protected from MITM attack with SMP server (see [Transport connection](#transport-connection-with-the-smp-server) for SMP transport protocol). +The SMP queue URIs MUST include router identity, queue hostname, an optional port, sender queue ID, and the recipient's public key to agree shared secret for e2e encryption, and an optional query string parameter `k=s` to indicate that the queue can be secured by the sender using `SKEY` command (see [Fast SMP procedure](#fast-smp-procedure) and [Secure queue by sender](#secure-queue-by-sender)). Router identity is used to establish secure connection protected from MITM attack with SMP router (see [Transport connection](#transport-connection-with-the-smp-router) for SMP transport protocol). The [ABNF][8] syntax of the queue URI is: ```abnf -queueURI = %s"smp://" smpServer "/" queueId "#/?" versionParam keyParam [sndSecureParam] -smpServer = serverIdentity "@" srvHosts [":" port] +queueURI = %s"smp://" smpRouter "/" queueId "#/?" versionParam keyParam [sndSecureParam] +smpRouter = routerIdentity "@" srvHosts [":" port] srvHosts = ["," srvHosts] ; RFC1123, RFC5891 port = 1*DIGIT -serverIdentity = base64url +routerIdentity = base64url queueId = base64url versionParam = %s"v=" versionRange versionRange = 1*DIGIT / 1*DIGIT "-" 1*DIGIT @@ -194,49 +194,49 @@ x509UrlEncoded = `port` is optional, the default TCP port for SMP protocol is 5223. -`serverIdentity` is a required hash of the server certificate SPKI block (without line breaks, header and footer) used by the client to validate server certificate during transport handshake (see [Transport connection](#transport-connection-with-the-smp-server)) +`routerIdentity` is a required hash of the router certificate SPKI block (without line breaks, header and footer) used by the client to validate router certificate during transport handshake (see [Transport connection](#transport-connection-with-the-smp-router)) ## SMP procedure -The SMP procedure of creating a simplex queue on SMP server is explained using participants Alice (the recipient) who wants to receive messages from Bob (the sender). +The SMP procedure of creating a simplex queue on SMP router is explained using participants Alice (the recipient) who wants to receive messages from Bob (the sender). To create and start using a simplex queue Alice and Bob follow these steps: -1. Alice creates a simplex queue on the server: +1. Alice creates a simplex queue on the router: - 1. Decides which SMP server to use (can be the same or different server that Alice uses for other queues) and opens secure encrypted transport connection to the chosen SMP server (see [Transport connection](#transport-connection-with-the-smp-server)). + 1. Decides which SMP router to use (can be the same or different router that Alice uses for other queues) and opens secure encrypted transport connection to the chosen SMP router (see [Transport connection](#transport-connection-with-the-smp-router)). 2. Generates a new random public/private key pair (encryption key - `EK`) that she did not use before to agree a shared secret with Bob to encrypt the messages. - 3. Generates another new random public/private key pair (recipient key - `RK`) that she did not use before for her to authorize commands to the server. + 3. Generates another new random public/private key pair (recipient key - `RK`) that she did not use before for her to authorize commands to the router. - 4. Generates one more random key pair (recipient DH key - `RDHK`) to negotiate symmetric key that will be used by the server to encrypt message bodies delivered to Alice (to avoid shared cipher-text inside transport connection). + 4. Generates one more random key pair (recipient DH key - `RDHK`) to negotiate symmetric key that will be used by the router to encrypt message bodies delivered to Alice (to avoid shared cipher-text inside transport connection). - 5. Sends `"NEW"` command to the server to create a simplex queue (see `create` in [Create queue command](#create-queue-command)). This command contains previously generated unique "public" keys `RK` and `RDHK`. `RK` will be used by the server to verify the subsequent commands related to the same queue authorized by its private counterpart, for example to subscribe to the messages received to this queue or to update the queue, e.g. by setting the key required to send the messages (initially Alice creates the queue that accepts unauthorized messages, so anybody could send the message via this queue if they knew the queue sender's ID and server address). + 5. Sends `"NEW"` command to the router to create a simplex queue (see `create` in [Create queue command](#create-queue-command)). This command contains previously generated unique "public" keys `RK` and `RDHK`. `RK` will be used by the router to verify the subsequent commands related to the same queue authorized by its private counterpart, for example to subscribe to the messages received to this queue or to update the queue, e.g. by setting the key required to send the messages (initially Alice creates the queue that accepts unauthorized messages, so anybody could send the message via this queue if they knew the queue sender's ID and router address). - 6. The server sends `IDS` response with queue IDs (`queueIds`): + 6. The router sends `IDS` response with queue IDs (`queueIds`): - Recipient ID `RID` for Alice to manage the queue and to receive the messages. - Sender ID `SID` for Bob to send messages to the queue. - - Server public DH key (`SDHK`) to negotiate a shared secret for message body encryption, that Alice uses to derive a shared secret with the server `SS`. + - Router public DH key (`SDHK`) to negotiate a shared secret for message body encryption, that Alice uses to derive a shared secret with the router `SS`. 2. Alice sends an out-of-band message to Bob via the alternative channel that both Alice and Bob trust (see [protocol abstract](#simplex-messaging-protocol-abstract)). The message must include [SMP queue URI](#smp-queue-uri) with: - Unique "public" key (`EK`) that Bob must use to agree a shared secret for E2E encryption. - - SMP server hostname and information to open secure encrypted transport connection (see [Transport connection](#transport-connection-with-the-smp-server)). + - SMP router hostname and information to open secure encrypted transport connection (see [Transport connection](#transport-connection-with-the-smp-router)). - Sender queue ID `SID` for Bob to use. 3. Bob, having received the out-of-band message from Alice, connects to the queue: - 1. Generates a new random public/private key pair (sender key - `SK`) that he did not use before for him to authorize messages sent to Alice's server and another key pair for e2e encryption agreement. + 1. Generates a new random public/private key pair (sender key - `SK`) that he did not use before for him to authorize messages sent to Alice's router and another key pair for e2e encryption agreement. 2. Prepares the confirmation message for Alice to secure the queue. This message includes: - - Previously generated "public" key `SK` that will be used by Alice's server to verify Bob's messages, once the queue is secured. + - Previously generated "public" key `SK` that will be used by Alice's router to verify Bob's messages, once the queue is secured. - Public key to agree a shared secret with Alice for e2e encryption. @@ -244,9 +244,9 @@ To create and start using a simplex queue Alice and Bob follow these steps: 3. Encrypts the confirmation body with the shared secret agreed using public key `EK` (that Alice provided via the out-of-band message). - 4. Sends the encrypted message to the server with queue ID `SID` (see `send` in [Send message](#send-message)). This initial message to the queue must not be authorized - authorized messages will be rejected until Alice secures the queue (below). + 4. Sends the encrypted message to the router with queue ID `SID` (see `send` in [Send message](#send-message)). This initial message to the queue must not be authorized - authorized messages will be rejected until Alice secures the queue (below). -4. Alice receives Bob's message from the server using recipient queue ID `RID` (possibly, via the same transport connection she already has opened - see `message` in [Deliver queue message](#deliver-queue-message)): +4. Alice receives Bob's message from the router using recipient queue ID `RID` (possibly, via the same transport connection she already has opened - see `message` in [Deliver queue message](#deliver-queue-message)): 1. She decrypts received message body using the secret `SS`. @@ -258,7 +258,7 @@ To create and start using a simplex queue Alice and Bob follow these steps: 1. She sends the `KEY` command with `RID` signed with "private" key `RK` to update the queue to only accept requests authorized by "private" key `SK` provided by Bob. This command contains unique "public" key `SK` previously generated by Bob. - 2. From this moment the server will accept only authorized commands to `SID`, so only Bob will be able to send messages to the queue `SID` (corresponding to `RID` that Alice has). + 2. From this moment the router will accept only authorized commands to `SID`, so only Bob will be able to send messages to the queue `SID` (corresponding to `RID` that Alice has). 3. Once queue is secured, Alice deletes `SID` and `SK` - even if Alice's client is compromised in the future, the attacker would not be able to send messages pretending to be Bob. @@ -276,19 +276,19 @@ Bob now can securely send messages to Alice: 1. He encrypts the message to Alice with the agreed shared secret (using "public" key `EK` provided by Alice, only known to Bob, used only for one simplex queue). - 2. He authorizes `"SEND"` command to the server queue `SID` using the "private" key `SK` (that only he knows, used only for this queue). + 2. He authorizes `"SEND"` command to the router queue `SID` using the "private" key `SK` (that only he knows, used only for this queue). - 3. He sends the command to the server (see `send` in [Send message](#send-message)), that the server will verify using the "public" key `SK` (that Alice earlier received from Bob and provided to the server via `"KEY"` command). + 3. He sends the command to the router (see `send` in [Send message](#send-message)), that the router will verify using the "public" key `SK` (that Alice earlier received from Bob and provided to the router via `"KEY"` command). 2. Alice receives the message(s): - 1. She authorizes `"SUB"` command to the server to subscribe to the queue `RID` with the "private" key `RK` (see `subscribe` in [Subscribe to queue](#subscribe-to-queue)). + 1. She authorizes `"SUB"` command to the router to subscribe to the queue `RID` with the "private" key `RK` (see `subscribe` in [Subscribe to queue](#subscribe-to-queue)). - 2. The server, having verified Alice's command with the "public" key `RK` that she provided, delivers Bob's message(s) (see `message` in [Deliver queue message](#deliver-queue-message)). + 2. The router, having verified Alice's command with the "public" key `RK` that she provided, delivers Bob's message(s) (see `message` in [Deliver queue message](#deliver-queue-message)). 3. She decrypts Bob's message(s) with the shared secret agreed using "private" key `EK`. - 4. She acknowledges the message reception to the server with `"ACK"` so that the server can delete the message and deliver the next messages. + 4. She acknowledges the message reception to the router with `"ACK"` so that the router can delete the message and deliver the next messages. This flow is show on sequence diagram below. @@ -300,11 +300,11 @@ This flow is show on sequence diagram below. ![Simplex queue operations](./diagrams/simplex-messaging/simplex-op.svg) -Sequence diagram does not show E2E encryption - server knows nothing about encryption between the sender and the receiver. +Sequence diagram does not show E2E encryption - router knows nothing about encryption between the sender and the receiver. A higher level application protocol should define the semantics that allow to use two simplex queues (or two sets of queues for redundancy) for the bi-directional or any other communication scenarios. -The SMP is intentionally unidirectional - it provides no answer to how Bob will know that the transmission succeeded, and whether Alice received any messages. There may be a scenario when Alice wants to securely receive the messages from Bob, but she does not want Bob to have any proof that she received any messages - this low-level protocol can be used in this scenario, as all Bob knows as a fact is that he was able to send one unsigned message to the server that Alice provided, and now he can only send messages signed with the key `SK` that he sent to the server - it does not prove that any message was received by Alice. +The SMP is intentionally unidirectional - it provides no answer to how Bob will know that the transmission succeeded, and whether Alice received any messages. There may be a scenario when Alice wants to securely receive the messages from Bob, but she does not want Bob to have any proof that she received any messages - this low-level protocol can be used in this scenario, as all Bob knows as a fact is that he was able to send one unsigned message to the router that Alice provided, and now he can only send messages signed with the key `SK` that he sent to the router - it does not prove that any message was received by Alice. For bi-directional conversation, now that Bob can securely send encrypted messages to Alice, Bob can create the second simplex queue that will allow Alice to send messages to Bob in the same way, sending the second queue details via the first queue. If both Alice and Bob have their respective unique "public" keys (Alice's and Bob's `EK`s of two separate queues), or pass additional keys to sign the messages, the conversation can be both encrypted and signed. @@ -330,37 +330,37 @@ Simplex Messaging Protocol: - Defines only message-passing protocol: - - Transport agnostic - the protocol does not define how clients connect to the servers. It can be implemented over any ordered data stream channel: TCP connection, HTTP with long polling, websockets, etc. + - Transport agnostic - the protocol does not define how clients connect to the routers. It can be implemented over any ordered data stream channel: TCP connection, HTTP with long polling, websockets, etc. - - Not semantic - the protocol does not assign any meaning to queues and messages. While on the application level the queues and messages can have different meaning (e.g., for messages: text or image chat message, message acknowledgement, participant profile information, status updates, changing "public" key to encrypt messages, changing servers, etc.), on SMP protocol level all the messages are binary and their meaning can only be interpreted by client applications and not by the servers - this interpretation is out of scope of this protocol. + - Not semantic - the protocol does not assign any meaning to queues and messages. While on the application level the queues and messages can have different meaning (e.g., for messages: text or image chat message, message acknowledgement, participant profile information, status updates, changing "public" key to encrypt messages, changing routers, etc.), on SMP protocol level all the messages are binary and their meaning can only be interpreted by client applications and not by the routers - this interpretation is out of scope of this protocol. -- Client-server architecture: +- Client-router architecture: - - Multiple servers, that can be deployed by the system users, can be used to send and retrieve messages. + - Multiple routers, that can be deployed by the system users, can be used to send and retrieve messages. - - Servers do not communicate with each other, except when used as proxy to forward commands to another server, and do not "know" about other servers. + - Routers do not communicate with each other, except when used as proxy to forward commands to another router, and do not "know" about other routers. - - Clients only communicate with servers (excluding the initial out-of-band message), so the message passing is asynchronous. + - Clients only communicate with routers (excluding the initial out-of-band message), so the message passing is asynchronous. - - For each queue, the message recipient defines the server through which the sender should send messages. To protect transport anonymity the sender can use their chosen server to forward commands to the server chosen by the recipient. + - For each queue, the message recipient defines the router through which the sender should send messages. To protect transport anonymity the sender can use their chosen router to forward commands to the router chosen by the recipient. - - While multiple servers and multiple queues can be used to pass each message, it is in scope of application level protocol(s), and out of scope of this protocol. + - While multiple routers and multiple queues can be used to pass each message, it is in scope of application level protocol(s), and out of scope of this protocol. - - Servers store messages only until they are retrieved by the recipients, and in any case, for a limited time. + - Routers store messages only until they are retrieved by the recipients, and in any case, for a limited time. - - Servers are required to NOT store any message history or delivery log, but even if the server is compromised, it does not allow to decrypt the messages or to determine the list of queues established by any participant - this information is only stored on client devices. + - Routers are required to NOT store any message history or delivery log, but even if the router is compromised, it does not allow to decrypt the messages or to determine the list of queues established by any participant - this information is only stored on client devices. -- The only element provided by SMP servers is simplex queues: +- The only element provided by SMP routers is simplex queues: - Each queue is created and managed by the queue recipient. - Asymmetric encryption is used to authorize and verify the requests to send and receive the messages. - - One ephemeral public key is used by the servers to verify requests to send the messages into the queue, and another ephemeral public key - to verify requests to retrieve the messages from the queue. These ephemeral keys are used only for one queue, and are not used for any other context - this key does not represent any participant identity. + - One ephemeral public key is used by the routers to verify requests to send the messages into the queue, and another ephemeral public key - to verify requests to retrieve the messages from the queue. These ephemeral keys are used only for one queue, and are not used for any other context - this key does not represent any participant identity. - - Both recipient and sender public keys are provided to the server by the queue recipient. "Public" key `RK` is provided when the queue is created, public key `SK` is provided when the queue is secured. V9 of SMP protocol allows senders to provide their key to the server directly or via proxy, to avoid waiting until the recipient is online to secure the queue. + - Both recipient and sender public keys are provided to the router by the queue recipient. "Public" key `RK` is provided when the queue is created, public key `SK` is provided when the queue is secured. V9 of SMP protocol allows senders to provide their key to the router directly or via proxy, to avoid waiting until the recipient is online to secure the queue. - - The "public" keys known to the server and used to verify commands from the participants are unrelated to the keys used to encrypt and decrypt the messages - the latter keys are also unique per each queue but they are only known to participants, not to the servers. + - The "public" keys known to the router and used to verify commands from the participants are unrelated to the keys used to encrypt and decrypt the messages - the latter keys are also unique per each queue but they are only known to participants, not to the routers. - Messaging graph can be asymmetric: Bob's ability to send messages to Alice does not automatically lead to the Alice's ability to send messages to Bob. @@ -368,7 +368,7 @@ Simplex Messaging Protocol: Simplex messaging clients must cryptographically authorize commands for the following operations: -- With the recipient's key `RK` (server to verify): +- With the recipient's key `RK` (router to verify): - create the queue (`NEW`) - subscribe to queue (`SUB`) - secure the queue (`KEY`) @@ -377,19 +377,19 @@ Simplex messaging clients must cryptographically authorize commands for the foll - acknowledge received messages (`ACK`) - suspend the queue (`OFF`) - delete the queue (`DEL`) -- With the sender's key `SK` (server to verify): +- With the sender's key `SK` (router to verify): - secure queue (`SKEY`) - send messages (`SEND`) - With the optional notifier's key: - subscribe to message notifications (`NSUB`) -To authorize/verify transmissions clients and servers MUST use either signature algorithm Ed25519 algorithm defined in [RFC8709][15] or [deniable authentication scheme](#deniable-client-authentication-scheme) based on NaCL crypto_box. +To authorize/verify transmissions clients and routers MUST use either signature algorithm Ed25519 algorithm defined in [RFC8709][15] or [deniable authentication scheme](#deniable-client-authentication-scheme) based on NaCL crypto_box. It is recommended that clients use signature algorithm for the recipient commands and deniable authentication scheme for sender commands (to have non-repudiation quality in the whole protocol stack). -To encrypt/decrypt message bodies delivered to the recipients, servers/clients MUST use NaCL crypto_box. +To encrypt/decrypt message bodies delivered to the recipients, routers/clients MUST use NaCL crypto_box. -Clients MUST encrypt message bodies sent via SMP servers using use NaCL crypto_box. +Clients MUST encrypt message bodies sent via SMP routers using use NaCL crypto_box. ## Deniable client authentication scheme @@ -397,47 +397,47 @@ While e2e encryption algorithms used in the client applications have repudiation SMP protocol supports repudiable authenticators to authorize client commands. These authenticators use NaCl crypto_box that proves authentication and third party unforgeability and, unlike signature, provides repudiation guarantee. See [crypto_box docs](https://nacl.cr.yp.to/box.html). -When queue is created or secured, the recipient would provide a DH key (X25519) to the server (either their own or received from the sender, in case of KEY command), and the server would provide its own random X25519 key per session in the handshake header. The authenticator is computed in this way: +When queue is created or secured, the recipient would provide a DH key (X25519) to the router (either their own or received from the sender, in case of KEY command), and the router would provide its own random X25519 key per session in the handshake header. The authenticator is computed in this way: ```abnf transmission = authenticator authorized -authenticator = crypto_box(sha512(authorized), secret = dh(client long term queue key, server session key), nonce = correlation ID) +authenticator = crypto_box(sha512(authorized), secret = dh(client long term queue key, router session key), nonce = correlation ID) authorized = sessionIdentifier corrId queueId protocol_command ; same as the currently signed part of the transmission ``` ## Simplex queue IDs -Simplex messaging servers MUST generate 2 different IDs for each new queue - for the recipient (that created the queue) and for the sender. It is REQUIRED that: +Simplex messaging routers MUST generate 2 different IDs for each new queue - for the recipient (that created the queue) and for the sender. It is REQUIRED that: -- These IDs are different and unique within the server. +- These IDs are different and unique within the router. - Based on random bytes generated with cryptographically strong pseudo-random number generator. -## Server security requirements +## Router security requirements -Simplex messaging server implementations MUST NOT create, store or send to any other servers: +Simplex messaging router implementations MUST NOT create, store or send to any other routers: - Logs of the client commands and transport connections in the production environment. - History of deleted queues, retrieved or acknowledged messages (deleted queues MAY be stored temporarily as part of the queue persistence implementation). -- Snapshots of the database they use to store queues and messages (instead simplex messaging clients must manage redundancy by using more than one simplex messaging server). In-memory persistence is recommended. +- Snapshots of the database they use to store queues and messages (instead simplex messaging clients must manage redundancy by using more than one simplex messaging router). In-memory persistence is recommended. -- Any other information that may compromise privacy or [forward secrecy][4] of communication between clients using simplex messaging servers (the servers cannot compromise forward secrecy of any application layer protocol, such as double ratchet). +- Any other information that may compromise privacy or [forward secrecy][4] of communication between clients using simplex messaging routers (the routers cannot compromise forward secrecy of any application layer protocol, such as double ratchet). ## Message delivery notifications Supporting message delivery while the client mobile app is not running requires sending push notifications with the device token. All alternative mechanisms for background message delivery are unreliable, particularly on iOS platform. -To protect the privacy of the recipients, there are several commands in SMP protocol that allow enabling and subscribing to message notifications from SMP queues, using separate set of "notifier keys" and via separate queue IDs - as long as SMP server is not compromised, these notifier queue IDs cannot be correlated with recipient or sender queue IDs. +To protect the privacy of the recipients, there are several commands in SMP protocol that allow enabling and subscribing to message notifications from SMP queues, using separate set of "notifier keys" and via separate queue IDs - as long as SMP router is not compromised, these notifier queue IDs cannot be correlated with recipient or sender queue IDs. -The clients can optionally instruct a dedicated push notification server to subscribe to notifications and deliver push notifications to the device, which can then retrieve the messages in the background and send local notifications to the user - this is out of scope of SMP protocol. The commands that SMP protocol provides to allow it: +The clients can optionally instruct a dedicated push notification router to subscribe to notifications and deliver push notifications to the device, which can then retrieve the messages in the background and send local notifications to the user - this is out of scope of SMP protocol. The commands that SMP protocol provides to allow it: - `enableNotifications` (`"NKEY"`) with `notifierIdResp` (`"NID"`) response - see [Enable notifications command](#enable-notifications-command). - `disableNotifications` (`"NDEL"`) - see [Disable notifications command](#disable-notifications-command). - `subscribeNotifications` (`"NSUB"`) - see [Subscribe to queue notifications](#subscribe-to-queue-notifications). - `messageNotification` (`"NMSG"`) - see [Deliver message notification](#deliver-message-notification). -[`SEND` command](#send-message) includes the notification flag to instruct SMP server whether to send the notification - this flag is forwarded to the recipient inside encrypted envelope, together with the timestamp and the message body, so even if TLS is compromised this flag cannot be used for traffic correlation. +[`SEND` command](#send-message) includes the notification flag to instruct SMP router whether to send the notification - this flag is forwarded to the recipient inside encrypted envelope, together with the timestamp and the message body, so even if TLS is compromised this flag cannot be used for traffic correlation. ## Client services @@ -451,19 +451,19 @@ A client service can have one of three roles: - **Notifications** (`"N"`) - Notification service that subscribes to queue notifications and delivers push notifications to user devices. -- **Proxy** (`"P"`) - Proxy service that forwards sender commands to destination servers. +- **Proxy** (`"P"`) - Proxy service that forwards sender commands to destination routers. Service role is identified in the transport handshake and determines what commands the service is authorized to send. ### Service certificates -To send service commands, services should authenticate themselves to SMP servers using service certificates. This provides: +To send service commands, services should authenticate themselves to SMP routers using service certificates. This provides: -- **Service identity** - The server assigns a unique service ID based on the service certificate, allowing associating multiple SMP queues with a service. +- **Service identity** - The router assigns a unique service ID based on the service certificate, allowing associating multiple SMP queues with a service. - **Subscription management** - Services can efficiently manage subscriptions across reconnections without re-subscribing to individual queues. -- **Rate limiting** - Servers can apply rate limits per service identity rather than per connection. +- **Rate limiting** - Routers can apply rate limits per service identity rather than per connection. -Service certificates are included in the client handshake and verified by the server. The service receives a service ID in the handshake response, which is then used as entity ID in service transmissions. +Service certificates are included in the client handshake and verified by the router. The service receives a service ID in the handshake response, which is then used as entity ID in service transmissions. ```abnf clientHandshakeService = serviceRole serviceCertKey @@ -477,8 +477,8 @@ Services use batch subscription commands to subscribe to multiple queues: - **SUBS** - Subscribe to messages from all associated SMP queues at once. The service provides a count and hash of queue IDs, and receives `SOKS` response with the service ID. - **NSUBS** - Subscribe to notifications from all associated SMP queues. Similar to SUBS. -- **SOKS** - Server response confirming batch subscription success. -- **ENDS** - Server notification when batch subscriptions are terminated (e.g., when another instance of service connects). +- **SOKS** - Router response confirming batch subscription success. +- **ENDS** - Router notification when batch subscriptions are terminated (e.g., when another instance of service connects). ## SMP Transmission and transport block structure @@ -499,7 +499,7 @@ paddedNotation = ; paddedLength - required length after padding, including 2 bytes for originalLength ``` -Transport block for SMP transmission between the client and the server must have this syntax: +Transport block for SMP transmission between the client and the router must have this syntax: ```abnf paddedTransportBlock = @@ -511,14 +511,14 @@ transmissionLength = 2*2 OCTET ; word16 encoded in network byte order transmission = authorization [serviceSig] authorized authorized = sessionIdentifier corrId entityId smpCommand corrId = %x18 24*24 OCTET / %x0 "" - ; corrId is required in client commands and server responses, - ; it is empty (0-length) in server notifications. + ; corrId is required in client commands and router responses, + ; it is empty (0-length) in router notifications. ; %x18 is 24 - the random correlation ID must be 24 bytes as it is used as a nonce for NaCL crypto_box in some contexts. entityId = shortString ; queueId or proxySessionId - ; empty entityId ID is used with "create" command and in some server responses + ; empty entityId ID is used with "create" command and in some router responses authorization = shortString ; signature or authenticator ; empty authorization can be used with "send" before the queue is secured with secure command - ; authorization is always empty with "ping" and server responses + ; authorization is always empty with "ping" and router responses serviceSig = shortString ; optional Ed25519 service signature (v16+) ; present only in service sessions when authorization is non-empty sessionIdentifier = "" ; @@ -535,7 +535,7 @@ Commands syntax below is provided using [ABNF][8] with [case-sensitive strings e ```abnf smpCommand = ping / recipientCmd / senderCommand / - proxyCommand / notifierCommand / linkCommand / serverMsg + proxyCommand / notifierCommand / linkCommand / routerMsg recipientCmd = create / subscribe / subscribeMultiple / rcvSecure / recipientKeys / enableNotifications / disableNotifications / getMessage acknowledge / suspend / delete / getQueueInfo / setShortLink / deleteShortLink @@ -543,7 +543,7 @@ senderCommand = send / sndSecure linkCommand = setLinkKey / getLinkData proxyCommand = proxySession / proxyForward / relayForward notifierCommand = subscribeNotifications / subscribeNotificationsMultiple -serverMsg = queueIds / linkResponse / serviceOk / serviceOkMultiple / +routerMsg = queueIds / linkResponse / serviceOk / serviceOkMultiple / message / allReceived / notifierIdResp / messageNotification / proxySessionKey / proxyResponse / relayResponse / unsubscribed / serviceUnsubscribed / deleted / @@ -554,17 +554,17 @@ The syntax of specific commands and responses is defined below. ### Correlating responses with commands -The server should send `queueIds`, `error` and `ok` responses in the same order within each queue ID as the commands received in the transport connection, so that they can be correlated by the clients. To simplify correlation of commands and responses, the server must use the same `corrId` in the response as in the command sent by the client. +The router should send `queueIds`, `error` and `ok` responses in the same order within each queue ID as the commands received in the transport connection, so that they can be correlated by the clients. To simplify correlation of commands and responses, the router must use the same `corrId` in the response as in the command sent by the client. If the transport connection is closed before some responses are sent, these responses should be discarded. ### Command verification -SMP servers must verify all transmissions (excluding `ping` and initial `send` commands) by verifying the client authorizations. Command authorization should be generated by applying the algorithm specified for the queue to the `signed` block of the transmission, using the key associated with the queue ID (recipient's, sender's or notifier's, depending on which queue ID is used). +SMP routers must verify all transmissions (excluding `ping` and initial `send` commands) by verifying the client authorizations. Command authorization should be generated by applying the algorithm specified for the queue to the `signed` block of the transmission, using the key associated with the queue ID (recipient's, sender's or notifier's, depending on which queue ID is used). ### Keep-alive command -To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the server responds with `pong` response. This command should be sent unsigned and without queue ID. +To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the router responds with `pong` response. This command should be sent unsigned and without queue ID. ```abnf ping = %s"PING" @@ -575,13 +575,13 @@ This command is always sent unsigned. ### Recipient commands -Sending any of the commands in this section (other than `create`, that is sent without queue ID) is only allowed with recipient's ID (`RID`). If sender's ID is used the server must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). +Sending any of the commands in this section (other than `create`, that is sent without queue ID) is only allowed with recipient's ID (`RID`). If sender's ID is used the router must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). #### Create queue command -This command is sent by the recipient to the SMP server to create a new queue. +This command is sent by the recipient to the SMP router to create a new queue. -Servers SHOULD support basic auth with this command, to allow only server owners and trusted users to create queues on the destiation servers. +Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to create queues on the destiation routers. The syntax is: @@ -591,9 +591,9 @@ recipientAuthPublicKey = length x509encoded ; the recipient's Ed25519 or X25519 public key to verify commands for this queue recipientDhPublicKey = length x509encoded ; the recipient's Curve25519 key for DH exchange to derive the secret -; that the server will use to encrypt delivered message bodies +; that the router will use to encrypt delivered message bodies ; using [NaCl crypto_box][16] encryption scheme (curve25519xsalsa20poly1305). -optBasicAuth = %s"0" / (%s"1" shortString) ; optional server password +optBasicAuth = %s"0" / (%s"1" shortString) ; optional router password subscribeMode = %s"S" / %s"C" ; S - create and subscribe, C - only create optQueueReqData = %s"0" / (%s"1" queueReqData) ; optional queue request data queueReqData = queueReqMessaging / queueReqContact @@ -616,13 +616,13 @@ length = 1*1 OCTET length2 = 2*2 OCTET ; Word16, network byte order ``` -If the queue is created successfully, the server must send `queueIds` response with the recipient's and sender's queue IDs and public key to encrypt delivered message bodies: +If the queue is created successfully, the router must send `queueIds` response with the recipient's and sender's queue IDs and public key to encrypt delivered message bodies: ```abnf -queueIds = %s"IDS " recipientId senderId srvDhPublicKey optQueueMode optLinkId optServiceId optServerNtfCreds +queueIds = %s"IDS " recipientId senderId srvDhPublicKey optQueueMode optLinkId optServiceId optRouterNtfCreds srvDhPublicKey = length x509encoded -; the server's Curve25519 key for DH exchange to derive the secret -; that the server will use to encrypt delivered message bodies to the recipient +; the router's Curve25519 key for DH exchange to derive the secret +; that the router will use to encrypt delivered message bodies to the recipient recipientId = shortString ; 16-24 bytes senderId = shortString ; 16-24 bytes optQueueMode = %s"0" / (%s"1" queueMode) @@ -631,7 +631,7 @@ optLinkId = %s"0" / (%s"1" linkId) linkId = shortString optServiceId = %s"0" / (%s"1" serviceId) serviceId = shortString -optServerNtfCreds = %s"0" / (%s"1" srvNtfId srvNtfDhKey) +optRouterNtfCreds = %s"0" / (%s"1" srvNtfId srvNtfDhKey) srvNtfId = shortString srvNtfDhKey = length x509encoded ``` @@ -650,7 +650,7 @@ When the simplex queue was not created in the current transport connection, the subscribe = %s"SUB" ``` -If subscription is successful the server must respond with the first available message or with [queue subscription response](#queue-subscription-response) (`SOK`) if no messages are available. The recipient will continue receiving the messages from this queue until the transport connection is closed or until another transport connection subscribes to the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. +If subscription is successful the router must respond with the first available message or with [queue subscription response](#queue-subscription-response) (`SOK`) if no messages are available. The recipient will continue receiving the messages from this queue until the transport connection is closed or until another transport connection subscribes to the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. The first message will be delivered either immediately or as soon as it is available; to receive the following message the recipient must acknowledge the reception of the message (see [Acknowledge message delivery](#acknowledge-message-delivery)). @@ -666,13 +666,13 @@ count = 8*8 OCTET ; Int64, network byte order (big-endian) idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs ``` -The count and idsHash allow the server to detect subscription drift. The server responds with `serviceOkMultiple` (`SOKS`) response. +The count and idsHash allow the router to detect subscription drift. The router responds with `serviceOkMultiple` (`SOKS`) response. #### Secure queue by recipient This command is only used until v8 of SMP protocol. V9 uses [SKEY](#secure-queue-by-sender). -This command is sent by the recipient to the server to add sender's key to the queue: +This command is sent by the recipient to the router to add sender's key to the queue: ```abnf rcvSecure = %s"KEY " senderAuthPublicKey @@ -712,7 +712,7 @@ largeString = length2 *OCTET length2 = 2*2 OCTET ; Word16, network byte order (big-endian) ``` -The server responds with `OK` response if successful. +The router responds with `OK` response if successful. #### Delete short link @@ -722,11 +722,11 @@ This command is used to remove a short link association from the queue: deleteShortLink = %s"LDEL" ``` -The server responds with `OK` or `ERR` +The router responds with `OK` or `ERR` #### Enable notifications command -This command is sent by the recipient to the server to add notifier's key to the queue, to allow push notifications server to receive notifications when the message arrives, via a separate queue ID, without receiving message content. +This command is sent by the recipient to the router to add notifier's key to the queue, to allow push notifications router to receive notifications when the message arrives, via a separate queue ID, without receiving message content. ```abnf enableNotifications = %s"NKEY " notifierKey recipientNotificationDhPublicKey @@ -735,18 +735,18 @@ notifierKey = length x509encoded recipientNotificationDhPublicKey = length x509encoded ; the recipient's Curve25519 key for DH exchange to derive the secret -; that the server will use to encrypt notification metadata (encryptedNMsgMeta in NMSG) +; that the router will use to encrypt notification metadata (encryptedNMsgMeta in NMSG) ; using [NaCl crypto_box][16] encryption scheme (curve25519xsalsa20poly1305). ``` -The server will respond with `NID` response if notifications were enabled and the notifier's key was successfully added to the queue: +The router will respond with `NID` response if notifications were enabled and the notifier's key was successfully added to the queue: ```abnf notifierIdResponse = %s"NID " notifierId srvNotificationDhPublicKey notifierId = shortString ; 16-24 bytes srvNotificationDhPublicKey = length x509encoded -; the server's Curve25519 key for DH exchange to derive the secret -; that the server will use to encrypt notification metadata to the recipient (encryptedNMsgMeta in NMSG) +; the router's Curve25519 key for DH exchange to derive the secret +; that the router will use to encrypt notification metadata to the recipient (encryptedNMsgMeta in NMSG) ``` This response is sent with the recipient's queue ID (the third part of the transmission). @@ -755,15 +755,15 @@ To receive the message notifications, `subscribeNotifications` command ("NSUB") #### Disable notifications command -This command is sent by the recipient to the server to remove notifier's credentials from the queue: +This command is sent by the recipient to the router to remove notifier's credentials from the queue: ```abnf disableNotifications = %s"NDEL" ``` -The server must respond `ok` to this command if it was successful. +The router must respond `ok` to this command if it was successful. -Once notifier's credentials are removed server will no longer send "NMSG" for this queue to notifier. +Once notifier's credentials are removed router will no longer send "NMSG" for this queue to notifier. #### Get message command @@ -777,18 +777,18 @@ getMessage = %s"GET" #### Acknowledge message delivery -The recipient should send the acknowledgement of message delivery once the message was stored in the client, to notify the server that the message should be deleted: +The recipient should send the acknowledgement of message delivery once the message was stored in the client, to notify the router that the message should be deleted: ```abnf acknowledge = %s"ACK" SP msgId msgId = shortString ``` -Client must send message ID to acknowledge a particular message - to prevent double acknowledgement (e.g., when command response times out) resulting in message being lost. If the message was not delivered or if the ID of the message does not match the last delivered message, the server SHOULD respond with `ERR NO_MSG` error. +Client must send message ID to acknowledge a particular message - to prevent double acknowledgement (e.g., when command response times out) resulting in message being lost. If the message was not delivered or if the ID of the message does not match the last delivered message, the router SHOULD respond with `ERR NO_MSG` error. -The server should limit the time the message is stored, even if the message was not delivered or if acknowledgement is not sent by the recipient. +The router should limit the time the message is stored, even if the message was not delivered or if acknowledgement is not sent by the recipient. -Having received the acknowledgement, SMP server should delete the message and then send the next available message or respond with `ok` if there are no more messages available in this simplex queue. +Having received the acknowledgement, SMP router should delete the message and then send the next available message or respond with `ok` if there are no more messages available in this simplex queue. #### Suspend queue @@ -798,13 +798,13 @@ The recipient can suspend a queue prior to deleting it to make sure that no mess suspend = %s"OFF" ``` -The server must respond with `"ERR AUTH"` to any messages sent after the queue was suspended (see [Error responses](#error-responses)). +The router must respond with `"ERR AUTH"` to any messages sent after the queue was suspended (see [Error responses](#error-responses)). -The server must respond `ok` to this command if it was successful. +The router must respond `ok` to this command if it was successful. -This command can be sent multiple times (in case transport connection was interrupted and the response was not delivered), the server should still respond `ok` even if the queue is already suspended. +This command can be sent multiple times (in case transport connection was interrupted and the response was not delivered), the router should still respond `ok` even if the queue is already suspended. -There is no command to resume the queue. Servers must delete suspended queues that were not deleted after some period of time. +There is no command to resume the queue. Routers must delete suspended queues that were not deleted after some period of time. #### Delete queue @@ -828,7 +828,7 @@ queueInfo = %s"INFO " info info = ``` -The format of queue information is implementation specific, and is not part of the specification. For information, [JTD schema][17] for queue information returned by the reference implementation of SMP server is: +The format of queue information is implementation specific, and is not part of the specification. For information, [JTD schema][17] for queue information returned by the reference implementation of SMP router is: ```json { @@ -859,13 +859,13 @@ The format of queue information is implementation specific, and is not part of t ### Sender commands -Currently SMP defines only one command that can be used by senders - `send` message. This command must be used with sender's ID, if recipient's ID is used the server must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). +Currently SMP defines only one command that can be used by senders - `send` message. This command must be used with sender's ID, if recipient's ID is used the router must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). #### Secure queue by sender This command is used from v8 of SMP protocol. V8 and earlier uses [KEY](#secure-queue-by-recipient). -This command is sent by the sender to the server to add sender's key to the queue: +This command is sent by the sender to the router to add sender's key to the queue: ```abnf sndSecure = %s"SKEY " senderAuthPublicKey @@ -879,7 +879,7 @@ This command MUST be used in transmission with sender queue ID. #### Send message -This command is sent to the server by the sender both to confirm the queue after the sender received out-of-band message from the recipient and to send messages after the queue is secured: +This command is sent to the router by the sender both to confirm the queue after the sender received out-of-band message from the recipient and to send messages after the queue is secured: ```abnf send = %s"SEND " msgFlags SP smpEncMessage @@ -904,19 +904,19 @@ msgNonce = 24*24 OCTET word16 = 2*2 OCTET ``` -The first message is sent to confirm the queue - it should contain sender's server key (see decrypted message syntax below) - this first message may be sent without authorization. +The first message is sent to confirm the queue - it should contain sender's router key (see decrypted message syntax below) - this first message may be sent without authorization. Once the queue is secured (see [Secure queue by sender](#secure-queue-by-sender)), the subsequent `SEND` commands must be sent with the authorization. -The server must respond with `"ERR AUTH"` response in the following cases: +The router must respond with `"ERR AUTH"` response in the following cases: - the queue does not exist or is suspended - the queue is secured but the transmission does NOT have a authorization - the queue is NOT secured but the transmission has a authorization -The server must respond with `"ERR QUOTA"` response when queue capacity is exceeded. The number of messages that the server can hold is defined by the server configuration. When sender reaches queue capacity the server will not accept any further messages until the recipient receives ALL messages from the queue. After the last message is delivered, the server will deliver an additional special message indicating that the queue capacity was reached. See [Deliver queue message](#deliver-queue-message) +The router must respond with `"ERR QUOTA"` response when queue capacity is exceeded. The number of messages that the router can hold is defined by the router configuration. When sender reaches queue capacity the router will not accept any further messages until the recipient receives ALL messages from the queue. After the last message is delivered, the router will deliver an additional special message indicating that the queue capacity was reached. See [Deliver queue message](#deliver-queue-message) -Until the queue is secured, the server should accept any number of unsigned messages (up to queue capacity) - it allows the sender to resend the confirmation in case of failure. +Until the queue is secured, the router should accept any number of unsigned messages (up to queue capacity) - it allows the sender to resend the confirmation in case of failure. The body should be encrypted with the shared secret based on recipient's "public" key (`EK`); once decrypted it must have this format: @@ -937,7 +937,7 @@ senderKey = length x509encoded ; the sender's Ed25519 or X25519 public key to authorize SEND commands for this queue ``` -`clientHeader` in the initial unsigned message is used to transmit sender's server key and can be used in the future revisions of SMP protocol for other purposes. +`clientHeader` in the initial unsigned message is used to transmit sender's router key and can be used in the future revisions of SMP protocol for other purposes. SMP transmission structure for directly sent messages: @@ -973,7 +973,7 @@ SMP transmission structure for received messages: 2 | originalLength 283- | authorization sessionId corrId queueId %s"MSG" SP msgId (1+114 + 1+32? + 1+24 + 1+24 + 3+1 + 1+24 = 227) 16 | auth tag (msgId is used as nonce) - ------- serverEncryptedMsg (= 16082 bytes = 16384 - 302 bytes) + ------- routerEncryptedMsg (= 16082 bytes = 16384 - 302 bytes) 2 | originalLength 8 | timestamp 8- | message flags @@ -1013,22 +1013,22 @@ SMP transmission structure for received messages: ------- smpClientMessage end | ....... smpEncMessage end - 0+ | serverEncryptedMsg pad - ------- serverEncryptedMsg end + 0+ | routerEncryptedMsg pad + ------- routerEncryptedMsg end 0+ | transmission pad ------- transmission end ``` ### Proxying sender commands -To protect transport (IP address and session) anonymity of the sender from the server chosen (and, potentially, controlled) by the recipient SMP v8 added support for proxying sender's command to the recipient's server via the server chosen by the sender. +To protect transport (IP address and session) anonymity of the sender from the router chosen (and, potentially, controlled) by the recipient SMP v8 added support for proxying sender's command to the recipient's router via the router chosen by the sender. Sequence diagram for sending the message and `SKEY` commands via SMP proxy: ``` ------------- ------------- ------------- ------------- | sending | | SMP | | SMP | | receiving | -| client | | proxy | | server | | client | +| client | | proxy | | router | | client | ------------- ------------- ------------- ------------- | `PRXY` | | | | -------------------------> | | | @@ -1052,17 +1052,17 @@ Sequence diagram for sending the message and `SKEY` commands via SMP proxy: | | | | ``` -1. The client requests (`PRXY` command) the chosen server to connect to the destination SMP server and receives (`PKEY` response) the session information, including server certificate and the session key signed by this certificate. To protect client session anonymity the proxy MUST re-use the same session with all clients that request connection with any given destination server. +1. The client requests (`PRXY` command) the chosen router to connect to the destination SMP router and receives (`PKEY` response) the session information, including router certificate and the session key signed by this certificate. To protect client session anonymity the proxy MUST re-use the same session with all clients that request connection with any given destination router. -2. The client encrypts the transmission (`SKEY` or `SEND`) to the destination server using the shared secret computed from per-command random key and server's session key and sends it to proxying server in `PFWD` command. +2. The client encrypts the transmission (`SKEY` or `SEND`) to the destination router using the shared secret computed from per-command random key and router's session key and sends it to proxying router in `PFWD` command. 3. Proxy additionally encrypts the body to prevent correlation by ciphertext (in case TLS is compromised) and forwards it to proxy in `RFWD` command. -4. Proxy receives the double-encrypted response from the destination server, removes one encryption layer and forwards it to the client. +4. Proxy receives the double-encrypted response from the destination router, removes one encryption layer and forwards it to the client. The diagram below shows the encryption layers for `PFWD`/`RFWD` commands and `RRES`/`PRES` responses: -- s2r - encryption between client and SMP relay, with relay key returned in relay handshake, with MITM by proxy mitigated by verifying the certificate fingerprint included in the relay address. This encryption prevents proxy server from observing commands and responses - proxy does not know how many different queues a connected client sends messages and commands to. +- s2r - encryption between client and SMP relay, with relay key returned in relay handshake, with MITM by proxy mitigated by verifying the certificate fingerprint included in the relay address. This encryption prevents proxy router from observing commands and responses - proxy does not know how many different queues a connected client sends messages and commands to. - e2e - end-to-end encryption per SMP queue, with additional client encryption inside it. - p2r - additional encryption between proxy and SMP relay with the shared secret agreed in the handshake, to mitigate traffic correlation inside TLS. - r2c - additional encryption between SMP relay and client to prevent traffic correlation inside TLS. @@ -1072,35 +1072,35 @@ The diagram below shows the encryption layers for `PFWD`/`RFWD` commands and `RR | | -- TLS -- | | -- p2r -- | | -- TLS -- | | | | -- s2r -- | | -- s2r -- | | -- r2c -- | | | sending | -- e2e -- | | -- e2e -- | | -- e2e -- | receiving | -| client | MSG | SMP proxy | MSG | SMP server | MSG | client | +| client | MSG | SMP proxy | MSG | SMP router | MSG | client | | | -- e2e -- | | -- e2e -- | | -- e2e -- | | | | -- s2r -- | | -- s2r -- | | -- r2c -- | | | | -- TLS -- | | -- p2r -- | | -- TLS -- | | ----------------- ----------------- -- TLS -- ----------------- ----------------- ``` -SMP proxy is not another type of the server, it is a role that any SMP server can play when forwarding the commands. +SMP proxy is not another type of the router, it is a role that any SMP router can play when forwarding the commands. #### Request proxied session The sender uses this command to request the session with the destination proxy. -Servers SHOULD support basic auth with this command, to allow only server owners and trusted users to proxy commands to the destination servers. +Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to proxy commands to the destination routers. ```abnf -proxySession = %s"PRXY" SP smpServer basicAuth -smpServer = hosts port fingerprint +proxySession = %s"PRXY" SP smpRouter basicAuth +smpRouter = hosts port fingerprint hosts = length 1*host host = shortString port = shortString fingerprint = shortString -basicAuth = "0" / "1" shortString ; server password +basicAuth = "0" / "1" shortString ; router password ``` ```abnf proxySessionKey = %s"PKEY" SP sessionId smpVersionRange certChain signedKey sessionId = shortString - ; Session ID (tlsunique) of the proxy with the destination server. + ; Session ID (tlsunique) of the proxy with the destination router. ; This session ID should be used as entity ID in transmission with `PFWD` command certChain = length 1*cert cert = originalLength x509encoded @@ -1109,18 +1109,18 @@ originalLength = 2*2 OCTET ``` When the client receives PKEY response it MUST validate that: -- the fingerprint of the received certificate matches fingerprint in the server address - it mitigates MITM attack by proxy. -- the server session key is correctly signed with the received certificate. +- the fingerprint of the received certificate matches fingerprint in the router address - it mitigates MITM attack by proxy. +- the router session key is correctly signed with the received certificate. -The proxy server may respond with error response in case the destination server is not available or in case it has an earlier version that does not support proxied commands. +The proxy router may respond with error response in case the destination router is not available or in case it has an earlier version that does not support proxied commands. #### Send command via proxy Sender can send `SKEY` and `SEND` commands via proxy after obtaining the session ID with `PRXY` command (see [Request proxied session](#request-proxied-session)). -Transmission sent to proxy server should use session ID as entity ID and use a random correlation ID of 24 bytes as a nonce for crypto_box encryption of transmission to the destination server. The random ephemeral X25519 key to encrypt transmission should be unique per command, and it should be combined with the key sent by the server in the handshake header to proxy and to the client in `PKEY` command. +Transmission sent to proxy router should use session ID as entity ID and use a random correlation ID of 24 bytes as a nonce for crypto_box encryption of transmission to the destination router. The random ephemeral X25519 key to encrypt transmission should be unique per command, and it should be combined with the key sent by the router in the handshake header to proxy and to the client in `PKEY` command. -Encrypted transmission should use the received session ID from the connection between proxy server and destination server in the authorized body. +Encrypted transmission should use the received session ID from the connection between proxy router and destination router in the authorized body. ```abnf proxyCommand = %s"PFWD" SP smpVersion commandKey @@ -1128,34 +1128,34 @@ smpVersion = 2*2 OCTET commandKey = length x509encoded ``` -The proxy server will forward the encrypted transmission in `RFWD` command (see below). +The proxy router will forward the encrypted transmission in `RFWD` command (see below). -Having received the `RRES` response from the destination server, proxy server will forward `PRES` response to the client. `PRES` response should use the same correlation ID as `PFWD` command. The destination server will use this correlation ID increased by 1 as a nonce for encryption of the response. +Having received the `RRES` response from the destination router, proxy router will forward `PRES` response to the client. `PRES` response should use the same correlation ID as `PFWD` command. The destination router will use this correlation ID increased by 1 as a nonce for encryption of the response. ```abnf proxyResponse = %s"PRES" SP ``` -#### Forward command to destination server +#### Forward command to destination router -Having received `PFWD` command from the client, the server should additionally encrypt it (without padding, as the received transmission is already encrypted by the client and padded to a fixed size) together with the correlation ID, sender command key, and protocol version, and forward it to the destination server as `RFWD` command: +Having received `PFWD` command from the client, the router should additionally encrypt it (without padding, as the received transmission is already encrypted by the client and padded to a fixed size) together with the correlation ID, sender command key, and protocol version, and forward it to the destination router as `RFWD` command: -Transmission forwarded to relay uses empty entity ID and its unique random correlation ID is used as a nonce to encrypt forwarded transmission. Correlation ID increased by 1 is used by the destination server as a nonce to encrypt responses. +Transmission forwarded to relay uses empty entity ID and its unique random correlation ID is used as a nonce to encrypt forwarded transmission. Correlation ID increased by 1 is used by the destination router as a nonce to encrypt responses. ```abnf relayCommand = %s"RFWD" SP forwardedTransmission = fwdCorrId fwdSmpVersion fwdCommandKey transmission fwdCorrId = length 24*24 OCTET ; `fwdCorrId` - correlation ID used in `PFWD` command transmission - it is used as a nonce for client encryption, - ; and `fwdCorrId + 1` is used as a nonce for the destination server response encryption. + ; and `fwdCorrId + 1` is used as a nonce for the destination router response encryption. fwdSmpVersion = 2*2 OCTET fwdCommandKey = length x509encoded transmission = *OCTET ; note that it is not prefixed with the length ``` -The destination server having received this command decrypts both encryption layers (proxy and client), verifies client authorization as usual, processes it, and send the double encrypted `RRES` response to proxy. +The destination router having received this command decrypts both encryption layers (proxy and client), verifies client authorization as usual, processes it, and send the double encrypted `RRES` response to proxy. -The shared secret for encrypting transmission bodies between proxy server and destination server is agreed from proxy and destination server keys exchanged in handshake headers - proxy and server use the same shared secret during the session for the encryption between them. +The shared secret for encrypting transmission bodies between proxy router and destination router is agreed from proxy and destination router keys exchanged in handshake headers - proxy and router use the same shared secret during the session for the encryption between them. ```abnf @@ -1175,7 +1175,7 @@ setLinkKey = %s"LKEY " senderAuthPublicKey senderAuthPublicKey = length x509encoded ``` -The server secures the queue with the provided key and responds with `LNK` response containing the sender ID and encrypted link data. +The router secures the queue with the provided key and responds with `LNK` response containing the sender ID and encrypted link data. Once this command is used, the queue is secured, and the command can only be repeated with the same key. @@ -1187,7 +1187,7 @@ This command is used to retrieve the link data associated with a "contact" queue getLinkData = %s"LGET" ``` -The server responds with `LNK` response containing the sender ID and encrypted link data. +The router responds with `LNK` response containing the sender ID and encrypted link data. This command may be repeated multiple times. @@ -1195,13 +1195,13 @@ This command may be repeated multiple times. #### Subscribe to queue notifications -The push notifications server (notifier) must use this command to start receiving message notifications from the queue: +The push notifications router (notifier) must use this command to start receiving message notifications from the queue: ```abnf subscribeNotifications = %s"NSUB" ``` -If subscription is successful the server must respond with [queue subscription response](#queue-subscription-response) (`SOK`). The notifier will be receiving the message notifications from this queue until the transport connection is closed or until another transport connection subscribes to notifications from the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. +If subscription is successful the router must respond with [queue subscription response](#queue-subscription-response) (`SOK`). The notifier will be receiving the message notifications from this queue until the transport connection is closed or until another transport connection subscribes to notifications from the same simplex queue - in this case the first subscription should be cancelled and [subscription END notification](#subscription-end-notification) delivered. The first message notification will be delivered either immediately or as soon as the message is available. @@ -1215,11 +1215,11 @@ count = 8*8 OCTET ; Int64, network byte order (big-endian) idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs ``` -The server responds with `serviceOkMultiple` (`SOKS`) response. +The router responds with `serviceOkMultiple` (`SOKS`) response. -### Server messages +### Router messages -This section includes server events and generic command responses used for several commands. +This section includes router events and generic command responses used for several commands. The syntax for command-specific responses is shown together with the commands. @@ -1266,16 +1266,16 @@ allReceived = %s"ALLS" #### Deliver queue message -When server delivers the messages to the recipient, message body should be encrypted with the secret derived from DH exchange using the keys passed during the queue creation and returned with `queueIds` response. +When router delivers the messages to the recipient, message body should be encrypted with the secret derived from DH exchange using the keys passed during the queue creation and returned with `queueIds` response. -This is done to prevent the possibility of correlation of incoming and outgoing traffic of SMP server inside transport protocol. +This is done to prevent the possibility of correlation of incoming and outgoing traffic of SMP router inside transport protocol. -The server must deliver messages to all subscribed simplex queues on the currently open transport connection. The syntax for the message delivery is: +The router must deliver messages to all subscribed simplex queues on the currently open transport connection. The syntax for the message delivery is: ```abnf message = %s"MSG" SP msgId encryptedRcvMsgBody encryptedRcvMsgBody = - ; server-encrypted padded sent msgBody + ; router-encrypted padded sent msgBody ; maxMessageLength = 16048 (v11+) rcvMsgBody = timestamp msgFlags SP sentMsgBody / msgQuotaExceeded msgQuotaExceeded = %s"QUOTA" SP timestamp @@ -1285,26 +1285,26 @@ timestamp = 8*8OCTET If the sender exceeded queue capacity the recipient will receive a special message indicating the quota was exceeded. This can be used in the higher level protocol to notify sender client that it can continue sending messages. -`msgId` - unique message ID generated by the server based on cryptographically strong random bytes. It should be used by the clients to detect messages that were delivered more than once (in case the transport connection was interrupted and the server did not receive the message delivery acknowledgement). Message ID is used as a nonce for server/recipient encryption of message bodies. +`msgId` - unique message ID generated by the router based on cryptographically strong random bytes. It should be used by the clients to detect messages that were delivered more than once (in case the transport connection was interrupted and the router did not receive the message delivery acknowledgement). Message ID is used as a nonce for router/recipient encryption of message bodies. -`timestamp` - system time when the server received the message from the sender as **a number of seconds** since Unix epoch (1970-01-01) encoded as 64-bit integer in network byte order. If a client system/language does not support 64-bit integers, until 2106 it is safe to simply skip the first 4 zero bytes and decode 32-bit unsigned integer (or as signed integer until 2038). +`timestamp` - system time when the router received the message from the sender as **a number of seconds** since Unix epoch (1970-01-01) encoded as 64-bit integer in network byte order. If a client system/language does not support 64-bit integers, until 2106 it is safe to simply skip the first 4 zero bytes and decode 32-bit unsigned integer (or as signed integer until 2038). `sentMsgBody` - message sent by `SEND` command. See [Send message](#send-message). #### Deliver message notification -The server must deliver message notifications to all simplex queues that were subscribed with `subscribeNotifications` command (`NSUB`) on the currently open transport connection. The syntax for the message notification delivery is: +The router must deliver message notifications to all simplex queues that were subscribed with `subscribeNotifications` command (`NSUB`) on the currently open transport connection. The syntax for the message notification delivery is: ```abnf messageNotification = %s"NMSG " nmsgNonce encryptedNMsgMeta encryptedNMsgMeta = -; metadata E2E encrypted between server and recipient containing server's message ID and timestamp (allows extension), +; metadata E2E encrypted between router and recipient containing router's message ID and timestamp (allows extension), ; to be passed to the recipient by the notifier for them to decrypt ; with key negotiated in NKEY and NID commands using nmsgNonce nmsgNonce = -; nonce used by the server for encryption of message metadata, to be passed to the recipient by the notifier +; nonce used by the router for encryption of message metadata, to be passed to the recipient by the notifier ; for them to use in decryption of E2E encrypted metadata ``` @@ -1312,7 +1312,7 @@ Message notification does not contain any message data or non E2E encrypted meta #### Subscription END notification -When another transport connection is subscribed to the same simplex queue, the server should unsubscribe and to send the notification to the previously subscribed transport connection: +When another transport connection is subscribed to the same simplex queue, the router should unsubscribe and to send the notification to the previously subscribed transport connection: ```abnf unsubscribed = %s"END" @@ -1351,27 +1351,27 @@ deleted = %s"DELD" - transmission has no required authorization or queue ID (`NO_AUTH`) - transmission has unexpected credentials (`HAS_AUTH`) - transmission has no required queue ID (`NO_ENTITY`) -- proxy server errors (`PROXY`): +- proxy router errors (`PROXY`): - `PROTOCOL` - any error. - `BASIC_AUTH` - incorrect basic auth. - - `NO_SESSION` - no destination server session with passed ID. - - `BROKER` - destination server error: - - `RESPONSE` - invalid server response (failed to parse). + - `NO_SESSION` - no destination router session with passed ID. + - `BROKER` - destination router error: + - `RESPONSE` - invalid router response (failed to parse). - `UNEXPECTED` - unexpected response. - `NETWORK` - network error. - `TIMEOUT` - command response timeout. - - `HOST` - no compatible server host (e.g. onion when public is required, or vice versa) + - `HOST` - no compatible router host (e.g. onion when public is required, or vice versa) - `NO_SERVICE` - service unavailable client-side. - `TRANSPORT` - handshake or other transport error: - `BLOCK` - error parsing transport block. - - `VERSION` - incompatible client or server version. + - `VERSION` - incompatible client or router version. - `LARGE_MSG` - message too large. - `SESSION` - incorrect session ID. - - `NO_AUTH` - absent server key - when the server did not provide a DH key to authorize commands for the queue that should be authorized with a DH key. + - `NO_AUTH` - absent router key - when the router did not provide a DH key to authorize commands for the queue that should be authorized with a DH key. - `HANDSHAKE` - transport handshake error: - `PARSE` - handshake syntax (parsing) error. - - `IDENTITY` - incorrect server identity (certificate fingerprint does not match server address). - - `BAD_AUTH` - incorrect or missing server credentials in handshake. + - `IDENTITY` - incorrect router identity (certificate fingerprint does not match router address). + - `BAD_AUTH` - incorrect or missing router credentials in handshake. - authentication error (`AUTH`) - incorrect authorization, unknown (or suspended) queue, sender's ID is used in place of recipient's and vice versa, and some other cases (see [Send message](#send-message) command). - blocked entity error (`BLOCKED`) - the entity (queue or message) was blocked due to policy violation (added in v12). Contains blocking information: - `reason` - blocking reason (`spam` or `content`). @@ -1379,12 +1379,12 @@ deleted = %s"DELD" - service error (`SERVICE`) - service-related error. - crypto error (`CRYPTO`) - cryptographic operation failed. - message queue quota exceeded error (`QUOTA`) - too many messages were sent to the message queue. Further messages can only be sent after the recipient retrieves the messages. -- store error (`STORE`) - server storage error with error message. +- store error (`STORE`) - router storage error with error message. - relay public key expired (`EXPIRED`) - relay public key has expired. - no message (`NO_MSG`) - no message available or message ID mismatch. - sent message is too large (> maxMessageLength) to be delivered (`LARGE_MSG`). -- internal server error (`INTERNAL`). -- duplicate error (`DUPLICATE_`) - internal duplicate detection error (not returned by server). +- internal router error (`INTERNAL`). +- duplicate error (`DUPLICATE_`) - internal duplicate detection error (not returned by router). The syntax for error responses: @@ -1411,93 +1411,93 @@ jsonNotice = storeError = *OCTET ``` -Server implementations must aim to respond within the same time for each command in all cases when `"ERR AUTH"` response is required to prevent timing attacks (e.g., the server should verify authorization even when the queue does not exist on the server or the authorization of different type is sent, using any dummy key compatible with the used authorization). +Router implementations must aim to respond within the same time for each command in all cases when `"ERR AUTH"` response is required to prevent timing attacks (e.g., the router should verify authorization even when the queue does not exist on the router or the authorization of different type is sent, using any dummy key compatible with the used authorization). ### OK response -When the command is successfully executed by the server, it should respond with OK response: +When the command is successfully executed by the router, it should respond with OK response: ```abnf ok = %s"OK" ``` -## Transport connection with the SMP server +## Transport connection with the SMP router ### General transport protocol considerations -Both the recipient and the sender can use TCP or some other, possibly higher level, transport protocol to communicate with the server. The default TCP port for SMP server is 5223. +Both the recipient and the sender can use TCP or some other, possibly higher level, transport protocol to communicate with the router. The default TCP port for SMP router is 5223. The transport protocol should provide the following: -- server authentication (by matching server certificate hash with `serverIdentity`), +- server authentication (by matching router certificate hash with `routerIdentity`), - forward secrecy (by encrypting the traffic using ephemeral keys agreed during transport handshake), - integrity (preventing data modification by the attacker without detection), - unique channel binding (`sessionIdentifier`) to include in the signed part of SMP transmissions. ### TLS transport encryption -The client and server communicate using [TLS 1.3 protocol][13] restricted to: +The client and router communicate using [TLS 1.3 protocol][13] restricted to: - TLS_CHACHA20_POLY1305_SHA256 cipher suite (for better performance on mobile devices), - ed25519 EdDSA algorithms for signatures, - x25519 ECDHE groups for key exchange. -- servers must send the chain of 2, 3 or 4 self-signed certificates in the handshake (see [Server certificate](#server-certificate)), with the first (offline) certificate one signing the second (online) certificate. Offline certificate fingerprint is used as a server identity - it is a part of SMP server address. +- routers must send the chain of 2, 3 or 4 self-signed certificates in the handshake (see [Router certificate](#router-certificate)), with the first (offline) certificate one signing the second (online) certificate. Offline certificate fingerprint is used as a router identity - it is a part of SMP router address. - The clients must abort the connection in case a different number of certificates is sent. -- server and client TLS configuration should not allow resuming the sessions. +- router and client TLS configuration should not allow resuming the sessions. -During TLS handshake the client must validate that the fingerprint of the online server certificate is equal to the `serverIdentity` the client received as part of SMP server address; if the server identity does not match the client must abort the connection. +During TLS handshake the client must validate that the fingerprint of the online router certificate is equal to the `routerIdentity` the client received as part of SMP router address; if the router identity does not match the client must abort the connection. -### Server certificate +### Router certificate -Servers use self-signed certificates that the clients validate by comparing the fingerprint of one of the certificates in the chain with the certificate fingerprint present in the server address. +Routers use self-signed certificates that the clients validate by comparing the fingerprint of one of the certificates in the chain with the certificate fingerprint present in the router address. -Clients SHOULD support the chains of 2, 3 and 4 server certificates: +Clients SHOULD support the chains of 2, 3 and 4 router certificates: **2 certificates**: -1. offline server certificate: - - its fingerprint is present in the server address. - - its private key is not stored on the server. -2. online server certificate: +1. offline router certificate: + - its fingerprint is present in the router address. + - its private key is not stored on the router. +2. online router certificate: - it must be signed by offline certificate. - - its private key is stored on the server and is used in TLS session. + - its private key is stored on the router and is used in TLS session. **3 certificates**: -1. offline server certificate - same as with 2 certificates. -2. online server certificate: +1. offline router certificate - same as with 2 certificates. +2. online router certificate: - it must be signed by offline certificate. - - its private key is stored on the server. + - its private key is stored on the router. 3. session certificate: - - generated automatically on every server start and/or on schedule. - - signed by online server certificate. + - generated automatically on every router start and/or on schedule. + - signed by online router certificate. - its private key is used in TLS session. **4 certificates**: 0. offline operator identity certificate: - - used for all servers operated by the same entity. - - its private key is not stored on the server. -1. offline server certificate: + - used for all routers operated by the same entity. + - its private key is not stored on the router. +1. offline router certificate: - signed by offline operator certificate. - same as with 2 certificates. -2. online server certificate - same as with 3 certificates. +2. online router certificate - same as with 3 certificates. 3. session certificate - same as with 3 certificates. ### ALPN to agree handshake version -Client and server use [ALPN extension][18] of TLS to agree handshake version. +Client and router use [ALPN extension][18] of TLS to agree handshake version. -Server SHOULD send `smp/1` protocol name and the client should confirm this name in order to use the current protocol version. This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax. +Router SHOULD send `smp/1` protocol name and the client should confirm this name in order to use the current protocol version. This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax. -If the client does not confirm this protocol name, the server would fall back to v6 of SMP protocol. +If the client does not confirm this protocol name, the router would fall back to v6 of SMP protocol. ### Transport handshake -Once TLS handshake is complete, client and server will exchange blocks of fixed size (16384 bytes). +Once TLS handshake is complete, client and router will exchange blocks of fixed size (16384 bytes). -The first block sent by the server should be `paddedServerHello` and the client should respond with `paddedClientHello` - these blocks are used to agree SMP protocol version: +The first block sent by the router should be `paddedRouterHello` and the client should respond with `paddedClientHello` - these blocks are used to agree SMP protocol version: ```abnf -paddedServerHello = -serverHello = smpVersionRange sessionIdentifier [serverCertKey] ignoredPart +paddedRouterHello = +routerHello = smpVersionRange sessionIdentifier [routerCertKey] ignoredPart smpVersionRange = minSmpVersion maxSmpVersion minSmpVersion = smpVersion maxSmpVersion = smpVersion @@ -1505,18 +1505,18 @@ sessionIdentifier = shortString ; unique session identifier derived from transport connection handshake ; it should be included in authorized part of all SMP transmissions sent in this transport connection, ; but it must not be sent as part of the transmission in the current protocol version. -serverCertKey = certChain signedServerKey +routerCertKey = certChain signedRouterKey certChain = count 1*cert ; 2-4 certificates cert = originalLength x509encoded -signedServerKey = originalLength x509encoded ; X25519 key signed by server certificate +signedRouterKey = originalLength x509encoded ; X25519 key signed by router certificate paddedClientHello = -clientHello = smpVersion keyHash [clientKey] proxyServer optClientService ignoredPart +clientHello = smpVersion keyHash [clientKey] proxyRouter optClientService ignoredPart ; chosen SMP protocol version - it must be the maximum supported version -; within the range offered by the server -keyHash = shortString ; server identity - CA certificate fingerprint +; within the range offered by the router +keyHash = shortString ; router identity - CA certificate fingerprint clientKey = length x509encoded ; X25519 public key for session encryption - only present if needed -proxyServer = %s"T" / %s"F" ; true if connecting client is a proxy server +proxyRouter = %s"T" / %s"F" ; true if connecting client is a proxy router optClientService = %s"0" / (%s"1" clientService) ; optional service client credentials clientService = serviceRole serviceCertKey serviceRole = %s"M" / %s"N" / %s"P" ; Messaging / Notifier / Proxy @@ -1530,24 +1530,24 @@ ignoredPart = *OCTET pad = *OCTET ``` -`signedServerKey` is used to compute a shared secret to authorize client transmissions - it is combined with the per-queue key that was used when the queue was created. +`signedRouterKey` is used to compute a shared secret to authorize client transmissions - it is combined with the per-queue key that was used when the queue was created. -`clientKey` is used only by SMP proxy server when it connects to the destination server to agree shared secret for the additional encryption layer, end user clients do not use this key. +`clientKey` is used only by SMP proxy router when it connects to the destination router to agree shared secret for the additional encryption layer, end user clients do not use this key. -`proxyServer` flag (v14+) disables additional transport encryption inside TLS for proxy connections, since proxy server connection already has additional encryption. +`proxyRouter` flag (v14+) disables additional transport encryption inside TLS for proxy connections, since proxy router connection already has additional encryption. -`clientService` (v16+) provides long-term service client certificate for high-volume services using SMP server (chat relays, notification servers, high traffic bots). The server responds with a third handshake message containing the assigned service ID: +`clientService` (v16+) provides long-term service client certificate for high-volume services using SMP router (chat relays, notification routers, high traffic bots). The router responds with a third handshake message containing the assigned service ID: ```abnf -paddedServerHandshakeResponse = -serverHandshakeResponse = %s"R" serviceId / %s"E" handshakeError +paddedRouterHandshakeResponse = +routerHandshakeResponse = %s"R" serviceId / %s"E" handshakeError serviceId = shortString handshakeError = transportError ``` -`ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and servers must ignore any extra bytes within the original block length. +`ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and routers must ignore any extra bytes within the original block length. -For TLS transport client should assert that `sessionIdentifier` is equal to `tls-unique` channel binding defined in [RFC 5929][14] (TLS Finished message struct); we pass it in `serverHello` block to allow communication over some other transport protocol (possibly, with another channel binding). +For TLS transport client should assert that `sessionIdentifier` is equal to `tls-unique` channel binding defined in [RFC 5929][14] (TLS Finished message struct); we pass it in `routerHello` block to allow communication over some other transport protocol (possibly, with another channel binding). ### Additional transport privacy @@ -1557,7 +1557,7 @@ For scenarios when meta-data privacy is critical, it is recommended that clients - establish a separate connection for each SMP queue, - send noise traffic (using PING command). -In addition to that, the servers can be deployed as Tor onion services. +In addition to that, the routers can be deployed as Tor onion services. [1]: https://en.wikipedia.org/wiki/Man-in-the-middle_attack [2]: https://en.wikipedia.org/wiki/End-to-end_encryption diff --git a/protocol/xftp.md b/protocol/xftp.md index 1a17524d71..855d3f62e8 100644 --- a/protocol/xftp.md +++ b/protocol/xftp.md @@ -11,12 +11,12 @@ Version 3, 2025-01-24 - [XFTP procedure](#xftp-procedure) - [File description](#file-description) - [URIs syntax](#uris-syntax) - - [XFTP server URI](#xftp-server-uri) + - [XFTP router URI](#xftp-router-uri) - [File description URI](#file-description-URI) - [XFTP qualities and features](#xftp-qualities-and-features) - [Cryptographic algorithms](#cryptographic-algorithms) -- [File chunk IDs](#file-chunk-ids) -- [Server security requirements](#server-security-requirements) +- [Data packet IDs](#data-packet-ids) +- [Router security requirements](#router-security-requirements) - [Transport protocol](#transport-protocol) - [TLS ALPN](#tls-alpn) - [Connection handshake](#connection-handshake) @@ -26,13 +26,13 @@ Version 3, 2025-01-24 - [Command authentication](#command-authentication) - [Keep-alive command](#keep-alive-command) - [File sender commands](#file-sender-commands) - - [Register new file chunk](#register-new-file-chunk) - - [Add file chunk recipients](#add-file-chunk-recipients) - - [Upload file chunk](#upload-file-chunk) - - [Delete file chunk](#delete-file-chunk) + - [Register new data packet](#register-new-data-packet) + - [Add data packet recipients](#add-data-packet-recipients) + - [Upload data packet](#upload-data-packet) + - [Delete data packet](#delete-data-packet) - [File recipient commands](#file-recipient-commands) - - [Download file chunk](#download-file-chunk) - - [Acknowledge file chunk download](#acknowledge-file-chunk-download) + - [Download data packet](#download-data-packet) + - [Acknowledge data packet download](#acknowledge-data-packet-download) - [Error responses](#error-responses) - [Threat model](#threat-model) @@ -46,7 +46,7 @@ It is designed as a application level protocol to solve the problem of secure an ## Introduction -The objective of SimpleX File Transfer Protocol (XFTP) is to facilitate the secure and private unidirectional transfer of files from senders to recipients via persistent file chunks stored by the xftp server. +The objective of SimpleX File Transfer Protocol (XFTP) is to facilitate the secure and private unidirectional transfer of files from senders to recipients via persistent data packets stored by the xftp router. XFTP is implemented as an application level protocol on top of HTTP2 and TLS. @@ -56,19 +56,21 @@ This document describes XFTP protocol version 3. The version history: - v2: authenticated commands - added basic auth support for commands - v3: blocked files - added BLOCKED error type for policy violations -The protocol describes the set of commands that senders and recipients can send to XFTP servers to create, upload, download and delete file chunks of several pre-defined sizes. XFTP servers SHOULD support chunks of 4 sizes: 64KB, 256KB, 1MB and 4MB (1KB = 1024 bytes, 1MB = 1024KB). +The protocol describes the set of commands that senders and recipients can send to XFTP routers to create, upload, download and delete data packets of several pre-defined sizes. XFTP routers SHOULD support packets of 4 sizes: 64KB, 256KB, 1MB and 4MB (1KB = 1024 bytes, 1MB = 1024KB). -The protocol is designed with the focus on meta-data privacy and security. While using TLS, the protocol does not rely on TLS security by using additional encryption to achieve that there are no identifiers or ciphertext in common in received and sent server traffic, frustrating traffic correlation even if TLS is compromised. +The protocol is designed with the focus on meta-data privacy and security. While using TLS, the protocol does not rely on TLS security by using additional encryption to achieve that there are no identifiers or ciphertext in common in received and sent router traffic, frustrating traffic correlation even if TLS is compromised. -XFTP does not use any form of participants' identities. It relies on out-of-band passing of "file description" - a human-readable YAML document with the list of file chunk locations, hashes and necessary cryptographic keys. +XFTP does not use any form of participants' identities. It relies on out-of-band passing of "file description" - a human-readable YAML document with the list of data packet locations, hashes and necessary cryptographic keys. + +> **Note:** While this protocol was originally designed for file transfer, it handles generic addressed data packets. File-specific semantics (splitting files into packets, assembly, naming) are application-level concerns defined in the [agent protocol](./agent-protocol.md). ## XFTP Model -The XFTP model has three communication participants: the recipient, the file server (XFTP server) that is chosen and, possibly, controlled by the sender, and the sender. +The XFTP model has three communication participants: the recipient, the XFTP router that is chosen and, possibly, controlled by the sender, and the sender. -XFTP server allows uploading fixed size file chunks, with or without basic authentication. The same party that can be the sender of one file chunk can be the recipient of another, without exposing it to the server. +XFTP router allows uploading fixed size data packets, with or without basic authentication. The same party that can be the sender of one data packet can be the recipient of another, without exposing it to the router. -Each file chunk allows multiple recipients, each recipient can download the same chunk multiple times. It allows depending on the threat model use the same recipient credentials for multiple parties, thus reducing server ability to understand the number of intended recipients (but server can still track IP addresses to determine it), or use one unique set of credentials for each recipient, frustrating traffic correlation on the assumption of compromised TLS. In the latter case, senders can create a larger number of recipient credentials to hide the actual number of intended recipients from the servers (which is what SimpleX clients do). +Each data packet allows multiple recipients, each recipient can download the same packet multiple times. It allows depending on the threat model use the same recipient credentials for multiple parties, thus reducing router ability to understand the number of intended recipients (but router can still track IP addresses to determine it), or use one unique set of credentials for each recipient, frustrating traffic correlation on the assumption of compromised TLS. In the latter case, senders can create a larger number of recipient credentials to hide the actual number of intended recipients from the routers (which is what SimpleX clients do). ``` Sender Internet XFTP relays Internet Recipient @@ -76,7 +78,7 @@ Each file chunk allows multiple recipients, each recipient can download the same | | | | | | (can be self-hosted) | | | | +---------+ | | - chunk 1 ----- HTTP2 over TLS ------ | XFTP | ---- HTTP2 / TLS ----- chunk 1 + packet 1 ----- HTTP2 over TLS ------ | XFTP | ---- HTTP2 / TLS ----- packet 1 |---> SimpleX File Transfer Protocol (XFTP) --> | Relay | ---> XFTP ------------->| | --------------------------- +---------+ ---------------------- | | | | | | | @@ -90,21 +92,21 @@ file ---> | XFTP | ------> XFTP ----> | Relay | ---> | | | +---------+ | | | | ------- HTTP2 / TLS ------- | XFTP | ---- HTTP2 / TLS ---- | |-------------> XFTP ----> | Relay | ---> XFTP ------------->| - chunk N --------------------------- +---------+ --------------------- chunk N - | | (store file chunks) | | + packet N --------------------------- +---------+ --------------------- packet N + | | (store data packets) | | | | | | | | | | ``` -When sender client uploads a file chunk, it has to register it first with one sender ID and multiple recipient IDs, and one random unique key per ID to authenticate sender and recipients, and also provide its size and hash that will be validated when chunk is uploaded. +When sender client uploads a data packet, it has to register it first with one sender ID and multiple recipient IDs, and one random unique key per ID to authenticate sender and recipients, and also provide its size and hash that will be validated when packet is uploaded. -To send the actual file, the sender client MUST pad it and encrypt it with a random symmetric key and distribute chunks of fixed sized across multiple XFTP servers. Information about chunk locations, keys, hashes and required keys is passed to the recipients as "[file description](#file-description)" out-of-band. +To send the actual file, the sender client MUST pad it and encrypt it with a random symmetric key and distribute packets of fixed sized across multiple XFTP routers. Information about packet locations, keys, hashes and required keys is passed to the recipients as "[file description](#file-description)" out-of-band. -Creating, uploading, downloading and deleting file chunks requires sending commands to the XFTP server - they are described in detail in [XFTP commands](#xftp-commands) section. +Creating, uploading, downloading and deleting data packets requires sending commands to the XFTP router - they are described in detail in [XFTP commands](#xftp-commands) section. ## Persistence model -Server stores file chunk records in memory, with optional adding to append-only log, to allow restoring them on server restart. File chunk bodies can be stored as files or as objects in any object store (e.g. S3). +Router stores data packet records in memory, with optional adding to append-only log, to allow restoring them on router restart. Data packet bodies can be stored as files or as objects in any object store (e.g. S3). ## XFTP procedure @@ -114,28 +116,28 @@ To send the file, the sender will: 1) Prepare file - compute its SHA512 digest. - - prepend header with the name and pad the file to match the whole number of chunks in size. It is RECOMMENDED to use 2 of 4 allowed chunk sizes, to balance upload size and metadata privacy. + - prepend header with the name and pad the file to match the whole number of packets in size. It is RECOMMENDED to use 2 of 4 allowed packet sizes, to balance upload size and metadata privacy. - encrypt it with a randomly chosen symmetric key and IV (e.g., using NaCL secret_box). - - split into allowed size chunks. + - split into allowed size packets. - generate per-recipient keys. It is recommended that the sending client generates more per-recipient keys than the actual number of recipients, rounding up to a power of 2, to conceal the actual number of intended recipients. -2) Upload file chunks - - register each chunk record with randomly chosen one or more (for redundancy) XFTP server(s). +2) Upload data packets + - register each packet record with randomly chosen one or more (for redundancy) XFTP router(s). - optionally request additional recipient IDs, if required number of recipient keys didn't fit into register request. - - upload each chunk to chosen server(s). + - upload each packet to chosen router(s). 3) Prepare file descriptions, one per recipient. -The sending client combines addresses of all chunks and other information into "file description", different for each file recipient, that will include: +The sending client combines addresses of all packets and other information into "file description", different for each file recipient, that will include: - an encryption key used to encrypt/decrypt the full file (the same for all recipients). - file SHA512 digest to validate download. -- list of chunk descriptions; information for each chunk: - - private Ed25519 key to sign commands for file transfer server. - - chunk address (server host and chunk ID). - - chunk sha512 digest. +- list of packet descriptions; information for each packet: + - private Ed25519 key to sign commands for file transfer router. + - packet address (router host and packet ID). + - packet sha512 digest. -To reduce the size of file description, chunks are grouped by the server host. +To reduce the size of file description, packets are grouped by the router host. 4) Send file description(s) to the recipient(s) out-of-band, via pre-existing secure and authenticated channel. E.g., SimpleX clients send it as messages via SMP protocol, but it can be done via any other channel. @@ -145,16 +147,16 @@ To reduce the size of file description, chunks are grouped by the server host. Having received the description, the recipient will: -1) Download all chunks. +1) Download all packets. -The receiving client can fall back to secondary servers, if necessary: -- if the server is not available. -- if the chunk is not present on the server (ERR AUTH response). -- if the hash of the downloaded file chunk does not match the description. +The receiving client can fall back to secondary routers, if necessary: +- if the router is not available. +- if the packet is not present on the router (ERR AUTH response). +- if the hash of the downloaded data packet does not match the description. -Optionally recipient can acknowledge file chunk reception to delete file ID from server for this recipient. +Optionally recipient can acknowledge data packet reception to delete file ID from router for this recipient. -2) Combine the chunks into a file. +2) Combine the packets into a file. 3) Decrypt the file using the key in file description. @@ -170,35 +172,35 @@ Optionally recipient can acknowledge file chunk reception to delete file ID from It includes these fields: - `party` - "sender" or "recipient". Sender's file description is required to delete the file. -- `size` - padded file size equal to total size of all chunks, see `fileSize` syntax below. +- `size` - padded file size equal to total size of all packets, see `fileSize` syntax below. - `digest` - SHA512 hash of encrypted file, base64url encoded string. - `key` - symmetric encryption key to decrypt the file, base64url encoded string. - `nonce` - nonce to decrypt the file, base64url encoded string. -- `chunkSize` - default chunk size, see `fileSize` syntax below. -- `replicas` - the array of file chunk replicas descriptions. +- `packetSize` - default packet size, see `fileSize` syntax below. +- `replicas` - the array of data packet replicas descriptions. - `redirect` - optional property for redirect information indicating that the file is itself a description to another file, allowing to use file description as a short URI. Each replica description is an object with 2 fields: -- `chunks` - and array of chunk replica descriptions stored on one server. -- `server` - [server address](#xftp-server-uri) where the chunks can be downloaded from. +- `packets` - and array of packet replica descriptions stored on one router. +- `router` - [router address](#xftp-router-uri) where the packets can be downloaded from. -Each server replica description is a string with this syntax: +Each router replica description is a string with this syntax: ```abnf -chunkReplica = chunkNo ":" replicaId ":" replicaKey [":" chunkDigest [":" chunkSize]] -chunkNo = 1*DIGIT - ; a sequential 1-based chunk number in the original file. +packetReplica = packetNo ":" replicaId ":" replicaKey [":" packetDigest [":" packetSize]] +packetNo = 1*DIGIT + ; a sequential 1-based packet number in the original file. replicaId = base64url - ; server-assigned random chunk replica ID. + ; router-assigned random packet replica ID. replicaKey = base64url - ; sender-generated random key to receive (or to delete, in case of sender's file description) the chunk replica. -chunkDigest = base64url - ; chunk digest that MUST be specified for the first replica of each chunk, + ; sender-generated random key to receive (or to delete, in case of sender's file description) the packet replica. +packetDigest = base64url + ; packet digest that MUST be specified for the first replica of each packet, ; and SHOULD be omitted (or be the same) on the subsequent replicas -chunkSize = fileSize +packetSize = fileSize fileSize = sizeInBytes / sizeInUnits - ; chunk size SHOULD only be specified on the first replica and only if it is different from default chunk size + ; packet size SHOULD only be specified on the first replica and only if it is different from default packet size sizeInBytes = 1*DIGIT sizeInUnits = 1*DIGIT sizeUnit sizeUnit = %s"kb" / %s"mb" / %s"gb" @@ -211,22 +213,22 @@ Optional redirect information has two fields: ## URIs syntax -### XFTP server URI +### XFTP router URI -The XFTP server address is a URI with the following syntax: +The XFTP router address is a URI with the following syntax: ```abnf -xftpServerURI = %s"xftp://" xftpServer -xftpServer = serverIdentity [":" basicAuth] "@" srvHost [":" port] +xftpRouterURI = %s"xftp://" xftpRouter +xftpRouter = routerIdentity [":" basicAuth] "@" srvHost [":" port] srvHost = ; RFC1123, RFC5891 port = 1*DIGIT -serverIdentity = base64url +routerIdentity = base64url basicAuth = base64url ``` ### File description URI -This file description URI can be generated by the client application to share a small file description as a QR code or as a link. Practically, to be able to scan a QR code it should be under 1000 characters, so only file descriptions with 1-2 chunks can be used in this case. This is supported with `redirect` property when file description leads to a file which in itself is a larger file description to another file - akin to URL shortener. +This file description URI can be generated by the client application to share a small file description as a QR code or as a link. Practically, to be able to scan a QR code it should be under 1000 characters, so only file descriptions with 1-2 packets can be used in this case. This is supported with `redirect` property when file description leads to a file which in itself is a larger file description to another file - akin to URL shortener. File description URI syntax: @@ -247,47 +249,47 @@ clientAppServer is not a server the client connects to - it is a server that sho XFTP stands for SimpleX File Transfer Protocol. Its design is based on the same ideas and has some of the qualities of SimpleX Messaging Protocol: -- recipient cannot see sender's IP address, as the file fragments (chunks) are temporarily stored on multiple XFTP relays. +- recipient cannot see sender's IP address, as the file fragments (packets) are temporarily stored on multiple XFTP relays. - file can be sent asynchronously, without requiring the sender to be online for file to be received. - there is no network of peers that can observe this transfer - sender chooses which XFTP relays to use, and can self-host their own. -- XFTP relays do not have any file metadata - they only see individual chunks, with access to each chunk authorized with anonymous credentials (using Edwards curve cryptographic signature) that are random per chunk. -- chunks have one of the sizes allowed by the servers - 64KB, 256KB, 1MB and 4MB chunks, so sending a large file looks indistinguishable from sending many small files to XFTP server. If the same transport connection is reused, server would only know that chunks are sent by the same user. -- each chunk can be downloaded by multiple recipients, but each recipient uses their own key and chunk ID to authorize access, and the chunk is encrypted by a different key agreed via ephemeral DH keys (NaCl crypto_box (SalsaX20Poly1305 authenticated encryption scheme ) with shared secret derived from Curve25519 key exchange) on the way from the server to each recipient. XFTP protocol as a result has the same quality as SMP protocol - there are no identifiers and ciphertext in common between sent and received traffic inside TLS connection, so even if TLS is compromised, it complicates traffic correlation attacks. -- XFTP protocol supports redundancy - each file chunk can be sent via multiple relays, and the recipient can choose the one that is available. Current implementation of XFTP protocol in SimpleX Chat does not support redundancy though. +- XFTP relays do not have any file metadata - they only see individual packets, with access to each packet authorized with anonymous credentials (using Edwards curve cryptographic signature) that are random per packet. +- packets have one of the sizes allowed by the routers - 64KB, 256KB, 1MB and 4MB packets, so sending a large file looks indistinguishable from sending many small files to XFTP router. If the same transport connection is reused, router would only know that packets are sent by the same user. +- each packet can be downloaded by multiple recipients, but each recipient uses their own key and packet ID to authorize access, and the packet is encrypted by a different key agreed via ephemeral DH keys (NaCl crypto_box (SalsaX20Poly1305 authenticated encryption scheme ) with shared secret derived from Curve25519 key exchange) on the way from the router to each recipient. XFTP protocol as a result has the same quality as SMP protocol - there are no identifiers and ciphertext in common between sent and received traffic inside TLS connection, so even if TLS is compromised, it complicates traffic correlation attacks. +- XFTP protocol supports redundancy - each data packet can be sent via multiple relays, and the recipient can choose the one that is available. Current implementation of XFTP protocol in SimpleX Chat does not support redundancy though. - the file as a whole is encrypted with a random symmetric key using NaCl secret_box. ## Cryptographic algorithms Clients must cryptographically authorize XFTP commands, see [Command authentication](#command-authentication). -To authorize/verify transmissions clients and servers MUST use either signature algorithm Ed25519 algorithm defined in RFC8709 or using deniable authentication scheme based on NaCL crypto_box (see Simplex Messaging Protocol). +To authorize/verify transmissions clients and routers MUST use either signature algorithm Ed25519 algorithm defined in RFC8709 or using deniable authentication scheme based on NaCL crypto_box (see Simplex Messaging Protocol). -To encrypt/decrypt file chunk bodies delivered to the recipients, servers/clients MUST use NaCL crypto_box. +To encrypt/decrypt data packet bodies delivered to the recipients, routers/clients MUST use NaCL crypto_box. -Clients MUST encrypt file chunk bodies sent via XFTP servers using use NaCL crypto_box. +Clients MUST encrypt data packet bodies sent via XFTP routers using use NaCL crypto_box. -## File chunk IDs +## Data packet IDs -XFTP servers MUST generate a separate new set of IDs for each new chunk - for the sender (that uploads the chunk) and for each intended recipient. It is REQUIRED that: +XFTP routers MUST generate a separate new set of IDs for each new packet - for the sender (that uploads the packet) and for each intended recipient. It is REQUIRED that: -- These IDs are different and unique within the server. +- These IDs are different and unique within the router. - Based on random bytes generated with cryptographically strong pseudo-random number generator. -## Server security requirements +## Router security requirements -XFTP server implementations MUST NOT create, store or send to any other servers: +XFTP router implementations MUST NOT create, store or send to any other routers: - Logs of the client commands and transport connections in the production environment. - History of retrieved files. -- Snapshots of the database they use to store file chunks (instead clients can manage redundancy by creating chunk replicas using more than one XFTP server). In-memory persistence is recommended for file chunks records. +- Snapshots of the database they use to store data packets (instead clients can manage redundancy by creating packet replicas using more than one XFTP router). In-memory persistence is recommended for data packets records. -- Any other information that may compromise privacy or [forward secrecy][4] of communication between clients using XFTP servers. +- Any other information that may compromise privacy or [forward secrecy][4] of communication between clients using XFTP routers. ## Transport protocol -- binary-encoded commands sent as fixed-size padded block in the body of HTTP2 POST request, similar to SMP and notifications server protocol transmission encodings. +- binary-encoded commands sent as fixed-size padded block in the body of HTTP2 POST request, similar to SMP and notifications router protocol transmission encodings. - HTTP2 POST with a fixed size padded block body for file upload and download. Block size - 16384 bytes (it would fit ~350 Ed25519 recipient keys). @@ -306,41 +308,41 @@ The reason not to use URI segments / HTTP verbs / REST semantics is to have cons ### ALPN to agree handshake version -Client and server use [ALPN extension][18] of TLS to agree handshake version. +Client and router use [ALPN extension][18] of TLS to agree handshake version. -Server SHOULD send `xftp/1` protocol name and the client should confirm this name in order to use the current protocol version. This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax. +Router SHOULD send `xftp/1` protocol name and the client should confirm this name in order to use the current protocol version. This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax. -If the client does not confirm this protocol name, the server would fall back to v1 of XFTP protocol. +If the client does not confirm this protocol name, the router would fall back to v1 of XFTP protocol. ### Transport handshake -When a client and a server agree on handshake version using ALPN extension, they should proceed with XFTP handshake. +When a client and a router agree on handshake version using ALPN extension, they should proceed with XFTP handshake. -As with SMP, a client doesn't reveal its version range to avoid version fingerprinting. Unlike SMP, XFTP runs a HTTP2 protocol over TLS and the server can't just send its handshake right away. So a session handshake is driven by client-sent requests: +As with SMP, a client doesn't reveal its version range to avoid version fingerprinting. Unlike SMP, XFTP runs a HTTP2 protocol over TLS and the router can't just send its handshake right away. So a session handshake is driven by client-sent requests: -1. To pass initiative to the server, the client sends a request with empty body. -2. Server responds with its `paddedServerHello` block. +1. To pass initiative to the router, the client sends a request with empty body. +2. Router responds with its `paddedRouterHello` block. 3. Clients sends a request containing `paddedClientHello` block, -4. Server sends an empty response, finalizing the handshake. +4. Router sends an empty response, finalizing the handshake. -Once TLS handshake is complete, client and server will exchange blocks of fixed size (16384 bytes). +Once TLS handshake is complete, client and router will exchange blocks of fixed size (16384 bytes). ```abnf -paddedServerHello = -serverHello = xftpVersionRange sessionIdentifier serverCerts signedServerKey ignoredPart +paddedRouterHello = +routerHello = xftpVersionRange sessionIdentifier routerCerts signedRouterKey ignoredPart xftpVersionRange = minXftpVersion maxXftpVersion minXftpVersion = xftpVersion maxXftpVersion = xftpVersion sessionIdentifier = shortString ; unique session identifier derived from transport connection handshake -serverCerts = length 1*serverCert ; NonEmpty list of certificates in chain -serverCert = originalLength -signedServerKey = originalLength ; signed by server certificate +routerCerts = length 1*routerCert ; NonEmpty list of certificates in chain +routerCert = originalLength +signedRouterKey = originalLength ; signed by router certificate paddedClientHello = clientHello = xftpVersion keyHash ignoredPart ; chosen XFTP protocol version - must be the maximum supported version -; within the range offered by the server +; within the range offered by the router xftpVersion = 2*2OCTET ; Word16 version number keyHash = shortString @@ -350,47 +352,47 @@ originalLength = 2*2OCTET ignoredPart = *OCTET ``` -In XFTP v2 the handshake is only used for version negotiation, but `serverCert` and `signedServerKey` must be validated by the client. +In XFTP v2 the handshake is only used for version negotiation, but `routerCert` and `signedRouterKey` must be validated by the client. -`keyHash` is the CA fingerprint used by client to validate TLS certificate chain and is checked by a server against its own key. +`keyHash` is the CA fingerprint used by client to validate TLS certificate chain and is checked by a router against its own key. -`ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and servers must ignore any extra bytes within the original block length. +`ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version - the client and routers must ignore any extra bytes within the original block length. -For TLS transport client should assert that `sessionIdentifier` is equal to `tls-unique` channel binding defined in [RFC 5929][14] (TLS Finished message struct); we pass it in `serverHello` block to allow communication over some other transport protocol (possibly, with another channel binding). +For TLS transport client should assert that `sessionIdentifier` is equal to `tls-unique` channel binding defined in [RFC 5929][14] (TLS Finished message struct); we pass it in `routerHello` block to allow communication over some other transport protocol (possibly, with another channel binding). ### Requests and responses - File sender: - - create file chunk record. + - create data packet record. - Parameters: - Ed25519 key for subsequent sender commands and Ed25519 keys for commands of each recipient. - - chunk size. + - packet size. - Response: - - chunk ID for the sender and different IDs for all recipients. - - add recipients to file chunk + - packet ID for the sender and different IDs for all recipients. + - add recipients to data packet - Parameters: - - sender's chunk ID + - sender's packet ID - Ed25519 keys for commands of each recipient. - Response: - - chunk IDs for new recipients. - - upload file chunk. - - delete file chunk (invalidates all recipient IDs). + - packet IDs for new recipients. + - upload data packet. + - delete data packet (invalidates all recipient IDs). - File recipient: - - download file chunk: - - chunk ID - - DH key for additional encryption of the chunk. - - command should be signed with the key passed by the sender when creating chunk record. - - delete file chunk ID (only for one recipient): signed with the same key. + - download data packet: + - packet ID + - DH key for additional encryption of the packet. + - command should be signed with the key passed by the sender when creating packet record. + - delete data packet ID (only for one recipient): signed with the same key. ## XFTP commands Commands syntax below is provided using ABNF with case-sensitive strings extension. ```abnf -xftpCommand = ping / senderCommand / recipientCmd / serverMsg +xftpCommand = ping / senderCommand / recipientCmd / routerMsg senderCommand = register / add / put / delete recipientCmd = get / ack -serverMsg = pong / sndIds / rcvIds / ok / file / error +routerMsg = pong / sndIds / rcvIds / ok / file / error ``` The syntax of specific commands and responses is defined below. @@ -401,11 +403,11 @@ Commands are made via HTTP2 requests, responses to commands are correlated as HT ### Command authentication -XFTP servers must authenticate all transmissions (excluding `ping`) by verifying the client signatures. Command signature should be generated by applying the algorithm specified for the file to the `signed` block of the transmission, using the key associated with the file chunk ID (recipient's or sender's depending on which file chunk ID is used). +XFTP routers must authenticate all transmissions (excluding `ping`) by verifying the client signatures. Command signature should be generated by applying the algorithm specified for the file to the `signed` block of the transmission, using the key associated with the data packet ID (recipient's or sender's depending on which data packet ID is used). ### Keep-alive command -To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the server responds with `pong` response. This command should be sent unsigned and without file chunk ID. +To keep the transport connection alive and to generate noise traffic the clients should use `ping` command to which the router responds with `pong` response. This command should be sent unsigned and without data packet ID. ```abnf ping = %s"PING" @@ -421,13 +423,13 @@ pong = %s"PONG" ### File sender commands -Sending any of the commands in this section (other than `register`, that is sent without file chunk ID) is only allowed with sender's ID. +Sending any of the commands in this section (other than `register`, that is sent without data packet ID) is only allowed with sender's ID. -#### Register new file chunk +#### Register new data packet -This command is sent by the sender to the XFTP server to register a new file chunk. +This command is sent by the sender to the XFTP router to register a new data packet. -Servers SHOULD support basic auth with this command, to allow only server owners and trusted users to create file chunks on the servers. +Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to create data packets on the routers. The syntax is: @@ -446,7 +448,7 @@ x509encoded = length = 1*1 OCTET ``` -If the file chunk is registered successfully, the server must send `sndIds` response with the sender's and recipients' file chunk IDs: +If the data packet is registered successfully, the router must send `sndIds` response with the sender's and recipients' data packet IDs: ```abnf sndIds = %s"SIDS " senderId recipientIds @@ -455,9 +457,9 @@ recipientIds = length 1*recipientId recipientId = length *OCTET ``` -#### Add file chunk recipients +#### Add data packet recipients -This command is sent by the sender to the XFTP server to add additional recipient keys to the file chunk record, in case number of keys requested by client didn't fit into `register` command. The syntax is: +This command is sent by the sender to the XFTP router to add additional recipient keys to the data packet record, in case number of keys requested by client didn't fit into `register` command. The syntax is: ```abnf add = %s"FADD " rcvPublicAuthKeys @@ -465,7 +467,7 @@ rcvPublicAuthKeys = length 1*rcvPublicAuthKey rcvPublicAuthKey = length x509encoded ``` -If additional keys were added successfully, the server must send `rcvIds` response with the added recipients' file chunk IDs: +If additional keys were added successfully, the router must send `rcvIds` response with the added recipients' data packet IDs: ```abnf rcvIds = %s"RIDS " recipientIds @@ -473,46 +475,46 @@ recipientIds = length 1*recipientId recipientId = length *OCTET ``` -#### Upload file chunk +#### Upload data packet -This command is sent by the sender to the XFTP server to upload file chunk body to server. The syntax is: +This command is sent by the sender to the XFTP router to upload data packet body to router. The syntax is: ```abnf put = %s"FPUT" ``` -Chunk body is streamed via HTTP2 request. +Packet body is streamed via HTTP2 request. -If file chunk body was successfully received, the server must send `ok` response. +If data packet body was successfully received, the router must send `ok` response. ```abnf ok = %s"OK" ``` -#### Delete file chunk +#### Delete data packet -This command is sent by the sender to the XFTP server to delete file chunk from the server. The syntax is: +This command is sent by the sender to the XFTP router to delete data packet from the router. The syntax is: ```abnf delete = %s"FDEL" ``` -Server should delete file chunk record, invalidating all recipient IDs, and delete file body from file storage. If file chunk was successfully deleted, the server must send `ok` response. +Router should delete data packet record, invalidating all recipient IDs, and delete file body from file storage. If data packet was successfully deleted, the router must send `ok` response. ### File recipient commands Sending any of the commands in this section is only allowed with recipient's ID. -#### Download file chunk +#### Download data packet -This command is sent by the recipient to the XFTP server to download file chunk body from the server. The syntax is: +This command is sent by the recipient to the XFTP router to download data packet body from the router. The syntax is: ```abnf get = %s"FGET " rDhKey rDhKey = length x509encoded ``` -If requested file is successfully located, the server must send `file` response. File chunk body is sent as HTTP2 response body. +If requested file is successfully located, the router must send `file` response. Data packet body is sent as HTTP2 response body. ```abnf file = %s"FILE " sDhKey cbNonce @@ -520,23 +522,23 @@ sDhKey = length x509encoded cbNonce = 24*24 OCTET ; NaCl crypto_box nonce ``` -Chunk is additionally encrypted on the way from the server to the recipient using a key agreed via ephemeral DH keys `rDhKey` and `sDhKey`, so there is no ciphertext in common between sent and received traffic inside TLS connection, in order to complicate traffic correlation attacks, if TLS is compromised. +Packet is additionally encrypted on the way from the router to the recipient using a key agreed via ephemeral DH keys `rDhKey` and `sDhKey`, so there is no ciphertext in common between sent and received traffic inside TLS connection, in order to complicate traffic correlation attacks, if TLS is compromised. -#### Acknowledge file chunk download +#### Acknowledge data packet download -This command is sent by the recipient to the XFTP server to acknowledge file reception, deleting file ID from server for this recipient. The syntax is: +This command is sent by the recipient to the XFTP router to acknowledge file reception, deleting file ID from router for this recipient. The syntax is: ```abnf ack = %s"FACK" ``` -If file recipient ID is successfully deleted, the server must send `ok` response. +If file recipient ID is successfully deleted, the router must send `ok` response. -In current implementation of XFTP protocol in SimpleX Chat clients don't use FACK command. Files are automatically expired on servers after configured time interval. +In current implementation of XFTP protocol in SimpleX Chat clients don't use FACK command. Files are automatically expired on routers after configured time interval. ### Error responses -The server responds with `ERR` followed by the error type: +The router responds with `ERR` followed by the error type: ```abnf error = %s"ERR " errorType @@ -556,17 +558,17 @@ Error types: - `SESSION` - incorrect session ID (TLS Finished message / tls-unique binding). - `HANDSHAKE` - incorrect handshake command. - `CMD` - command syntax errors (UNKNOWN, SYNTAX, PROHIBITED, NO_AUTH, HAS_AUTH, NO_ENTITY). -- `AUTH` - command authorization error - bad signature or non-existing file chunk. -- `BLOCKED` - file chunk was blocked due to policy violation (added in v3). Contains blocking reason and optional notice. +- `AUTH` - command authorization error - bad signature or non-existing data packet. +- `BLOCKED` - data packet was blocked due to policy violation (added in v3). Contains blocking reason and optional notice. - `SIZE` - incorrect file size. - `QUOTA` - storage quota exceeded. - `DIGEST` - incorrect file digest. - `CRYPTO` - file encryption/decryption failed. -- `NO_FILE` - no expected file body in request/response or no file on the server. +- `NO_FILE` - no expected file body in request/response or no file on the router. - `HAS_FILE` - unexpected file body. - `FILE_IO` - file IO error. - `TIMEOUT` - file sending or receiving timeout. -- `INTERNAL` - internal server error. +- `INTERNAL` - internal router error. ## Threat model @@ -575,7 +577,7 @@ Error types: - A user protects their local database and key material. - The user's application is authentic, and no local malware is running. - The cryptographic primitives in use are not broken. - - A user's choice of servers is not directly tied to their identity or otherwise represents distinguishing information about the user. + - A user's choice of routers is not directly tied to their identity or otherwise represents distinguishing information about the user. #### A passive adversary able to monitor the traffic of one user @@ -583,7 +585,7 @@ Error types: - identify that and when a user is sending files over XFTP protocol. - - determine which servers the user sends/receives files to/from. + - determine which routers the user sends/receives files to/from. - observe how much traffic is being sent, and make guesses as to its purpose. @@ -595,11 +597,11 @@ Error types: *can:* - - learn which XFTP servers are used to send and receive files for which users. + - learn which XFTP routers are used to send and receive files for which users. - learn when files are sent and received. - - perform traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the servers. + - perform traffic correlation attacks against senders and recipients and correlate senders and recipients within the monitored set, frustrated by the number of users on the routers. - observe how much traffic is being sent, and make guesses as to its purpose. @@ -609,31 +611,31 @@ Error types: - perform traffic correlation attacks. -#### XFTP server +#### XFTP router *can:* - learn when file senders and recipients are online. -- know how many file chunks and chunk sizes are sent via the server. +- know how many data packets and packet sizes are sent via the router. -- perform the correlation of the file chunks as belonging to one file via either a re-used transport connection, user's IP address, or connection timing regularities. +- perform the correlation of the data packets as belonging to one file via either a re-used transport connection, user's IP address, or connection timing regularities. - learn file senders' and recipients' IP addresses, and infer information (e.g. employer) based on the IP addresses, as long as Tor is not used. -- delete file chunks, preventing file delivery, as long as redundant delivery is not used. +- delete data packets, preventing file delivery, as long as redundant delivery is not used. -- lie about the state of a file chunk to the recipient and/or to the sender (e.g. deleted when it is not). +- lie about the state of a data packet to the recipient and/or to the sender (e.g. deleted when it is not). - refuse deleting the file when instructed by the sender. *cannot:* -- undetectably corrupt file chunks. +- undetectably corrupt data packets. - learn the contents, name or the exact size of sent files. -- learn approximate size of sent files, as long as more than one server is used to send file chunks. +- learn approximate size of sent files, as long as more than one router is used to send data packets. - compromise the users' end-to-end encryption of files with an active attack. @@ -645,7 +647,7 @@ Error types: - receive all files sent and received by Alice that did not expire yet, as long as information about these files was not removed from the database. -- prevent Alice's contacts from receiving the files she sent by deleting all or some of the file chunks from XFTP servers. +- prevent Alice's contacts from receiving the files she sent by deleting all or some of the data packets from XFTP routers. #### A user's contact @@ -667,10 +669,10 @@ Error types: *can:* -- Denial of Service XFTP servers. +- Denial of Service XFTP routers. *cannot:* - send files to a user who they are not connected with. -- enumerate file chunks on an XFTP server. +- enumerate data packets on an XFTP router. diff --git a/rfcs/2022-04-20-smp-conf-timeout-recovery.md b/rfcs/2022-04-20-smp-conf-timeout-recovery.md index 7c7f84caaa..5941e02595 100644 --- a/rfcs/2022-04-20-smp-conf-timeout-recovery.md +++ b/rfcs/2022-04-20-smp-conf-timeout-recovery.md @@ -3,9 +3,9 @@ ## Problem When sending an SMP confirmation a network timeout can lead to the following race condition: -- server receives the confirmation while the joining party fails to receive the server's response; +- router receives the confirmation while the joining party fails to receive the router's response; - joining party deletes the connection together with credentials sent in the confirmation for securing the queue; -- initiating party will receive the confirmation from the server and secure the queue; +- initiating party will receive the confirmation from the router and secure the queue; - on subsequent attempt to join via the same invitation link initiating party will generate new credentials and fail authorization. This renders the joining party permanently unable to join via that invitation link and complete the connection. diff --git a/rfcs/2024-07-06-ios-notifications.md b/rfcs/2024-07-06-ios-notifications.md index c60d7baf39..c2d7b54729 100644 --- a/rfcs/2024-07-06-ios-notifications.md +++ b/rfcs/2024-07-06-ios-notifications.md @@ -3,12 +3,12 @@ ## Problem iOS notifications may fail to deliver for several reasons, but there are two important reasons that we could address: -- when notification server is not subscribed to SMP server(s), the notifications can be dropped - it can happen because either notification server restarts or becuase SMP server restarted and some messages are received before notification server resubscribed. We lose approximately 3% of notifications because of this reason. +- when notification router is not subscribed to SMP router(s), the notifications can be dropped - it can happen because either notification router restarts or becuase SMP router restarted and some messages are received before notification router resubscribed. We lose approximately 3% of notifications because of this reason. - when user device is offline or has low power condition, Apple does not deliver notification, but puts them to storage. If while the notification is in storage a new one arrives it would overwrite the previous notification. If it was the message to the same message queue, the client will download messages anyway, up to a limit, but if the message was to another queue, it will not be delivered until the app is opened. Apple delivers about 88% of notifications that should be delivered (not accounting for uninstalled apps), the rest is replaced with the newer notifications. ## Solution -The first problem can be solved by preserving notifications for a limited time (say 1 hour) in case there is no subscription to notification from notification server. At the very least, they can be preserved in SMP server memory but can also be stored to a file on restart, similar to messages, and be delivered when notification server resubscribes. It is sufficient to store one notification per messaging queue. +The first problem can be solved by preserving notifications for a limited time (say 1 hour) in case there is no subscription to notification from notification router. At the very least, they can be preserved in SMP router memory but can also be stored to a file on restart, similar to messages, and be delivered when notification router resubscribes. It is sufficient to store one notification per messaging queue. The second problem is both more damaging and more complex to solve. The solution could be to always deliver several last notifications to different queues in one packet (Apple allows up to ~4-5kb notification size, and we are sending packets of fixed size 512 bytes, so we could fit up to 8-10 of them in each notification). diff --git a/rfcs/2024-09-05-queue-storage.md b/rfcs/2024-09-05-queue-storage.md index 9b67cc4cf8..0e8fa53a9f 100644 --- a/rfcs/2024-09-05-queue-storage.md +++ b/rfcs/2024-09-05-queue-storage.md @@ -8,7 +8,7 @@ See [Short invitation links](./2024-06-21-short-links.md). 2) clients only delete queue records based on some user action, pending connections do not expire. -While part 2 should be improved in the client, indefinite storage of queue records becomes a much bigger issue if each of them would result in a permanent storage of 4-16kb blob in server memory, without server-side expiration for short invitation links. +While part 2 should be improved in the client, indefinite storage of queue records becomes a much bigger issue if each of them would result in a permanent storage of 4-16kb blob in router memory, without router-side expiration for short invitation links. ## Possible solutions @@ -16,15 +16,15 @@ While part 2 should be improved in the client, indefinite storage of queue recor The problem with this approach is that contact addresses are also unsecured queues, and they should not be expired. -We could set really large expiration time, and require that clients "update" the unsecured queues they need at least every 1-2 years, but it would not solve the problem of storing a large number of blobs in the server memory for unused/abandoned 1-time invitations. +We could set really large expiration time, and require that clients "update" the unsecured queues they need at least every 1-2 years, but it would not solve the problem of storing a large number of blobs in the router memory for unused/abandoned 1-time invitations. -2) Do not store blobs in memory / append-only log, and instead use something like RocksDB. While it may be a correct long term solution, it may be not expedient enough at the current POC stage for this feature. Also, the lack of expiration is wrong in any case and would indefinitely grow server storage. +2) Do not store blobs in memory / append-only log, and instead use something like RocksDB. While it may be a correct long term solution, it may be not expedient enough at the current POC stage for this feature. Also, the lack of expiration is wrong in any case and would indefinitely grow router storage. -3) Add flag allowing the server to differentiate permanent queues used as contact addresses, also using different blob sizes for them. In this case, messaging queues will be expired if not secured after 3 weeks, and contact address queues would be expired if not "updated" by the owner within 2 years. +3) Add flag allowing the router to differentiate permanent queues used as contact addresses, also using different blob sizes for them. In this case, messaging queues will be expired if not secured after 3 weeks, and contact address queues would be expired if not "updated" by the owner within 2 years. Probably all three solutions need to be used, to avoid creating a non-expiring blob storage in memory, as in case too many of such blobs are created it would not be possible to differentiate between real users and resource exhaustion attacks, and unlike with messages, they won't be expiring too. -Servers already can differentiate messaging queues and contact address queues, if they want to: +Routers already can differentiate messaging queues and contact address queues, if they want to: - with the old 4-message handshake, the confirmation message on a normal queue was different, and also KEY command was eventually used. - with the fast 2-message handshake, while the confirmation message has the same syntax, and the differences are inside encrypted envelope, the client still uses SKEY command. - in both cases, the usual messaging queues are secured, and contact addresses are not, so this difference is visible in the storage as well (although it is not easy to differentiate between abandoned 1-time invitations and contact addresses). @@ -33,7 +33,7 @@ Differentiating these queues can also allow different message retention times - ## Proposed solution -1. Add queue updated_at date into queue records. While it adds some metadata, it seems necessary to manage retention and quality of service. It will not include exact time, only date, and the time of creation will be replaced by the time of any update - queue secured, a message is sent, or queue owner subscribes to the queue. To avoid the need to update store log on every message this information can be appended to store log on server termination. Or given that only one update per day is needed it may be ok to make these updates as they happen (temporarily making the sequence and time of these events available in storage). +1. Add queue updated_at date into queue records. While it adds some metadata, it seems necessary to manage retention and quality of service. It will not include exact time, only date, and the time of creation will be replaced by the time of any update - queue secured, a message is sent, or queue owner subscribes to the queue. To avoid the need to update store log on every message this information can be appended to store log on router termination. Or given that only one update per day is needed it may be ok to make these updates as they happen (temporarily making the sequence and time of these events available in storage). 2. Add flag to indicate the queue usage - messaging queue or queue for contact address connection requests. This would result in different queue size and different retention policy for queue and its messages. We already have "sender can secure flag" which is, effectively, this flag - contact address queues are never secured. So this does not increase stored metadata in any way. @@ -41,11 +41,11 @@ Differentiating these queues can also allow different message retention times - This is a design considerations and a concept, not a design yet. -Instead of implementing a generic blob storage that can be used as an attack vector, and adds additional failure point (another server storing blob that is necessary to connect to the queue on the current server), but instead adds an extended queue information blobs, most of which could be dropped without the loss of connectivity, so that the attack can be mitigated by deleting these blobs without users losing the ability to connect, as long as the queue and minimal extended information is retained. +Instead of implementing a generic blob storage that can be used as an attack vector, and adds additional failure point (another router storing blob that is necessary to connect to the queue on the current router), but instead adds an extended queue information blobs, most of which could be dropped without the loss of connectivity, so that the attack can be mitigated by deleting these blobs without users losing the ability to connect, as long as the queue and minimal extended information is retained. So, to make the connection there need to be these elements: -- queue server and queue ID - mandatory part, that can be included in short link +- queue router and queue ID - mandatory part, that can be included in short link - SMP key - mandatory part for all queues. We are considering initializing ratchets earlier for contact addresses, and include ratchet keys and pre-keys into queue data as well, but it is out of scope here. - Ratchet keys - mandatory part for 1-time invitation that won't fit in short link. - PQ key - optional part that can be stored with addresses if ratchet keys are added and with 1-time invitations. @@ -56,8 +56,8 @@ So rather that storing one blob with a large address inside it, not associated w Also, we need the address shared with the sender (party accepting the connection) to be short. We could use a similar approach that was proposed for data blobs, using a single random seed per queues to derive multiple keys and IDs from it. For example: 1. The queue owner: - - generates Ed25529 key pair `(sk, spk)` and X25519 key pair `(dhk, dhpk)` to use with the server, same as now sent in NEW command. - - generates queue recipient ID (this ID can still be server-generated). + - generates Ed25529 key pair `(sk, spk)` and X25519 key pair `(dhk, dhpk)` to use with the router, same as now sent in NEW command. + - generates queue recipient ID (this ID can still be router-generated). - generates X25519 key pair `(k, pk)` to use with the accepting party. - derives from `k`: - sender ID. @@ -73,9 +73,9 @@ The algorithm used to derive key and ID from `k` needs to be cryptographically s So, coupling blob storage with messaging queues has these pros/cons: Cons: -- no additional layer of privacy - the server used for connection is visible in the link, even after the blobs are removed from the server. +- no additional layer of privacy - the router used for connection is visible in the link, even after the blobs are removed from the router. Pros: -- no additional point of failure in the connection process - the same server will be used to retrieve necessary blobs as for connection. +- no additional point of failure in the connection process - the same router will be used to retrieve necessary blobs as for connection. - queue blobs of messaging blobs will be automatically removed once the queue is secured or expired, without additional request from the recipient - reducing the storage and the time these blobs are available. - queue blobs for contact addresses will be structured and some of the large blobs can be removed in case of resource exhaustion attack (and recreated by the client if needed), with the only downside that PQ handshake will be postponed (which is the case now) and profile will not be available at a point of connection. diff --git a/rfcs/2024-09-10-private-rendezvous.md b/rfcs/2024-09-10-private-rendezvous.md index 744596ea68..4f549d2ef7 100644 --- a/rfcs/2024-09-10-private-rendezvous.md +++ b/rfcs/2024-09-10-private-rendezvous.md @@ -2,25 +2,25 @@ ## Problem -Our current handshake protocol is open to this attack: whoever observes the link exchange, knows on which server connection is being made, and if the traffic on this server is observed, then it can confirm communication between parties. Further, even with the [last proposal](./2024-09-09-smp-blobs.md#possible-privacy-improvement), having real-time access to the server data allows to establish the exact messaging queue that is used to send messages. +Our current handshake protocol is open to this attack: whoever observes the link exchange, knows on which router connection is being made, and if the traffic on this router is observed, then it can confirm communication between parties. Further, even with the [last proposal](./2024-09-09-smp-blobs.md#possible-privacy-improvement), having real-time access to the router data allows to establish the exact messaging queue that is used to send messages. ## Solution -We could make the initial link exchange more private by making it harder for any observer to discover which server will be used for messaging by hiding this information from the server that hosts the initial link. +We could make the initial link exchange more private by making it harder for any observer to discover which router will be used for messaging by hiding this information from the router that hosts the initial link. Preliminary, the protocol could be the following: -1. Connection initiator stores 224-256 bytes of encrypted connection link on a rendezvous server (link contains server host and linkId on another messaging server, not a rendezvous one). +1. Connection initiator stores 224-256 bytes of encrypted connection link on a rendezvous router (link contains router host and linkId on another messaging router, not a rendezvous one). -2. Rendezvous server adds these links to buckets, up to 64 links per bucket. Bucket ID is the timestamp when the bucket was created + a sequential bucket number, in case more than one bucket is created per second. +2. Rendezvous router adds these links to buckets, up to 64 links per bucket. Bucket ID is the timestamp when the bucket was created + a sequential bucket number, in case more than one bucket is created per second. -3. The server responds to the link creator with a bucket ID where this link was added. That bucket ID is its timestamp + a number prevents server "fingerprinting" clients and using say one bucket for each client. If timestamp is different or a bucket number within this timestamp is too large, the client can refuse to use it, depending on the client settings. +3. The router responds to the link creator with a bucket ID where this link was added. That bucket ID is its timestamp + a number prevents router "fingerprinting" clients and using say one bucket for each client. If timestamp is different or a bucket number within this timestamp is too large, the client can refuse to use it, depending on the client settings. -4. The initiating party will pass to the accepting party the rendezvous server host, the hash of this bucket ID (bucket link) and the passphrase to derive the key from. The initiating party has an option to pass a link and passphrase via two channels - in which case the link will only contain the bucket ID. +4. The initiating party will pass to the accepting party the rendezvous router host, the hash of this bucket ID (bucket link) and the passphrase to derive the key from. The initiating party has an option to pass a link and passphrase via two channels - in which case the link will only contain the bucket ID. -5. The accepting party would then request the bucket via its ID hash (the server would store hashes to be able to look up - hash is used to prevent showing time in the link) and attempt to decrypt all contained links using the provided key. +5. The accepting party would then request the bucket via its ID hash (the router would store hashes to be able to look up - hash is used to prevent showing time in the link) and attempt to decrypt all contained links using the provided key. The accepting party then will continue the connection via the decrypted link. -This obviously does not protect accepting party from the initiating party, if it can choose rendezvous server it controls. It also does not protect from the malicious rendezvous server that would collaborate with link observers. I think reunion doesn’t protect from it too. +This obviously does not protect accepting party from the initiating party, if it can choose rendezvous router it controls. It also does not protect from the malicious rendezvous router that would collaborate with link observers. I think reunion doesn’t protect from it too. But it does protect connection from whoever observes the link, particularly if this link only contains the bucket and the key is passed separately, via some other channel. diff --git a/rfcs/2024-09-25-ios-notifications-2.md b/rfcs/2024-09-25-ios-notifications-2.md index 79416b83f1..e17ee90aed 100644 --- a/rfcs/2024-09-25-ios-notifications-2.md +++ b/rfcs/2024-09-25-ios-notifications-2.md @@ -2,7 +2,7 @@ ## Problem -For iOS notifications to be delivered the client has to create credentials for notification subscription on SMP server using NKEY command and after that create a subscription on notification server using SNEW command. These two commands are sent in sequence, after the connections are created, and for it to happen the client needs to be online and in foreground. +For iOS notifications to be delivered the client has to create credentials for notification subscription on SMP router using NKEY command and after that create a subscription on notification router using SNEW command. These two commands are sent in sequence, after the connections are created, and for it to happen the client needs to be online and in foreground. iOS users tend to close the app when it is not used, and iOS has very limited permissions for background activities, so these notification subscriptions are created with a substantial delay, and notifications do not work. @@ -12,19 +12,19 @@ This problem is distinct from and probably more common than other problems affec 1. When the new connection is created, the client already knows if it needs to create notification subscription or not, based on the conversation setting (e.g., if the group is muted, the client will not create notification subscription as well.). We should extend NEW command to avoid the need to send additional NKEY command with an option to create notification subscription at the point where connection is created. NDEL would still be used to disable this notification, and NKEY will be used to re-enable it. -2. In the same way we stopped using SDEL command (NDEL sends notification DELD to subscribed notification server) to delete notificaiton subscriptions from notification server, we should delegate creating notification subscription on notification server to SMP servers. Clients could use keys agreed with ntf server for e2e encryption and for command authorization to encrypt and sign instruction to create notification subscription that will be forwarded to notification server using protocol similar to SMP proxies. This will avoid the need for clients to separately contact notification servers that won't happen until they are online. +2. In the same way we stopped using SDEL command (NDEL sends notification DELD to subscribed notification router) to delete notificaiton subscriptions from notification router, we should delegate creating notification subscription on notification router to SMP routers. Clients could use keys agreed with ntf router for e2e encryption and for command authorization to encrypt and sign instruction to create notification subscription that will be forwarded to notification router using protocol similar to SMP proxies. This will avoid the need for clients to separately contact notification routers that won't happen until they are online. -3. Instead of making Ntf server trust DELD notifications, we could send deletion instructions signed by the client, which will only fail to send in case notification server is down (and they won't be sent later after server restart). +3. Instead of making Ntf router trust DELD notifications, we could send deletion instructions signed by the client, which will only fail to send in case notification router is down (and they won't be sent later after router restart). Cons: -- If SMP servers were to retain in the storage the information about which notification server is used for which queue, it would reduce metadata privacy. While currently it is not an issue, as all notification servers are known and operated by us, once there are other client apps, this can be used for app users fingerprinting, which would act as a deterrence from using new apps – but only if app users use servers of operators who are different from the app provider. To mitigate it, we could only store it in server memory and include notification instruction in subscription commands (SUB) and include notification subscription status in SUB responses. We don't need to mitigate the problem of server being able to store this information, as messaging servers can observe which notification servers connect to them anyway. -- If SMP server is restarted before the subscription request is forwared to the notification server, then it will have to be forwarded again, once the client subscribes. The problem here is that if the client is offline, it will neither subscribe to the queue to send notification subscription request, nor receive notifications from this queue. Storing notification server and subscription request would mitigate that, as in this case we could send all pending requests on server start, without depending on client subscriptions. -- "Small" agent will need to support connections to ntf servers and manage workers that retry sending pending subscription requests. -- Until the client learns the public keys of notification server, it will not be able to decrypt notifications. It potentially can be mitigated by using the public key of the server returned when token is created, in this way different client keys (per-queue) will be combined with the same ntf server key (per-token). +- If SMP routers were to retain in the storage the information about which notification router is used for which queue, it would reduce metadata privacy. While currently it is not an issue, as all notification routers are known and operated by us, once there are other client apps, this can be used for app users fingerprinting, which would act as a deterrence from using new apps – but only if app users use routers of operators who are different from the app provider. To mitigate it, we could only store it in router memory and include notification instruction in subscription commands (SUB) and include notification subscription status in SUB responses. We don't need to mitigate the problem of router being able to store this information, as messaging routers can observe which notification routers connect to them anyway. +- If SMP router is restarted before the subscription request is forwared to the notification router, then it will have to be forwarded again, once the client subscribes. The problem here is that if the client is offline, it will neither subscribe to the queue to send notification subscription request, nor receive notifications from this queue. Storing notification router and subscription request would mitigate that, as in this case we could send all pending requests on router start, without depending on client subscriptions. +- "Small" agent will need to support connections to ntf routers and manage workers that retry sending pending subscription requests. +- Until the client learns the public keys of notification router, it will not be able to decrypt notifications. It potentially can be mitigated by using the public key of the router returned when token is created, in this way different client keys (per-queue) will be combined with the same ntf router key (per-token). ## Implementation details -1. NEW and NKEY commands will need to be extended to include notification subscription request. As the notifier ID needs to be sent to notification server, this notifier ID will have to be client-generated and supplied as part of NEW command. +1. NEW and NKEY commands will need to be extended to include notification subscription request. As the notifier ID needs to be sent to notification router, this notifier ID will have to be client-generated and supplied as part of NEW command. now: @@ -46,4 +46,4 @@ NKEY :: NtfPublicAuthKey -> RcvNtfPublicDhKey -> Maybe NtfServerRequest -> Comma -- NotifierID is passed in entity ID field of the transmission ``` -2. Notification server will need to support an additional command to receive "proxied" subscription commands, `SFWD`, that would include `NtfServerRequest`. This command can include both `SNEW` and `SDEL` commands. +2. Notification router will need to support an additional command to receive "proxied" subscription commands, `SFWD`, that would include `NtfServerRequest`. This command can include both `SNEW` and `SDEL` commands. diff --git a/rfcs/2024-11-25-queue-blobs-2.md b/rfcs/2024-11-25-queue-blobs-2.md index a4913c1189..e18162c126 100644 --- a/rfcs/2024-11-25-queue-blobs-2.md +++ b/rfcs/2024-11-25-queue-blobs-2.md @@ -5,7 +5,7 @@ This document evolves the design proposed [here](./2024-09-09-smp-blobs.md). ## Problems In addition to problems in the first doc, we have these issues with in-memory queue record storage: -- many queues are idle or rarely used, but they are loaded to memory, and currently just loading all queues uses 20gb RAM on each server, and takes 10 min to process, increasing downtimes during restarts. +- many queues are idle or rarely used, but they are loaded to memory, and currently just loading all queues uses 20gb RAM on each router, and takes 10 min to process, increasing downtimes during restarts. - adding blobs to memory would make this problem much worse. ## Proposed solution diff --git a/rfcs/2025-03-30-ios-notifications-3.md b/rfcs/2025-03-30-ios-notifications-3.md index 4415257671..0922b5a3e1 100644 --- a/rfcs/2025-03-30-ios-notifications-3.md +++ b/rfcs/2025-03-30-ios-notifications-3.md @@ -6,65 +6,65 @@ iOS notifications have these problems: - iOS notification service crashes exceeding memory limit. This is being addressed by changes in GHC RTS. - there is a large number of connections, because each member in a group requires individual connection. This will improve with chat relays when each group would require 2-3 connections. - some notification may be not shown if notification with reply/mention is skipped, and instead some other message is delivered, which may be muted. This would not improve without some changes, as notifications may be skipped anyway. -- client devices delay communication with ntf server because it is done in background, and by that time the app may be suspended. -- notification server represents a bottleneck, as it has to be owned by the app vendor, and the current design when ntf server subscribes to notifications scales very badly. +- client devices delay communication with ntf router because it is done in background, and by that time the app may be suspended. +- notification router represents a bottleneck, as it has to be owned by the app vendor, and the current design when ntf router subscribes to notifications scales very badly. This RFC is based on the previous [RFC related to notifications](./2024-09-25-ios-notifications-2.md). ## Solution -As notification server has to know client token and currently it associates subscriptions with this token anyway, we are not gaining any privacy and security by using per-subscription keys - both authorization and encryption keys of notification subscription can be dropped. +As notification router has to know client token and currently it associates subscriptions with this token anyway, we are not gaining any privacy and security by using per-subscription keys - both authorization and encryption keys of notification subscription can be dropped. -We still need to store the list of queue IDs associated with the token on the notification server, but we do not need any per-queue keys on the notification server, and we don't need subscriptions - it's effectively a simple set of IDs, with no other information. +We still need to store the list of queue IDs associated with the token on the notification router, but we do not need any per-queue keys on the notification router, and we don't need subscriptions - it's effectively a simple set of IDs, with no other information. In this case, when queue is created the client would supply notifier ID - it has to be derived from correlation ID, to prevent existense check (see previous RFC). As we also supply sender ID, instead of deriving it as sha3-192 of correlation ID, they both can be derived as sha3-384 and split to two IDs - 24 bytes each. -The notification server will maintain a rotating list of server keys with the latest key communicated to the client every time the token is registered and checked. The keys would expire after, say, 1 week or 1 month, and removed from notification server on expiration. +The notification router will maintain a rotating list of router keys with the latest key communicated to the client every time the token is registered and checked. The keys would expire after, say, 1 week or 1 month, and removed from notification router on expiration. -The packet containing association between notifier queue ID and token will be crypto_box encrypted using key agreement between identified notification server master key and an ephemeral per packet (effectively, per-queue) client-key. +The packet containing association between notifier queue ID and token will be crypto_box encrypted using key agreement between identified notification router master key and an ephemeral per packet (effectively, per-queue) client-key. Deleting the queue may also include encrypted packet that would verify that the client deleted the queue. -Instead of notification server subscribing to the notifications creating a lot of traffic for the queues without messages, the SMP server would push notifications via NTF server connection (whether via NTF or via SMP protocol). This could be used as a mechanism to migrate existing queues when with the next subscription the notification server would communicate it's address to SMP server and this association would be stored together with the queue. +Instead of notification router subscribing to the notifications creating a lot of traffic for the queues without messages, the SMP router would push notifications via NTF router connection (whether via NTF or via SMP protocol). This could be used as a mechanism to migrate existing queues when with the next subscription the notification router would communicate it's address to SMP router and this association would be stored together with the queue. ## Protocol design Additional/changed SMP commands: ```haskell --- register notification server --- should be signed with server key +-- register notification router +-- should be signed with router key NSRV :: NtfServerCreds -> Command NtfServer -- response NSID :: NtfServerId -> BrokerMsg --- to communicate which server is responsible for the queue +-- to communicate which router is responsible for the queue -- should be signed with queue key NSUB :: Maybe NtfServerId -> Command Notifier --- subscribe to notificaions from all queues associated with the server --- should be signed with server key +-- subscribe to notificaions from all queues associated with the router +-- should be signed with router key -- entity ID - NtfServerId NSSUB :: Command NtfServer data NtfServerCreds = NtfServerCreds { server :: NtfServer, - -- NTF server certificate chain that should match fingerpring in address + -- NTF router certificate chain that should match fingerpring in address cert :: X.CertificateChain, - -- server autorizatio key to sign server subscription requests + -- router autorizatio key to sign router subscription requests authKey :: X.SignedExact X.PubKey } -- entity ID is recipient ID -NSKEY :: NtfSubscription -> Command Recipient +NSKEY :: NtfSubscription -> Command Recipient data NtfSubscription = NtfSubscription -- key to encrypt notifications e2e with the client { ntfPubDbKey :: RcvNtfPublicDhKey, ntfServer :: NtfServer, -- should be linked to correlation ID to prevent existense check - -- the ID sent to notification server could be its hash? + -- the ID sent to notification router could be its hash? ntfId :: NotifierId, encNtfTokenAssoc :: EncDataBytes } @@ -77,12 +77,12 @@ data NtfTokenAssoc = NtfTokenAssoc } ``` -SMP server will need to maintain the list of Ntf servers and their credentials, and when NSSUB arrives to make only one subscription. When message arrives it would deliver notification to the correct connection via queue / ntf server association. +SMP router will need to maintain the list of Ntf routers and their credentials, and when NSSUB arrives to make only one subscription. When message arrives it would deliver notification to the correct connection via queue / ntf router association. -Ntf server needs to maintain three indices to the same data: +Ntf router needs to maintain three indices to the same data: - `(smpServer, queueId) -> tokenId` - to deliver notification to the correct token -- `tokenId -> [smpServer -> [queueId]]` - to remove all queues when token is removed, and to store/update these associations effficiently - store log may have one compact line per token (after compacting), or per token/server combination. -- `[smpServer]` - array of SMP servers to subscribe to. +- `tokenId -> [smpServer -> [queueId]]` - to remove all queues when token is removed, and to store/update these associations effficiently - store log may have one compact line per token (after compacting), or per token/router combination. +- `[smpServer]` - array of SMP routers to subscribe to. ## Mention notifications @@ -90,4 +90,4 @@ Currently we are marking messages with T (true) for messages that require notifi The proposal is to: - add additional values to this metadata, e.g. 2 (priority) and 3 (high priority) (and T/F could be sent as 0/1 respectively) - that is, to deliver notifications even if notifications are generally disabled (they can still be further filtered by the client). -- instead of deleting notification credentials when notifications are disabled - which is costly - communicate to SMP server the change of notificaion priority level, e.g. the client could set minimal notification priority to deliver notifications, where 0 would mean disabling it completely, 1 enable for all, 2 for priority 2+, 3 for priority 3. The downside here is that it could be used for timing correlation of queues in the group, but it already can be used on bulk deletions of ntf credentials for these queues and when sending messages. +- instead of deleting notification credentials when notifications are disabled - which is costly - communicate to SMP router the change of notificaion priority level, e.g. the client could set minimal notification priority to deliver notifications, where 0 would mean disabling it completely, 1 enable for all, 2 for priority 2+, 3 for priority 3. The downside here is that it could be used for timing correlation of queues in the group, but it already can be used on bulk deletions of ntf credentials for these queues and when sending messages. diff --git a/rfcs/2025-04-04-short-links-for-groups.md b/rfcs/2025-04-04-short-links-for-groups.md index 90938acec0..835f2592c0 100644 --- a/rfcs/2025-04-04-short-links-for-groups.md +++ b/rfcs/2025-04-04-short-links-for-groups.md @@ -35,18 +35,18 @@ This could possibly be evolved into the requirement to have a direct connection 3. Allow "joint management" of SMP queues. -SMP servers can support multiple recipients for contact queues:\ +SMP routers can support multiple recipients for contact queues:\ - subscription would be possible to the "subscriber recipient". - all other changes (update data, change subscriber recipient, add or remove recipients) would require multiple recipient signatures on SMP command in line with n-of-m multisig rules, that the command sender would have to collect out-of-band (from SMP protocol point of view). Pros: allows joint ownership, and protects from losing access to master owner device. Cons: - complicates queue abstraction with approach that is not needed for most queues. -- still retains the server as a single point of failure. +- still retains the router as a single point of failure. -4. Introduce "group" as a new type of entity managed by SMP servers. +4. Introduce "group" as a new type of entity managed by SMP routers. -SMP servers would provide a separate set of commands for managing group records that would include in an encrypted container: +SMP routers would provide a separate set of commands for managing group records that would include in an encrypted container: - the group profile - the list of chat relay links - the list of owner member IDs with their public keys @@ -54,7 +54,7 @@ SMP servers would provide a separate set of commands for managing group records - alternative group entity locations - possibly, a globally unique group identity (as the hash of the initial/seed group data). -While the server domain would be used as the hostname in group link, it may contain alternative hosts (not just hostnames of the same server), both in the link and in the group record data. +While the router domain would be used as the hostname in group link, it may contain alternative hosts (not just hostnames of the same router), both in the link and in the group record data. Pros: separates additional complexity to where it is needed, allowing reliability and redundancy for group ownership. Cons: complexity, coupling between SMP and chat protocol. @@ -86,7 +86,7 @@ Cons: - if no messages are accepted, this is not even a queue. - no way to directly contact owners (maybe it is not a downside, as for relays there would be a communication channel anyway as part of the group). -Option 2 looks more simple and attractive, implementing server broadcast for SMP seems unnecessary, as while it could have been used for simple groups, it does not solve such problems as spam and pre-moderation anyway - it requires a higher level protocol. +Option 2 looks more simple and attractive, implementing router broadcast for SMP seems unnecessary, as while it could have been used for simple groups, it does not solve such problems as spam and pre-moderation anyway - it requires a higher level protocol. The command to update owner keys would be `RKEY` with the list of keys, and we can make `NEW` accept multiple keys too, although the use case here is less clear. @@ -96,7 +96,7 @@ Option 1: Use the same keys in SMP as when signing queue data. Option 2: Use different keys. -The value here could be that the server could validate these signatures too, and also maintain the chain of key changes. While tempting, it is probably unnecessary, and this chain of ownership is better to be maintained on chat relay level, as there are no size constraints on the size of this chain. Also, it is better for metadata privacy to not couple transport and chat protocol keys. +The value here could be that the router could validate these signatures too, and also maintain the chain of key changes. While tempting, it is probably unnecessary, and this chain of ownership is better to be maintained on chat relay level, as there are no size constraints on the size of this chain. Also, it is better for metadata privacy to not couple transport and chat protocol keys. We still need to bind the mutable data updates to the "genesis" signature key (the one included in the immutable data). @@ -147,12 +147,12 @@ The size of the OwnerInfo record encoding is: ~189 bytes, so we should practically limit the number of owners to say 8 - 1 original + 7 addiitonal. Original creator could use a different key as a "genesis" key, to conceal creator identity from other members, and it needs to include the record with memberId anyway. -The structure is simplified, and it does not allow arbitrary ownership changes. Its purpose is not to comprehensively manage ownership changes - while it is possible with a generic blockchain, it seems not appropriate at this stage, - but rather to ensure access continuity and that the server cannot modify the data (although nothing prevents the server from removing the data completely or from serving the previous version of the data). +The structure is simplified, and it does not allow arbitrary ownership changes. Its purpose is not to comprehensively manage ownership changes - while it is possible with a generic blockchain, it seems not appropriate at this stage, - but rather to ensure access continuity and that the router cannot modify the data (although nothing prevents the router from removing the data completely or from serving the previous version of the data). For example it would only allow any given owner to remove subsequenty added owners, preserving the group link and identity, but it won't allow removing owners that signed this owner authorization. So owners are not equal, with the creator having the highest rank and being able to remove all additional owners, and owners authorise by creator can remove all other owners but themselves and creator, and so on - they have to maintain the chain that authorized themselves, at least. We could explicitely include owner rank into OwnerInfo, or we could require that they are sorted by rank, or the rank can be simply derived from signatures. When additional owners want to be added to the group, they would have to provide any of the current owners: -- the key for SMP commands authorization - this will be passed to SMP server together with other keys. There could be either RKEY to pass all keys (some risk to miss some, or of race conditions), or RADD/RGET/RDEL to add and remove recipient keys, which has no risk of race conditions. +- the key for SMP commands authorization - this will be passed to SMP router together with other keys. There could be either RKEY to pass all keys (some risk to miss some, or of race conditions), or RADD/RGET/RDEL to add and remove recipient keys, which has no risk of race conditions. - the signature of the immutable data by their member key included in their profile. - the current owner would then include their member key into the queue data, and update it with LSET command. In any case there should be some simple consensus protocol between owners for owner changes, and it has to be maintained as a blockchain by owners and by chat relays, as otherwise it may lead to race conditions with LSET command. diff --git a/rfcs/2025-07-15-multi-device.md b/rfcs/2025-07-15-multi-device.md index 56a09fea29..65e5e30a61 100644 --- a/rfcs/2025-07-15-multi-device.md +++ b/rfcs/2025-07-15-multi-device.md @@ -12,13 +12,13 @@ In addition to that, the specific implementation of this approach in Signal comp While this limitation can be addressed with notifications when a new device is added and per-device keys, we still find the remaining attack vectors on user security and privacy to be unacceptable, and opening unsuspecting users to various criminal actions - and it is wrong to say that would only affect security conscious users, and most people would not be affected by these risks. Allowing potential criminals in groups to know which device you are currently using is a real risk for all users. -Another approach was offered by Threema that is ["mediator" server](https://threema.com/en/blog/md-architectural-overview) where the state of encryption ratchets is stored server-side. While it protects the user from their communication peers, it increases required level of trust to the servers, and in case of SimpleX network it would expose the knowledge of who communicates to whom. So while the idea of server-side storage of encryption state is promising, it has to be per-connection, to retain "no-accounts" property of SimpleX messaging network. +Another approach was offered by Threema that is ["mediator" router](https://threema.com/en/blog/md-architectural-overview) where the state of encryption ratchets is stored router-side. While it protects the user from their communication peers, it increases required level of trust to the routers, and in case of SimpleX network it would expose the knowledge of who communicates to whom. So while the idea of router-side storage of encryption state is promising, it has to be per-connection, to retain "no-accounts" property of SimpleX messaging network. Also see [FAQ](https://simplex.chat/faq/#why-cant-i-use-the-same-profile-on-different-devices) and [this issue](https://github.com/simplex-chat/simplex-chat/issues/444#issuecomment-3066968358). ## Proposed solution -One of the ideas presented in FAQ - to store the state of Double Ratchet algorithm in the encrypted container on the server seems promising. The RFC develops this idea. +One of the ideas presented in FAQ - to store the state of Double Ratchet algorithm in the encrypted container on the router seems promising. The RFC develops this idea. ### Considerations for the design @@ -26,21 +26,21 @@ One of the ideas presented in FAQ - to store the state of Double Ratchet algorit 2. Protocol commands and events may be changed (even if at the cost of slightly reducing message size) can fit the hash of the ratchet state (32 bytes sha256 would be sufficient), so that the client can determine whether it has the most recent ratchet state or if it needs to retrieve the latest copy. Message size reduction won't affect the users because we use compression, and there is a substantial reserve. -3. Client commands that modify ratchet state would include the hash of the previous ratchet state so that the server can reject or ignore the command in case the previous ratchet state is different or in case command is repeated in case of lost response). +3. Client commands that modify ratchet state would include the hash of the previous ratchet state so that the router can reject or ignore the command in case the previous ratchet state is different or in case command is repeated in case of lost response). 4. The client does not need to retrieve message state for each encryption and decryption operation - it can "speculatively" use the ratchet state it has, and receive correct ratchet state in the "error" response after attempting encryption based on incorrect ratchet state. ## Proposed protocol design -Ratchet state will be stored on the same server that stores message queue, as part of message queue record. 8kb is a sufficient size for this blob (the actual max size is 7800 bytes). The server would also store the hashes of the current and, possibly, the previous ratchet states (TBC). +Ratchet state will be stored on the same router that stores message queue, as part of message queue record. 8kb is a sufficient size for this blob (the actual max size is 7800 bytes). The router would also store the hashes of the current and, possibly, the previous ratchet states (TBC). While ratchet is used for duplex connection, the connection still has primary queue, and with redundancy the same ratchet state can be stored on all secondary queues. -Ratchet state will be encrypted using secret_box - a symmetric encryption scheme, so PQ-resistant. If ratchet state is stored on more than one server, it has to be encrypted with a different key for each server. +Ratchet state will be encrypted using secret_box - a symmetric encryption scheme, so PQ-resistant. If ratchet state is stored on more than one router, it has to be encrypted with a different key for each router. Questions: how to rotate the key used to store ratchet? Should key used to encrypt ratchet rotate at the same time when queue is rotated? The latter is a logical option, as it prevents additional complexity and solves the problem anyway. A possible option is to have "ratchet version" that will be used to advance the key used to encrypt ratchet via HKDF. -Security considerations: the scheme may reduce break-in recovery to the points queues are rotated, unless there is some randomness mixed-in into the key derivation (the key used to encrypt ratchet state). But including randomness would defeat the purpose, as other devices wouldn't be able to access the ratchets. Another approach would be to have each device use its own key for encryption, and encrypt to all keys of all devices (or to encrypt key, to avoid size increase). Having multiple encryptions would show how many devices use the queue, but servers already can observe it, so it is a better tradeoff. Another idea would be to rotate the key used to authorize queue commands - we already support multiple recipient keys, and it can be used for multi-device scenario. That would partially mitigate break-in attacks as the attacker who obtained the key from ratchet state would be able to decrypt it, but won't be able to decrypt it (the attacker collusion with the server is not mitigated). Yet another idea would be for each party (device) to share its private (or encapsulation) key and to have a symmetric key (used to encrypt the ratchet state) encrypted (encapsulated) separately for each device. This would reduce the size of the stored data to `ratchet size` + `encrypted key size` * N, so even in case of PQ encryption (e.g. sntrup) the size required to store the ratchet would be under transport block size, while limiting it to say 4-8 devices, which is sufficient. +Security considerations: the scheme may reduce break-in recovery to the points queues are rotated, unless there is some randomness mixed-in into the key derivation (the key used to encrypt ratchet state). But including randomness would defeat the purpose, as other devices wouldn't be able to access the ratchets. Another approach would be to have each device use its own key for encryption, and encrypt to all keys of all devices (or to encrypt key, to avoid size increase). Having multiple encryptions would show how many devices use the queue, but routers already can observe it, so it is a better tradeoff. Another idea would be to rotate the key used to authorize queue commands - we already support multiple recipient keys, and it can be used for multi-device scenario. That would partially mitigate break-in attacks as the attacker who obtained the key from ratchet state would be able to decrypt it, but won't be able to decrypt it (the attacker collusion with the router is not mitigated). Yet another idea would be for each party (device) to share its private (or encapsulation) key and to have a symmetric key (used to encrypt the ratchet state) encrypted (encapsulated) separately for each device. This would reduce the size of the stored data to `ratchet size` + `encrypted key size` * N, so even in case of PQ encryption (e.g. sntrup) the size required to store the ratchet would be under transport block size, while limiting it to say 4-8 devices, which is sufficient. To participate in multi-device scheme the devices would join the usual group that will be used to share public (encapsulation) device keys and to communicate updates to conversations that were received by the currently "active" device. "Active" means the device that received or sent and processed the message, and while only one device can receive messages from a given queue, device "active" state may be determined per queue, allowing concurrent usage. @@ -50,7 +50,7 @@ The scheme must be resilient to state updates being lost, and in case of direct `rsi` - ratchet state on device `i`. -`enc(rs)` - current authoritative ratchet state on the server. +`enc(rs)` - current authoritative ratchet state on the router. `pt` and `ct` - plaintext and ciphertext messages. @@ -58,13 +58,13 @@ Encryption is a state transition function ratchetEnc: `(ct, rs') = ratchetEnc(pt 1. Device encrypts the message using the stored ratchet state: `(ct, rsi') = ratchetEnc(pt, rsi)` -2. Device sends modified encrypted ratchet state and the hash of the previous encrypted state to the server that stores the queue: `RSET (hash(enc(rsi)), enc(rsi'))`. +2. Device sends modified encrypted ratchet state and the hash of the previous encrypted state to the router that stores the queue: `RSET (hash(enc(rsi)), enc(rsi'))`. -3. If the hash of the previous state matches state stored on the server (`hash(enc(rsi)) == hash(enc(rs))`), the server updates the state and responds with `ratchet_ok` (that may include the current state or it's hash, for validation). If the hash is different, the server responds with `bad_ratchet(enc(rs))` message that includes the correct ratchet state. These updates must be atomic. In this case device has to update the local ratchet state (provided it can decrypt it), and repeat encryption attempt. If device cannot decrypt the provided ratchet state, it means that the connection is disrupted (possibly, device is removed from device group, but missed the notifications). +3. If the hash of the previous state matches state stored on the router (`hash(enc(rsi)) == hash(enc(rs))`), the router updates the state and responds with `ratchet_ok` (that may include the current state or it's hash, for validation). If the hash is different, the router responds with `bad_ratchet(enc(rs))` message that includes the correct ratchet state. These updates must be atomic. In this case device has to update the local ratchet state (provided it can decrypt it), and repeat encryption attempt. If device cannot decrypt the provided ratchet state, it means that the connection is disrupted (possibly, device is removed from device group, but missed the notifications). 4. After successful state update in primary receiving queue, the device would update it in secondary receiving queues. -5. Device sends encrypted message as usual, via proxy that must be different both from the server that stores the ratchet and from the destination server. +5. Device sends encrypted message as usual, via proxy that must be different both from the router that stores the ratchet and from the destination router. 6. Device broadcasts sent message and new ratchet state to other devices in the device group. @@ -74,17 +74,17 @@ This protocol is simple, and it minimizes requests when sending the message to o Decryption is also a state transition function: `(pt, rs') = ratchetDec(ct, rs)` -1. Server sends the message to the device (can be in response to SUB or ACK commands, or with active subscription). Pushed message would include the hash of the currently stored ratchet state: `hash(enc(rs))`. +1. Router sends the message to the device (can be in response to SUB or ACK commands, or with active subscription). Pushed message would include the hash of the currently stored ratchet state: `hash(enc(rs))`. 2. If device has the ratchet state with the same hash (`hash(enc(rs)) == hash(enc(rsi))`), it decrypts the message: `(pt, rsi') = ratchetDec(ct, rsi)`. -3. If device has ratchet state with a different hash, it requests ratchet from the server with additional protocol command `RGET` with response `RCHT (enc(rs))` and updates the local state. +3. If device has ratchet state with a different hash, it requests ratchet from the router with additional protocol command `RGET` with response `RCHT (enc(rs))` and updates the local state. 4. Device decrypts the message `(pt, rsi') = ratchetDec(ct, rsi)` and processes it as usual. -5. Device sends acknowledgement to the server as usual, but now it includes the new ratchet state and the hash of the previous state: `ACK msgId (hash(enc(rsi)), enc(rsi'))` +5. Device sends acknowledgement to the router as usual, but now it includes the new ratchet state and the hash of the previous state: `ACK msgId (hash(enc(rsi)), enc(rsi'))` -6. The server compares ratchet state with stored state hash, and in case it matches it processes `ACK` and responds with `OK` as usual (or `NO_MSG` in case msgId is incorrect, also as usual - it would happen in repeated ACK requests). If ratchet state hash does not match, the server would respond with `bad_ratchet(enc(rs))` - which means that the message was already processed by another device and ratchet was advanced. This is a complex scenario, as the client has to either revert the change from message processing or somehow combine the change with the updates communicated via device group (as a side note, device group can simply re-broadcast messages, not state updates, but it will result in state divergence between devices when different messages are lost). +6. The router compares ratchet state with stored state hash, and in case it matches it processes `ACK` and responds with `OK` as usual (or `NO_MSG` in case msgId is incorrect, also as usual - it would happen in repeated ACK requests). If ratchet state hash does not match, the router would respond with `bad_ratchet(enc(rs))` - which means that the message was already processed by another device and ratchet was advanced. This is a complex scenario, as the client has to either revert the change from message processing or somehow combine the change with the updates communicated via device group (as a side note, device group can simply re-broadcast messages, not state updates, but it will result in state divergence between devices when different messages are lost). Unlike sending messages, this flow does not require any additional requests in most cases, only requiring requesting message state reconciliation when the same message was received and processed by more than one client, but it does not require re-acknowledgement. diff --git a/rfcs/2025-08-20-service-subs-drift.md b/rfcs/2025-08-20-service-subs-drift.md index 1ca9e6018a..d4182fd5d5 100644 --- a/rfcs/2025-08-20-service-subs-drift.md +++ b/rfcs/2025-08-20-service-subs-drift.md @@ -2,16 +2,16 @@ ## Problem -While service certificates and subscriptions hugely decrease startup time and delivery delays on server restarts, they introduce the risk of losing subscriptions in case of state drifts. They also do not provide efficient mechanism for validating that the list of subscribed queues is in sync. +While service certificates and subscriptions hugely decrease startup time and delivery delays on router restarts, they introduce the risk of losing subscriptions in case of state drifts. They also do not provide efficient mechanism for validating that the list of subscribed queues is in sync. How can the state drift happen? There are several possibilities: - lost broker response would make the broker consider that the queue is associated, but the client won't know it, and will have to re-associate. While in itself it is not a problem, as it'll be resolved, it would make drift detected more frequently (regardless of the detection logic used). That service certificates are used on clients with good connection would make it less likely though. -- server state restored from the backup, in case of some failure. Nothing can be done to recover lost queues, but we may restore lost service associations. -- queue blocking or removal by server operator because of policy violation. -- server downgrade (when it loses all service associations) with subsequent upgrade - the client would think queues are associated, while they are not, and won't receive any messages at all in this scenario. -- any other server-side error or logic error. +- router state restored from the backup, in case of some failure. Nothing can be done to recover lost queues, but we may restore lost service associations. +- queue blocking or removal by router operator because of policy violation. +- router downgrade (when it loses all service associations) with subsequent upgrade - the client would think queues are associated, while they are not, and won't receive any messages at all in this scenario. +- any other router-side error or logic error. In addition to the possibility of the drift, we simply need to have confidence that service subscriptions work as intended, without skipping queues. We ignored this consideration for notifications, as the tolerance to lost notifications is higher, but we can't ignore it for messages. @@ -25,7 +25,7 @@ An approach of having an efficient way to detect drift, but load the full list o ### Drift detection -Both client and server would maintain the number of associated queues and the "symmetric" hash over the set of queue IDs. The requirements for this hash algorithm are: +Both client and router would maintain the number of associated queues and the "symmetric" hash over the set of queue IDs. The requirements for this hash algorithm are: - not cryptographically strong, to be fast. - 128 bits to minimize collisions over the large set of millions of queues. - symmetric - the result should not depend on ID order. @@ -33,11 +33,11 @@ Both client and server would maintain the number of associated queues and the "s In this way, every time association is added or removed (including queue marked as deleted), both peers would recompute this hash in the same transaction. -The client would suspend sending and processing any other commands on the server and the queues of this server until SOKS response is received from this server, to prevent drift. It can be achieved with per-server semaphores/locks in memory. UI clients need to become responsive sooner than these responses are received, but we do not service certificates on UI clients, and chat relays may prevent operations on server queues until SOKS response is received. +The client would suspend sending and processing any other commands on the router and the queues of this router until SOKS response is received from this router, to prevent drift. It can be achieved with per-router semaphores/locks in memory. UI clients need to become responsive sooner than these responses are received, but we do not service certificates on UI clients, and chat relays may prevent operations on router queues until SOKS response is received. SOKS response would include both the count of associated queues (as now) and the hash over all associated queue IDs (to be added). If both count and hash match, the client will not do anything. If either does not match the client would perform full sync (see below). -There is a value from doing the same in notification server as well to detect and "fix" drifts. +There is a value from doing the same in notification router as well to detect and "fix" drifts. The algorithm to compute hashes can be the following. @@ -46,7 +46,7 @@ The algorithm to compute hashes can be the following. 3. Adding queue ID to pre-computed hash requires a single XOR with ID hash: `new_aggregate = aggregate XOR hash(queue_id)`. 4. Removing queue ID from pre-computed hash also requires the same XOR (XOR is involutory, it undoes itself): `new_aggregate = aggregate XOR hash(queue_id)`. -These hashes need to be computed per user/server in the client and per service certificate in the server - on startup both have to validate and compute them once if necessary. +These hashes need to be computed per user/router in the client and per service certificate in the router - on startup both have to validate and compute them once if necessary. There can be also a start-up option to recompute hashe(s) to detect and fix any errors. @@ -54,35 +54,35 @@ This is all rather simple and would help detecting drifts. ### Synchronization when drift is detected -The assumption here is that in most cases drifts are rare, and isolated to few IDs (e.g., this is the case with notification server). +The assumption here is that in most cases drifts are rare, and isolated to few IDs (e.g., this is the case with notification router). But the algorithm should be resilient to losing all associations, and it should not be substantially worse than simply restoring all associations or loading all IDs. -We have `c_n` and `c_hash` for client-side count and hash of queue IDs and `s_n` and `s_hash` for server-side, which are returned in SOKS response to SUBS command. +We have `c_n` and `c_hash` for client-side count and hash of queue IDs and `s_n` and `s_hash` for router-side, which are returned in SOKS response to SUBS command. 1. If `c_n /= s_n || c_hash /= s_hash`, the client must perform sync. -2. If `abs(c_n - s_n) / max(c_n, s_n) > 0.5`, the client will request the full list of queues (more than half of the queues are different), and will perform diff with the queues it has. While performing the diff the client will continue block operations with this user/server. +2. If `abs(c_n - s_n) / max(c_n, s_n) > 0.5`, the client will request the full list of queues (more than half of the queues are different), and will perform diff with the queues it has. While performing the diff the client will continue block operations with this user/router. -3. Otherwise would perform some algorithm for determining the difference between queue IDs between client and server. This algorithm can be made efficient (`O(log N)`) by relying on efficient sorting of IDs and database loading of ranges, via computing and communicating hashes of ranges, and performing a binary search on ranges, with batching to optimize network traffic. +3. Otherwise would perform some algorithm for determining the difference between queue IDs between client and router. This algorithm can be made efficient (`O(log N)`) by relying on efficient sorting of IDs and database loading of ranges, via computing and communicating hashes of ranges, and performing a binary search on ranges, with batching to optimize network traffic. This algorithm is similar to Merkle tree reconcilliation, but it is optimized for database reading of ordered ranges, and for our 16kb block size to minimize network requests. The algorithm: -1. The client would request all ranges from the server. -2. The server would compute hashes for N ranges of IDs and send them to the client. Each range would include start_id, optional end_id (for single ID ranges) and XOR-hash of the range. N is determined based on the block size and the range size. -3. The client would perform the same computation for the same ranges, and compare them with the returned ranges from the server, while detecting any gaps between ranges and missing range boundaries. +1. The client would request all ranges from the router. +2. The router would compute hashes for N ranges of IDs and send them to the client. Each range would include start_id, optional end_id (for single ID ranges) and XOR-hash of the range. N is determined based on the block size and the range size. +3. The client would perform the same computation for the same ranges, and compare them with the returned ranges from the router, while detecting any gaps between ranges and missing range boundaries. 4. If more than half of the ranges don't match, the client would request the full list. Otherwise it would repeat the same algorithm for each mismatched range and for gaps. It can be further optimized by merging adjacent ranges and by batching all range requests, it is quite simple. Once the client determines the list of missing and extra queues it can: - create associations (via SUB) for missing queues, -- request removal of association (a new command, e.g. BUS) for extra queues on the server. +- request removal of association (a new command, e.g. BUS) for extra queues on the router. The pseudocode for the algorightm: -For the server to return all ranges or subranges of requested range: +For the router to return all ranges or subranges of requested range: ```haskell getSubRanges :: Maybe (RecipientId, RecipientId) -> [(RecipientId, Maybe RecipientId, Hash)] diff --git a/rfcs/README.md b/rfcs/README.md index a98f8aa10c..51a450ab07 100644 --- a/rfcs/README.md +++ b/rfcs/README.md @@ -143,8 +143,8 @@ As more protocols are designated as Core IP, development naturally transitions t | Location | Contents | Count | |----------|----------|-------| -| `protocol/` | Consolidated specs (SMP v9, Agent v5, XFTP v2, XRCP v1, Push v2, PQDR v1) | 6 specs + overview | -| `rfcs/` root | Active draft proposals | 19 | -| `rfcs/done/` | Implemented, not yet verified | 25 | -| `rfcs/standard/` | Verified against implementation | (to be populated) | +| `protocol/` | Consolidated specs (SMP v19, Agent v7, XFTP v3, XRCP v1, NTF v3, PQDR v1) | 6 specs + overview | +| `rfcs/` root | Active draft proposals | 10 | +| `rfcs/done/` | Implemented, not yet verified | 12 | +| `rfcs/standard/` | Verified against implementation | 25 | | `rfcs/rejected/` | Draft proposals not accepted | 7 | diff --git a/rfcs/2024-02-12-encryption.md b/rfcs/done/2024-02-12-encryption.md similarity index 91% rename from rfcs/2024-02-12-encryption.md rename to rfcs/done/2024-02-12-encryption.md index 37a936ae47..8ecfcadfa8 100644 --- a/rfcs/2024-02-12-encryption.md +++ b/rfcs/done/2024-02-12-encryption.md @@ -4,7 +4,7 @@ ### Protection of meta-data from sending proxy -The SEND commands and message queue IDs need to be encrypted so that sending proxy cannot see how many queues exist on each server. +The SEND commands and message queue IDs need to be encrypted so that sending proxy cannot see how many queues exist on each router. Correlation IDs need to be random and can be re-used as nonces so that the destination relay cannot use the increasing correlation IDs that are sent in v6 of the protocol to track the sender. @@ -24,10 +24,10 @@ encRespTransmission = replyNonce encrypted(respTransmission) respTransmission = entityId command ``` -The keys to encrypt and decrypt both the command and responses would be computed as curve25519 from the key sent together with command and server session key. For the requests, the nonce has to be random and sent outside of the encrypted envelopt, but for the response respNonce would be taken from inside of the encrypted envelope and it would also be used for correlating commands and responses. This way the attacker who could compromise TLS would not be able to correlate the commands and responses, and also observe entity IDs. +The keys to encrypt and decrypt both the command and responses would be computed as curve25519 from the key sent together with command and router session key. For the requests, the nonce has to be random and sent outside of the encrypted envelope, but for the response respNonce would be taken from inside of the encrypted envelope and it would also be used for correlating commands and responses. This way the attacker who could compromise TLS would not be able to correlate the commands and responses, and also observe entity IDs. 2. The remaining question is to how encrypt and decrypt messages delivered not in response to the commands. The possible options are: - restore client session key only for that purpose, but do not forward this key to the destination proxy for sent messages. Then the messages can be sent with a random replyNonce and the key would be computed from session keys. The advantage here is that we won't need to parameterize handles as both client and server would have session keys. The downside that we would have to either somehow differentiate messages and responses, either by some flag that would allow some correlation or just by the absense of replyNonce in the lookup map - that is if the client can find replyNonce, it would use the associated key to decrypt, and if not it would use session key. -- use the same key that was sent with SUB or ACK command. This is much more complex, and would only have some upside if we were to introduce receiving proxies (to conceal transport sessions from the receiving relays for the recipients). +- use the same key that was sent with SUB or ACK command. This is much more complex, and would only have some upside if we were to introduce receiving proxies (to conceal transport sessions from the receiving routers for the recipients). diff --git a/rfcs/2024-03-20-server-metadata.md b/rfcs/done/2024-03-20-server-metadata.md similarity index 87% rename from rfcs/2024-03-20-server-metadata.md rename to rfcs/done/2024-03-20-server-metadata.md index 22b163c056..3e696a8fb6 100644 --- a/rfcs/2024-03-20-server-metadata.md +++ b/rfcs/done/2024-03-20-server-metadata.md @@ -2,7 +2,7 @@ ## Problem -Currently, the clients configure/choose which servers to use, but they cannot see who operates them, in which geography and hosting provider, what is the server source code (in case it was modified from the reference implementation we provide) and also any administrative and feedback contacts. +Currently, the clients configure/choose which routers to use, but they cannot see who operates them, in which geography and hosting provider, what is the router source code (in case it was modified from the reference implementation we provide) and also any administrative and feedback contacts. Further, we currently use simplex.chat domain to host group links, and as diversity of the groups grows it is beginning to require managing feedback from the users about groups. It is important that this feedback is directed to relay owners and not to us, in case they are not our relays, as we are simply providing software here. @@ -21,28 +21,28 @@ While this document is not the end of the journey to decentralize the network, i The proposed solution consists of two parts: -- communicate server metadata via protocol, so it can be observed by the clients. +- communicate router metadata via protocol, so it can be observed by the clients. - create home page for the relays, with all the same metadata. - create invitation and address links in the same domain name as the relay. -The latter point is important so it is clear to the users who operates and owns the relay and where the access point to the content or group is hosted. Even though simplex.chat domain is never accessed by the app, and the meaningful part of the address is never sent to the page hosting server, it creates an impression of centralization, and some dependency on simplex.chat domain for anything other that showing the link QR code. +The latter point is important so it is clear to the users who operates and owns the relay and where the access point to the content or group is hosted. Even though simplex.chat domain is never accessed by the app, and the meaningful part of the address is never sent to the page hosting router, it creates an impression of centralization, and some dependency on simplex.chat domain for anything other that showing the link QR code. Moving invitation links to the domain of the relay (primary relay, in case the link has redundancy) will both clarify relay ownership, solve the incorrect mis-perception of centralization, remove the dependency on simplex-chat domain without any user effort, and provides the means to submit content complaints to the relay operators (should they wish to receive them, which seems reasonable for large public relays, but may be unnecessary for private relays where unidentified parties cannot create links). ## Solution details -Extend server INI file with information section: +Extend router INI file with information section: ``` [INFORMATION] # Please note that under AGPLv3 license conditions you MUST make -# any source code modifications available to the end users of the server. +# any source code modifications available to the end users of the router. # LICENSE: https://github.com/simplex-chat/simplexmq/blob/stable/LICENSE # Not doing so would constitute a license violation. # Declaring an incorrect information here amounts to a fraud. # The license holders reserve the right to prosecute missing or incorrect # information about the server source code to the fullest extent permitted by the law. -# The server will show warning on start if this field is absent +# The router will show warning on start if this field is absent # and will not launch from v6.0 until this field is added. # If any other information field is present, source code property also MUST be present. source_code: https://github.com/simplex-chat/simplexmq @@ -69,13 +69,13 @@ hosting: Linode / Akamai Inc. hosting_country: US ``` -Server home page would show whether queue creation is allowed and/or password protected, server retention policy (e.g., preserve messages on restart or not, and persist connections or not). +Router home page would show whether queue creation is allowed and/or password protected, router retention policy (e.g., preserve messages on restart or not, and persist connections or not). -Server queue address/contact pages will optionally, provide the UI to submit feedback, comments and complaints directly from the web page (not an MVP, initially we would simply show addresses for feedback, and, probably, create link that opens in the app with pre-populated message, and we could also use this addresses defined in server meta-data to submit feedback from inside of the app - it's also out of MVP scope). +Router queue address/contact pages will optionally, provide the UI to submit feedback, comments and complaints directly from the web page (not an MVP, initially we would simply show addresses for feedback, and, probably, create link that opens in the app with pre-populated message, and we could also use this addresses defined in router meta-data to submit feedback from inside of the app - it's also out of MVP scope). -If server is available on .onion address, the web pages would show "open via .onion" in Tor browser. +If router is available on .onion address, the web pages would show "open via .onion" in Tor browser. -Extend server handshake header with these information fields: +Extend router handshake header with these information fields: ```haskell data ServerHandshake = ServerHandshake @@ -93,13 +93,13 @@ data ServerInformation = ServerInformation info :: ServerPublicInfo } --- based on server configuration +-- based on router configuration data ServerPublicConfig = ServerPublicConfig { persistence :: SMPServerPersistenceMode, messageExpiration :: Int, statsEnabled :: Bool, newQueuesAllowed :: Bool, - basicAuthEnabled :: Bool -- server is private if enabled + basicAuthEnabled :: Bool -- router is private if enabled } -- based on INFORMATION section of INI file @@ -127,4 +127,4 @@ data ServerContactAddress = ServerContactAddress } ``` -This extended server information will be stored in the chat database every time it changes and shown in the UI of the server configuration. +This extended router information will be stored in the chat database every time it changes and shown in the UI of the router configuration. diff --git a/rfcs/2024-06-01-agent-protocol.md b/rfcs/done/2024-06-01-agent-protocol.md similarity index 100% rename from rfcs/2024-06-01-agent-protocol.md rename to rfcs/done/2024-06-01-agent-protocol.md diff --git a/rfcs/2024-06-21-short-links.md b/rfcs/done/2024-06-21-short-links.md similarity index 84% rename from rfcs/2024-06-21-short-links.md rename to rfcs/done/2024-06-21-short-links.md index df028a8ff5..1dd3bf9105 100644 --- a/rfcs/2024-06-21-short-links.md +++ b/rfcs/done/2024-06-21-short-links.md @@ -14,7 +14,7 @@ Additionally, if we store short links, they can also include chat preferences an MITM-resistant link shortening. -Instead of generating the random address that would resolve into the link - doing so would create the possibility of MITM by the server hosting this link - we can use private key as the link ID that will be passed to the accepting party, and the hash of the public key as ID for the server - the accepting party would present this key itself as ID and it will also be used for server to client encryption (see Protocol below). HKDF will be used to derive symmetric key from private key and used in secret_box together with random nonce (to allow replacing data with the same key but with a different nonce - nonce will be sent to the server too). secret_box construction is authenticated encryption, so it would protect from MITM. +Instead of generating the random address that would resolve into the link - doing so would create the possibility of MITM by the router hosting this link - we can use private key as the link ID that will be passed to the accepting party, and the hash of the public key as ID for the router - the accepting party would present this key itself as ID and it will also be used for router to client encryption (see Protocol below). HKDF will be used to derive symmetric key from private key and used in secret_box together with random nonce (to allow replacing data with the same key but with a different nonce - nonce will be sent to the router too). secret_box construction is authenticated encryption, so it would protect from MITM. The proposed syntax: @@ -29,7 +29,7 @@ srvHosts = ["," srvHosts] ; RFC1123, RFC5891 linkHash = ``` -If SMP server supports pages, its name can be used as clientAppServer, without repeating it after #, for a shorter link. +If SMP router supports pages, its name can be used as clientAppServer, without repeating it after #, for a shorter link. Example link: @@ -40,12 +40,12 @@ https://simplex.chat/contact/#0YuTwO05YJWS8rkjn9eLJDjQhFKvIYd8d4xG8X1blIU=@smp8. This link has the length of ~136 characters (256 bits), which is shorter than the full contact address (~310 characters) and much shorter than invitation links (~528 characters) even without post-quantum keys added to them. This size can be further reduced by -- use server domain in the link. -- do not include onion address, as the connection happens via proxy anyway, if it's untrusted server. -- not pinning server TLS certificate - the downside here is that while the attack that compromises TLS will not be able to substitute the link (because it's hash will not match), it will be able to intercept and to block it. +- use router domain in the link. +- do not include onion address, as the connection happens via proxy anyway, if it's untrusted router. +- not pinning router TLS certificate - the downside here is that while the attack that compromises TLS will not be able to substitute the link (because it's hash will not match), it will be able to intercept and to block it. - using shorter hash, e.g. SHA128 - reducing the collision resistance. -If the server is known, the client could use it's hash and onion address, otherwise it could trust the proxy to use any existing session with the same hostname or to accept the risk of interception - given that there is no risk of substitution. +If the router is known, the client could use its hash and onion address, otherwise it could trust the proxy to use any existing session with the same hostname or to accept the risk of interception - given that there is no risk of substitution. With the first two of these "improvements" the link could be ~122 characters: @@ -59,13 +59,13 @@ If onion address is preserved the link will be ~184 characters (won't fit in Twi https://smp8.simplex.im/contact/#0YuTwO05YJWS8rkjn9eLJDjQhFKvIYd8d4xG8X1blIU@beccx4yfxxbvyhqypaavemqurytl6hozr47wfc7uuecacjqdvwpw2xid.onion/abcdefghij0123456789abcdefghij0123456789abc ``` -If we implement it, the request to resolve the link would be made via proxied SMP command (to avoid the direct connection between the client and the recipient's server). +If we implement it, the request to resolve the link would be made via proxied SMP command (to avoid the direct connection between the client and the recipient's router). Pros: - a bit shorter link. - possibility to include post-quantum keys into the full link keeping the same shortened link size. - possibility to include chat profile of contact or group, and preferences, for a much better connection experience, and to show this information when the link sent in the conversation (clients can resolve them automatically, without connecting - it can be resolved by the sending clients). -- server will not have access to the link. +- router will not have access to the link. Cons: - protocol complexity. @@ -75,7 +75,7 @@ Pros are a huge improvement of UX of connecting both within and from outside of ## Protocol -To support short links, the SMP servers would provide a simple key-value store enabled by three additional commands: `WRT`, `CLR` and `READ` +To support short links, the SMP routers would provide a simple key-value store enabled by three additional commands: `WRT`, `CLR` and `READ` `WRT` command is used to store and to update values in the store. The size of the value is limited by the same size as sent messages (or, possibly, smaller - as connection information size used in confirmation messages) - the clients would use this fixed size irrespective of the content. `WRT` command will be sent with the data blob ID in the transaction entityId field, public authorization key used to authorize `WRT` and `CLR` commands (subsequent WRT commands to the existing key must use the same key), and the data blob. @@ -89,22 +89,22 @@ To support short links, the SMP servers would provide a simple key-value store e - the data blob owner generates X25519 key pair: `(k, pk)`. - private key `pk` will be included in the short link shared with the other party (only base64url encoded key bytes, not X509 encoding). -- `HKDF(pk)` will be used to encrypt the link data with secret_box before storing it on the server. +- `HKDF(pk)` will be used to encrypt the link data with secret_box before storing it on the router. - the hash of public key `sha256(k)` will be used as ID by the owner to store and to remove the data blob (`WRT` and `CLR` commands). **Retrieve data blob** -- the sender uses the public key `k` derived from the private key `pk` included in the link as entity ID to retrieve data blob (the server will compute the ID used by the owner as `sha256(k)` and will be able to look it up). This provides the quality that the traffic of the parties has no shared IDs inside TLS. It also means that unlike message queue creation, the ID to retrieve the blob was never sent to the blob creator, and also is not known to the server in advance (the second part is only an observation, in itself it does not increase security, as server has access to an encrypted blob anyway). +- the sender uses the public key `k` derived from the private key `pk` included in the link as entity ID to retrieve data blob (the router will compute the ID used by the owner as `sha256(k)` and will be able to look it up). This provides the quality that the traffic of the parties has no shared IDs inside TLS. It also means that unlike message queue creation, the ID to retrieve the blob was never sent to the blob creator, and also is not known to the router in advance (the second part is only an observation, in itself it does not increase security, as router has access to an encrypted blob anyway). - note that the sender does not authorize the request to retrieve the blob, as it would not increase security unless a different key is used to authorize, and adding a key would increase link size. -- server session keys with the sender will be `(sk, spk)`, where `sk` is public key shared with the sender during session handshake, and `spk` is the private key known only to the server. -- this public key `k` will also be combined with server session key `spk` using `dh(k, spk)` to encrypt the response, so that there is no ciphertext in common in sent and received traffic for these blobs. Correlation ID will be used as a nonce for this encryption. +- router session keys with the sender will be `(sk, spk)`, where `sk` is public key shared with the sender during session handshake, and `spk` is the private key known only to the router. +- this public key `k` will also be combined with router session key `spk` using `dh(k, spk)` to encrypt the response, so that there is no ciphertext in common in sent and received traffic for these blobs. Correlation ID will be used as a nonce for this encryption. - having received the blob, the client can now decrypt it using secret_box with `HKDF(pk)`. Using the same key as ID for the request, and also to additionally encrypt the response allows to use a single key in the link, without increasing the link size. ## Threat model -**Compromised SMP server** +**Compromised SMP router** can: - delete link data. diff --git a/rfcs/2024-09-01-smp-message-storage.md b/rfcs/done/2024-09-01-smp-message-storage.md similarity index 93% rename from rfcs/2024-09-01-smp-message-storage.md rename to rfcs/done/2024-09-01-smp-message-storage.md index 0cad20235c..1acc32f3b8 100644 --- a/rfcs/2024-09-01-smp-message-storage.md +++ b/rfcs/done/2024-09-01-smp-message-storage.md @@ -1,8 +1,8 @@ -# SMP server message storage +# SMP router message storage ## Problem -Currently SMP servers store all queues in server memory. As the traffic grows, so does the number of undelivered messages. What is worse, Haskell is not avoiding heap fragmentation when messages are allocated and then de-allocated - undelivered messages use ByteString and GC cannot move them around, as they use pinned memory. +Currently SMP routers store all queues in router memory. As the traffic grows, so does the number of undelivered messages. What is worse, Haskell is not avoiding heap fragmentation when messages are allocated and then de-allocated - undelivered messages use ByteString and GC cannot move them around, as they use pinned memory. ## Possible solutions @@ -10,7 +10,7 @@ Currently SMP servers store all queues in server memory. As the traffic grows, s Move from ByteString to some other primitive to store messages in memory long term, e.g. ShortByteString, or manage allocation/de-allocation of stored messages manually in some other way. -Pros: the simplest solution that avoids substantial re-engineering of the server. +Pros: the simplest solution that avoids substantial re-engineering of the router. Cons: - not a long term solution, as memory growth still has limits. @@ -22,12 +22,12 @@ Use files or RocksDB to store messages. Pros: - much lower memory usage. -- no message loss in case of abnormal server termination (important until clients have delivery redundancy). +- no message loss in case of abnormal router termination (important until clients have delivery redundancy). - this is a long term solution, and at some point it might need to be done anyway. Cons: - substantial re-engineering costs and risks. -- metadata privacy. Currently we only save undelivered messages when server is restarted, with this approach all messages will be stored for some time. this argument is limited, as hosting providers of VMs can make memory snapshots too, on the other hand they are harder to analyze than files. On another hand, with this approach messages will be stored for a shorter time. +- metadata privacy. Currently we only save undelivered messages when router is restarted, with this approach all messages will be stored for some time. this argument is limited, as hosting providers of VMs can make memory snapshots too, on the other hand they are harder to analyze than files. On another hand, with this approach messages will be stored for a shorter time. #### RocksDB and other key-value stores @@ -67,7 +67,7 @@ queueLogLine = %s"write_msg=" digits ``` -When queue is first requested by the server: +When queue is first requested by the router: ```c if queue folder exists: @@ -87,7 +87,7 @@ nextReadMsg = read_msg open write_file in AppendMode ``` -When message is added to the queue (assumes that queue state is loaded to server memory, if not the previous section will be done first): +When message is added to the queue (assumes that queue state is loaded to router memory, if not the previous section will be done first): ```c if write_msg > max_queue_messages: @@ -128,7 +128,7 @@ else nextReadByte = current position in file ``` -When message delivery is acknowledged, the read queue needs to be advanced, and possibly switched to read from the current write_queue: +When message delivery is acknowledged, the read queue needs to be advanced, and possibly switched to read from the current write queue: ```c if nextReadByte == read_byte: @@ -162,9 +162,9 @@ Most Linux systems use EXT4 filesystem where the file lookup time scales linearl So storing all queue folders in one folder won't scale. -To solve this problem we could use recipient queue ID in base64url format not as a folder name, but as a folder path, splitting it to path fragments of some length. The number of fragments can be configurable and migration to a different fragment size can be supported as the number of queues on a given server grows. +To solve this problem we could use recipient queue ID in base64url format not as a folder name, but as a folder path, splitting it to path fragments of some length. The number of fragments can be configurable and migration to a different fragment size can be supported as the number of queues on a given router grows. -Currently, queue ID is 24 bytes random number, thus allowing 2^192 possible queue IDs. If we assume that a server must hold 1b queues, it means that we have ~2^162 possible addresses for each existing queue. 24 bytes in base64 is 32 characters that can be split into say 8 fragments with 4 characters each, so that queue folder path for queue with ID `abcdefghijklmnopqrstuvwxyz012345` would be: +Currently, queue ID is 24 bytes random number, thus allowing 2^192 possible queue IDs. If we assume that a router must hold 1b queues, it means that we have ~2^162 possible addresses for each existing queue. 24 bytes in base64 is 32 characters that can be split into say 8 fragments with 4 characters each, so that queue folder path for queue with ID `abcdefghijklmnopqrstuvwxyz012345` would be: `/var/opt/simplex/messages/abcd/efgh/ijkl/mnop/qrst/uvwx/yz01/2345` @@ -174,6 +174,6 @@ So we could use an unequal split of path, two letters each and the last being lo `/var/opt/simplex/messages/ab/cd/ef/ghijklmnopqrstuvwxyz012345` -The first three levels in this case can have 4096 subfolders each, and it gives 68b possible subfolders (64^2^3), so the last level will be sparse in case of 1b queues on the server. So we could make it 4 levels with 2 letters to never think about it, accounting for a large variance of the random numbers distribution: +The first three levels in this case can have 4096 subfolders each, and it gives 68b possible subfolders (64^2^3), so the last level will be sparse in case of 1b queues on the router. So we could make it 4 levels with 2 letters to never think about it, accounting for a large variance of the random numbers distribution: `/var/opt/simplex/messages/ab/cd/ef/gh/ijklmnopqrstuvwxyz012345` diff --git a/rfcs/2024-09-09-smp-blobs.md b/rfcs/done/2024-09-09-smp-blobs.md similarity index 91% rename from rfcs/2024-09-09-smp-blobs.md rename to rfcs/done/2024-09-09-smp-blobs.md index be990f09c3..b205d36563 100644 --- a/rfcs/2024-09-09-smp-blobs.md +++ b/rfcs/done/2024-09-09-smp-blobs.md @@ -11,13 +11,13 @@ Allow storing extended information with SMP queues to improve UX and security of ## Design -1. Queue creation/update date is already added to server persistence, allowing to expire queues and blobs, depending on their usage. +1. Queue creation/update date is already added to router persistence, allowing to expire queues and blobs, depending on their usage. 2. Add "queue type" metadata to NEW command to indicate whether messaging queue is used as public address or as messaging queue (see previous docs on why it doesn't change threat model). While at the moment it would match sndSecure flag there may be future scenarios when they diverge. Initially only "invitation" and "contact" types will be supported. 3. Prohibit sndSecure flag for "contact" queues, prohibit securing contact queues. 4. Add "queue blobs" to NEW command: - - blob0: ratchetKeys up to N0 bytes - priority 0, can't be removed by the server, only in "invitation" - - blob1: PQ key up to N1 bytes - priority 1, can be removed by the server, only used in "invitation" - - blob2: Application data up to N2 bytes - priority 2, can be removed by the server. + - blob0: ratchetKeys up to N0 bytes - priority 0, can't be removed by the router, only in "invitation" + - blob1: PQ key up to N1 bytes - priority 1, can be removed by the router, only used in "invitation" + - blob2: Application data up to N2 bytes - priority 2, can be removed by the router. 5. Add linkId to NEW command 6. linkId and blobs will be removed when queue is secured. 7. Add recipient command to remove/upsert blob2 for contact queues. @@ -28,7 +28,7 @@ Allow storing extended information with SMP queues to improve UX and security of ### Creating a queue: The queue owner: -- generates Ed25529 key pair `(sk, spk)` and X25519 key pair `(dhk, dhpk)` to use with the server, same as now. `sk` and `dhk` will be sent in NEW command. +- generates Ed25529 key pair `(sk, spk)` and X25519 key pair `(dhk, dhpk)` to use with the router, same as now. `sk` and `dhk` will be sent in NEW command. - generates X25519 key pair `(k, pk)` to use with the accepting party to encrypt queue messages. - derives from `k` using HKDF: - symmetric key `bk` for authenticated encryption of blobs. @@ -73,7 +73,7 @@ Response to GET: blobs = %s"BLOB" senderId [ "0" blob0 ] [ "1" blob1 ] [ "2" blob2 ] ``` -As blobs are retrieved using a separate linkId, once blobs are removed it will be impossible to find senderId from short link - it is a threat model improvement. Once server storage is compacted, it will be impossible to find queue related to the link even with the access to server data (unless server preserves the data). +As blobs are retrieved using a separate linkId, once blobs are removed it will be impossible to find senderId from short link - it is a threat model improvement. Once router storage is compacted, it will be impossible to find queue related to the link even with the access to router data (unless router preserves the data). ### Possible privacy improvement diff --git a/rfcs/2024-09-15-shared-port.md b/rfcs/done/2024-09-15-shared-port.md similarity index 86% rename from rfcs/2024-09-15-shared-port.md rename to rfcs/done/2024-09-15-shared-port.md index 67f55b3ca6..390e9d6c03 100644 --- a/rfcs/2024-09-15-shared-port.md +++ b/rfcs/done/2024-09-15-shared-port.md @@ -1,6 +1,6 @@ # Sharing protocol ports with HTTPS -Some networks block all ports other than web ports, including port 5223 used for SMP protocol by default. Running SMP servers on a common web port 443 would allow them to work on more networks. The servers would need to provide an HTTPS page for browsers (and probes). +Some networks block all ports other than web ports, including port 5223 used for SMP protocol by default. Running SMP routers on a common web port 443 would allow them to work on more networks. The routers would need to provide an HTTPS page for browsers (and probes). ## Problem @@ -8,7 +8,7 @@ Browsers and tools rely on system CA bundles instead of certificate pinning. The crypto parameters used by HTTPS are different from what the protocols use. Public certificate providers like LetsEncrypt can only sign specific types of keys and Ed25519 isn't one of them. -This means a server should distinguish browser and protocol clients and adjust its behavior to match. +This means a router should distinguish browser and protocol clients and adjust its behavior to match. ## Solution @@ -16,15 +16,15 @@ This means a server should distinguish browser and protocol clients and adjust i Since LE certificates are only handed out to domain names, TLS client will be sending the SNI. However client transports are constructed over connected sockets and the SNI wouldn't be present unless explicitly requested. -When a client sends SNI, then it's a browser and a web credentials should be used. +When a client sends SNI, then it's a browser and web credentials should be used. Otherwise it's a protocol client to be offered the self-signed ca, cert and key. When a transport colocated with a HTTPS, its ALPN list should be extended with `h2 http/1.1`. The browsers will send it, and it should be checked before running transport client. -If HTTP ALPN is detected, then the client connection is served with HTTP `Application` instead (the same "server information" page). +If HTTP ALPN is detected, then the client connection is served with HTTP `Application` instead (the same "router information" page). -If some client connects to server IP, doesn't send SNI and doesn't send ALPN, it will look like a pre-handshake client. -In that case a server will send its handshake first. +If some client connects to router IP, doesn't send SNI and doesn't send ALPN, it will look like a pre-handshake client. +In that case a router will send its handshake first. This can be mitigated by delaying its handshake and letting the probe to issue its HTTP request. ## Implementation plan @@ -43,7 +43,7 @@ runServer (tcpPort, ATransport t) = do else runClient serverSignKey t h `runReaderT` env -- performs serverHandshake etc as usual ``` -The web app and server live outside, so `runHttp` has to be provided by the `runSMPServer` caller. +The web app and router live outside, so `runHttp` has to be provided by the `runSMPServer` caller. Additonally, Warp is using its `InternalInfo` object that's scoped to `withII` bracket. ```haskell @@ -69,7 +69,7 @@ The implementation relies on a few modification to upstream code: ### TLS.ServerParams -When a server has port sharing enabled, a new set of TLS params is loaded and combined with transport params: +When a router has port sharing enabled, a new set of TLS params is loaded and combined with transport params: ```haskell newEnv config = do @@ -129,7 +129,7 @@ key: /etc/opt/simplex/web.key # key: /etc/letsencrypt/live/smp.hostname.tld/privkey.pem ``` -When `TRANSPORT.port` matches `WEB.https` the transport server becomes shared. +When `TRANSPORT.port` matches `WEB.https` the transport router becomes shared. Perhaps a more desirable option would be explicit configuration resulting in additional transported to run: @@ -148,16 +148,16 @@ key: /etc/opt/simplex/web.key ## Caveats -Serving static files and the protocols togother may pose a problem for those who currently use dedicated web servers as they should switch to embedded http handlers. +Serving static files and the protocols together may pose a problem for those who currently use dedicated web servers as they should switch to embedded http handlers. As before, using embedded HTTP server is increasing attack surface. -Users who want to run everything on a single host will have to add and extra IP address and bind servers to specific IPs instead of 0.0.0.0. -An amalgamated server binary can be provided that would contain both SMP and XFTP servers, where transport will dispatch connections by handshake ALPN. +Users who want to run everything on a single host will have to add an extra IP address and bind routers to specific IPs instead of 0.0.0.0. +An amalgamated router binary can be provided that would contain both SMP and XFTP routers, where transport will dispatch connections by handshake ALPN. ## Alternative: Use transports routable with reverse-proxies An "industrial" reverse proxy may do the ALPN routing, serving HTTP by itself and delegating `smp` and `xftp` to protocol servers. Same with the `websockets`. -Since this in effect does TLS termination, the protocol servers will have to rely on credentials from protocol handshakes. +Since this in effect does TLS termination, the protocol routers will have to rely on credentials from protocol handshakes. diff --git a/rfcs/2024-11-25-journal-expiration.md b/rfcs/done/2024-11-25-journal-expiration.md similarity index 94% rename from rfcs/2024-11-25-journal-expiration.md rename to rfcs/done/2024-11-25-journal-expiration.md index d6281e3b1e..0445122417 100644 --- a/rfcs/2024-11-25-journal-expiration.md +++ b/rfcs/done/2024-11-25-journal-expiration.md @@ -2,7 +2,7 @@ ## Problem -The journal storage servers recently migrated to do not delete delivered or expired messages, they only update pointers to journal file lines. The messages are actually deleted when the whole journal file is deleted (when fully deleted or fully expired). +The journal storage routers recently migrated to do not delete delivered or expired messages, they only update pointers to journal file lines. The messages are actually deleted when the whole journal file is deleted (when fully deleted or fully expired). The problem is that in case the queue stops receiving the new messages then writing of messages won't switch to the new journal file, and the current journal file containing delivered or expired messages would never be deleted. diff --git a/rfcs/2025-03-16-smp-queues.md b/rfcs/done/2025-03-16-smp-queues.md similarity index 85% rename from rfcs/2025-03-16-smp-queues.md rename to rfcs/done/2025-03-16-smp-queues.md index d79e6f419f..761d624baf 100644 --- a/rfcs/2025-03-16-smp-queues.md +++ b/rfcs/done/2025-03-16-smp-queues.md @@ -19,18 +19,18 @@ Simply designating queue types would allow to use this information to decide for We want to achieve these objectives for short links and associated queue data: 1. no possibility to provide incorrect SenderId inside link data (e.g. from another queue). -2. link data cannot be accessed by the server unless it has the link. -3. prevent MITM attack by the server, including the server that obtained the link. +2. link data cannot be accessed by the router unless it has the link. +3. prevent MITM attack by the router, including the router that obtained the link. 4. prevent changing of connection request by the user (to prevent MITM via break-in attack in the originating client). -5. for one-time links, prevent accessing link data by link observers who did not compromise the server. +5. for one-time links, prevent accessing link data by link observers who did not compromise the router. 6. allow changing the user-defined part of link data. -7. avoid changing the link when user-defined part of link data changes, while preventing MITM attack by the server on user-defined part, even if it has the link. -8. retain the quality that it is impossible to check the existence of secured queue from having any of its temporary visible IDs (sender ID and link ID in 1-time invitations) - it requires that these IDs remain server-generated (contrary to the previous RFCs). +7. avoid changing the link when user-defined part of link data changes, while preventing MITM attack by the router on user-defined part, even if it has the link. +8. retain the quality that it is impossible to check the existence of secured queue from having any of its temporary visible IDs (sender ID and link ID in 1-time invitations) - it requires that these IDs remain router-generated (contrary to the previous RFCs). To achieve these objectives the queue data will include fixed (immutable) and user-defined (mutable) parts. Fixed part would include: -- full connection request (the current long link with all keys, including PQ keys). This includes SenderId that must match server response. +- full connection request (the current long link with all keys, including PQ keys). This includes SenderId that must match router response. - public signature key to verify mutable part of link data. Signed mutable part would include: @@ -41,7 +41,7 @@ The link itself should include both the key and auth tag from the encryption of ## Solution -Current NEW and NKEY commands: +Current NEW and NKEY commands (code identifiers like `QueueIdsKeys` are Haskell type names): ```haskell NEW :: RcvPublicAuthKey -> RcvPublicDhKey -> Maybe BasicAuth -> SubscriptionMode -> SenderCanSecure -> Command Recipient @@ -76,8 +76,8 @@ data QueueReqData | QRContact (Maybe (LinkId, (SenderId, QueueLinkData))) -- SenderId should be computed client-side as the first 24 bytes of sha3-384(correlation_id), --- The server must verify it and reject if it is not. --- It allows to include sender ID inside encrypted associated link data as part of full connection URI without requesting it from the server, but prevents checking if a given sender ID exists (queue creation would fail for a duplicate sender ID), as sha3-384 derivation is not reversible. +-- The router must verify it and reject if it is not. +-- It allows to include sender ID inside encrypted associated link data as part of full connection URI without requesting it from the router, but prevents checking if a given sender ID exists (queue creation would fail for a duplicate sender ID), as sha3-384 derivation is not reversible. type QueueLinkData = (EncFixedLinkData, EncUserDataBytes) type EncFixedLinkData = ByteString @@ -86,7 +86,7 @@ type EncUserDataBytes = ByteString -- We need to use binary encoding for ConnectionRequestUri to reduce its size -- The clients would reject changed immutable data and --- ConnectionRequestUri where server or SenderId of the queue do not match. +-- ConnectionRequestUri where router or SenderId of the queue do not match. data FixedLinkData c = FixedLinkData { agentVRange :: VersionRangeSMPA, rootKey :: C.PublicKeyEd25519, @@ -110,11 +110,11 @@ newtype UserLinkData = UserLinkData ByteString -- | Updated queue IDs and keys, returned in IDS response data QueueIdsKeys = QIK - { rcvId :: RecipientId, -- server-generated - sndId :: SenderId, -- server-generated + { rcvId :: RecipientId, -- router-generated + sndId :: SenderId, -- router-generated rcvPublicDhKey :: RcvPublicDhKey, sndSecure :: SenderCanSecure, -- possibly, can be removed? or implied? - linkId :: Maybe LinkId -- server-generated + linkId :: Maybe LinkId -- router-generated } ``` @@ -149,31 +149,31 @@ LGET :: Command Sender LNK :: SenderId -> QueueLinkData -> BrokerMsg ``` -To both include sender_id into the full link before the server response, and to prevent "oracle attack" when a failure to create the queue with the supplied `sender_id` can be used as a proof of queue existence, it is proposed that `sender_id` is computed client-side as the first 24 bytes of 48 in `sha3-384(correlation_id)` and validated server-side, where `corelation_id` is the transmission correlation ID. +To both include sender_id into the full link before the router response, and to prevent "oracle attack" when a failure to create the queue with the supplied `sender_id` can be used as a proof of queue existence, it is proposed that `sender_id` is computed client-side as the first 24 bytes of 48 in `sha3-384(correlation_id)` and validated router-side, where `corelation_id` is the transmission correlation ID. -To allow retries, every time the command is sent a new random `correlation_id` and new `sender_id` (and for contact queue, also `link_id`, which would be random as it is derived from hash of fixed link data that includes a random signature key) should be used on each attempt, because other IDs would be generated randomly on the server, and in case the previous command succeeded on the server but failed to be communicated to the client, the retry will fail if the same ID is used. +To allow retries, every time the command is sent a new random `correlation_id` and new `sender_id` (and for contact queue, also `link_id`, which would be random as it is derived from hash of fixed link data that includes a random signature key) should be used on each attempt, because other IDs would be generated randomly on the router, and in case the previous command succeeded on the router but failed to be communicated to the client, the retry will fail if the same ID is used. Alternative solutions that would allow retries that were considered and rejected: -- additional request to save queue data, after `sender_id` is returned by the server. The scenarios that require short links are interactive - creating user addresses and 1-time invitations - so making two requests instead of one would make the UX worse. -- include empty sender_id in the immutable data and have it replaced by the accepting party with `sender_id` received in `LINK` response - both a weird design, and might create possibility for some attacks via server, especially for contact addresses. +- additional request to save queue data, after `sender_id` is returned by the router. The scenarios that require short links are interactive - creating user addresses and 1-time invitations - so making two requests instead of one would make the UX worse. +- include empty sender_id in the immutable data and have it replaced by the accepting party with `sender_id` received in `LINK` response - both a weird design, and might create possibility for some attacks via router, especially for contact addresses. - making NEW commands idempotent. Doing it would require generating all IDs client-side, not only `sender_id`. It increases complexity, and it is not really necessary as the only scenarios when retries are needed are async NEW commands, that do not require short links. For future short links of chat relays the retries are much less likely, as chat relays will have good network connections. ## Algorithm to prepare and to interpret queue link data. -For contact addresses this approach follows the design proposed in [Short links](./2024-06-21-short-links.md) RFC - when link id is derived from the same random binary as key. For 1-time invitations link ID is independent and server-generated, to prevent existence checks (oracle attack). +For contact addresses this approach follows the design proposed in [Short links](./2024-06-21-short-links.md) RFC - when link id is derived from the same random binary as key. For 1-time invitations link ID is independent and router-generated, to prevent existence checks (oracle attack). This scheme results in 32 byte binary size for contact addresses and 56 bytes for 1-time invitation links. For fixed link data. -1. Generate random `nonce` (also used as a correlation ID for server command) and signature key (public `rootKey` included in fixed data). +1. Generate random `nonce` (also used as a correlation ID for router command) and signature key (public `rootKey` included in fixed data). 2. Compute sender ID from `nonce` as the first 24 bytes of sha3-384 of `nonce`. 3. Generate other keys for queue address, including queue e2e encryption keys and double ratchet connection e2e encryption keys. 4. Construct the full connection address to be included in fixed data. 5. `link_key = SHA3-256(fixed_data)` - used as part of the link, and to derive the key to encrypt content. 6. HKDF: 1) contact address: `(link_id, key) = HKDF(link_key, 56 bytes)`. - 2) 1-time invitation: `key = HKDF(link_key, 32 bytes)`, `link-id` - server-generated. + 2) 1-time invitation: `key = HKDF(link_key, 32 bytes)`, `link-id` - router-generated. 7. Encrypt: `(ct1, tag1) = secret_box(fixed_data, key, nonce1)`, where `nonce1` is a random nonce 5. Store: `(nonce1, ct1, tag1)` stored as fixed link data. @@ -202,7 +202,7 @@ While using content hash as encryption key is unconventional, it is not complete ## Threat model -**Compromised SMP server** +**Compromised SMP router** can: - delete link data. @@ -223,22 +223,22 @@ cannot: - undetectably check the existence of messaging queue or 1-time link (objective 8). - replace or delete the link data. -**Queue owner who did not compromise the server**: +**Queue owner who did not compromise the router**: cannot: -- redirect connecting user to another queue, on the same or on another server (objective 1). +- redirect connecting user to another queue, on the same or on another router (objective 1). - replace connection request in the link (objective 4). ## Correlation of design objectives with design elements -1. The presence of `SenderId` in `LNK` response from the server. +1. The presence of `SenderId` in `LNK` response from the router. 2. Encryption of link data with crypto_box. -3. Deriving encryption key from the hash of fixed data prevents it being modified by the server - any change would be detected and rejected by the client, as the hash of fixed data won't match the link. Signature verification with the key from fixed data, and signing of mutable data prevents server modification of mutable data. -4. No server command to change fixed data once it's set. Also, changing fixed data would require changing the link. +3. Deriving encryption key from the hash of fixed data prevents it being modified by the router - any change would be detected and rejected by the client, as the hash of fixed data won't match the link. Signature verification with the key from fixed data, and signing of mutable data prevents router modification of mutable data. +4. No router command to change fixed data once it's set. Also, changing fixed data would require changing the link. 5. 1-time link data can only be accessed with `LKEY` command, that while allows retries to mitigate network failures, will require the same key for retries. 6. `LSET` command. -7. The link is derived from fixed data only, so it does not change when mutable link data changes. Mutable part is signed preventing server MITM attacks. -8. SenderId is derived from request correlation ID, so it cannot be arbitrary defined to check existence of some known queue. LinkId for 1-time invitation is generated server-side, so it cannot be provided by the client when creating the queues to check if these IDs are used. +7. The link is derived from fixed data only, so it does not change when mutable link data changes. Mutable part is signed preventing router MITM attacks. +8. SenderId is derived from request correlation ID, so it cannot be arbitrary defined to check existence of some known queue. LinkId for 1-time invitation is generated router-side, so it cannot be provided by the client when creating the queues to check if these IDs are used. ## Syntax for short links @@ -257,34 +257,34 @@ contactLink = ; 32 bytes / 43 base64 encoded characters param = hostsParam / portParam / certHashParam hostsParam = %s"h=" host *("," host) ; additional hostnames, e.g. onion -portParam = %s"p=" 1*DIGIT ; server port -certHashParam = %s"c=" +portParam = %s"p=" 1*DIGIT ; router port +certHashParam = %s"c=" ``` -To have shorter links fingerprint and additional server hostnames do not need to be specified for pre-configured servers, even if they are disabled - they can be used from the client code. Any user defined servers will require including additional hosts and server fingerprint. +To have shorter links fingerprint and additional router hostnames do not need to be specified for pre-configured routers, even if they are disabled - they can be used from the client code. Any user defined routers will require including additional hosts and router fingerprint. -Example one-time link for preset server (104 characters): +Example one-time link for preset router (104 characters): ``` https://smp12.simplex.im/i#abcdefghij0123456789abcdefghij01/23456789abcdefghij0123456789abcdefghij01234 ``` -Example contact link for preset server (71 characters): +Example contact link for preset router (71 characters): ``` https://smp12.simplex.im/c#abcdefghij0123456789abcdefghij0123456789abc ``` -Example contact link for user-defined server (with fingerprint, but without onion hostname - 117 characters): +Example contact link for user-defined router (with fingerprint, but without onion hostname - 117 characters): ``` https://smp1.example.com/c#abcdefghij0123456789abcdefghij0123456789abc?c=0YuTwO05YJWS8rkjn9eLJDjQhFKvIYd8d4xG8X1blIU ``` -Example contact link for user-defined server (with fingerprint ant onion hostname - 182 characters): +Example contact link for user-defined router (with fingerprint and onion hostname - 182 characters): ``` https://smp1.example.com/c#abcdefghij0123456789abcdefghij0123456789abc?c=0YuTwO05YJWS8rkjn9eLJDjQhFKvIYd8d4xG8X1blIU&h=beccx4yfxxbvyhqypaavemqurytl6hozr47wfc7uuecacjqdvwpw2xid.onion ``` -For the links to work in the browser the servers must provide server pages. +For the links to work in the browser the routers must provide router pages. diff --git a/rfcs/2025-05-05-client-certificates.md b/rfcs/done/2025-05-05-client-certificates.md similarity index 79% rename from rfcs/2025-05-05-client-certificates.md rename to rfcs/done/2025-05-05-client-certificates.md index 00de2f9b32..81455fb577 100644 --- a/rfcs/2025-05-05-client-certificates.md +++ b/rfcs/done/2025-05-05-client-certificates.md @@ -1,12 +1,12 @@ -# Service certificates for high volume servers and services connecting to SMP servers +# Service certificates for high volume routers and services connecting to SMP routers ## Problem -The absense of user and client identification benefits privacy, but it requires separately authorizing subscription for each messaging queue, that doesn't scale when a high volume server or service acts as a client for SMP server even for the current traffic and network size. +The absence of user and client identification benefits privacy, but it requires separately authorizing subscription for each messaging queue, that doesn't scale when a high volume router or service acts as a client for SMP router even for the current traffic and network size. -These servers/services include: +These routers/services include: - operators' chat relays (aka super-peers), -- notification servers, +- notification routers, - high-traffic service chat bots, - high-traffic business support clients. @@ -16,31 +16,31 @@ Self-hosted chat relays may want to retain privacy, so they will not use client Even today, directory service subscribing to all queues may take 15-20 minutes, which is experienced as downtime by the end users. -Notification servers also acting as clients to messaging servers also take 15-20 minutes to subscribe to all notifications, during which time notifications are not delivered. +Notification routers also acting as clients to messaging routers also take 15-20 minutes to subscribe to all notifications, during which time notifications are not delivered. -Not only these subscription take a lot of time, they also consume a large amount of memory both in the clients and in the servers, as association between clients and queues is currently session-scoped and not persisted anywhere (and it should not be, because end-users' clients do need privacy). +Not only these subscriptions take a lot of time, they also consume a large amount of memory both in the clients and in the routers, as association between clients and queues is currently session-scoped and not persisted anywhere (and it should not be, because end-users' clients do need privacy). ## Solution -High volume "clients" (operators' chat relays, directory service, SimpleX Chat team support client, SimpleX Status bot, etc.) that don't need privacy will identify themselves to the messaging servers at a point of connection by providing client sertificate, both in TLS handshake and in SMP handshake (the same certificate must be provided). +High volume "clients" (operators' chat relays, directory service, SimpleX Chat team support client, SimpleX Status bot, etc.) that don't need privacy will identify themselves to the messaging routers at a point of connection by providing client certificate, both in TLS handshake and in SMP handshake (the same certificate must be provided). All the new queues and subscriptions made in this session will be creating a permanent association of the messaging queue with the client, and on subsequent reconnections the client can "subscribe" to all their queues with a single client subscription command. -This will save a lot of time subscribing and resubscribing on server and client restarts, servers' bandwidth, servers' traffic spikes, and memory of both clients and servers. +This will save a lot of time subscribing and resubscribing on router and client restarts, routers' bandwidth, routers' traffic spikes, and memory of both clients and routers. ## Protocol -An ephemeral per-session signature key signed by long-term client certificate is used for client authorization – this session signature key will be passed in SMP handshake. +An ephemeral per-session signature key signed by long-term client certificate is used for client authorization -- this session signature key will be passed in SMP handshake. To transition existing queues, the subscription command will have to be double-signed - by the queue key, and then by client key. -When server receives such "hand-over" subscription it would create a permanent association between the client certificate and the queue, and on subsequent re-connections the client can subscribe to all the existing queues still associated with the client with one command. +When router receives such "hand-over" subscription it would create a permanent association between the client certificate and the queue, and on subsequent re-connections the client can subscribe to all the existing queues still associated with the client with one command. -The server will respond to the client with the number of queues it was subscribed to - it would both inform the client that it has to re-connect in case of interruption, and can be used for client and server statistics. +The router will respond to the client with the number of queues it was subscribed to - it would both inform the client that it has to re-connect in case of interruption, and can be used for client and router statistics. When client creates a new queue, it would also sign the request with both keys, per-queue and client's. Other queue operations (e.g., deletion, or changing associated queue data for short links) would still require two signatures, both the queue key and the client key. -The open question is whether there is any value in allowing to remove the association between the client and the queue. Probably not, as threat model should assume that the server would retain this information, and the use-case for users controlling their servers is narrow. +The open question is whether there is any value in allowing to remove the association between the client and the queue. Probably not, as threat model should assume that the router would retain this information, and the use-case for users controlling their routers is narrow. ## Protocol connection handshake @@ -69,7 +69,7 @@ data ClientHandshake = ClientHandshake } ``` -`ServerHandshake` already contains `authPubKey` with the server certificate chain and the signed key for connection encryption and creating a shared secret for denable authorization (with client entity key) and session encryption layer. +`ServerHandshake` already contains `authPubKey` with the router certificate chain and the signed key for connection encryption and creating a shared secret for deniable authorization (with client entity key) and session encryption layer. `ClientHandshake` contains only ephemeral `authPubKey` to compute a shared secret for session encryption layer, so we need an additional field for an optional client certificate: @@ -77,9 +77,9 @@ data ClientHandshake = ClientHandshake serviceCertKey :: Maybe (X.CertificateChain, X.SignedExact X.PubKey) ``` -Certificate here defines client identity. The actual key to be used to sign commands is session-scoped, and is signed by the certificate key. In case of notification server it MUST be the same certificate that is used for server TLS connections. +Certificate here defines client identity. The actual key to be used to sign commands is session-scoped, and is signed by the certificate key. In case of notification router it MUST be the same certificate that is used for router TLS connections. -For operators' clients we may optionally include operators' certificate in the chain, and that would allow servers to identify operators if either wants to. This would improve end-user security, as not only the server would validate that its certificate matches the address, but it would also validate that it is operated by SimpleX Chat or by Flux, preventing any server impersonation (e.g., via DNS manipulations) - the client could then report that the files are hosted on SimpleX Chat servers, but then can stop and show additional warning in case certificate does not match the domain - same as the browsers do with CA stores in the client. +For operators' clients we may optionally include operators' certificate in the chain, and that would allow routers to identify operators if either wants to. This would improve end-user security, as not only the router would validate that its certificate matches the address, but it would also validate that it is operated by SimpleX Chat or by Flux, preventing any router impersonation (e.g., via DNS manipulations) - the client could then report that the files are hosted on SimpleX Chat routers, but then can stop and show additional warning in case certificate does not match the domain - same as the browsers do with CA stores in the client. ## Protocol transmissions @@ -104,9 +104,9 @@ authenticator = queue_authenticator ("0" / "1" service_authenticator) In case service_authenticator is present, queue_authenticator should authorize over `fingerprint authorized` (concatenation of service identity certificate fingerprint and the rest of the transmission). -All queues created with client key will have to be double-authorized with both the queue key and the client key - both the client and the server would have to maintain this knowledge, whether the queue is associated with the client or not. +All queues created with client key will have to be double-authorized with both the queue key and the client key - both the client and the router would have to maintain this knowledge, whether the queue is associated with the client or not. -Asymmetric retries have to be supported - the first request creating this association may succeed on the server and timeout on the client. +Asymmetric retries have to be supported - the first request creating this association may succeed on the router and timeout on the client. ## Subscription @@ -118,7 +118,7 @@ The command and response: SUBS :: Command Recipient -- to enable all client subscriptions, empty entity ID in the transmission, signed by client key - it must be the same as was used in handover subscription signature. NSUBS :: Command Recipient -- notification subscription SOK :: Maybe ServiceId -- new subscription response -SOKS :: Int64 -> BrokerMsg -- response from the server, includes the number of subscribed queues +SOKS :: Int64 -> BrokerMsg -- response from the router, includes the number of subscribed queues ENDS :: Int64 -> BrokerMsg -- when another session subscribes with the same certificate ``` @@ -133,7 +133,7 @@ This was considered to reduce costs for the usual clients to re-subscribe. Curre For some very busy end-user clients it may help. -Given that server has access to an ephemeral association between recipient client session and queues anyway (even with clients connecting via Tor, unless per-connection transport isolation is used), introducing `sessionPubKey` to allow resubscription to the previously subscribed queues may reduce the traffic. This won't change threat model as the server would only keep this association in memory, and not persist it. Clients on another hand may safely persist this association for fast resubscription on client restarts. +Given that router has access to an ephemeral association between recipient client session and queues anyway (even with clients connecting via Tor, unless per-connection transport isolation is used), introducing `sessionPubKey` to allow resubscription to the previously subscribed queues may reduce the traffic. This won't change threat model as the router would only keep this association in memory, and not persist it. Clients on another hand may safely persist this association for fast resubscription on client restarts. This is not planned for the forseable future, as migrating to chat relays would solve most of the problem. diff --git a/rfcs/2026-01-30-send-file-page.md b/rfcs/done/2026-01-30-send-file-page.md similarity index 88% rename from rfcs/2026-01-30-send-file-page.md rename to rfcs/done/2026-01-30-send-file-page.md index 0e35d44994..00ac027aec 100644 --- a/rfcs/2026-01-30-send-file-page.md +++ b/rfcs/done/2026-01-30-send-file-page.md @@ -2,14 +2,14 @@ ## 1. Problem & Business Case -There is no way to send or receive files using SimpleX without installing the app. A static web page that implements the XFTP protocol client-side would allow anyone with a browser to upload and download files via XFTP servers, promoting app adoption. +There is no way to send or receive files using SimpleX without installing the app. A static web page that implements the XFTP protocol client-side would allow anyone with a browser to upload and download files via XFTP routers, promoting app adoption. **Business constraints:** - Web page allows up to 100 MB uploads; app allows up to 1 GB. - Page must promote app installation (e.g., banner, messaging around limits). **Security constraint:** -- The server hosting the page must never access file content or file descriptions. The file description is carried in the URL hash fragment (`#`), which browsers do not send to the server. +- The router hosting the page must never access file content or file descriptions. The file description is carried in the URL hash fragment (`#`), which browsers do not send to the router. - The only way to compromise transfer security is page substitution (serving malicious JS). Mitigations: standard web security (HTTPS, CSP, SRI) and IPFS hosting with page fingerprints published in multiple independent locations. ## 2. Design Overview @@ -29,7 +29,7 @@ There is no way to send or receive files using SimpleX without installing the ap │ fetch() over HTTP/2 │ fetch() over HTTP/2 ▼ ▼ ┌─────────────────┐ ┌─────────────────┐ -│ XFTP Server 1 │ │ XFTP Server 2 │ +│ XFTP Router 1 │ │ XFTP Router 2 │ │ (SNI→web cert) │ │ (SNI→web cert) │ │ (+CORS headers) │ │ (+CORS headers) │ └─────────────────┘ └─────────────────┘ @@ -59,7 +59,7 @@ There is no way to send or receive files using SimpleX without installing the ap ### 3.3 Error States - File too large (> 100 MB): Show limit message with app install CTA. -- Server unreachable: Retry with exponential backoff, show error after exhausting retries. +- Router unreachable: Retry with exponential backoff, show error after exhausting retries. - File expired: "This file is no longer available" message. - Decryption failure: "File corrupted or link invalid" message. @@ -71,7 +71,7 @@ There is no way to send or receive files using SimpleX without installing the ap https://example.com/file/# ``` -- Hash fragment is never sent to the server. +- Hash fragment is never sent to the router. - Compression: DEFLATE (raw, no gzip/zlib wrapper) — better ratio than LZW for structured text like YAML. - Encoding: Base64url (RFC 4648 §5) — no `+`, `/`, `=`, or `%` characters. @@ -79,18 +79,18 @@ Alternative: LZW + base64url if DEFLATE proves problematic. Both should be evalu ### 4.2 Redirect Mechanism -For files with many chunks, the YAML file description can exceed a practical URL length. The threshold is ~600 bytes of compressed+encoded description (configurable). +For files with many data packets, the YAML file description can exceed a practical URL length. The threshold is ~600 bytes of compressed+encoded description (configurable). **Flow when description is too large:** 1. Serialize recipient file description to YAML. 2. Encrypt YAML using fresh key + nonce (same XSalsa20-Poly1305 as files). -3. Upload encrypted YAML as a single-chunk "file" to one randomly chosen XFTP server. +3. Upload encrypted YAML as a single-packet "file" to one randomly chosen XFTP router. 4. Create redirect description pointing to this uploaded description. -5. Encode redirect description into URL (always small — single chunk). +5. Encode redirect description into URL (always small — single data packet). **Download with redirect:** 1. Parse URL → redirect description (has `redirect` field with `size` and `digest`). -2. Download the description "file" using the single chunk reference. +2. Download the description "file" using the single data packet reference. 3. Decrypt → get full YAML description. 4. Validate size and digest match redirect metadata. 5. Proceed with normal download using full description. @@ -99,11 +99,11 @@ For files with many chunks, the YAML file description can exceed a practical URL These estimates are preliminary and may be incorrect. -| Scenario | Chunks | Compressed+encoded size | URL length | +| Scenario | Data packets | Compressed+encoded size | URL length | |----------|--------|------------------------|------------| -| Small file (1 chunk, 1 server) | 1 | ~300 bytes | ~350 chars | -| Medium file (5 chunks, 1 server) | 5 | ~500 bytes | ~550 chars | -| Large file (25+ chunks) | 25 | Exceeds threshold → redirect | ~350 chars | +| Small file (1 data packet, 1 router) | 1 | ~300 bytes | ~350 chars | +| Medium file (5 data packets, 1 router) | 5 | ~500 bytes | ~550 chars | +| Large file (25+ data packets) | 25 | Exceeds threshold → redirect | ~350 chars | ## 5. TypeScript XFTP Client Library @@ -141,7 +141,7 @@ The XFTP wire format uses a custom binary encoding (from `Simplex.Messaging.Enco - Fields separated by space (0x20). - `signature`: Ed25519 signature over `(sessionId ++ corrId ++ entityId ++ encodedCommand)`. - `corrId`: Correlation ID (arbitrary, echoed in response). - - `entityId`: File/chunk ID on server. + - `entityId`: File/data packet ID on router. - Command: tag + space-separated fields. - **Padding:** 2-byte big-endian length prefix + message + `#` (0x23) fill to block size (16384 bytes). @@ -154,7 +154,7 @@ The XFTP wire format uses a custom binary encoding (from `Simplex.Messaging.Enco | Transit decryption (download) | XSalsa20-Poly1305 (streaming: `cbInit` + `sbDecryptChunk`) | DH shared secret | 24 B | 16 B | libsodium.js | | Command signing | Ed25519 | 64 B (private) | — | 64 B (sig) | libsodium.js | | DH key exchange | X25519 | 32 B | — | — | libsodium.js | -| Chunk digest | SHA-256 | — | — | 32 B | Web Crypto API | +| Data packet digest | SHA-256 | — | — | 32 B | Web Crypto API | | File digest | SHA-512 | — | — | 64 B | Web Crypto API | | Random bytes | ChaCha20-DRBG | — | — | — | libsodium.js `randombytes_buf` | @@ -204,7 +204,7 @@ async function sendXFTPCommand( - Firefox 102+: Supported - Safari 16.4+: Supported -For older browsers, fall back to `ArrayBuffer` body (buffer entire chunk in memory). +For older browsers, fall back to `ArrayBuffer` body (buffer entire data packet in memory). ### 5.5 Upload Orchestration @@ -220,22 +220,22 @@ For older browsers, fall back to `ArrayBuffer` body (buffer entire chunk in memo d. Encrypt `'#'` padding in 65536-byte chunks to fill `encSize - authTagSize - fileSize' - 8` e. Finalize: `sbAuth(state)` → append 16-byte auth tag 6. Compute SHA-512 digest of encrypted data -7. Split into chunks using prepareChunkSizes algorithm: - - > 75% of 4MB → 4MB chunks - - > 75% of 1MB → 1MB + 4MB chunks - - Otherwise → 64KB + 256KB chunks -8. For each chunk (parallel, up to 8 concurrent): +7. Split into data packets using prepareChunkSizes algorithm: + - > 75% of 4MB → 4MB data packets + - > 75% of 1MB → 1MB + 4MB data packets + - Otherwise → 64KB + 256KB data packets +8. For each data packet (parallel, up to 8 concurrent): a. Generate Ed25519 sender keypair b. Generate Ed25519 recipient keypair (1 recipient for web) - c. Compute SHA-256 chunk digest - d. Connect to XFTP server (handshake if new connection) + c. Compute SHA-256 data packet digest + d. Connect to XFTP router (handshake if new connection) e. Send FNEW { sndKey, size, digest } + recipient keys → receive (senderId, [recipientId]) - f. Send FPUT with chunk data → receive OK + f. Send FPUT with data packet content → receive OK g. Report progress -9. Build FileDescription YAML from all chunk metadata +9. Build FileDescription YAML from all data packet metadata 10. If YAML size (compressed+encoded) > threshold: a. Encrypt YAML as a file - b. Upload encrypted YAML (single chunk) → get redirect description + b. Upload encrypted YAML (single data packet) → get redirect description c. Use redirect description for URL 11. Compress + base64url encode description 12. Display URL: https://example.com/file/# @@ -247,32 +247,32 @@ For older browsers, fall back to `ArrayBuffer` body (buffer entire chunk in memo 1. Parse URL hash fragment 2. Base64url decode + decompress → YAML 3. Parse YAML → FileDescription -4. Validate description (sequential chunks, sizes match) +4. Validate description (sequential data packets, sizes match) 5. If redirect field present: - a. Download redirect file (single chunk) + a. Download redirect file (single data packet) b. Decrypt, validate size+digest, parse inner description c. Continue with inner description -6. For each chunk (parallel, up to 8 concurrent): +6. For each data packet (parallel, up to 8 concurrent): a. Generate ephemeral X25519 keypair - b. Connect to XFTP server (web handshake) + b. Connect to XFTP router (web handshake) c. Send FGET { recipientDhPubKey } → receive (serverDhPubKey, cbNonce) + encrypted body d. Compute DH shared secret - e. Transit-decrypt chunk body (XSalsa20-Poly1305 with DH secret) - f. Verify chunk digest (SHA-256) + e. Transit-decrypt data packet body (XSalsa20-Poly1305 with DH secret) + f. Verify data packet digest (SHA-256) g. Send FACK → receive OK h. Report progress -7. Concatenate all transit-decrypted chunks (in order) → encrypted file +7. Concatenate all transit-decrypted data packets (in order) → encrypted file 8. Verify file digest (SHA-512) 9. File-decrypt entire stream (XSalsa20-Poly1305 with file key + nonce) 10. Extract FileHeader → get original fileName 11. Trigger browser download (Blob + or File System Access API) ``` -## 6. XFTP Server Changes +## 6. XFTP Router Changes ### 6.1 SNI-Based Certificate Switching -The SMP server already implements SNI-based certificate switching (see `Transport/Server.hs:255-269`). The same mechanism must be added to the XFTP server. +The SMP router already implements SNI-based certificate switching (see `Transport/Server.hs:255-269`). The same mechanism must be added to the XFTP router. **Current SMP implementation:** ```haskell @@ -292,14 +292,14 @@ T.onServerNameIndication = case sniCredential of **Certificate setup:** - XFTP identity certificate: Existing self-signed CA chain (used for protocol identity via fingerprint). -- Web certificate: Standard CA-issued TLS certificate (e.g., Let's Encrypt) for the server's FQDN. +- Web certificate: Standard CA-issued TLS certificate (e.g., Let's Encrypt) for the router's FQDN. - Both certificates served on the same port (443). ### 6.2 CORS Support -Browsers enforce same-origin policy. The web page (served from `example.com`) must make cross-origin requests to XFTP servers (`xftp1.simplex.im`, etc.). +Browsers enforce same-origin policy. The web page (served from `example.com`) must make cross-origin requests to XFTP routers (`xftp1.simplex.im`, etc.). -**Required server changes:** +**Required router changes:** 1. **Handle OPTIONS preflight requests:** ``` @@ -319,45 +319,45 @@ Browsers enforce same-origin policy. The web page (served from `example.com`) mu Access-Control-Expose-Headers: * ``` -3. **Implementation location:** In `runHTTP2Server` handler or a wrapper around the XFTP request handler. Detect the `Origin` header → add CORS headers. This can be conditional on web mode being enabled in config. +3. **Implementation location:** In `runHTTP2Server` handler or a wrapper around the XFTP request handler. Detect the `Origin` header → add CORS headers. This can be conditional on web mode being enabled in the router config. **Security consideration:** `Access-Control-Allow-Origin: *` is safe here because: -- All XFTP commands require Ed25519 authentication (per-chunk keys from file description). +- All XFTP commands require Ed25519 authentication (per-packet keys from file description). - No cookies or browser credentials are involved. - File content is end-to-end encrypted. -### 6.3 Web Handshake with Server Identity Proof +### 6.3 Web Handshake with Router Identity Proof **Both SNI and web handshake are required.** They solve different problems: -1. **SNI certificate switching** is required because browsers reject self-signed certificates. The XFTP identity certificate is self-signed (CA chain with offline root), so the server must present a standard CA-issued web certificate (e.g., Let's Encrypt) when a browser connects. SNI is how the server detects this. +1. **SNI certificate switching** is required because browsers reject self-signed certificates. The XFTP identity certificate is self-signed (CA chain with offline root), so the router must present a standard CA-issued web certificate (e.g., Let's Encrypt) when a browser connects. SNI is how the router detects this. 2. **Web handshake with challenge-response** is required because browsers cannot access the TLS certificate fingerprint or the TLS-unique channel binding (`sessionId`). The native client validates XFTP identity by checking the certificate chain fingerprint against the known `keyHash` and binding it to the TLS session. The browser gets none of this — it only knows TLS succeeded with some CA-issued cert. So the XFTP identity must be proven at the protocol level. **Standard handshake (unchanged for native clients):** ``` -1. Client → empty POST body → Server -2. Server → padded { vRange, sessionId, CertChainPubKey } → Client -3. Client → padded { version, keyHash } → Server -4. Server → empty → Client +1. Client → empty POST body → Router +2. Router → padded { vRange, sessionId, CertChainPubKey } → Client +3. Client → padded { version, keyHash } → Router +4. Router → empty → Client ``` **Web handshake (new, when SNI is detected):** ``` -1. Client → padded { challenge: 32 random bytes } → Server -2. Server → padded { vRange, sessionId, CertChainPubKey } (header block) +1. Client → padded { challenge: 32 random bytes } → Router +2. Router → padded { vRange, sessionId, CertChainPubKey } (header block) + extended body { fullCertChain, signature(challenge ++ sessionId) } → Client 3. Client validates: - Certificate chain CA fingerprint matches known keyHash - Signature over (challenge ++ sessionId) is valid under cert's public key - - This proves: server controls XFTP identity key AND is live (not replay) -4. Client → padded { version, keyHash } → Server -5. Server → empty → Client + - This proves: router controls XFTP identity key AND is live (not replay) +4. Client → padded { version, keyHash } → Router +5. Router → empty → Client ``` -**Detection mechanism:** The server detects web clients by the `sniCredUsed` flag (already available from the TLS layer). When SNI is detected, the server expects a challenge in the first POST body (non-empty, unlike standard handshake where it is empty). No marker byte is needed — SNI presence is the discriminator. +**Detection mechanism:** The router detects web clients by the `sniCredUsed` flag (already available from the TLS layer). When SNI is detected, the router expects a challenge in the first POST body (non-empty, unlike standard handshake where it is empty). No marker byte is needed — SNI presence is the discriminator. -**Block size note:** The XFTP block size is 16384 bytes (`Protocol.hs:65`). The XFTP identity certificate chain fits within this block. The signed challenge response is sent as an extended body (streamed after the 16384-byte header block), same mechanism as file chunk data. +**Block size note:** The XFTP block size is 16384 bytes (`Protocol.hs:65`). The XFTP identity certificate chain fits within this block. The signed challenge response is sent as an extended body (streamed after the 16384-byte header block), same mechanism as data packet content. ### 6.4 Protocol Version and Handshake Extension @@ -373,11 +373,11 @@ The XFTP handshake is binary-encoded via the `Encoding` typeclass (`Transport.hs ### 6.5 Serving the Static Page -The XFTP server can optionally serve the static web page itself (similar to how SMP servers serve info pages). When a browser connects via SNI and sends a GET request (not POST), the server serves the HTML/JS/CSS bundle. +The XFTP router can optionally serve the static web page itself (similar to how SMP routers serve info pages). When a browser connects via SNI and sends a GET request (not POST), the router serves the HTML/JS/CSS bundle. -This can be implemented identically to the SMP server's static page serving (`apps/smp-server/web/Static.hs`), using Warp to handle HTTP requests on the same TLS connection. +This can be implemented identically to the SMP router's static page serving (`apps/smp-server/web/Static.hs`), using Warp to handle HTTP requests on the same TLS connection. -Alternatively, the page is hosted on a separate web server (e.g., `files.simplex.chat`). The XFTP servers only need to handle XFTP protocol requests (POST) with CORS headers. +Alternatively, the page is hosted on a separate web server (e.g., `files.simplex.chat`). The XFTP routers only need to handle XFTP protocol requests (POST) with CORS headers. ## 7. Security Analysis @@ -386,24 +386,24 @@ Alternatively, the page is hosted on a separate web server (e.g., `files.simplex | Threat | Mitigation | Residual Risk | |--------|-----------|---------------| | Page substitution (malicious JS) | HTTPS, CSP, SRI; IPFS hosting with fingerprints in multiple locations | If web server is compromised and IPFS is not used, all guarantees lost. Fundamental limitation of web-based E2E crypto, mitigated by IPFS. | -| MITM between browser and XFTP server | XFTP identity verification via challenge-response handshake | Attacker can relay traffic (see §7.2) but cannot read file content due to E2E encryption. | -| File description leakage | Hash fragment (`#`) is never sent to server | If browser extension or malware reads URL bar, description is exposed. | -| Server learns file content | File encrypted client-side before upload (XSalsa20-Poly1305) | Server sees encrypted chunks only. | +| MITM between browser and XFTP router | XFTP identity verification via challenge-response handshake | Attacker can relay traffic (see §7.2) but cannot read file content due to E2E encryption. | +| File description leakage | Hash fragment (`#`) is never sent to router | If browser extension or malware reads URL bar, description is exposed. | +| Router learns file content | File encrypted client-side before upload (XSalsa20-Poly1305) | Router sees encrypted data packets only. | | Traffic analysis | File size visible to network observers | Same as native XFTP client. | ### 7.2 Relay Attack Analysis -An attacker who controls the network could relay all traffic between the browser and the real XFTP server: +An attacker who controls the network could relay all traffic between the browser and the real XFTP router: -1. Browser sends challenge to "attacker's server" -2. Attacker relays to real server -3. Real server signs challenge + sessionId with XFTP identity key +1. Browser sends challenge to "attacker's router" +2. Attacker relays to real router +3. Real router signs challenge + sessionId with XFTP identity key 4. Attacker relays signed response to browser -5. Browser validates ✓ (signature is from the real server) +5. Browser validates ✓ (signature is from the real router) However, the attacker **cannot read file content** because: - File encryption key is in the hash fragment (never sent over network) -- Transit encryption uses DH key exchange (FGET) — attacker doesn't have server's DH private key +- Transit encryption uses DH key exchange (FGET) — attacker doesn't have router's DH private key - The attacker can observe transfer sizes and timing, but this is already visible via traffic analysis The relay attack is equivalent to a passive network observer, which is the same threat model as native XFTP. @@ -414,6 +414,7 @@ The relay attack is equivalent to a passive network observer, which is the same |----------|--------------|------------| | TLS certificate validation | XFTP identity cert via fingerprint pinning | Web CA cert via browser + XFTP identity via challenge-response | | Session binding | TLS-unique binds to XFTP identity cert | TLS-unique binds to web cert; challenge binds to XFTP identity | + | Code integrity | Binary signed/distributed via app stores | Served over HTTPS; SRI for subresources; IPFS hosting option; vulnerable to server compromise | | File encryption | XSalsa20-Poly1305 | Same | | Transit encryption | DH + XSalsa20-Poly1305 | Same | @@ -421,8 +422,8 @@ The relay attack is equivalent to a passive network observer, which is the same ### 7.4 Layman Security Summary (Displayed on Page) The web page should display a brief, non-technical security summary explaining to users: -- Files are encrypted in the browser before upload — the server never sees file contents. -- The file link (URL) contains the decryption key in the hash fragment, which the browser never sends to any server. +- Files are encrypted in the browser before upload — the router never sees file contents. +- The file link (URL) contains the decryption key in the hash fragment, which the browser never sends to any router. - Only someone with the exact link can download and decrypt the file. - The main risk is if the web page itself is tampered with (page substitution attack). IPFS hosting mitigates this. - For maximum security, use the SimpleX app instead. @@ -445,10 +446,10 @@ The web page should display a brief, non-technical security summary explaining t - Well-understood, readable, auditable by the community. - Rich crypto ecosystem (libsodium.js provides all needed NaCl primitives as WASM). - Direct access to browser APIs (fetch, File, ReadableStream, Blob). -- Testable in Node.js against Haskell XFTP server. +- Testable in Node.js against Haskell XFTP router. - Small bundle size (~200 KB with libsodium WASM). -**Risk:** Exact byte-level wire compatibility requires careful encoding implementation and thorough testing against the Haskell server. +**Risk:** Exact byte-level wire compatibility requires careful encoding implementation and thorough testing against the Haskell router. ### 8.3 Option 3: C to WASM @@ -476,14 +477,14 @@ The web page should display a brief, non-technical security summary explaining t 4. Handshake encoding/decoding (protocol/handshake.ts) — 18 tests 5. Identity proof verification (crypto/identity.ts) — 15 tests 6. File descriptions: types, YAML, validation (protocol/description.ts) — 13 tests -7. Chunk sizing: prepareChunkSizes, singleChunkSize, etc. (protocol/chunks.ts) — 4 tests +7. Data packet sizing: prepareChunkSizes, singleChunkSize, etc. (protocol/chunks.ts) — 4 tests 8. Transport crypto: cbAuthenticate/cbVerify, transit encrypt/decrypt (protocol/client.ts) — 10 tests -9. Server address parsing (protocol/address.ts) — 3 tests +9. Router address parsing (protocol/address.ts) — 3 tests 10. Download helpers: DH, transit-decrypt, file-decrypt (download.ts) — 11 tests -### Phase 2: XFTP Server Changes — DONE +### Phase 2: XFTP Router Changes — DONE -**Goal:** XFTP servers support web client connections. +**Goal:** XFTP routers support web client connections. **Completed** (7 Haskell integration tests passing): 1. SNI certificate switching — `TLSServerCredential` mechanism for XFTP @@ -493,20 +494,20 @@ The web page should display a brief, non-technical security summary explaining t ### Phase 3: HTTP/2 Client + Agent Orchestration -**Goal:** Complete XFTP client that can upload and download files against a real Haskell XFTP server. +**Goal:** Complete XFTP client that can upload and download files against a real Haskell XFTP router. 1. **`client.ts`** ← `Simplex.FileTransfer.Client` — HTTP/2 client via `fetch()` / `node:http2`: connect + handshake, sendCommand, createChunk, uploadChunk, downloadChunk, deleteChunk, ackChunk, ping. -2. **`agent.ts`** ← `Simplex.FileTransfer.Client.Main` — Upload orchestration (encrypt → chunk → register → upload → build description), download orchestration (parse → download → verify → decrypt → ack), URL encoding with DEFLATE compression (§4.1). +2. **`agent.ts`** ← `Simplex.FileTransfer.Client.Main` — Upload orchestration (encrypt → split into data packets → register → upload → build description), download orchestration (parse → download → verify → decrypt → ack), URL encoding with DEFLATE compression (§4.1). ### Phase 4: Integration Testing -**Goal:** Prove the TypeScript client is wire-compatible with the Haskell server. +**Goal:** Prove the TypeScript client is wire-compatible with the Haskell router. 1. **Test harness** — Haskell-driven tests in `XFTPWebTests.hs` (same pattern as per-function tests). -2. **Upload test** — TypeScript uploads file → Haskell client downloads it → verify contents match. -3. **Download test** — Haskell client uploads file → TypeScript downloads it → verify contents match. +2. **Upload test** — TypeScript uploads file → Haskell client downloads it → verify content matches. +3. **Download test** — Haskell client uploads file → TypeScript downloads it → verify content matches. 4. **Round-trip test** — TypeScript upload → TypeScript download → verify. -5. **Edge cases** — Single chunk, many chunks, exactly-sized chunks, redirect descriptions. +5. **Edge cases** — Single data packet, many data packets, exactly-sized data packets, redirect descriptions. ### Phase 5: Web Page @@ -517,11 +518,11 @@ The web page should display a brief, non-technical security summary explaining t 3. **Download UI** — Parse URL, show file info, download button, progress circle. 4. **App install CTA** — Banner/messaging promoting SimpleX app for larger files. -### Phase 6: Server-Hosted Page (Optional) +### Phase 6: Router-Hosted Page (Optional) -**Goal:** XFTP servers can optionally serve the web page themselves. +**Goal:** XFTP routers can optionally serve the web page themselves. -1. **Static file serving** — Similar to SMP server's `attachStaticFiles`. +1. **Static file serving** — Similar to SMP router's `attachStaticFiles`. 2. **GET handler** — When web client sends HTTP GET (not POST), serve HTML page. 3. **Page generation** — Embed page bundle at server build time. @@ -588,9 +589,9 @@ cabal test --ghc-options -O0 --test-option=--match="/XFTP Web Client/" **Random inputs:** Haskell tests can use QuickCheck to generate random inputs each run, not just hardcoded values. This catches edge cases that fixed test vectors miss. -### 10.2 Integration Tests (TS-driven, spawns Haskell server) +### 10.2 Integration Tests (TS-driven, spawns Haskell router) -**Only attempted after all per-function tests (§10.1) pass.** These are end-to-end tests that verify the full upload/download pipeline works against a real XFTP server. +**Only attempted after all per-function tests (§10.1) pass.** These are end-to-end tests that verify the full upload/download pipeline works against a real XFTP router. **Approach:** Node.js test (`xftp-web/test/integration.test.ts`) spawns `xftp-server` and `xftp` CLI as subprocesses. @@ -615,7 +616,7 @@ cabal test --ghc-options -O0 --test-option=--match="/XFTP Web Client/" 3. TypeScript upload + download round-trip. 4. Web handshake with challenge-response validation. 5. Redirect descriptions (large file → compressed description upload). -6. Multiple chunks across multiple servers. +6. Multiple data packets across multiple routers. 7. Error cases: expired file, auth failure, digest mismatch. ### 10.3 Browser Tests @@ -635,7 +636,7 @@ The per-function tests (§10.1) must pass before attempting integration tests ( 5. **Protocol encoding** — command/response encoding, transmission framing (§12.2, §12.3) 6. **Handshake** — handshake type encoding/decoding (§12.9) 7. **Description** — YAML serialization, validation (§12.12–§12.14) -8. **Chunk sizing** — `prepareChunkSizes`, `getChunkDigest` (§12.11) +8. **Data packet sizing** — `prepareChunkSizes`, `getChunkDigest` (§12.11) 9. **Transport client** — `sendCommand`, `createChunk`, `uploadChunk`, `downloadChunk` (§12.10) 10. **Integration** — full upload/download round-trips (§10.2) @@ -660,7 +661,7 @@ The TypeScript implementation must reimplement the exact streaming logic using l ### 11.3 Web Client Detection -Both SNI and web handshake are mandatory (see §6.3). SNI detection (`sniCredUsed` flag) is the discriminator — when SNI is detected, the server expects the web handshake variant. +Both SNI and web handshake are mandatory (see §6.3). SNI detection (`sniCredUsed` flag) is the discriminator — when SNI is detected, the router expects the web handshake variant. ### 11.4 URL Compression @@ -677,32 +678,32 @@ XSalsa20-Poly1305 streaming encryption/decryption is sequential — each 64KB bl **Upload flow:** 1. `File.stream()` → encrypt sequentially (state threading) → buffer encrypted output 2. Compute SHA-512 digest of encrypted data -3. Split into chunks, upload in parallel to 8 randomly selected servers (from 6 default servers in `Presets.hs`) +3. Split into data packets, upload in parallel to 8 randomly selected routers (from 6 default routers in `Presets.hs`) **Download flow:** -1. Download chunks in parallel from servers → buffer encrypted data +1. Download data packets in parallel from routers → buffer encrypted data 2. Decrypt sequentially (state threading) → verify auth tag 3. Trigger browser save Both directions buffer ~100 MB of encrypted data. The approach should be symmetric. -**Option A — Memory buffer:** Buffer encrypted data as `ArrayBuffer`. 100 MB peak memory is feasible on modern devices. Simple implementation, no Web Worker needed. Chunk slicing is zero-copy via `ArrayBuffer.slice()`. +**Option A — Memory buffer:** Buffer encrypted data as `ArrayBuffer`. 100 MB peak memory is feasible on modern devices. Simple implementation, no Web Worker needed. Data packet slicing is zero-copy via `ArrayBuffer.slice()`. **Option B — OPFS ([Origin Private File System](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system)):** Write encrypted data to OPFS instead of holding in memory. OPFS storage quota is shared with IndexedDB/Cache API — typically hundreds of MB to several GB ([quota details](https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria)). The fast synchronous API (`createSyncAccessHandle()`) requires a [Web Worker](https://developer.mozilla.org/en-US/docs/Web/API/FileSystemFileHandle/createSyncAccessHandle) but is [3-4x faster than IndexedDB](https://web.dev/articles/origin-private-file-system). The async API (`createWritable()`) works on the main thread. **Decision:** Use OPFS with a Web Worker. While 100 MB fits in memory, OPFS future-proofs the implementation for raising the file size limit (250 MB, 500 MB, etc.) without code changes. The Web Worker also keeps the main thread responsive during encryption/decryption. The implementation cost is modest — a single worker that runs the sequential crypto pipeline, reading/writing OPFS files. -### 11.7 Server Page Hosting +### 11.7 Router Page Hosting -Excluded from initial implementation. Added at the very end (Phase 5) as optional feature. Initial deployment serves the page from a separate web host. +Excluded from initial implementation. Added at the very end (Phase 5) as optional feature. Initial deployment serves the page from a separate web server. ### 11.8 File Expiry Communication -Hardcode 48 hours for standalone web page. Server-hosted page can use server-configurable TTL. The page should also display which XFTP servers were used for the upload. +Hardcode 48 hours for standalone web page. Router-hosted page can use router-configurable TTL. The page should also display which XFTP routers were used for the upload. ### 11.9 Concurrent Operations -8 parallel operations in the browser. The Haskell CLI uses 16, but browsers have per-origin connection limits (6-8). Since chunks typically go to different servers (different origins), 8 provides good parallelism without hitting browser limits. +8 parallel operations in the browser. The Haskell CLI uses 16, but browsers have per-origin connection limits (6-8). Since data packets typically go to different routers (different origins), 8 provides good parallelism without hitting browser limits. ## 12. Haskell-to-TypeScript Function Mapping @@ -861,13 +862,13 @@ Note: `encryptFile` does NOT use `padLazy` or `sbEncryptTailTag`. It manually pr **`decryptChunks` algorithm** (lines 57-111) — two paths: -**Single chunk (one file, line 60):** Calls `sbDecryptTailTag(key, nonce, encSize - authTagSize, data)` directly. This internally decrypts, verifies auth tag, and strips the 8-byte length prefix + padding via `unPad`. Returns `(authOk, content)`. Then parses `FileHeader` from content. +**Single data packet (one file, line 60):** Calls `sbDecryptTailTag(key, nonce, encSize - authTagSize, data)` directly. This internally decrypts, verifies auth tag, and strips the 8-byte length prefix + padding via `unPad`. Returns `(authOk, content)`. Then parses `FileHeader` from content. -**Multi-chunk (line 67):** +**Multi-packet (line 67):** 1. `sbInit(key, nonce)` → init state -2. Decrypt first chunk file: `sbDecryptChunkLazy(state, chunk)` → `splitLen` extracts 8-byte `expectedLen` → parse `FileHeader` -3. Decrypt middle chunk files: `sbDecryptChunkLazy(state, chunk)` loop, write to output, accumulate `len` -4. Decrypt last chunk file: split off last 16 bytes as auth tag → `sbDecryptChunkLazy(state, remaining)` → truncate padding using `expectedLen` vs accumulated `len` → verify `sbAuth(finalState) == authTag` +2. Decrypt first data packet: `sbDecryptChunkLazy(state, chunk)` → `splitLen` extracts 8-byte `expectedLen` → parse `FileHeader` +3. Decrypt middle data packets: `sbDecryptChunkLazy(state, chunk)` loop, write to output, accumulate `len` +4. Decrypt last data packet: split off last 16 bytes as auth tag → `sbDecryptChunkLazy(state, remaining)` → truncate padding using `expectedLen` vs accumulated `len` → verify `sbAuth(finalState) == authTag` **`FileHeader`** (`Types.hs:35`): `{fileName :: String, fileExtra :: Maybe String}`, parsed via `smpP`. @@ -888,18 +889,18 @@ XFTP handshake types and encoding. ### 12.10 `protocol/client.ts` ← `Simplex/FileTransfer/Client.hs` (crypto primitives) — DONE -Transport-level crypto for command authentication and chunk encryption/decryption. +Transport-level crypto for command authentication and data packet encryption/decryption. | TypeScript function | Haskell function | Description | Status | |---|---|---|---| | `cbAuthenticate(peerPub, ownPriv, nonce, msg)` | `C.cbAuthenticate` | 80-byte crypto_box authenticator | ✓ | | `cbVerify(peerPub, ownPriv, nonce, auth, msg)` | `C.cbVerify` | Verify authenticator | ✓ | -| `encryptTransportChunk(dhSecret, nonce, plain)` | `sendEncFile` | Encrypt chunk (tag appended) | ✓ | -| `decryptTransportChunk(dhSecret, nonce, enc)` | `receiveEncFile` | Decrypt chunk (tag verified) | ✓ | +| `encryptTransportChunk(dhSecret, nonce, plain)` | `sendEncFile` | Encrypt data packet (tag appended) | ✓ | +| `decryptTransportChunk(dhSecret, nonce, enc)` | `receiveEncFile` | Decrypt data packet (tag verified) | ✓ | ### 12.11 `protocol/chunks.ts` ← `Simplex/FileTransfer/Chunks.hs` + `Client.hs` — DONE -Chunk size selection and file splitting. +Data packet size selection and file splitting. | TypeScript function/constant | Haskell equivalent | Status | |---|---|---| @@ -944,7 +945,7 @@ HTTP/2 XFTP client using `node:http2` (Node.js) or `fetch()` (browser). Transpil **XFTPClient state** (returned by `connectXFTP`): - HTTP/2 session (node: `ClientHttp2Session`, browser: base URL for fetch) - `thParams`: `{sessionId, blockSize, thVersion, thAuth}` from handshake -- Server address for reconnection +- Router address for reconnection **sendXFTPCommand wire format:** 1. `xftpEncodeAuthTransmission(thParams, pKey, (corrId, fId, cmd))` → padded 16KB block @@ -960,16 +961,16 @@ Upload/download orchestration and URL encoding. Combines what the RFC originally | TypeScript function | Haskell function | Line | Description | |---|---|---|---| -| `encryptFileForUpload(file, fileName)` | `encryptFileForUpload` | 264 | key/nonce → encrypt → digest → chunk specs | +| `encryptFileForUpload(file, fileName)` | `encryptFileForUpload` | 264 | key/nonce → encrypt → digest → data packet specs | | `uploadFile(client, chunkSpecs, servers, numRcps)` | `uploadFile` | 285 | Parallel upload (up to 16 concurrent) | -| `uploadFileChunk(client, chunkNo, spec, server)` | `uploadFileChunk` | 301 | FNEW + FPUT for one chunk | +| `uploadFileChunk(client, chunkNo, spec, server)` | `uploadFileChunk` | 301 | FNEW + FPUT for one data packet | | `createRcvFileDescriptions(fd, sentChunks)` | `createRcvFileDescriptions` | 329 | Build per-recipient descriptions | | `createSndFileDescription(fd, sentChunks)` | `createSndFileDescription` | 361 | Build sender (deletion) description | **Upload call sequence** (`cliSendFileOpts`, line 243): 1. `encryptFileForUpload` — `randomSbKey` + `randomCbNonce` → `encryptFile` → `sha512Hash` digest → `prepareChunkSpecs` -2. `uploadFile` — for each chunk: generate sender/recipient key pairs, `createXFTPChunk`, `uploadXFTPChunk` -3. `createRcvFileDescriptions` — assemble `FileDescription` per recipient from sent chunks +2. `uploadFile` — for each data packet: generate sender/recipient key pairs, `createXFTPChunk`, `uploadXFTPChunk` +3. `createRcvFileDescriptions` — assemble `FileDescription` per recipient from sent data packets 4. `createSndFileDescription` — assemble sender description with deletion keys **Download functions:** @@ -977,17 +978,17 @@ Upload/download orchestration and URL encoding. Combines what the RFC originally | TypeScript function | Haskell function | Line | Description | |---|---|---|---| | `downloadFile(description)` | `cliReceiveFile` | 388 | Full download: parse → download → verify → decrypt | -| `downloadFileChunk(client, chunk)` | `downloadFileChunk` | 418 | FGET + transit-decrypt one chunk | -| `ackFileChunk(client, chunk)` | `acknowledgeFileChunk` | 440 | FACK one chunk | -| `deleteFile(description)` | `cliDeleteFile` | 455 | FDEL for all chunks | +| `downloadFileChunk(client, chunk)` | `downloadFileChunk` | 418 | FGET + transit-decrypt one data packet | +| `ackFileChunk(client, chunk)` | `acknowledgeFileChunk` | 440 | FACK one data packet | +| `deleteFile(description)` | `cliDeleteFile` | 455 | FDEL for all data packets | **Download call sequence** (`cliReceiveFile`, line 388): 1. Parse and validate `FileDescription` from YAML -2. Group chunks by server -3. Parallel download: `downloadXFTPChunk` per chunk (up to 16 concurrent) -4. Verify file digest (SHA-512) over concatenated encrypted chunks +2. Group data packets by router +3. Parallel download: `downloadXFTPChunk` per data packet (up to 16 concurrent) +4. Verify file digest (SHA-512) over concatenated encrypted data packets 5. `decryptChunks` — file-level decrypt with auth tag verification -6. Parallel acknowledge: `ackXFTPChunk` per chunk +6. Parallel acknowledge: `ackXFTPChunk` per data packet **URL encoding (§4.1):** @@ -1004,7 +1005,7 @@ Upload/download orchestration and URL encoding. Combines what the RFC originally 2. Send `FGET(rcvDhPubKey)` → receive `FRFile(sndDhPubKey, cbNonce)` + encrypted body 3. Compute DH shared secret: `dh'(sndDhPubKey, rcvDhPrivKey)` (`Crypto.hs:1280`) 4. Transit-decrypt body via `receiveSbFile` (`Transport.hs:176`): `cbInit(dhSecret, cbNonce)` → `sbDecryptChunk` loop (`fileBlockSize` = 16384-byte blocks, `Transport/HTTP2/File.hs:14`) → `sbAuth` tag verification at end -5. Verify chunk digest (SHA-256): `getChunkDigest` (`Client.hs:346`) +5. Verify data packet digest (SHA-256): `getChunkDigest` (`Client.hs:346`) ### 12.18 Per-Function Testing: Haskell Drives Node diff --git a/rfcs/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md b/rfcs/done/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md similarity index 89% rename from rfcs/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md rename to rfcs/done/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md index a1a2f47d54..1a20c215a8 100644 --- a/rfcs/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-01-31-xftp-web-server-changes.md @@ -1,12 +1,12 @@ -# XFTP Server: SNI, CORS, and Web Support +# XFTP Router: SNI, CORS, and Web Support Implementation details for Phase 3 of `rfcs/2026-01-30-send-file-page.md` (sections 6.1-6.4). ## 1. Overview -The XFTP server is extended to support web browser clients by: +The XFTP router is extended to support web browser clients by: -1. **SNI-based TLS certificate switching** — Present a CA-issued web certificate (e.g., Let's Encrypt) to browsers, while continuing to present the self-signed XFTP identity certificate to native clients. +1. **SNI-based TLS certificate switching** — Present a CA-issued web certificate (e.g., Let's Encrypt) to browsers, while continuing to present the self-signed XFTP identity certificate to native XFTP clients. 2. **CORS headers** — Add CORS response headers on SNI connections so browsers allow cross-origin XFTP requests. 3. **Configuration** — `[WEB]` INI section for HTTPS cert/key paths; opt-in (commented out by default). @@ -16,11 +16,11 @@ Web handshake (challenge-response identity proof, §6.3 of parent RFC) is not ye ### 2.1 Reusing the SMP Pattern -The SMP server already implements SNI-based certificate switching via `TLSServerCredential` and `runTransportServerState_` (see `rfcs/2024-09-15-shared-port.md`). The XFTP server applies the same pattern with one key difference: both native and web XFTP clients use HTTP/2 transport, whereas SMP switches between raw SMP protocol and HTTP entirely. +The SMP router already implements SNI-based certificate switching via `TLSServerCredential` and `runTransportServerState_` (see `rfcs/2024-09-15-shared-port.md`). The XFTP router applies the same pattern with one key difference: both native and web XFTP clients use HTTP/2 transport, whereas SMP switches between raw SMP protocol and HTTP entirely. ### 2.2 Approach -When `httpServerCreds` is configured, the XFTP server bypasses `runHTTP2Server` and uses `runTransportServerState_` directly to obtain the per-connection `sniUsed` flag. It then sets up HTTP/2 manually on each TLS connection using `withHTTP2` (same internals as `runHTTP2ServerWith_`). The `sniUsed` flag is captured in the closure and shared by all HTTP/2 requests on that connection. +When `httpServerCreds` is configured, the XFTP router bypasses `runHTTP2Server` and uses `runTransportServerState_` directly to obtain the per-connection `sniUsed` flag. It then sets up HTTP/2 manually on each TLS connection using `withHTTP2` (same internals as `runHTTP2ServerWith_`). The `sniUsed` flag is captured in the closure and shared by all HTTP/2 requests on that connection. When `httpServerCreds` is absent, the existing `runHTTP2Server` path is unchanged. @@ -33,7 +33,7 @@ Browser client (SNI) ──TLS──> Web CA cert ──HTTP/2──> The web certificate file (e.g., `web.crt`) must contain the full chain: leaf certificate followed by the signing CA certificate. `loadServerCredential` uses `T.credentialLoadX509Chain` which reads all PEM blocks from the file. -The client validates the chain by comparing `idCert` fingerprint (the CA cert, second in the 2-cert chain) against the known `keyHash`. This is the same validation as for XFTP identity certificates — the CA that signed the web cert must match the XFTP server's identity. +The client validates the chain by comparing `idCert` fingerprint (the CA cert, second in the 2-cert chain) against the known `keyHash`. This is the same validation as for XFTP identity certificates — the CA that signed the web cert must match the XFTP router's identity. ## 3. CORS Support @@ -69,7 +69,7 @@ Access-Control-Max-Age: 86400 ### 3.4 Security `Access-Control-Allow-Origin: *` is safe because: -- All XFTP commands require Ed25519 authentication (per-chunk keys from file description). +- All XFTP commands require Ed25519 authentication (per-packet keys from file description). - No cookies or browser credentials are involved. - File content is end-to-end encrypted. @@ -87,9 +87,9 @@ Commented out by default — web support is opt-in. ### 4.2 Behavior -- `[WEB]` section not configured: silently ignored, server operates normally for native clients only. +- `[WEB]` section not configured: silently ignored, router operates normally for native clients only. - `[WEB]` section configured with valid cert/key paths: SNI + CORS enabled. -- `[WEB]` section configured with missing cert files: warning + continue (non-fatal, unlike SMP where it is fatal). +- `[WEB]` section configured with missing cert files: warning + continue (non-fatal, unlike SMP router where it is fatal). ## 5. Files Modified @@ -146,9 +146,9 @@ Added SNI and CORS tests as a subsection within `xftpServerTests` (6 tests): 3. **CORS headers** — SNI POST request includes `Access-Control-Allow-Origin: *` and `Access-Control-Expose-Headers: *`. 4. **OPTIONS preflight** — SNI OPTIONS request returns all CORS preflight headers. 5. **No CORS without SNI** — Non-SNI POST request has no CORS headers. -6. **File chunk delivery** — Full XFTP file chunk upload/download through SNI-enabled server verifying no regression. +6. **Data packet delivery** — Full XFTP data packet upload/download through SNI-enabled router verifying no regression. ## 6. Remaining Work -- **Web handshake** (§6.3 of parent RFC): Challenge-response identity proof for SNI connections. The server detects web clients via the `sniUsed` flag and expects a 32-byte challenge in the first POST body (non-empty, unlike standard handshake). Response includes full cert chain + signature over `(challenge ++ sessionId)`. +- **Web handshake** (§6.3 of parent RFC): Challenge-response identity proof for SNI connections. The router detects web clients via the `sniUsed` flag and expects a 32-byte challenge in the first POST body (non-empty, unlike standard handshake). Response includes full cert chain + signature over `(challenge ++ sessionId)`. - **Static page serving** (§6.5 of parent RFC): Optional serving of the web page HTML/JS bundle on GET requests. diff --git a/rfcs/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md b/rfcs/done/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md similarity index 97% rename from rfcs/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md index de23bbf8b4..14b8d70588 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-02-xftp-web-handshake.md @@ -1,6 +1,6 @@ # Web Handshake — Challenge-Response Identity Proof -RFC §6.3: Server proves XFTP identity to web clients independently of TLS CA infrastructure. +RFC §6.3: Router proves XFTP identity to web clients independently of TLS CA infrastructure. ## 1. Protocol @@ -29,7 +29,7 @@ Server → empty → Client **Detection**: `sniUsed` per-connection flag. Non-empty hello allowed only when `sniUsed`. Empty hello with SNI → standard handshake. -**Why both steps 3 and 4**: Native clients verify `signedPubKey` using the TLS peer certificate (`serverKey` from `getServerVerifyKey`), which is the XFTP identity cert in non-SNI connections — TLS provides this binding. Web clients cannot access TLS peer certificate data (browser API limitation; TLS presents the web CA cert but provides no API to extract it). So web clients must verify at the application layer using `authPubKey.certChain`, which always contains the XFTP identity chain regardless of which cert TLS used. Step 3 proves the server holds its identity key *right now* (freshness via random challenge). Step 4 proves the DH session key was signed by the identity key holder (prevents MITM key substitution). Together they give web clients some assurance native clients get from TLS, except channel binding for commands. +**Why both steps 3 and 4**: Native clients verify `signedPubKey` using the TLS peer certificate (`serverKey` from `getServerVerifyKey`), which is the XFTP identity cert in non-SNI connections — TLS provides this binding. Web clients cannot access TLS peer certificate data (browser API limitation; TLS presents the web CA cert but provides no API to extract it). So web clients must verify at the application layer using `authPubKey.certChain`, which always contains the XFTP identity chain regardless of which cert TLS used. Step 3 proves the router holds its identity key *right now* (freshness via random challenge). Step 4 proves the DH session key was signed by the identity key holder (prevents MITM key substitution). Together they give web clients some assurance native clients get from TLS, except channel binding for commands. ## 2. Type Changes — `src/Simplex/FileTransfer/Transport.hs` @@ -56,7 +56,7 @@ Same `Tail compat` pattern as server handshake. Both types use `(..)` export — new fields auto-exported. -## 3. Server Changes — `src/Simplex/FileTransfer/Server.hs` +## 3. Router Changes — `src/Simplex/FileTransfer/Server.hs` ### `XFTPTransportRequest` (line 88) @@ -176,7 +176,7 @@ Remove `extractCertEd25519Key` (replaced by generic path). Keep `extractCertPubl ### 10.5 Tests — `tests/XFTPWebTests.hs` -**Integration test**: Switch from `withXFTPServerEd25519SNI` (Ed25519 fixtures) to `withXFTPServerSNI` (default Ed448 fixtures). Update fingerprint source from `tests/fixtures/ed25519/ca.crt` to `tests/fixtures/ca.crt`. +**Integration test**: Switch from `withXFTPServerEd25519SNI` (Ed25519 fixtures) to `withXFTPServerSNI` (default Ed448 fixtures). Update fingerprint source from `tests/fixtures/ed25519/ca.crt` to the default `tests/fixtures/ca.crt`. Optionally add a second integration test with Ed25519 to cover both paths, or rely on existing unit tests for Ed25519 coverage. diff --git a/rfcs/2026-01-30-send-file-page/2026-02-03-xftp-web-browser-tests.md b/rfcs/done/2026-01-30-send-file-page/2026-02-03-xftp-web-browser-tests.md similarity index 100% rename from rfcs/2026-01-30-send-file-page/2026-02-03-xftp-web-browser-tests.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-03-xftp-web-browser-tests.md diff --git a/rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md similarity index 100% rename from rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md diff --git a/rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md similarity index 99% rename from rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md index b69234de82..496b3f73d4 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-page.md @@ -20,7 +20,7 @@ Build a static web page for browser-based XFTP file transfer (Phase 5 of master Two build variants: - **Local**: single test server at `localhost:7000` (development/testing) -- **Production**: 12 preset XFTP servers (6 SimpleX + 6 Flux) +- **Production**: 12 preset XFTP routers (6 SimpleX + 6 Flux) Uses Vite for bundling (already a dependency via vitest). No CSS framework — plain CSS per RFC spec. @@ -258,7 +258,7 @@ export function pickRandomServer(servers: XFTPServer[]): XFTPServer { ### 4.3 Assumption -Production XFTP servers must have `[WEB]` section configured with a CA-signed certificate for browser TLS. Without this, browsers will reject the self-signed XFTP identity cert. The local test server uses `tests/fixtures/` certs which Chromium accepts via `ignoreHTTPSErrors`. +Production XFTP routers must have `[WEB]` section configured with a CA-signed certificate for browser TLS. Without this, browsers will reject the self-signed XFTP identity cert. The local test router uses `tests/fixtures/` certs which Chromium accepts via `ignoreHTTPSErrors`. ## 5. Page Structure & UI @@ -293,7 +293,7 @@ Both upload-complete and download-ready states display a brief non-technical sec ### 5.5 File expiry -Display on upload-complete state: "Files are typically available for 48 hours." This is an approximation — actual expiry depends on each XFTP server's `[STORE_LOG]` retention configuration. The 48-hour figure matches the current preset server defaults. +Display on upload-complete state: "Files are typically available for 48 hours." This is an approximation — actual expiry depends on each XFTP router's `[STORE_LOG]` retention configuration. The 48-hour figure matches the current preset router defaults. ### 5.6 Styling diff --git a/rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-persistent-connections.md b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-persistent-connections.md similarity index 100% rename from rfcs/2026-01-30-send-file-page/2026-02-04-xftp-web-persistent-connections.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-persistent-connections.md diff --git a/rfcs/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md b/rfcs/done/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md similarity index 98% rename from rfcs/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md index 2dda76aeee..c48e5250ba 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-05-xftp-web-e2e-tests.md @@ -19,10 +19,10 @@ This document specifies comprehensive Playwright E2E tests for the XFTP web page - **Upload flow**: File selection (picker + drag-drop), validation, progress, cancellation, link sharing, error handling - **Download flow**: Invalid link handling, download button, progress, file save, error states -- **Edge cases**: Boundary file sizes, special characters, network failures, multi-chunk files with redirect, UI information display +- **Edge cases**: Boundary file sizes, special characters, network failures, multi-packet files with redirect, UI information display **Key constraints**: -- Tests run against a local XFTP server (started via `globalSetup.ts`) +- Tests run against a local XFTP router (started via `globalSetup.ts`) - Server port is dynamic (read from `/tmp/xftp-test-server.port`) - Browser uses `--ignore-certificate-errors` for self-signed certs - OPFS and Web Workers are required (Chromium supports both) @@ -50,7 +50,7 @@ xftp-web/ ### 2.2 Prerequisites -- `globalSetup.ts` starts the XFTP server and writes port to `PORT_FILE` +- `globalSetup.ts` starts the XFTP router and writes port to `PORT_FILE` - Tests must read the port dynamically: `readFileSync(PORT_FILE, 'utf-8').trim()` - Vite builds and serves the page at `http://localhost:4173` @@ -699,7 +699,7 @@ test('concurrent downloads from same link', async ({browser}) => { }) ``` -### 6.7 Redirect File Handling (Multi-chunk) +### 6.7 Redirect File Handling (Multi-packet) **Test ID**: `edge-redirect-file` @@ -786,7 +786,7 @@ test('download page shows file size and security note', async ({uploadPage, down ### Phase 7: Error Recovery and Advanced (Priority: Low) 22. `upload-error-retry` - Retry after error 23. `edge-concurrent-downloads` - Concurrent access -24. `edge-redirect-file` - Multi-chunk file with redirect (slow) +24. `edge-redirect-file` - Multi-packet file with redirect (slow) 25. `edge-ui-info` - Expiry message, security notes --- diff --git a/rfcs/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md b/rfcs/done/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md similarity index 94% rename from rfcs/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md index c46f38a46f..e9b8c18852 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-08-xftp-web-hello-header.md @@ -2,27 +2,27 @@ ## 1. Problem Statement -Browser HTTP/2 connection pooling reuses TLS connections across page navigations (same origin = same connection pool). The XFTP server maintains per-TLS-connection session state in `TMap SessionId Handshake` keyed by `tlsUniq tls`. When a browser navigates from the upload page to the download page (or reloads), the new page sends a fresh ClientHello on the reused HTTP/2 connection. The server is already in `HandshakeAccepted` state for that connection, so it routes the request to `processRequest`, which expects a 16384-byte command block but receives a 34-byte ClientHello → `ERR BLOCK`. +Browser HTTP/2 connection pooling reuses TLS connections across page navigations (same origin = same connection pool). The XFTP router maintains per-TLS-connection session state in `TMap SessionId Handshake` keyed by `tlsUniq tls`. When a browser navigates from the upload page to the download page (or reloads), the new page sends a fresh ClientHello on the reused HTTP/2 connection. The server is already in `HandshakeAccepted` state for that connection, so it routes the request to `processRequest`, which expects a 16384-byte command block but receives a 34-byte ClientHello → `ERR BLOCK`. -**Root cause**: The server cannot distinguish a ClientHello from a command on an already-handshaked connection because both arrive on the same HTTP/2 connection (same `tlsUniq`), and there is no content-level discriminator (ClientHello is unpadded, but the server never gets to parse it — the size check in `processRequest` rejects it first). +**Root cause**: The router cannot distinguish a ClientHello from a command on an already-handshaked connection because both arrive on the same HTTP/2 connection (same `tlsUniq`), and there is no content-level discriminator (ClientHello is unpadded, but the router never gets to parse it — the size check in `processRequest` rejects it first). **Browser limitation**: `fetch()` provides zero control over HTTP/2 connection pooling. There is no browser API to force a new connection or detect connection reuse before a request is sent. ## 2. Solution Summary -Add an HTTP header `xftp-web-hello` to web ClientHello requests. When the server sees this header on an already-handshaked connection (`HandshakeAccepted` state), it re-runs `processHello` **reusing the existing session keys** (same X25519 key pair from the original handshake). The client then completes the normal handshake flow (sends ClientHandshake, receives ack) and proceeds with commands. +Add an HTTP header `xftp-web-hello` to web ClientHello requests. When the router sees this header on an already-handshaked connection (`HandshakeAccepted` state), it re-runs `processHello` **reusing the existing session keys** (same X25519 key pair from the original handshake). The client then completes the normal handshake flow (sends ClientHandshake, receives ack) and proceeds with commands. Key properties: -- Server reuses existing `serverPrivKey` — no new key material generated on re-handshake, so `thAuth` remains consistent with any in-flight commands on concurrent HTTP/2 streams. +- Router reuses existing `serverPrivKey` — no new key material generated on re-handshake, so `thAuth` remains consistent with any in-flight commands on concurrent HTTP/2 streams. - Header is only checked when `sniUsed` is true (web/browser connections). Native XFTP clients are unaffected. - CORS preflight already allows all headers (`Access-Control-Allow-Headers: *`). - Web clients always send this header on ClientHello — it's harmless on first connection (`Nothing` state) and enables re-handshake on reused connections (`HandshakeAccepted` state). ## 3. Detailed Technical Design -### 3.1 Server change: parameterize `processHello` (`src/Simplex/FileTransfer/Server.hs`) +### 3.1 Router change: parameterize `processHello` (`src/Simplex/FileTransfer/Server.hs`) -The entire server change is parameterizing the existing `processHello` with `Maybe C.PrivateKeyX25519`. Zero new functions. +The entire router change is parameterizing the existing `processHello` with `Maybe C.PrivateKeyX25519`. Zero new functions. #### Current code (lines 165-191): @@ -125,7 +125,7 @@ Add optional `headers?` parameter to `Transport.post()`, thread it through `fetc ### 3.5 Haskell test (`tests/XFTPServerTests.hs`) -Add `testWebReHandshake` next to the existing `testWebHandshake` (line 504). It reuses the same SNI + HTTP/2 setup pattern, performs a full handshake, then sends a second ClientHello with the `xftp-web-hello` header on the same connection and verifies the server responds with a valid ServerHandshake (same `sessionId`), then completes the second handshake. +Add `testWebReHandshake` next to the existing `testWebHandshake` (line 504). It reuses the same SNI + HTTP/2 setup pattern, performs a full handshake, then sends a second ClientHello with the `xftp-web-hello` header on the same connection and verifies the router responds with a valid ServerHandshake (same `sessionId`), then completes the second handshake. ```haskell -- Register in xftpServerTests (after line 86): @@ -170,7 +170,7 @@ The only difference from `testWebHandshake`: the second `helloReq2` passes `[("x ## 4. Implementation Plan -### Step 1: Server — parameterize `processHello` +### Step 1: Router — parameterize `processHello` Apply the diff from Section 3.1 to `src/Simplex/FileTransfer/Server.hs`. @@ -216,6 +216,6 @@ Tab A (upload) and Tab B (download) share the same HTTP/2 connection. ## 6. Security Considerations - **No new key material**: Re-handshake reuses existing `serverPrivKey`. No opportunity for key confusion or downgrade. -- **Identity re-verification**: Server re-signs the web challenge with its long-term signing key. Client verifies identity again. -- **Header cannot escalate privileges**: The header only triggers re-handshake (which the server was already capable of doing on first connection). It does not bypass any authentication. +- **Identity re-verification**: Router re-signs the web challenge with its long-term signing key. Client verifies identity again. +- **Header cannot escalate privileges**: The header only triggers re-handshake (which the router was already capable of doing on first connection). It does not bypass any authentication. - **Timing**: Re-handshake takes the same code path as initial handshake, so timing side-channels are unchanged. diff --git a/rfcs/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md b/rfcs/done/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md similarity index 81% rename from rfcs/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md index 2802f16a59..522eb070e6 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-11-xftp-web-error-handling.md @@ -2,13 +2,13 @@ ## 1. Problem Statement -The XFTP web client is fundamentally fragile: any transient error (browser opening a new HTTP/2 connection, network hiccup, server restart) causes an unrecoverable failure with a cryptic error message. There is no retry logic, no fetch timeout, no error categorization, and the upload uses a single server instead of distributing chunks across preset servers. This makes the app frustrating — it works most of the time but fails unpredictably, which is worse than being completely broken. +The XFTP web client is fundamentally fragile: any transient error (browser opening a new HTTP/2 connection, network hiccup, router restart) causes an unrecoverable failure with a cryptic error message. There is no retry logic, no fetch timeout, no error categorization, and the upload uses a single router instead of distributing data packets across preset routers. This makes the app frustrating — it works most of the time but fails unpredictably, which is worse than being completely broken. ### Confirmed root cause (from diagnostic logs) -When the browser opens a new HTTP/2 connection mid-operation, the new connection has a different TLS SessionId with no handshake state in the server's `TMap SessionId Handshake`. The server's `Nothing` branch in `xftpServerHandshakeV1` (Server.hs:169) unconditionally calls `processHello`, which tries to decode the command body as `XFTPClientHello`, fails, and sends a raw padded "HANDSHAKE" error string. The client cannot parse this as a proper transmission (first byte 'H' = 72 is read as batch count), producing `"expected batch count 1, got 72"`. +When the browser opens a new HTTP/2 connection mid-operation, the new connection has a different TLS SessionId with no handshake state in the router's `TMap SessionId Handshake`. The router's `Nothing` branch in `xftpServerHandshakeV1` (Server.hs:169) unconditionally calls `processHello`, which tries to decode the command body as `XFTPClientHello`, fails, and sends a raw padded "HANDSHAKE" error string. The client cannot parse this as a proper transmission (first byte 'H' = 72 is read as batch count), producing `"expected batch count 1, got 72"`. -Server log confirming the SessionId change: +Router log confirming the SessionId change: ``` DEBUG dispatch: Accepted+command sessId="ZSo1GGETgIvjbB7CWHbvGPpbMjx_b2IlC1eTI6aKfqc=" ...20 successful commands... @@ -17,32 +17,32 @@ DEBUG dispatch: Nothing sessId="mJC7Sck9xxW5UsXoPGoUWduuHghSVgf6CnD6ZC6SBhU=" we ### Why re-handshake is required (cannot be made optional) -1. **SessionId is baked into signed command data.** `encodeAuthTransmission` signs `concat(encode(sessionId), tInner)` with Ed25519. Server's `tDecodeServer` (Protocol.hs:2242) verifies `sessId == sessionId`. New connection = different sessionId = signature mismatch. -2. **Server generates per-session DH keys.** `processHello` creates fresh X25519 keypair stored in `HandshakeSent`. For SMP browser clients (future), `verifyCmdAuth` (Protocol.hs:1322) requires the matching `serverPrivKey` from `thAuth`. +1. **SessionId is baked into signed command data.** `encodeAuthTransmission` signs `concat(encode(sessionId), tInner)` with Ed25519. Router's `tDecodeServer` (Protocol.hs:2242) verifies `sessId == sessionId`. New connection = different sessionId = signature mismatch. +2. **Router generates per-session DH keys.** `processHello` creates fresh X25519 keypair stored in `HandshakeSent`. For SMP browser clients (future), `verifyCmdAuth` (Protocol.hs:1322) requires the matching `serverPrivKey` from `thAuth`. 3. **This applies to both XFTP and future SMP browser clients** — the session management approach is the same. -### Why multiple preset servers cannot work +### Why multiple preset routers cannot work -Upload (`agent.ts:105-157`) takes a single `server: XFTPServer` parameter and uploads ALL chunks to it. `web/upload.ts:133` calls `pickRandomServer(servers)` which selects ONE random server from all presets. The multi-server preset configuration is pointless — only one server is ever used per upload. The design intent (RFC section 11.6: "upload in parallel to 8 randomly selected servers") is not implemented. This must be fixed in Phase 2 (section 3.7). +Upload (`agent.ts:105-157`) takes a single `server: XFTPServer` parameter and uploads ALL data packets to it. `web/upload.ts:133` calls `pickRandomServer(servers)` which selects ONE random router from all presets. The multi-router preset configuration is pointless — only one router is ever used per upload. The design intent (RFC section 11.6: "upload in parallel to 8 randomly selected routers") is not implemented. This must be fixed in Phase 2 (section 3.7). ## 2. Solution Summary ### Phase 1: Error handling and connection resilience -1. **Server: strict dispatch for allowed protocol combinations** — reject all invalid combinations +1. **Router: strict dispatch for allowed protocol combinations** — reject all invalid combinations 2. **Client: automatic retry with re-handshake** on SESSION/HANDSHAKE errors 3. **Client: fetch timeout** with configurable duration 4. **UI: error categorization and retry** — auto-retry temporary, human-readable permanent -5. **Client: connection state with Promise-based lock and per-server queues** — `ServerConnection` with `client: Promise` + `queue: Promise` +5. **Client: connection state with Promise-based lock and per-router queues** — `ServerConnection` with `client: Promise` + `queue: Promise` 6. **Client: fix cache key** — include keyHash -### Phase 2: Multi-server upload (after Phase 1) +### Phase 2: Multi-router upload (after Phase 1) -7. **Multi-server upload with server selection and failover** — distribute chunks across servers, retry FNEW on different server if one fails +7. **Multi-router upload with router selection and failover** — distribute data packets across routers, retry FNEW on different router if one fails ## 3. Detailed Technical Design -### 3.1 Server: strict dispatch for allowed protocol combinations +### 3.1 Router: strict dispatch for allowed protocol combinations **Principle:** Everything not explicitly done by existing Haskell/TS clients is prohibited. It is better to fail on impossible combinations than to be permissive — permissiveness complicates debugging and creates attack vectors via unexpected behaviors. @@ -88,14 +88,14 @@ Nothing | `FRErr SESSION` | Temporary | Yes (auto) | "Session expired, reconnecting..." | | `FRErr HANDSHAKE` | Temporary | Yes (auto) | "Connection interrupted, reconnecting..." | | `fetch()` TypeError | Temporary | Yes (auto) | "Network error, retrying..." | -| AbortError (timeout) | Temporary | Yes (auto) | "Server timeout, retrying..." | +| AbortError (timeout) | Temporary | Yes (auto) | "Router timeout, retrying..." | | `FRErr AUTH` | Permanent | No | "File is invalid, expired, or has been removed" | | `FRErr NO_FILE` | Permanent | No | "File not found — it may have expired" | -| `FRErr SIZE` | Permanent | No | "File size exceeds server limit" | -| `FRErr QUOTA` | Permanent | No | "Server storage quota exceeded" | -| `FRErr BLOCKED` | Permanent | No | "File has been blocked by server" | +| `FRErr SIZE` | Permanent | No | "File size exceeds router limit" | +| `FRErr QUOTA` | Permanent | No | "Router storage quota exceeded" | +| `FRErr BLOCKED` | Permanent | No | "File has been blocked by router" | | `FRErr DIGEST` | Permanent | No | "File integrity check failed" | -| `FRErr INTERNAL` | Permanent | No | "Server internal error" | +| `FRErr INTERNAL` | Permanent | No | "Router internal error" | | `CMD *` | Permanent | No | "Protocol error" | **Retry behavior:** @@ -156,7 +156,7 @@ if (raw.length < 20) { 2. **FRErr classification** (replaces current unconditional throw): ```typescript -// After decodeResponse, instead of throw new Error("Server error: " + err.type): +// After decodeResponse, instead of throw new Error("Router error: " + err.type): if (response.type === "FRErr") { const err = response.err if (err.type === "SESSION" || err.type === "HANDSHAKE") { @@ -206,30 +206,30 @@ Default: 30s for production, 5s for tests. Threaded through `connectXFTP` → `c **Behavior (Option D):** -- **Temporary errors:** Auto-retry loop (3 attempts). After 3 failures, show human-readable diagnosis with manual retry button. Diagnosis examples: "Server timeout — the server may be temporarily unavailable", "Connection interrupted — your network may be unstable". +- **Temporary errors:** Auto-retry loop (3 attempts). After 3 failures, show human-readable diagnosis with manual retry button. Diagnosis examples: "Router timeout — the router may be temporarily unavailable", "Connection interrupted — your network may be unstable". - **Permanent errors:** Show human-readable error immediately, NO retry button. User can reload page if they want to retry. Examples: "File is invalid, expired, or has been removed" (AUTH), "File not found" (NO_FILE). **Current UI retry buttons:** - `upload.ts:73-75` — retry calls `startUpload(pendingFile)` from scratch - `download.ts:60` — retry calls `startDownload()` from scratch -**Improvement:** Track uploaded/downloaded chunk indices. On manual retry, skip completed chunks: +**Improvement:** Track uploaded/downloaded data packet indices. On manual retry, skip completed data packets: ```typescript -// Upload: track which chunks completed +// Upload: track which data packets completed const completedChunks: Set = new Set() for (let i = 0; i < specs.length; i++) { if (completedChunks.has(i)) continue - // ... create + upload chunk + // ... create + upload data packet completedChunks.add(i) } -// Download: already naturally resumable — each chunk is independent +// Download: already naturally resumable — each data packet is independent ``` -### 3.5 Client: connection state with Promise-based lock and per-server queues +### 3.5 Client: connection state with Promise-based lock and per-router queues -**Design:** Each server gets a `ServerConnection` record containing a `Promise` (the connection lock) and a `Promise` (the sequential command queue). The `XFTPClientAgent` maps server keys to these records. +**Design:** Each router gets a `ServerConnection` record containing a `Promise` (the connection lock) and a `Promise` (the sequential command queue). The `XFTPClientAgent` maps router keys to these records. The promise IS the lock — every consumer awaits the same promise. When reconnect is needed, the promise is replaced atomically. @@ -325,7 +325,7 @@ function removeStaleConnection( } ``` -**Per-server sequential queue:** `queue` is a `Promise` — the tail of the sequential operation chain. Each new operation `.then()`s onto it. It's `void` because callers hold their own typed promises; the queue only tracks completion order: +**Per-router sequential queue:** `queue` is a `Promise` — the tail of the sequential operation chain. Each new operation `.then()`s onto it. It's `void` because callers hold their own typed promises; the queue only tracks completion order: ```typescript async function enqueueCommand( @@ -348,9 +348,9 @@ async function enqueueCommand( } ``` -Commands to the same server execute one at a time via the queue. Commands to different servers execute concurrently because each has its own queue. `enqueueCommand` provides sequencing; `sendXFTPCommand` (called inside `fn` via command wrappers) provides retry. They compose as: `enqueueCommand` sequences calls to wrappers that internally use `sendXFTPCommand`. +Commands to the same router execute one at a time via the queue. Commands to different routers execute concurrently because each has its own queue. `enqueueCommand` provides sequencing; `sendXFTPCommand` (called inside `fn` via command wrappers) provides retry. They compose as: `enqueueCommand` sequences calls to wrappers that internally use `sendXFTPCommand`. -**Download change:** Group chunks by server, process each server's chunks sequentially, servers in parallel. Uses `for` loop for per-server sequencing (same pattern as Stage 2 upload). `enqueueCommand` is available for cases where different callers target the same server. +**Download change:** Group data packets by router, process each router's data packets sequentially, routers in parallel. Uses `for` loop for per-router sequencing (same pattern as Stage 2 upload). `enqueueCommand` is available for cases where different callers target the same router. ```typescript const byServer = new Map() @@ -374,7 +374,7 @@ await Promise.all([...byServer.entries()].map(async ([srv, chunks]) => { ### 3.6 Fix cache key -**Bug:** `getXFTPServerClient` (client.ts:110) uses `"https://" + server.host + ":" + server.port` as cache key, ignoring `keyHash`. Two servers with same host:port but different keyHash share a cached connection, bypassing identity verification. +**Bug:** `getXFTPServerClient` (client.ts:110) uses `"https://" + server.host + ":" + server.port` as cache key, ignoring `keyHash`. Two routers with same host:port but different keyHash share a cached connection, bypassing identity verification. **Fix:** Use `formatXFTPServer(server)` as cache key (includes keyHash). Already available in `protocol/address.ts:52-54`. @@ -388,11 +388,11 @@ const key = formatXFTPServer(server) Note: With the redesign in 3.5, the cache key fix is inherent — the `connections` Map uses `formatXFTPServer(server)` everywhere. -### 3.7 Phase 2: Multi-server upload with server selection and failover +### 3.7 Phase 2: Multi-router upload with router selection and failover -**Problem:** Current upload (`agent.ts:105-157`) takes a single `server: XFTPServer` and uploads ALL chunks to it. The 12 preset servers (6 SimpleX + 6 Flux) are pointless — only one is ever used. +**Problem:** Current upload (`agent.ts:105-157`) takes a single `server: XFTPServer` and uploads ALL data packets to it. The 12 preset routers (6 SimpleX + 6 Flux) are pointless — only one is ever used. -**Design goal:** Distribute chunks across servers. Retry FNEW on a different server if one fails. Once working servers are found, prefer them (heuristic: server unlikely to fail mid-process, more likely to be broken initially due to maintenance/downtime). +**Design goal:** Distribute data packets across routers. Retry FNEW on a different router if one fails. Once working routers are found, prefer them (heuristic: router unlikely to fail mid-process, more likely to be broken initially due to maintenance/downtime). **Reference implementation:** Haskell `Agent.hs:457-486` (`createChunk` / `createWithNextSrv`) + `Client.hs:2335-2385` (`getNextServer_` / `withNextSrv`). @@ -400,13 +400,13 @@ Note: With the redesign in 3.5, the cache key fix is inherent — the `connectio Two-stage architecture: -1. **Allocate stage (serial per file in Haskell):** For each chunk, call FNEW on a randomly-selected server. If FNEW fails, pick a different server and retry. Track tried hosts to avoid retrying the same server. After all chunks are assigned to servers, spawn one upload worker per server. +1. **Allocate stage (serial per file in Haskell):** For each data packet, call FNEW on a randomly-selected router. If FNEW fails, pick a different router and retry. Track tried hosts to avoid retrying the same router. After all data packets are assigned to routers, spawn one upload worker per router. -2. **Upload stage (parallel per server):** Each server worker uploads its assigned chunks sequentially (FPUT). On FPUT failure, retry on the same server with backoff (because the chunk replica already exists on that server). No server failover for FPUT. +2. **Upload stage (parallel per router):** Each router worker uploads its assigned data packets sequentially (FPUT). On FPUT failure, retry on the same router with backoff (because the data packet replica already exists on that router). No router failover for FPUT. -Server selection constraints (hierarchical, `getNextServer_` Client.hs:2335-2350): -1. Prefer servers from unused operators (operator diversity) -2. Prefer servers with unused hosts (host diversity) +Router selection constraints (hierarchical, `getNextServer_` Client.hs:2335-2350): +1. Prefer routers from unused operators (operator diversity) +2. Prefer routers with unused hosts (host diversity) 3. Random pick from the most-constrained candidate set 4. If all exhausted, reset tried set and start over @@ -414,17 +414,17 @@ Server selection constraints (hierarchical, `getNextServer_` Client.hs:2335-2350 The web client doesn't have operators or a database. Simplified algorithm with two stages: -**Stage 1 — Allocate:** Create chunk records on servers (FNEW). Unlike Haskell which is serial here, web FNEW runs concurrently within a concurrency limit. FNEW is a small command — concurrent FNEW on the same connection is not a problem, and concurrent FNEW across servers improves upload startup time. +**Stage 1 — Allocate:** Create data packet records on routers (FNEW). Unlike Haskell which is serial here, web FNEW runs concurrently within a concurrency limit. FNEW is a small command — concurrent FNEW on the same connection is not a problem, and concurrent FNEW across routers improves upload startup time. -**Stage 2 — Upload:** Upload chunk data (FPUT). Parallel across servers, sequential per server (reuses per-server queues from 3.5). FPUT retries on the same server with backoff — no server rotation because the chunk replica already exists on that server. Stage 2 reads chunk data by offset (via `readChunk`), so `SentChunk` must be extended with `chunkOffset: number` (from ChunkSpec). +**Stage 2 — Upload:** Upload data packet content (FPUT). Parallel across routers, sequential per router (reuses per-router queues from 3.5). FPUT retries on the same router with backoff — no router rotation because the data packet replica already exists on that router. Stage 2 reads data packet content by offset (via `readChunk`), so `SentChunk` must be extended with `chunkOffset: number` (from ChunkSpec). ```typescript interface UploadState { - untriedServers: XFTPServer[] // servers not yet attempted — initially all servers - workingServers: XFTPServer[] // servers that succeeded FNEW + untriedServers: XFTPServer[] // routers not yet attempted — initially all routers + workingServers: XFTPServer[] // routers that succeeded FNEW } -const MAX_FNEW_ATTEMPTS = 5 // per chunk: try up to 5 different servers +const MAX_FNEW_ATTEMPTS = 5 // per data packet: try up to 5 different routers async function uploadFile( agent: XFTPClientAgent, @@ -455,7 +455,7 @@ async function uploadFile( ) await Promise.all(allocateWorkers) - // Stage 2: Upload — parallel across servers, sequential per server + // Stage 2: Upload — parallel across routers, sequential per router // readChunk reads from the encrypted file by offset (same as Phase 1 uploadFile) let uploaded = 0 const total = encrypted.chunkSizes.reduce((a, b) => a + b, 0) @@ -473,7 +473,7 @@ async function uploadFile( } ``` -**`createChunkWithFailover`** — server selection with per-chunk retry limit: +**`createChunkWithFailover`** — router selection with per-data-packet retry limit: ```typescript async function createChunkWithFailover( @@ -515,7 +515,7 @@ function pickServer( state: UploadState, concurrency: number ): XFTPServer { - // Once enough working servers found, only use those + // Once enough working routers found, only use those if (state.workingServers.length >= concurrency) { return randomPick(state.workingServers) } @@ -524,7 +524,7 @@ function pickServer( const idx = Math.floor(Math.random() * state.untriedServers.length) return state.untriedServers.splice(idx, 1)[0] // remove from untried } - // All tried — reset untried to non-working servers and retry + // All tried — reset untried to non-working routers and retry state.untriedServers = allServers.filter( s => !state.workingServers.some(w => formatXFTPServer(w) === formatXFTPServer(s)) ) @@ -532,22 +532,22 @@ function pickServer( const idx = Math.floor(Math.random() * state.untriedServers.length) return state.untriedServers.splice(idx, 1)[0] } - // Every server is working — pick any working + // Every router is working — pick any working return randomPick(state.workingServers) } ``` -**Algorithm:** Two lists — `untriedServers` (initially all) and `workingServers` (initially empty). When `workingServers.length < concurrency`, pick from `untriedServers` (removing on pick). On FNEW success, add to `workingServers`. On FNEW failure, server is already removed from `untriedServers`; remove from `workingServers` if present. When `untriedServers` is empty, reset it to all non-working servers. Once `workingServers.length >= concurrency`, pick randomly only from `workingServers`. +**Algorithm:** Two lists — `untriedServers` (initially all) and `workingServers` (initially empty). When `workingServers.length < concurrency`, pick from `untriedServers` (removing on pick). On FNEW success, add to `workingServers`. On FNEW failure, router is already removed from `untriedServers`; remove from `workingServers` if present. When `untriedServers` is empty, reset it to all non-working routers. Once `workingServers.length >= concurrency`, pick randomly only from `workingServers`. -**Termination condition:** Each chunk tries at most `min(serverCount, 5)` different servers. If all attempts fail, the chunk fails and the upload fails with the last error. Rationale: if 5 out of 12 servers are down, something systemic is wrong and continuing is unlikely to help. Timeouts count as failures — the timed-out server is removed from working and a different server is picked next. +**Termination condition:** Each data packet tries at most `min(routerCount, 5)` different routers. If all attempts fail, the data packet fails and the upload fails with the last error. Rationale: if 5 out of 12 routers are down, something systemic is wrong and continuing is unlikely to help. Timeouts count as failures — the timed-out router is removed from working and a different router is picked next. **Key differences from Haskell:** - No operator concept — just host diversity via random selection - No database — state tracked in-memory during upload - FNEW runs concurrently (Haskell is serial) — improves startup time -- FNEW is cheap and retried with server rotation; FPUT retries on same server +- FNEW is cheap and retried with router rotation; FPUT retries on same router -**Download changes (also Phase 2):** Default concurrency should be 4 (matching Haskell). Download already groups by server in 3.5. If `replicas[0]` download fails, try `replicas[1]`, `replicas[2]`, etc. (fallback across replicas). +**Download changes (also Phase 2):** Default concurrency should be 4 (matching Haskell). Download already groups by router in 3.5. If `replicas[0]` download fails, try `replicas[1]`, `replicas[2]`, etc. (fallback across replicas). ## 4. Implementation Plan @@ -560,7 +560,7 @@ Steps are ordered by dependency and should be implemented one by one. - Add import for `formatXFTPServer` - Run existing tests to verify no regression -#### Step 2: Typed error detection for padded server errors (3.2 client-side) +#### Step 2: Typed error detection for padded router errors (3.2 client-side) - Add `XFTPRetriableError` class - In `sendXFTPCommand`, detect padded error strings before `decodeTransmission` - Classify `FRErr` responses as retriable or permanent with human-readable messages @@ -573,16 +573,16 @@ Steps are ordered by dependency and should be implemented one by one. - Add vitest test: timeout triggers after configured duration - Run existing tests -#### Step 4: Connection state with Promise-based lock and per-server queues (3.5) +#### Step 4: Connection state with Promise-based lock and per-router queues (3.5) - Introduce `ServerConnection` record: `{client: Promise, queue: Promise}` - Replace `XFTPClientAgent.clients: Map` with `connections: Map` - Implement `reconnectClient` — replaces `conn.client` with new promise, preserves queue -- Implement `enqueueCommand` — chains operation onto server's queue +- Implement `enqueueCommand` — chains operation onto router's queue - Implement `removeStaleConnection` — removes entry only if current promise is the failed one - Auto-cleanup: `p.catch(() => delete)` removes failed connections so next caller starts fresh - Adapt `closeXFTPServerClient` and `closeXFTPAgent` - Add vitest tests: - - Concurrent calls to same server produce single connection + - Concurrent calls to same router produce single connection - Failed promise is cleaned up, next caller gets fresh connection #### Step 5: Automatic retry in sendXFTPCommand (3.2) @@ -594,61 +594,61 @@ Steps are ordered by dependency and should be implemented one by one. - Max 3 retries for retriable errors, immediate throw for permanent - On retriable error: call `reconnectClient` and retry. On retriable error exhausted: call `removeStaleConnection` to clean up. On permanent error: throw immediately without touching connection - Add vitest tests: - - Server started with delay → first attempt fails, retry succeeds + - Router started with delay → first attempt fails, retry succeeds - 3 retries exhausted → error propagates with human-readable message - Non-retriable error (AUTH) → no retry, immediate failure -#### Step 6: Server-side stale session handling (3.1) +#### Step 6: Router-side stale session handling (3.1) - Add one guard to `Nothing` branch: `sniUsed && not webHello -> throwE SESSION` - Remove debug `hPutStrLn stderr` lines (all 6 occurrences in dispatch) - All other branches unchanged - Run Haskell tests + Playwright tests -#### Step 7: Download with per-server grouping -- Modify `downloadFileRaw` to group chunks by server, sequential within each server (`for` loop), parallel across servers (`Promise.all`) -- Add vitest test: concurrent downloads from different servers run in parallel +#### Step 7: Download with per-router grouping +- Modify `downloadFileRaw` to group data packets by router, sequential within each router (`for` loop), parallel across routers (`Promise.all`) +- Add vitest test: concurrent downloads from different routers run in parallel #### Step 8: UI error improvements (3.4) - Temporary errors: auto-retry loop (3 attempts), then show human-readable diagnosis + manual retry button - Permanent errors: show human-readable error, NO retry button -- Manual retry resumes from last successful chunk (not full restart) +- Manual retry resumes from last successful data packet (not full restart) #### Step 9: Remove debug logging - Remove all `console.log('[DEBUG ...]')` and `hPutStrLn stderr "DEBUG ..."` lines - Keep `console.error('[XFTP] ...')` error logging -### Phase 2: Multi-server upload +### Phase 2: Multi-router upload Implement after Phase 1 is complete and tested. -#### Step 10: Multi-server upload with failover (3.7) -- Extend `SentChunk` with `chunkOffset: number` (from ChunkSpec) and `server: XFTPServer` (assigned during allocate) — Stage 2 reads data by offset and groups chunks by server +#### Step 10: Multi-router upload with failover (3.7) +- Extend `SentChunk` with `chunkOffset: number` (from ChunkSpec) and `server: XFTPServer` (assigned during allocate) — Stage 2 reads data by offset and groups data packets by router - Change `uploadFile` signature: takes `allServers: XFTPServer[]` instead of single `server` - Implement `UploadState` with `untriedServers` and `workingServers` -- Implement `createChunkWithFailover` and `pickServer`: two-list selection (untried → working once enough found), max `min(serverCount, 5)` attempts per chunk +- Implement `createChunkWithFailover` and `pickServer`: two-list selection (untried → working once enough found), max `min(routerCount, 5)` attempts per data packet - Allocate stage: concurrent FNEW within concurrency limit (default 4) -- Upload stage: parallel across servers, sequential per server (reuse queue from Step 7) +- Upload stage: parallel across routers, sequential per router (reuse queue from Step 7) - Update `web/upload.ts`: pass `getServers()` instead of `pickRandomServer(getServers())` -- Update description building: each chunk references its actual server +- Update description building: each data packet references its actual router - Add vitest tests: - - File split across N servers (verify different servers in description) - - One server down → chunks redistributed to others - - All servers down → error after exhausting 5 attempts per chunk + - File split across N routers (verify different routers in description) + - One router down → data packets redistributed to others + - All routers down → error after exhausting 5 attempts per data packet #### Step 11: Download concurrency and replica fallback - Change default download concurrency from 1 to 4 - If `replicas[0]` download fails, try `replicas[1]`, `replicas[2]`, etc. -- Uses per-server queues from Step 7 +- Uses per-router queues from Step 7 ## 5. Testing Plan ### Principle -Prefer low-level vitest tests over Playwright E2E. Each new function gets one focused test. Pure functions tested without mocks; connection management tested with mock `connectXFTP`; server behavior tested with real server. Total: 13 tests across 4 files. +Prefer low-level vitest tests over Playwright E2E. Each new function gets one focused test. Pure functions tested without mocks; connection management tested with mock `connectXFTP`; router behavior tested with real router. Total: 13 tests across 4 files. -Tests A-C run in browser context (`@vitest/browser` with Chromium headless), configured in `vitest.config.ts`. Test D (integration) requires a separate Node.js vitest config since it uses `node:http2`. Existing `globalSetup.ts` provides a real XFTP server for integration tests. +Tests A-C run in browser context (`@vitest/browser` with Chromium headless), configured in `vitest.config.ts`. Test D (integration) requires a separate Node.js vitest config since it uses `node:http2`. Existing `globalSetup.ts` provides a real XFTP router for integration tests. -### Test file A: `test/errors.test.ts` — pure, no server +### Test file A: `test/errors.test.ts` — pure, no router Tests error classification and padded error detection (Steps 2, 5). @@ -682,7 +682,7 @@ expect(re.message).toContain("expired") // "Session expired, reconnecting..." **T3. Padded error detection extracts error string from padded block** ```typescript import {blockPad, blockUnpad} from '../src/protocol/transmission.js' -// Simulate server sending padded "SESSION" +// Simulate router sending padded "SESSION" const padded = blockPad(new TextEncoder().encode("SESSION")) const raw = blockUnpad(padded) expect(raw.length).toBeLessThan(20) @@ -694,7 +694,7 @@ const normalRaw = blockUnpad(normalBlock) expect(normalRaw.length).toBeGreaterThan(20) // not mistaken for padded error ``` -### Test file B: `test/connection.test.ts` — mock connectXFTP, no server +### Test file B: `test/connection.test.ts` — mock connectXFTP, no router Tests connection management functions (Steps 4, 5). Uses `vi.mock` to replace `connectXFTP` with a controllable promise factory. @@ -800,7 +800,7 @@ await expect(sendXFTPCommand(agent3, server, dummyKey, dummyId, encodePING())) expect(vi.mocked(connectXFTP)).toHaveBeenCalledTimes(1) // initial only, no reconnect ``` -### Test file C: `test/server-selection.test.ts` — pure, no server +### Test file C: `test/server-selection.test.ts` — pure, no router Tests `pickServer` state machine (Step 10). Determinism: seed `Math.random` or test invariants not specific picks. @@ -833,12 +833,12 @@ const state: UploadState = { workingServers: [s1, s2] // only 2 working, concurrency=4 } const picked = pickServer(servers, state, 4) -// Should have reset untried to non-working servers and picked from them +// Should have reset untried to non-working routers and picked from them expect([s3, s4, s5]).toContainEqual(picked) expect(state.untriedServers.length).toBe(2) // 3 non-working minus 1 picked ``` -### Test file D: `test/integration.test.ts` — real server, Node.js mode +### Test file D: `test/integration.test.ts` — real router, Node.js mode Requires separate vitest config with `browser: {enabled: false}` since these tests use `node:http2` directly. Alternatively, add `test/vitest.node.config.ts` that includes only `test/integration.test.ts` and runs in Node.js. @@ -847,10 +847,10 @@ Requires separate vitest config with `browser: {enabled: false}` since these tes import http2 from 'node:http2' // Connect and handshake normally via the client const client = await connectXFTP(server) -// Create a raw HTTP/2 session (new TLS SessionId, no handshake state on server) +// Create a raw HTTP/2 session (new TLS SessionId, no handshake state on router) const session = http2.connect(client.baseUrl, {rejectUnauthorized: false}) // Build a dummy command block using the old client's sessionId. -// Content doesn't matter — server detects stale session before parsing command. +// Content doesn't matter — router detects stale session before parsing command. const dummyKey = new Uint8Array(64) // Ed25519 private key (dummy) const dummyId = new Uint8Array(24) // entity ID (dummy) const cmdBlock = encodeAuthTransmission(client.sessionId, new Uint8Array(0), dummyId, encodePING(), dummyKey) @@ -862,7 +862,7 @@ const resp = await new Promise((resolve, reject) => { req.on("error", reject) req.end(Buffer.from(cmdBlock)) }) -// Server should return padded "SESSION" (not crash, not "HANDSHAKE") +// Router should return padded "SESSION" (not crash, not "HANDSHAKE") const raw = blockUnpad(resp.subarray(0, XFTP_BLOCK_SIZE)) expect(new TextDecoder().decode(raw)).toBe("SESSION") session.close() @@ -885,7 +885,7 @@ await expect( | Cache key fix (Step 1) | Existing round-trip test — uses `formatXFTPServer` after refactor | | Basic upload/download | 24 Playwright tests + 1 vitest browser test | | File size limits, unicode filenames | Playwright edge case tests | -| Server startup/teardown | `globalSetup.ts` / `globalTeardown.ts` | +| Router startup/teardown | `globalSetup.ts` / `globalTeardown.ts` | | Handshake + identity verification | `connectXFTP` in existing round-trip test | ### Test ordering @@ -895,7 +895,7 @@ Tests must be added alongside their implementation step: - **Step 3**: Add T13 (test/integration.test.ts) — requires Node.js vitest config - **Step 4**: Add T4, T5, T6, T7 (test/connection.test.ts) - **Step 5**: Add T8 (test/connection.test.ts) -- **Step 6**: Add T12 (test/integration.test.ts) — requires server change + Node.js vitest config +- **Step 6**: Add T12 (test/integration.test.ts) — requires router change + Node.js vitest config - **Step 10**: Add T9, T10, T11 (test/server-selection.test.ts) ## 6. Context for Implementation Sessions @@ -914,30 +914,30 @@ Tests must be added alongside their implementation step: - `web/servers.ts` — `getServers`, `pickRandomServer` **TypeScript (xftp-web/test/):** -- `browser.test.ts` — vitest Node.js test template (uses real Haskell server) -- `globalSetup.ts` — server startup, config generation, port file +- `browser.test.ts` — vitest Node.js test template (uses real Haskell router) +- `globalSetup.ts` — router startup, config generation, port file - `page.spec.ts` — Playwright page tests -**Haskell (reference for multi-server):** -- `src/Simplex/FileTransfer/Agent.hs` — `createChunk` (lines 457-486, allocate stage), `runXFTPSndPrepareWorker` (lines 391-430, serial allocate in Haskell), `runXFTPSndWorker` (lines 494-548, per-server upload worker) +**Haskell (reference for multi-router):** +- `src/Simplex/FileTransfer/Agent.hs` — `createChunk` (lines 457-486, allocate stage), `runXFTPSndPrepareWorker` (lines 391-430, serial allocate in Haskell), `runXFTPSndWorker` (lines 494-548, per-router upload worker) - `src/Simplex/Messaging/Agent/Client.hs` — `getNextServer_` (lines 2335-2350), `withNextSrv` (lines 2366-2385), `pickServer` (lines 2309-2314) -**Haskell (server):** +**Haskell (router):** - `src/Simplex/FileTransfer/Server.hs` — `xftpServerHandshakeV1` (lines 165-244), `processRequest` (lines 403-435) - `src/Simplex/Messaging/Protocol.hs` — `tDecodeServer` (lines 2239-2265) — sessionId verification at line 2242 ### Key design constraints 1. `tDecodeServer` (Protocol.hs:2242) verifies `sessId == sessionId` — commands signed with old sessionId WILL fail on new connection -2. Server generates per-session DH key in `processHello` (Server.hs:207) — cannot be shared across sessions +2. Router generates per-session DH key in `processHello` (Server.hs:207) — cannot be shared across sessions 3. `fetch()` provides zero control over HTTP/2 connection reuse — browser decides 4. `xftp-web-hello` header is only checked in dispatch (Server.hs:192), NOT inside `processHello` 5. Handshake-phase errors are raw padded strings; command-phase errors are proper ERR transmissions 6. Ed25519 signature verification (`TASignature` path, Protocol.hs:1314) does NOT use `thAuth` — but SMP will -7. Reconnect must re-handshake to get new sessionId AND new server DH key +7. Reconnect must re-handshake to get new sessionId AND new router DH key 8. The new `throwE SESSION` guard (Step 6) sends a raw padded "SESSION" string — no sessionId framing. Client detects this via padded error heuristic (section 3.2), not via sessionId mismatch -9. FNEW is cheap (creates chunk record on server) — retry with different server on failure -10. FPUT retries on same server (chunk replica already exists there) — close connection + backoff +9. FNEW is cheap (creates data packet record on router) — retry with different router on failure +10. FPUT retries on same router (data packet replica already exists there) — close connection + backoff ## 7. Plan Maintenance diff --git a/rfcs/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md b/rfcs/done/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md similarity index 87% rename from rfcs/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md rename to rfcs/done/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md index 14b7187e90..2fa4ca7917 100644 --- a/rfcs/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-12-xftp-cli-web-link-compat.md @@ -12,8 +12,8 @@ Make CLI produce and consume web-compatible links so that: - CLI `recv` accepts a web link URL as input (alternative to `.xftp` file path) - Browser can download files uploaded by CLI and vice versa -The web page host is derived from the XFTP server address - the server that hosts the file -also hosts the download page. Making XFTP servers actually serve the web page is a separate +The web page host is derived from the XFTP router address - the router that hosts the file +also hosts the download page. Making XFTP routers actually serve the web page is a separate concern (not covered here), but the link format anticipates it. The YAML file description format is already identical between CLI and web. @@ -33,7 +33,7 @@ Encoding chain (agent.ts:64-68): 3. `pako.deflateRaw(bytes)` -> compressed 4. `base64urlEncode(compressed)` -> URI fragment (no `#`) -For multi-chunk files exceeding ~400 chars in URI, a redirect description is uploaded: +For multi-packet files exceeding ~400 chars in URI, a redirect description is uploaded: the real file description is encrypted, uploaded as a separate XFTP file, and a smaller "redirect" description (pointing to it) is put in the URI. @@ -111,7 +111,7 @@ Extracts the actual filename from the path and embeds it in the encrypted header #### CLI download: uses filename from header (ok) -`Crypto.hs:62-66` (single chunk) / `Crypto.hs:72-74` (multi-chunk): +`Crypto.hs:62-66` (single data packet) / `Crypto.hs:72-74` (multi-packet): ```haskell (FileHeader {fileName}, rest) <- parseFileHeader decryptedContent destFile <- withExceptT FTCEFileIOError $ getDestFile fileName @@ -163,19 +163,19 @@ The CLI should consider adding filename sanitization similar to the web client f ### 2. Web Link Host Derivation -The web page URL domain comes from the XFTP server address, not from a CLI flag: +The web page URL domain comes from the XFTP router address, not from a CLI flag: -- **Non-redirected description**: use the server host of the first chunk's first replica. +- **Non-redirected description**: use the router host of the first data packet's first replica. E.g., `xftp://abc=@xftp1.simplex.im` -> `https://xftp1.simplex.im/#` -- **Redirected description**: use the server host of the redirect chunk (the outer description's - chunk that stores the encrypted inner description). +- **Redirected description**: use the router host of the redirect data packet (the outer description's + data packet that stores the encrypted inner description). -The server address format is `xftp://@[,,...][:]`. +The router address format is `xftp://@[,,...][:]`. The web link uses `https://` (port 443 implied). -This means the CLI does not need a `--web-url` flag - the server address fully determines -the link. The XFTP server serving the web page is a separate deployment concern. +This means the CLI does not need a `--web-url` flag - the router address fully determines +the link. The XFTP router serving the web page is a separate deployment concern. ### 3. Web URI Encoding/Decoding in Haskell @@ -196,7 +196,7 @@ decodeWebURI :: ByteString -> Either String (ValidFileDescription 'FRecipient) -- 4. validateFileDescription -- Build full web link from file description --- Extracts server host from first chunk replica (or redirect chunk) +-- Extracts router host from first data packet replica (or redirect data packet) fileWebLink :: FileDescription 'FRecipient -> (String, ByteString) -- Returns (webHost, uriFragment) -- Caller assembles: "https://" <> webHost <> "/#" <> uriFragment @@ -210,20 +210,20 @@ The `zlib` Haskell package provides `Codec.Compression.Zlib.Raw` for raw DEFLATE ### 4. Redirect Description Support -The CLI currently does NOT create redirect descriptions. For single-server single-recipient -uploads, most file descriptions fit in a reasonable URI even for multi-chunk files. But for -large files (many chunks x long server hostnames), the URI can exceed practical limits. +The CLI currently does NOT create redirect descriptions. For single-router single-recipient +uploads, most file descriptions fit in a reasonable URI even for multi-packet files. But for +large files (many data packets x long router hostnames), the URI can exceed practical limits. **Approach**: Match the web client threshold. -- After encoding the URI, if `length > 400` and chunks > 1, upload a redirect description. +- After encoding the URI, if `length > 400` and data packets > 1, upload a redirect description. - The redirect upload uses the same XFTP upload flow: encrypt YAML -> upload as file -> create outer description pointing to it. - This matches `agent.ts:152-155` exactly. -- The redirect chunk's server becomes the web link host. +- The redirect data packet's router becomes the web link host. For CLI download from a redirect URI, the existing `cliReceiveFile` needs extension: - After decoding the file description, check `redirect` field. -- If present: download and decrypt the redirect chunks first to get the inner description, +- If present: download and decrypt the redirect data packets first to get the inner description, then download the actual file using the inner description. - The web client already does this (`resolveRedirect` in agent.ts:320-346). @@ -281,16 +281,16 @@ Already identical. The web `description.ts` explicitly matches Haskell `Data.Yam Adding a cross-client test (CLI upload -> web download, or web upload -> CLI download) would validate interop end-to-end. -### 7. Server Compatibility +### 7. Router Compatibility -No server changes needed. Both clients use the same XFTP protocol (FGET, FPUT, FNEW, FACK, FDEL). +No router changes needed. Both clients use the same XFTP protocol (FGET, FPUT, FNEW, FACK, FDEL). The web client adds `xftp-web-hello: 1` header for the hello handshake, but the actual file operations are identical wire-format. The only consideration: CLI uses native HTTP/2 (via `http2` Haskell package), web uses browser `fetch()` API over HTTP/2. Both produce identical XFTP protocol frames. -**Note**: Making XFTP servers actually serve the web download page at `https:///` is a +**Note**: Making XFTP routers actually serve the web download page at `https:///` is a separate deployment/infrastructure task. This plan only establishes the link format convention so that links are ready to work once servers serve the page. @@ -301,7 +301,7 @@ so that links are ready to work once servers serve the page. 1. Add `zlib` dependency to `simplexmq.cabal` 2. Add `encodeWebURI` / `decodeWebURI` / `fileWebLink` to `Simplex.FileTransfer.Description` (or a new `Simplex.FileTransfer.Description.WebURI` module) -3. `fileWebLink` extracts host from first chunk's first replica server address +3. `fileWebLink` extracts host from first data packet's first replica router address 4. Add unit tests: encode a known FileDescription, verify output matches web client encoding 5. Add round-trip test: encode -> decode -> compare @@ -309,7 +309,7 @@ so that links are ready to work once servers serve the page. 1. Modify `ReceiveOptions` to accept `Either FilePath WebURL` for `fileDescription` 2. In `cliReceiveFile`: if URL, extract fragment after `#`, call `decodeWebURI` -3. Add redirect resolution: if `redirect /= Nothing`, download redirect chunks, +3. Add redirect resolution: if `redirect /= Nothing`, download redirect data packets, decrypt, parse inner description, then proceed with download 4. Test: upload via web page -> copy link -> `xftp recv ` diff --git a/rfcs/2026-02-17-fix-subq-deadlock.md b/rfcs/done/2026-02-17-fix-subq-deadlock.md similarity index 100% rename from rfcs/2026-02-17-fix-subq-deadlock.md rename to rfcs/done/2026-02-17-fix-subq-deadlock.md diff --git a/rfcs/done/2022-07-22-access-via-tor.md b/rfcs/standard/2026-03-09-access-via-tor.md similarity index 94% rename from rfcs/done/2022-07-22-access-via-tor.md rename to rfcs/standard/2026-03-09-access-via-tor.md index b4517d4036..16ed1a072f 100644 --- a/rfcs/done/2022-07-22-access-via-tor.md +++ b/rfcs/standard/2026-03-09-access-via-tor.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-07-22 +Implemented: ~2022-08 +Standardized: 2026-03-09 +Protocol: simplex-messaging +--- + # Accessing SMP servers via Tor ## Problem diff --git a/rfcs/done/2021-01-26-crypto.md b/rfcs/standard/2026-03-09-crypto.md similarity index 96% rename from rfcs/done/2021-01-26-crypto.md rename to rfcs/standard/2026-03-09-crypto.md index 39ca6eb709..9bf06c0d2f 100644 --- a/rfcs/done/2021-01-26-crypto.md +++ b/rfcs/standard/2026-03-09-crypto.md @@ -1,3 +1,12 @@ +--- +Proposed: 2021-01-26 +Implemented: ~2022 +Standardized: 2026-03-09 +Protocol: simplex-messaging v1, evolved through v7 +--- + +> **Implementation note:** All cryptographic primitives changed from this proposal. Transport: TLS 1.2/1.3 replaced the custom RSA handshake. E2E: Double ratchet with AES-GCM replaced per-message RSA-OAEP encryption. Auth: Ed25519/X25519 DH-based authenticated encryption (SMP v7) replaced RSA-PSS signatures. The transmission format (signature CRLF signed) was implemented as proposed. + # SMP agent: cryptography 3 main directions of work to enable basic level of security for communication via SMP agents and servers at the current stage of the project: diff --git a/rfcs/done/2022-06-13-db-sync.md b/rfcs/standard/2026-03-09-db-sync.md similarity index 98% rename from rfcs/done/2022-06-13-db-sync.md rename to rfcs/standard/2026-03-09-db-sync.md index dc375e20ff..a7e32ad8d0 100644 --- a/rfcs/done/2022-06-13-db-sync.md +++ b/rfcs/standard/2026-03-09-db-sync.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-06-13 +Implemented: ~2022-06 +Standardized: 2026-03-09 +Protocol: agent-protocol +--- + # DB access and processing messages for iOS notification service extension ## Problem diff --git a/rfcs/done/2022-06-13-db-sync.mmd b/rfcs/standard/2026-03-09-db-sync.mmd similarity index 95% rename from rfcs/done/2022-06-13-db-sync.mmd rename to rfcs/standard/2026-03-09-db-sync.mmd index 022f57d65b..8eefc0ead4 100644 --- a/rfcs/done/2022-06-13-db-sync.mmd +++ b/rfcs/standard/2026-03-09-db-sync.mmd @@ -1,3 +1,10 @@ +--- +Proposed: 2022-06-13 +Implemented: ~2022-06 +Standardized: 2026-03-09 +Protocol: agent-protocol +--- + sequenceDiagram participant M as iOS message
notification participant S as iOS system diff --git a/rfcs/done/2023-05-03-delivery-receipts.md b/rfcs/standard/2026-03-09-delivery-receipts.md similarity index 99% rename from rfcs/done/2023-05-03-delivery-receipts.md rename to rfcs/standard/2026-03-09-delivery-receipts.md index bc5658f965..e3887f8630 100644 --- a/rfcs/done/2023-05-03-delivery-receipts.md +++ b/rfcs/standard/2026-03-09-delivery-receipts.md @@ -1,3 +1,10 @@ +--- +Proposed: 2023-05-03 +Implemented: 2023-07-13 +Standardized: 2026-03-09 +Protocol: agent-protocol v4 +--- + # Delivery receipts ## Problems diff --git a/rfcs/done/2024-02-03-deniability.md b/rfcs/standard/2026-03-09-deniability.md similarity index 98% rename from rfcs/done/2024-02-03-deniability.md rename to rfcs/standard/2026-03-09-deniability.md index b7bd3f7c51..22993263d3 100644 --- a/rfcs/done/2024-02-03-deniability.md +++ b/rfcs/standard/2026-03-09-deniability.md @@ -1,3 +1,10 @@ +--- +Proposed: 2024-02-03 +Implemented: 2024-04-30 +Standardized: 2026-03-09 +Protocol: simplex-messaging v7 +--- + # Repudiation for message senders ## Problem diff --git a/rfcs/done/2024-06-14-fast-connection.md b/rfcs/standard/2026-03-09-fast-connection.md similarity index 96% rename from rfcs/done/2024-06-14-fast-connection.md rename to rfcs/standard/2026-03-09-fast-connection.md index 000f0ef107..81bd04cb26 100644 --- a/rfcs/done/2024-06-14-fast-connection.md +++ b/rfcs/standard/2026-03-09-fast-connection.md @@ -1,3 +1,10 @@ +--- +Proposed: 2024-06-14 +Implemented: 2024-06-30 +Standardized: 2026-03-09 +Protocol: simplex-messaging v9, agent-protocol v6 +--- + # Faster connection establishment ## Problem diff --git a/rfcs/done/2024-01-26-file-links.md b/rfcs/standard/2026-03-09-file-links.md similarity index 98% rename from rfcs/done/2024-01-26-file-links.md rename to rfcs/standard/2026-03-09-file-links.md index 3ff2f430eb..8de727b350 100644 --- a/rfcs/done/2024-01-26-file-links.md +++ b/rfcs/standard/2026-03-09-file-links.md @@ -1,3 +1,10 @@ +--- +Proposed: 2024-01-26 +Implemented: ~2024-01 +Standardized: 2026-03-09 +Protocol: xftp +--- + # Sending large file descriptions It is desirable to provide a QR code/URI from which a file can be downloaded. This way files may be addressed outside a chat client. diff --git a/rfcs/done/2021-01-20-logging.md b/rfcs/standard/2026-03-09-logging.md similarity index 85% rename from rfcs/done/2021-01-20-logging.md rename to rfcs/standard/2026-03-09-logging.md index ae84f8e697..9d23419973 100644 --- a/rfcs/done/2021-01-20-logging.md +++ b/rfcs/standard/2026-03-09-logging.md @@ -1,3 +1,12 @@ +--- +Proposed: 2021-01-20 +Implemented: ~2021 +Standardized: 2026-03-09 +Protocol: agent-protocol +--- + +> **Implementation note:** Logging infrastructure exists but the format evolved from the proposed ASCII art format to structured server statistics, TLS error logging, and Prometheus metrics. + # SMP agent logging ## Problem and proposed solution. diff --git a/rfcs/done/2021-01-26-messages.md b/rfcs/standard/2026-03-09-messages.md similarity index 89% rename from rfcs/done/2021-01-26-messages.md rename to rfcs/standard/2026-03-09-messages.md index 71db024082..bcfb43c9a8 100644 --- a/rfcs/done/2021-01-26-messages.md +++ b/rfcs/standard/2026-03-09-messages.md @@ -1,3 +1,12 @@ +--- +Proposed: 2021-01-26 +Implemented: ~2022 +Standardized: 2026-03-09 +Protocol: agent-protocol, simplex-messaging v2 +--- + +> **Implementation note:** Phase 1 (agent auto-ACK, store in DB, forward to client on SUB) is implemented. The GET command was added in SMP v2 for iOS NSE message retrieval. Phases 2 and 3 (fine-grained MGET/MDEL/MACK commands and autonomous agent with background polling) were not implemented. + # SMP Agent: message management The proposal is to change the way SMP agent manages the messages from the SMP servers. diff --git a/rfcs/done/2022-03-22-nofication-server.md b/rfcs/standard/2026-03-09-nofication-server.md similarity index 96% rename from rfcs/done/2022-03-22-nofication-server.md rename to rfcs/standard/2026-03-09-nofication-server.md index eebb94861e..eee122ca07 100644 --- a/rfcs/done/2022-03-22-nofication-server.md +++ b/rfcs/standard/2026-03-09-nofication-server.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-03-22 +Implemented: ~2022 +Standardized: 2026-03-09 +Protocol: push-notifications v1 +--- + # Notification server ## Background and motivation diff --git a/rfcs/done/2021-05-17-open-connection.md b/rfcs/standard/2026-03-09-open-connection.md similarity index 95% rename from rfcs/done/2021-05-17-open-connection.md rename to rfcs/standard/2026-03-09-open-connection.md index 02eec21f47..090a5c17b8 100644 --- a/rfcs/done/2021-05-17-open-connection.md +++ b/rfcs/standard/2026-03-09-open-connection.md @@ -1,3 +1,10 @@ +--- +Proposed: 2021-05-17 +Implemented: ~2021 +Standardized: 2026-03-09 +Protocol: agent-protocol v1 +--- + # Open connections ## Problem diff --git a/rfcs/done/2024-03-03-pqdr-version.md b/rfcs/standard/2026-03-09-pqdr-version.md similarity index 94% rename from rfcs/done/2024-03-03-pqdr-version.md rename to rfcs/standard/2026-03-09-pqdr-version.md index 5db9f23a5b..c05051c7a3 100644 --- a/rfcs/done/2024-03-03-pqdr-version.md +++ b/rfcs/standard/2026-03-09-pqdr-version.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-03-03 +Implemented: 2024-03-14 +Standardized: 2026-03-09 +Protocol: agent-protocol v5 +--- + +> **Implementation note:** PQ version negotiation and per-connection PQ mode are implemented. The proposed `RatchetVR` and `EncodingV` type class names were not adopted; the functionality was integrated through existing version range types, PQ-dependent size constants (`e2eEncConnInfoLength`, `e2eEncAgentMsgLength`), and the `pqdrSMPAgentVersion` constant. + # Migrating existing connections to post-quantum double ratchet algorithm ## Problem diff --git a/rfcs/done/2023-12-29-pqdr.md b/rfcs/standard/2026-03-09-pqdr.md similarity index 96% rename from rfcs/done/2023-12-29-pqdr.md rename to rfcs/standard/2026-03-09-pqdr.md index 7fd88ffbb7..4c478da2f4 100644 --- a/rfcs/done/2023-12-29-pqdr.md +++ b/rfcs/standard/2026-03-09-pqdr.md @@ -1,3 +1,10 @@ +--- +Proposed: 2023-12-29 +Implemented: 2024-03-14 +Standardized: 2026-03-09 +Protocol: pqdr v1, agent-protocol v5 +--- + # Post-quantum double ratchet implementation See [the previous doc](https://github.com/simplex-chat/simplex-chat/blob/stable/docs/rfcs/2023-09-30-pq-double-ratchet.md). diff --git a/rfcs/done/2022-12-27-queue-quota.md b/rfcs/standard/2026-03-09-queue-quota.md similarity index 94% rename from rfcs/done/2022-12-27-queue-quota.md rename to rfcs/standard/2026-03-09-queue-quota.md index 2337f84f29..b2e666d439 100644 --- a/rfcs/done/2022-12-27-queue-quota.md +++ b/rfcs/standard/2026-03-09-queue-quota.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-12-27 +Implemented: ~2023 +Standardized: 2026-03-09 +Protocol: simplex-messaging, agent-protocol +--- + # SMP and SMP agent protocol extensions to manage queue quotas ## Problem diff --git a/rfcs/done/2022-08-14-queue-rotation.md b/rfcs/standard/2026-03-09-queue-rotation.md similarity index 96% rename from rfcs/done/2022-08-14-queue-rotation.md rename to rfcs/standard/2026-03-09-queue-rotation.md index fa40fda7e3..18fabc5602 100644 --- a/rfcs/done/2022-08-14-queue-rotation.md +++ b/rfcs/standard/2026-03-09-queue-rotation.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-08-14 +Implemented: ~2022 +Standardized: 2026-03-09 +Protocol: agent-protocol v2 +--- + # SMP queue rotation and redundancy ## Problem diff --git a/rfcs/done/2023-10-25-remote-control.md b/rfcs/standard/2026-03-09-remote-control.md similarity index 99% rename from rfcs/done/2023-10-25-remote-control.md rename to rfcs/standard/2026-03-09-remote-control.md index 8507dba495..854b708661 100644 --- a/rfcs/done/2023-10-25-remote-control.md +++ b/rfcs/standard/2026-03-09-remote-control.md @@ -1,3 +1,10 @@ +--- +Proposed: 2023-10-25 +Implemented: ~2024 +Standardized: 2026-03-09 +Protocol: xrcp v1 +--- + # SimpleX Remote Control protocol Using profiles in SimpleX Chat mobile app from desktop app with minimal risk to the security/threat model of SimpleX protocols. diff --git a/rfcs/done/2023-05-02-resync-ratchets.md b/rfcs/standard/2026-03-09-resync-ratchets-design.md similarity index 92% rename from rfcs/done/2023-05-02-resync-ratchets.md rename to rfcs/standard/2026-03-09-resync-ratchets-design.md index a80a481361..6f8a4066d9 100644 --- a/rfcs/done/2023-05-02-resync-ratchets.md +++ b/rfcs/standard/2026-03-09-resync-ratchets-design.md @@ -1,3 +1,12 @@ +--- +Proposed: 2023-05-02 +Implemented: 2023-06-30 +Standardized: 2026-03-09 +Protocol: agent-protocol v3 +--- + +> **Implementation note:** Early brainstorm document. The implementation followed the more detailed RFC 2023-06-08-resync-ratchets, which refined the state machine to use a single RatchetSyncState (RSOk/RSAllowed/RSRequired/RSStarted/RSAgreed) and defined the AgentRatchetKey envelope type. + # Re-sync encryption ratchets, queue rotation, message delivery receipts This is very unfocussed doc outlining several problems that seem somewhat related, and some possible solution approaches. diff --git a/rfcs/done/2023-06-08-resync-ratchets.md b/rfcs/standard/2026-03-09-resync-ratchets.md similarity index 99% rename from rfcs/done/2023-06-08-resync-ratchets.md rename to rfcs/standard/2026-03-09-resync-ratchets.md index fc4572eec2..2c9a1d78da 100644 --- a/rfcs/done/2023-06-08-resync-ratchets.md +++ b/rfcs/standard/2026-03-09-resync-ratchets.md @@ -1,3 +1,10 @@ +--- +Proposed: 2023-06-08 +Implemented: 2023-06-30 +Standardized: 2026-03-09 +Protocol: agent-protocol v3 +--- + # Re-sync encryption ratchets ## Problem diff --git a/rfcs/done/2023-09-12-second-relays.md b/rfcs/standard/2026-03-09-second-relays.md similarity index 99% rename from rfcs/done/2023-09-12-second-relays.md rename to rfcs/standard/2026-03-09-second-relays.md index cad6c4a920..94137df914 100644 --- a/rfcs/done/2023-09-12-second-relays.md +++ b/rfcs/standard/2026-03-09-second-relays.md @@ -1,3 +1,10 @@ +--- +Proposed: 2023-09-12 +Implemented: 2024-06-21 +Standardized: 2026-03-09 +Protocol: simplex-messaging v8 +--- + # Protecting IP addresses of the users from their contacts ## Problem diff --git a/rfcs/done/2022-12-26-simplex-file-transfer.md b/rfcs/standard/2026-03-09-simplex-file-transfer.md similarity index 98% rename from rfcs/done/2022-12-26-simplex-file-transfer.md rename to rfcs/standard/2026-03-09-simplex-file-transfer.md index a833505f1d..e0c4258eec 100644 --- a/rfcs/done/2022-12-26-simplex-file-transfer.md +++ b/rfcs/standard/2026-03-09-simplex-file-transfer.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-12-26 +Implemented: ~2023 +Standardized: 2026-03-09 +Protocol: xftp v1 +--- + # SimpleX File Transfer protocol ## Problem diff --git a/rfcs/done/2022-11-11-smp-basic-auth.md b/rfcs/standard/2026-03-09-smp-basic-auth.md similarity index 95% rename from rfcs/done/2022-11-11-smp-basic-auth.md rename to rfcs/standard/2026-03-09-smp-basic-auth.md index f0bfbd97aa..ee4d9cea81 100644 --- a/rfcs/done/2022-11-11-smp-basic-auth.md +++ b/rfcs/standard/2026-03-09-smp-basic-auth.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-11-11 +Implemented: 2022-11-12 +Standardized: 2026-03-09 +Protocol: simplex-messaging v5 +--- + # SMP Basic Auth ## Problem diff --git a/rfcs/done/2023-05-24-smp-delivery-proxy.md b/rfcs/standard/2026-03-09-smp-delivery-proxy.md similarity index 86% rename from rfcs/done/2023-05-24-smp-delivery-proxy.md rename to rfcs/standard/2026-03-09-smp-delivery-proxy.md index 73bdd034b2..dad0624b4b 100644 --- a/rfcs/done/2023-05-24-smp-delivery-proxy.md +++ b/rfcs/standard/2026-03-09-smp-delivery-proxy.md @@ -1,3 +1,12 @@ +--- +Proposed: 2023-05-24 +Implemented: 2024-06-21 +Standardized: 2026-03-09 +Protocol: simplex-messaging v8 +--- + +> **Implementation note:** Short conceptual proposal. The full design evolved into the two-hop onion routing architecture described in RFC 2023-09-12-second-relays, implemented as SMP v8 with PRXY/PKEY/PFWD/RFWD/RRES/PRES commands. + # SMP and XFTP delivery relays ## Problem diff --git a/rfcs/done/2022-06-05-smp-notifications.md b/rfcs/standard/2026-03-09-smp-notifications.md similarity index 97% rename from rfcs/done/2022-06-05-smp-notifications.md rename to rfcs/standard/2026-03-09-smp-notifications.md index 4e58189b7b..8c1808552e 100644 --- a/rfcs/done/2022-06-05-smp-notifications.md +++ b/rfcs/standard/2026-03-09-smp-notifications.md @@ -1,3 +1,10 @@ +--- +Proposed: 2022-06-05 +Implemented: 2022-06-06 +Standardized: 2026-03-09 +Protocol: simplex-messaging v2 +--- + # SMP protocol changes to support push notifications on iOS ## Problem diff --git a/rfcs/done/2024-03-28-xftp-version.md b/rfcs/standard/2026-03-09-xftp-version.md similarity index 98% rename from rfcs/done/2024-03-28-xftp-version.md rename to rfcs/standard/2026-03-09-xftp-version.md index c46810bb9e..785b62340b 100644 --- a/rfcs/done/2024-03-28-xftp-version.md +++ b/rfcs/standard/2026-03-09-xftp-version.md @@ -1,3 +1,10 @@ +--- +Proposed: 2024-03-28 +Implemented: ~2024 +Standardized: 2026-03-09 +Protocol: xftp v2 +--- + # XFTP version agreement ## Problem From f745ce5ab630ee83325b831204f2f79ee981fa15 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Tue, 10 Mar 2026 08:15:53 +0000 Subject: [PATCH 19/91] docs: fix minor issues in protocols --- protocol/agent-protocol.md | 20 ++++++++++---------- protocol/pqdr.md | 8 +++----- protocol/push-notifications.md | 11 ++++++----- protocol/simplex-messaging.md | 26 +++++++++++++------------- protocol/xftp.md | 12 +++++------- protocol/xrcp.md | 26 ++++++++++---------------- src/Simplex/Messaging/Protocol.hs | 2 +- 7 files changed, 48 insertions(+), 57 deletions(-) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index a105ce795c..2c88d6521c 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -94,22 +94,22 @@ The procedure of establishing a duplex connection is explained on the example of 4. Bob uses agent `joinConnection` api function with the connection link as a parameter to agent B to accept the connection. 5. Agent B creates Bob's SMP reply queue with SMP router `NEW` command. 6. Agent B confirms the connection: sends an "SMP confirmation" with SMP router `SEND` command to the SMP queue specified in the connection link - SMP confirmation is an unauthenticated message with an ephemeral key that will be used to authenticate Bob's commands to the queue, as described in SMP protocol, and Bob's info (profile, public key for E2E encryption, and the connection link to this 2nd queue to Agent A - this connection link SHOULD use "simplex" URI scheme). This message is encrypted using key passed in the connection link (or with the derived shared secret, in which case public key for key derivation should be sent in clear text). -6. Alice confirms and continues the connection: +7. Alice confirms and continues the connection: - Agent A receives the SMP confirmation containing Bob's key, reply queue and info as SMP router `MSG`. - Agent A notifies Alice sending `CONF` notification with Bob's info. - Alice allows connection to continue with agent `allowConnection` api function. - Agent A secures the queue with SMP router `KEY` command. - Agent A sends SMP confirmation with ephemeral sender key, ephemeral public encryption key and profile (but without reply queue). -7. Agent B confirms the connection: +8. Agent B confirms the connection: - receives the confirmation. - sends the notification `INFO` with Alice's information to Bob. - secures SMP queue that it sent to Alice in the first confirmation with SMP `KEY` command . - sends `HELLO` message via SMP `SEND` command. This confirms that the reply queue is secured and also validates that Agent A secured the first SMP queue -8. Agent A notifies Alice. +9. Agent A notifies Alice. - receives `HELLO` message from Agent B. - sends `HELLO` message to Agent B via SMP `SEND` command. - sends `CON` notification to Alice, confirming that the connection is established. -9. Agent B notifies Bob. +10. Agent B notifies Bob. - Once Agent B receives `HELLO` from Agent A, it sends to Bob `CON` notification as well. At this point the duplex connection between Alice and Bob is established, they can use `SEND` command to send messages. The diagram also shows how the connection status changes for both parties, where the first part is the status of the SMP queue to receive messages, and the second part - the status of the queue to send messages. @@ -119,7 +119,7 @@ The most communication happens between the agents and routers, from the point of 1. Alice requests a new connection with `createConnection` agent API function and receives the connection link. 2. Alice passes connection link out-of-band to Bob. 3. Bob accepts the connection with `joinConnection` agent API function with the connection link to his agent. -4. Alice accepts the connection with `ACPT` agent API function. +4. Alice accepts the connection with `allowConnection` agent API function. 5. Both parties receive `CON` notification once duplex connection is established. Clients SHOULD support establishing duplex connection asynchronously (when parties are intermittently offline) by persisting intermediate states and resuming SMP queue subscriptions. @@ -382,9 +382,9 @@ Connection links are generated by SMP agent in response to `createConnection` ap ``` connectionLink = connectionScheme "/" connLinkType "#/?v=" versionRange "&smp=" smpQueues ["&e2e=" e2eEncryption] ["&data=" clientData] connLinkType = %s"invitation" / %s"contact" -connectionScheme = (%s"https://" clientAppServer) | %s"simplex:" +connectionScheme = (%s"https://" clientAppServer) / %s"simplex:" clientAppServer = hostname [ ":" port ] -; client app router, e.g. simplex.chat +; client app server, e.g. simplex.chat versionRange = 1*DIGIT / 1*DIGIT "-" 1*DIGIT ; agent version range e2eEncryption = smpQueues = smpQueue *(";" smpQueue) ; SMP queues for the connection (semicolon-separated) @@ -479,14 +479,14 @@ linkEntityId = shortString userData = invitationLinkData / contactLinkData invitationLinkData = %s"I" agentVersionRange userLinkData contactLinkData = %s"C" agentVersionRange userContactData -userLinkData = *OCTET ; opaque application data (e.g., user profile) +userLinkData = shortString / (%xFF largeString) ; opaque application data (e.g., user profile) userContactData = direct ownersList relaysList userLinkData direct = %s"T" / %s"F" ; whether direct connection via connReq is allowed ownersList = length *ownerAuth ownerAuth = shortString ; length-prefixed encoding of (ownerId ownerKey authOwnerSig) ownerId = shortString ; application-specific owner ID (e.g., MemberId) ownerKey = length x509encoded ; Ed25519 public key -authOwnerSig = 64*64 OCTET ; Ed25519 signature of (ownerId || ownerKey) by previous owner +authOwnerSig = length 64*64 OCTET ; Ed25519 signature of (ownerId || ownerKey) by previous owner relaysList = length *connShortLink ; alternative relay short links largeString = 2*2 OCTET *OCTET ; Word16 length prefix length = 1*1 OCTET @@ -537,7 +537,7 @@ The list of some of the API functions and events below is supported by the refer The list of APIs below is not exhaustive and provided for information only. Please consult the source code for more information. -#### Create conection +#### Create connection `createConnection` api is used to create a connection - it returns the connection link that should be sent out-of-band to another protocol user (the joining party). It should be used by the client of the agent that initiates creating a duplex connection (the initiating party). diff --git a/protocol/pqdr.md b/protocol/pqdr.md index 27f7082c83..e037f87968 100644 --- a/protocol/pqdr.md +++ b/protocol/pqdr.md @@ -71,11 +71,10 @@ def RatchetInitAlicePQ2HE(state, SK, bob_dh_public_key, shared_hka, shared_nhkb, // below added for post-quantum KEM state.PQRs = GENERATE_PQKEM() state.PQRr = bob_pq_kem_encapsulation_key - state.PQRss = random // shared secret for KEM - state.PQRct = PQKEM-ENC(state.PQRr, state.PQRss) // encapsulated additional shared secret + state.PQRct, state.PQRss = PQKEM-ENC(state.PQRr) // encapsulate: generates shared secret and ciphertext // above added for KEM // the next line augments DH key agreement with PQ shared secret - state.RK, state.CKs, state.NHKs = KDF_RK_HE(SK, DH(state.DHRs, state.DHRr) || state.PQRss) + state.RK, state.CKs, state.NHKs = KDF_RK_HE(SK, DH(state.DHRs, state.DHRr) || state.PQRss) state.CKr = None state.Ns = 0 state.Nr = 0 @@ -176,8 +175,7 @@ def DHRatchetPQ2HE(state, header): state.DHRs = GENERATE_DH() // below is added for KEM state.PQRs = GENERATE_PQKEM() // generate new PQ key pair - state.PQRss = random // shared secret for KEM - state.PQRct = PQKEM-ENC(state.PQRr, state.PQRss) // encapsulated additional shared secret KEM #1 + state.PQRct, state.PQRss = PQKEM-ENC(state.PQRr) // encapsulate: generates shared secret and ciphertext KEM #1 // above is added for KEM // use new shared secret with sending ratchet state.RK, state.CKs, state.NHKs = KDF_RK_HE(state.RK, DH(state.DHRs, state.DHRr) || state.PQRss) diff --git a/protocol/push-notifications.md b/protocol/push-notifications.md index 4a68a5236b..5113157afd 100644 --- a/protocol/push-notifications.md +++ b/protocol/push-notifications.md @@ -118,9 +118,10 @@ The command syntax: newTokenCmd = %s"TNEW" SP newToken newToken = %s"T" deviceToken authPubKey clientDhPubKey deviceToken = pushProvider tokenString -pushProvider = apnsDev / apnsProd / apnsNull +pushProvider = apnsDev / apnsProd / apnsTest / apnsNull apnsDev = "AD" ; APNS token for development environment apnsProd = "AP" ; APNS token for production environment +apnsTest = "AT" ; APNS token for test environment (mock server) apnsNull = "AN" ; token that does not trigger any notification delivery - used for router testing tokenString = shortString authPubKey = length x509encoded ; Ed25519 key used to verify clients commands @@ -226,10 +227,10 @@ The interval for periodic notifications is set in minutes, with the minimum of 2 This command makes notification router subscribe to message notifications from SMP router and to deliver them to push provider: ```abnf -newSubCmd = %s"SNEW" newSub -newSub = %s "S" tokenId smpRouter notifierId notifierKey +newSubCmd = %s"SNEW" SP newSub +newSub = %s"S" tokenId smpRouter notifierId notifierKey tokenId = shortString ; returned in response to `TNEW` command -smpRouter = smpRouter = hosts port fingerprint +smpRouter = hosts port fingerprint hosts = length 1*host host = shortString port = shortString @@ -259,7 +260,7 @@ subStatusResp = %s"SUB" SP subStatus subStatus = %s"NEW" / %s"PENDING" / ; e.g., after SMP router disconnect/timeout while ntf router is retrying to connect %s"ACTIVE" / %s"INACTIVE" / %s"END" / ; if another router subscribed to notifications %s"AUTH" / %s"DELETED" / %s"SERVICE" / subErrStatus -subErrStatus = %s"ERR" SP shortString +subErrStatus = %s"ERR" SP *OCTET ``` ### Delete notification subscription diff --git a/protocol/simplex-messaging.md b/protocol/simplex-messaging.md index 4d2d8e141c..d52a6e75f1 100644 --- a/protocol/simplex-messaging.md +++ b/protocol/simplex-messaging.md @@ -254,7 +254,7 @@ To create and start using a simplex queue Alice and Bob follow these steps: 3. Anybody can send the message to the queue with ID `SID` before it is secured (e.g. if communication is compromised), so it's a "race" to secure the queue. Optionally, in the client application, Alice may identify Bob using the information provided, but it is out of scope of SMP protocol. -5. Alice secures the queue `RID` with `"KEY"` command so only Bob can send messages to it (see [Secure queue command](#secure-queue-command)): +5. Alice secures the queue `RID` with `"KEY"` command so only Bob can send messages to it (see [Secure queue by recipient](#secure-queue-by-recipient)): 1. She sends the `KEY` command with `RID` signed with "private" key `RK` to update the queue to only accept requests authorized by "private" key `SK` provided by Bob. This command contains unique "public" key `SK` previously generated by Bob. @@ -581,7 +581,7 @@ Sending any of the commands in this section (other than `create`, that is sent w This command is sent by the recipient to the SMP router to create a new queue. -Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to create queues on the destiation routers. +Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to create queues on the destination routers. The syntax is: @@ -670,7 +670,7 @@ The count and idsHash allow the router to detect subscription drift. The router #### Secure queue by recipient -This command is only used until v8 of SMP protocol. V9 uses [SKEY](#secure-queue-by-sender). +This command was used before v9 of SMP protocol. V9+ uses [SKEY](#secure-queue-by-sender). KEY is still supported for backwards compatibility. This command is sent by the recipient to the router to add sender's key to the queue: @@ -859,11 +859,11 @@ The format of queue information is implementation specific, and is not part of t ### Sender commands -Currently SMP defines only one command that can be used by senders - `send` message. This command must be used with sender's ID, if recipient's ID is used the router must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). +SMP defines two commands that can be used by senders - `sndSecure` and `send` message. These commands must be used with sender's ID, if recipient's ID is used the router must respond with `"ERR AUTH"` response (see [Error responses](#error-responses)). #### Secure queue by sender -This command is used from v8 of SMP protocol. V8 and earlier uses [KEY](#secure-queue-by-recipient). +This command is used from v9 of SMP protocol. V8 and earlier uses [KEY](#secure-queue-by-recipient). This command is sent by the sender to the router to add sender's key to the queue: @@ -1056,16 +1056,16 @@ Sequence diagram for sending the message and `SKEY` commands via SMP proxy: 2. The client encrypts the transmission (`SKEY` or `SEND`) to the destination router using the shared secret computed from per-command random key and router's session key and sends it to proxying router in `PFWD` command. -3. Proxy additionally encrypts the body to prevent correlation by ciphertext (in case TLS is compromised) and forwards it to proxy in `RFWD` command. +3. Proxy additionally encrypts the body to prevent correlation by ciphertext (in case TLS is compromised) and forwards it to the destination router in `RFWD` command. 4. Proxy receives the double-encrypted response from the destination router, removes one encryption layer and forwards it to the client. The diagram below shows the encryption layers for `PFWD`/`RFWD` commands and `RRES`/`PRES` responses: -- s2r - encryption between client and SMP relay, with relay key returned in relay handshake, with MITM by proxy mitigated by verifying the certificate fingerprint included in the relay address. This encryption prevents proxy router from observing commands and responses - proxy does not know how many different queues a connected client sends messages and commands to. +- s2r - encryption between client and SMP router, with router key returned in router handshake, with MITM by proxy mitigated by verifying the certificate fingerprint included in the router address. This encryption prevents proxy router from observing commands and responses - proxy does not know how many different queues a connected client sends messages and commands to. - e2e - end-to-end encryption per SMP queue, with additional client encryption inside it. -- p2r - additional encryption between proxy and SMP relay with the shared secret agreed in the handshake, to mitigate traffic correlation inside TLS. -- r2c - additional encryption between SMP relay and client to prevent traffic correlation inside TLS. +- p2r - additional encryption between proxy and SMP router with the shared secret agreed in the handshake, to mitigate traffic correlation inside TLS. +- r2c - additional encryption between SMP router and client to prevent traffic correlation inside TLS. ``` ----------------- ----------------- -- TLS -- ----------------- ----------------- @@ -1083,7 +1083,7 @@ SMP proxy is not another type of the router, it is a role that any SMP router ca #### Request proxied session -The sender uses this command to request the session with the destination proxy. +The sender uses this command to request the session with the destination router. Routers SHOULD support basic auth with this command, to allow only router owners and trusted users to proxy commands to the destination routers. @@ -1140,7 +1140,7 @@ proxyResponse = %s"PRES" SP Having received `PFWD` command from the client, the router should additionally encrypt it (without padding, as the received transmission is already encrypted by the client and padded to a fixed size) together with the correlation ID, sender command key, and protocol version, and forward it to the destination router as `RFWD` command: -Transmission forwarded to relay uses empty entity ID and its unique random correlation ID is used as a nonce to encrypt forwarded transmission. Correlation ID increased by 1 is used by the destination router as a nonce to encrypt responses. +Transmission forwarded to destination router uses empty entity ID and its unique random correlation ID is used as a nonce to encrypt forwarded transmission. Correlation ID increased by 1 is used by the destination router as a nonce to encrypt responses. ```abnf relayCommand = %s"RFWD" SP @@ -1380,7 +1380,7 @@ deleted = %s"DELD" - crypto error (`CRYPTO`) - cryptographic operation failed. - message queue quota exceeded error (`QUOTA`) - too many messages were sent to the message queue. Further messages can only be sent after the recipient retrieves the messages. - store error (`STORE`) - router storage error with error message. -- relay public key expired (`EXPIRED`) - relay public key has expired. +- router public key expired (`EXPIRED`) - router public key has expired. - no message (`NO_MSG`) - no message available or message ID mismatch. - sent message is too large (> maxMessageLength) to be delivered (`LARGE_MSG`). - internal router error (`INTERNAL`). @@ -1429,7 +1429,7 @@ Both the recipient and the sender can use TCP or some other, possibly higher lev The transport protocol should provide the following: -- server authentication (by matching router certificate hash with `routerIdentity`), +- router authentication (by matching router certificate hash with `routerIdentity`), - forward secrecy (by encrypting the traffic using ephemeral keys agreed during transport handshake), - integrity (preventing data modification by the attacker without detection), - unique channel binding (`sessionIdentifier`) to include in the signed part of SMP transmissions. diff --git a/protocol/xftp.md b/protocol/xftp.md index 855d3f62e8..a5e25dd735 100644 --- a/protocol/xftp.md +++ b/protocol/xftp.md @@ -135,7 +135,7 @@ The sending client combines addresses of all packets and other information into - list of packet descriptions; information for each packet: - private Ed25519 key to sign commands for file transfer router. - packet address (router host and packet ID). - - packet sha512 digest. + - packet sha256 digest. To reduce the size of file description, packets are grouped by the router host. @@ -176,14 +176,14 @@ It includes these fields: - `digest` - SHA512 hash of encrypted file, base64url encoded string. - `key` - symmetric encryption key to decrypt the file, base64url encoded string. - `nonce` - nonce to decrypt the file, base64url encoded string. -- `packetSize` - default packet size, see `fileSize` syntax below. +- `chunkSize` - default packet size, see `fileSize` syntax below. - `replicas` - the array of data packet replicas descriptions. - `redirect` - optional property for redirect information indicating that the file is itself a description to another file, allowing to use file description as a short URI. Each replica description is an object with 2 fields: -- `packets` - and array of packet replica descriptions stored on one router. -- `router` - [router address](#xftp-router-uri) where the packets can be downloaded from. +- `chunks` - an array of packet replica descriptions stored on one server. +- `server` - [router address](#xftp-router-uri) where the packets can be downloaded from. Each router replica description is a string with this syntax: @@ -234,7 +234,7 @@ File description URI syntax: ```abnf fileDescriptionURI = serviceScheme "/file" "#/?desc=" description [ "&data=" userData ] -serviceScheme = (%s"https://" clientAppServer) | %s"simplex:" +serviceScheme = (%s"https://" clientAppServer) / %s"simplex:" clientAppServer = hostname [ ":" port ] ; client app server, e.g. simplex.chat description = @@ -415,8 +415,6 @@ ping = %s"PING" This command is always sent unsigned. - data FileResponse = ... | FRPong | ... - ```abnf pong = %s"PONG" ``` diff --git a/protocol/xrcp.md b/protocol/xrcp.md index b3a304572b..2ffff5365e 100644 --- a/protocol/xrcp.md +++ b/protocol/xrcp.md @@ -10,7 +10,7 @@ Version 1, 2024-06-22 - [Session invitation](#session-invitation) - [Establishing TLS connection](#establishing-tls-connection) - [Session verification and protocol negotiation](#session-verification-and-protocol-negotiation) - - [Controller/host session operation](#сontrollerhost-session-operation) + - [Controller/host session operation](#controllerhost-session-operation) - [Key agreement for announcement packet and for session](#key-agreement-for-announcement-packet-and-for-session) - [Threat model](#threat-model) @@ -104,12 +104,11 @@ Multicast session announcement is a binary encoded packet with this syntax: ```abnf sessionAddressPacket = dhPubKey nonce encrypted(unpaddedSize sessionAddress packetPad) dhPubKey = length x509encoded ; same as announced -nonce = length *OCTET +nonce = 24*24 OCTET ; NaCl 192-bit nonce, no length prefix sessionAddress = largeLength sessionAddressUri ; as above length = 1*1 OCTET ; for binary data up to 255 bytes largeLength = 2*2 OCTET ; for binary data up to 65535 bytes -packetPad = ; possibly, we may need to move KEM agreement one step later, -; with encapsulation key in HELLO block and KEM ciphertext in reply to HELLO. +packetPad = ``` ### Establishing TLS connection @@ -157,10 +156,7 @@ The controller decrypts (including the first session) and validates the received { "definitions": { "version": { - "type": "string", - "metadata": { - "format": "[0-9]+" - } + "type": "uint16" }, "base64url": { "type": "string", @@ -172,9 +168,7 @@ The controller decrypts (including the first session) and validates the received "properties": { "v": {"ref": "version"}, "ca": {"ref": "base64url"}, - "kem": {"ref": "base64url"} - }, - "optionalProperties": { + "kem": {"ref": "base64url"}, "app": {"properties": {}, "additionalProperties": true} }, "additionalProperties": true @@ -206,7 +200,7 @@ JTD schema for the encrypted part of controller HELLO block `ctrlHelloJSON`: } ``` -Controller `hello` block and all subsequent protocol messages are encrypted with the chain keys derived from the hybrid key (see key exchange below) - that is why conntroller hello block does not include nonce. That provides forward secrecy within the XRCP session. Receiving this `hello` block allows host to compute the same hybrid keys and to derive the same chain keys. +Controller `hello` block and all subsequent protocol messages are encrypted with the chain keys derived from the hybrid key (see key exchange below) - that is why controller hello block does not include nonce. That provides forward secrecy within the XRCP session. Receiving this `hello` block allows host to compute the same hybrid keys and to derive the same chain keys. Once the controller replies HELLO to the valid host HELLO block, it should stop accepting new TCP connections. @@ -261,7 +255,7 @@ kemCiphertext(1) = enc(kemSecret(1), kemEncKey(1)) kemSecret(1) = dec(kemCiphertext(1), kemDecKey(1)) // multicast announcement for session n -announcementSecret(n) = sha256(dhSecret(n')) +announcementSecret(n) = dhSecret(n') dhSecret(n') = dh(hostHelloDhKey(n - 1), controllerDhKey(n)) // session n @@ -277,11 +271,11 @@ If controller fails to store the new host DH key after receiving HELLO block, th To decrypt a multicast announcement, the host should try to decrypt it using the keys of all known (paired) remote controllers. -Once kemSecret is agreed for the session, it is used to derive two chain keys, to receive and to send messages: +Once sessionSecret is agreed for the session, it is used to derive two chain keys, to receive and to send messages: ``` -host: sndKey, rcvKey = HKDF(kemSecret, "SimpleXSbChainInit", 64) -controller: rcvKey, sndKey = HKDF(kemSecret, "SimpleXSbChainInit", 64) +controller: sndKey, rcvKey = HKDF(sessionSecret, "SimpleXSbChainInit", 64) +host: rcvKey, sndKey = HKDF(sessionSecret, "SimpleXSbChainInit", 64) ``` where HKDF is based on SHA512, with empty salt. diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index 8961f07f44..fa58d88439 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -613,7 +613,7 @@ data NewQueueReq = NewQueueReq data SubscriptionMode = SMSubscribe | SMOnlyCreate deriving (Eq, Show) --- SenderId must be computed client-side as `sha3-256(corr_id)`, `corr_id` - a random transmission ID. +-- SenderId must be computed client-side as the first 24 bytes of `sha3-384(corr_id)`, `corr_id` - a random transmission ID. -- The server must verify and reject it if it does not match (and in case of collision). -- This allows to include SenderId in FixedDataBytes in full connection request, -- and at the same time prevents the possibility of checking whether a queue with a known ID exists. From 01785d5de88fd986d3517ea3d3399bc0ca2c05cb Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Tue, 10 Mar 2026 09:04:21 +0000 Subject: [PATCH 20/91] docs: add e2e encrypted message wire encoding to PQDR spec --- protocol/pqdr.md | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/protocol/pqdr.md b/protocol/pqdr.md index e037f87968..d3d3e2b486 100644 --- a/protocol/pqdr.md +++ b/protocol/pqdr.md @@ -13,6 +13,11 @@ Version 1, 2024-06-22 - [Initialization](#initialization) - [Encrypting messages](#encrypting-messages) - [Decrypting messages](#decrypting-messages) +- [Ratchet message wire format](#ratchet-message-wire-format) + - [Encrypted ratchet message](#encrypted-ratchet-message) + - [Encrypted message header](#encrypted-message-header) + - [Plaintext message header](#plaintext-message-header) + - [KEM state machine](#kem-state-machine) - [Implementation considerations](#implementation-considerations) - [Chosen KEM algorithm](#chosen-kem-algorithm) - [Summary](#summary) @@ -189,6 +194,80 @@ Other than augmenting DH key agreements with the shared secrets from KEM, the ab It is worth noting that while DH agreements work as ping-pong, when the new received DH key is used for both DH agreements (and only the sent DH key is updated for the second DH key agreement), PQ KEM agreements in the proposed scheme work as a "parallel ping-pong", with two balls in play all the time (two KEM agreements run in parallel). +## Ratchet message wire format + +The pseudocode above describes the algorithm. This section specifies the actual binary encoding used in SimpleX implementation with Curve448 DH keys, sntrup761 KEM and AES-256-GCM AEAD. + +The ratchet-encrypted message has three encoding layers, from outermost to innermost: + +1. **Encrypted ratchet message** — the complete ratchet message envelope, referenced as an opaque encrypted body in [agent protocol](./agent-protocol.md). +2. **Encrypted message header** — the encrypted header within the ratchet message, used as associated data for message body encryption. +3. **Plaintext message header** — the DH and KEM ratchet keys and counters. + +### Encrypted ratchet message + +The outer envelope contains the encrypted header (used as associated data for body authentication), the body authentication tag, and the encrypted message body. + +The message body is encrypted with AES-256-GCM using the message key derived from the sending chain key (`KDF_CK`). The associated data for body encryption is the concatenation of the ratchet associated data and the encoded encrypted header. + +```abnf +encRatchetMessage = versionedLength encMessageHeader msgAuthTag encMsgBody +; encMessageHeader is used as associated data for body decryption: AD = rcAD || encMessageHeader +msgAuthTag = 16*16 OCTET ; AES-256-GCM authentication tag for the message body +encMsgBody = *OCTET ; AES-256-GCM encrypted padded message body (remaining bytes) +``` + +### Encrypted message header + +The encrypted header wraps the current ratchet e2e encryption version, an initialization vector, an authentication tag, and the encrypted padded header body. + +The header body is encrypted with AES-256-GCM using the header key (`HKs`). The associated data for header encryption is the ratchet associated data. The header is padded before encryption to a fixed size to prevent leaking information about the KEM state. + +```abnf +encMessageHeader = currentVersion headerIV headerAuthTag versionedLength encHeaderBody +currentVersion = 2*2 OCTET ; Word16, current ratchet e2e encryption version +headerIV = 16*16 OCTET ; AES-256 initialization vector for header encryption +headerAuthTag = 16*16 OCTET ; AES-256-GCM authentication tag for the header +encHeaderBody = *OCTET ; AES-256-GCM encrypted padded header (see plaintext format below) +``` + +`versionedLength` uses a 2-byte length prefix (Word16) when the current e2e version supports PQ encryption, or a 1-byte length prefix otherwise. The parser distinguishes the two encodings by peeking at the first byte: values below 32 indicate a 2-byte prefix (as the header is always at least 69 bytes). + +```abnf +versionedLength = largeLength / length ; 2-byte for PQ versions, 1-byte for pre-PQ versions +``` + +The padded header sizes before encryption are: 2310 bytes when PQ is supported, 88 bytes when PQ is not supported. Padding uses a 2-byte big-endian length prefix followed by the plaintext header and `#` fill bytes. + +### Plaintext message header + +```abnf +msgHeader = maxVersion dhPublicKey [kemParams] prevMsgCount msgCount +maxVersion = 2*2 OCTET ; Word16, max supported e2e encryption version +dhPublicKey = length x509encoded ; Curve448 public DH ratchet key +kemParams = noKEM / proposedKEM / acceptedKEM + ; present only when current ratchet version >= pqRatchetE2EEncryptVersion +noKEM = %x30 ; "0" - no KEM parameters +proposedKEM = %x31 %s"P" kemEncapsulationKey ; KEM proposed, not yet accepted +acceptedKEM = %x31 %s"A" kemCiphertext kemEncapsulationKey ; KEM accepted +kemEncapsulationKey = largeLength 1158*1158 OCTET ; sntrup761 encapsulation key +kemCiphertext = largeLength 1039*1039 OCTET ; sntrup761 ciphertext +prevMsgCount = 4*4 OCTET ; Word32, number of messages in previous sending chain +msgCount = 4*4 OCTET ; Word32, message number in current sending chain +length = 1*1 OCTET +largeLength = 2*2 OCTET ; Word16 +``` + +### KEM state machine + +PQ encryption can be enabled or disabled during a connection's lifetime. The KEM parameters in the header reflect three states: + +- **No KEM** (`noKEM`): PQ encryption is not active. The header contains only the DH key, as in the original double ratchet. +- **Proposed** (`proposedKEM`): One party generated a KEM key pair and includes the encapsulation key in the header, proposing PQ encryption. No ciphertext is included because the other party has not yet sent its encapsulation key. +- **Accepted** (`acceptedKEM`): The party received the other's encapsulation key, performed encapsulation (KEM #1), and includes both the ciphertext and its own new encapsulation key (for KEM #2). This is the steady state for active PQ encryption. + +The transition from Proposed to Accepted happens when a party receives a message containing KEM parameters (either Proposed or Accepted) and responds with its own Accepted parameters. Once both parties are in Accepted state, the double PQ KEM augmentation described in the algorithm above operates in each DH ratchet step. + ## Implementation considerations for SimpleX Messaging Protocol As SimpleX Messaging Protocol pads messages to a fixed size, using 16kb transport blocks, the size increase introduced by this scheme can be compensated for by using ZSTD encryption of JSON bodies and image previews encoded as base64. While there may be some rare cases of random texts that would fail to compress, in all real scenarios it would not cause the message size reduction. From 98351cf09700073a14c90c3971d4f150503ac6ae Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:36:24 +0000 Subject: [PATCH 21/91] docs: add missing encodings and other protocol corrections --- protocol/agent-protocol.md | 60 +++++++++++++++++++++++++++------- protocol/push-notifications.md | 5 +-- protocol/security.md | 2 +- protocol/simplex-messaging.md | 45 ++++++++++++------------- protocol/xftp.md | 2 +- protocol/xrcp.md | 2 +- 6 files changed, 78 insertions(+), 38 deletions(-) diff --git a/protocol/agent-protocol.md b/protocol/agent-protocol.md index 2c88d6521c..1a047ee136 100644 --- a/protocol/agent-protocol.md +++ b/protocol/agent-protocol.md @@ -24,12 +24,11 @@ Version 7, 2025-01-24 - [Full connection link syntax](#full-connection-link-syntax) - [Short connection link syntax](#short-connection-link-syntax) - [Short links](#short-links) - - [Short link structure](#short-link-structure) - [Link key derivation](#link-key-derivation) - [Link data encryption](#link-data-encryption) - [Short link resolution](#short-link-resolution) - [Link data management](#link-data-management) -- [Appendix A: SMP agent API](#smp-agent-api) +- [Appendix A: SMP agent API](#appendix-a-smp-agent-api) - [API functions](#api-functions) - [API events](#api-events) @@ -177,7 +176,7 @@ These messages are encrypted with per-queue shared secret using NaCL crypto_box decryptedSMPClientMessage = agentConfirmation / agentMsgEnvelope / agentInvitation / agentRatchetKey agentConfirmation = agentVersion %s"C" ("0" / "1" sndE2EEncryptionParams) encConnInfo agentVersion = 2*2 OCTET -sndE2EEncryptionParams = +sndE2EEncryptionParams = encConnInfo = doubleRatchetEncryptedMessage agentMsgEnvelope = agentVersion %s"M" encAgentMessage @@ -185,11 +184,14 @@ encAgentMessage = doubleRatchetEncryptedMessage agentInvitation = agentVersion %s"I" connReqLength connReq connInfo connReqLength = 2*2 OCTET ; Word16 +connReq = *OCTET ; URI text encoding of connection link, length given by connReqLength +connInfo = *OCTET ; opaque connection information (remaining bytes) -agentRatchetKey = agentVersion %s"R" rcvE2EEncryptionParams agentRatchetInfo -rcvE2EEncryptionParams = +agentRatchetKey = agentVersion %s"R" rcvE2EEncryptionParams ratchetKeyInfo +rcvE2EEncryptionParams = +ratchetKeyInfo = *OCTET ; additional ratchet renegotiation info (remaining bytes) -doubleRatchetEncryptedMessage = +doubleRatchetEncryptedMessage = ``` The maximum size of the encrypted connection info and agent message depend on whether post-quantum key exchange is used: @@ -217,7 +219,8 @@ Decrypted SMP message client body can be one of 4 types: - to confirm that the new double ratchet encryption is agreed (`EREADY`). - to notify another party that it can continue sending messages after queue capacity was exceeded (`A_QCONT`). - to manage SMP queue rotation (`QADD`, `QKEY`, `QUSE`, `QTEST`). -- `msgPadding` - an optional message padding to make all SMP messages have constant size, to prevent routers from observing the actual message size. The only case the message padding can be absent is when the message has exactly the maximum size, in all other cases the message MUST be padded to a fixed size. + +The encoded `agentMessage` is padded to a fixed size by the double ratchet encryption layer (see [ratchet message wire format](./pqdr.md#ratchet-message-wire-format)) to make all SMP messages have constant size, preventing routers from observing the actual message size. ### Messages between SMP agents @@ -232,7 +235,7 @@ smpQueues = length 1*newQueueInfo ; NonEmpty list of reply queues agentRatchetInfo = %s"R" ratchetInfo ratchetInfo = *OCTET -agentMessage = %s"M" agentMsgHeader aMessage msgPadding +agentMessage = %s"M" agentMsgHeader aMessage agentMsgHeader = agentMsgId prevMsgHash agentMsgId = 8*8 OCTET ; Int64 prevMsgHash = shortString @@ -243,12 +246,13 @@ aMessage = HELLO / A_MSG / A_RCVD / EREADY / A_QCONT / HELLO = %s"H" A_MSG = %s"M" userMsgBody -userMsgBody = *OCTET +userMsgBody = *OCTET ; remaining bytes A_RCVD = %s"V" msgReceipts msgReceipts = length 1*msgReceipt ; NonEmpty list msgReceipt = agentMsgId msgHash rcptLength rcptInfo msgHash = shortString +rcptInfo = *OCTET ; opaque receipt info, length given by rcptLength (Word16) EREADY = %s"E" agentMsgId @@ -284,7 +288,6 @@ senderId = shortString clientVRange = version version version = 2*2 OCTET -msgPadding = *OCTET rcptLength = 2*2 OCTET shortString = length *OCTET length = 1*1 OCTET @@ -474,12 +477,15 @@ fixedData = agentVersionRange rootKey linkConnReq [linkEntityId] agentVersionRange = version version ; min and max agent protocol version version = 2*2 OCTET rootKey = length x509encoded ; Ed25519 public key -linkConnReq = connectionRequestUri ; see full connection link syntax above +linkConnReq = invitationConnReq / contactConnReq ; binary encoding of connection request +invitationConnReq = %s"I" connReqData e2eRatchetParams +contactConnReq = %s"C" connReqData linkEntityId = shortString userData = invitationLinkData / contactLinkData invitationLinkData = %s"I" agentVersionRange userLinkData contactLinkData = %s"C" agentVersionRange userContactData userLinkData = shortString / (%xFF largeString) ; opaque application data (e.g., user profile) + ; shortString length byte 0x00-0xFE (max 254 bytes); 0xFF is reserved as largeString sentinel userContactData = direct ownersList relaysList userLinkData direct = %s"T" / %s"F" ; whether direct connection via connReq is allowed ownersList = length *ownerAuth @@ -488,6 +494,38 @@ ownerId = shortString ; application-specific owner ID (e.g., MemberId) ownerKey = length x509encoded ; Ed25519 public key authOwnerSig = length 64*64 OCTET ; Ed25519 signature of (ownerId || ownerKey) by previous owner relaysList = length *connShortLink ; alternative relay short links + +; Binary encoding of connection request (used in linkConnReq) +connReqData = agentVersionRange smpQueueUris clientData +smpQueueUris = length 1*smpQueueUri +clientData = %s"0" / (%s"1" largeString) ; Maybe (Large ByteString) +smpQueueUri = smpClientVersionRange smpServer senderId smpDhPublicKey [queueMode] +smpClientVersionRange = version version ; min and max SMP client versions +smpServer = hosts port serverKeyHash +hosts = length 1*host +host = shortString ; text-encoded hostname or IP address +port = shortString ; text-encoded port number +serverKeyHash = shortString ; CA certificate fingerprint +senderId = shortString ; queue sender ID +smpDhPublicKey = length x509encoded ; X25519 DH public key +queueMode = %s"M" / %s"C" ; messaging or contact (version-dependent trailing field) +e2eRatchetParams = e2eVersionRange e2eDhKey e2eDhKey kemParams +e2eVersionRange = version version ; min and max e2e encryption versions +e2eDhKey = length x509encoded ; X448 DH public key +kemParams = %s"0" / (%s"1" ratchetKEMParams) +ratchetKEMParams = %s"P" kemPublicKey / %s"A" kemCiphertext kemPublicKey +kemPublicKey = largeString ; sntrup761 public key +kemCiphertext = largeString ; sntrup761 ciphertext + +; Binary encoding of short link (used in relaysList) +connShortLink = invShortLink / contactShortLink +invShortLink = %s"I" smpServer linkId linkKey +contactShortLink = %s"C" contactConnType smpServer linkKey +contactConnType = %s"A" / %s"C" / %s"G" / %s"R" ; contact / channel / group / relay +linkId = shortString +linkKey = shortString + +x509encoded = *OCTET ; DER-encoded X.509 SubjectPublicKeyInfo largeString = 2*2 OCTET *OCTET ; Word16 length prefix length = 1*1 OCTET shortString = length *OCTET diff --git a/protocol/push-notifications.md b/protocol/push-notifications.md index 5113157afd..88645c4c22 100644 --- a/protocol/push-notifications.md +++ b/protocol/push-notifications.md @@ -100,8 +100,9 @@ The client and router use ALPN extension with `ntf/1` protocol name to agree han Protocol commands have this syntax: -``` -ntfRouterTransmission = +```abnf +ntfRouterTransmission = authorization corrId entityId ntfRouterCmd + ; same transmission structure as SMP, see simplex-messaging.md ntfRouterCmd = newTokenCmd / verifyTokenCmd / checkTokenCmd / replaceTokenCmd / deleteTokenCmd / cronCmd / newSubCmd / checkSubCmd / deleteSubCmd / pingCmd diff --git a/protocol/security.md b/protocol/security.md index 3c84fa5ca1..ea236daf4e 100644 --- a/protocol/security.md +++ b/protocol/security.md @@ -172,7 +172,7 @@ This document describes the cryptographic primitives and threat model for the Si - undetectably communicate at the same time as Alice with her contacts. Doing so would result in the contact getting different messages with repeated IDs. -- undetectably monitor message queues in realtime without alerting the user they are doing so, as a second subscription request unsubscribes the first and notifies the second. +- undetectably monitor message queues in realtime without alerting the user they are doing so, as a second subscription request unsubscribes the first and notifies the first. ### A user's contact diff --git a/protocol/simplex-messaging.md b/protocol/simplex-messaging.md index d52a6e75f1..f1d1f77ce4 100644 --- a/protocol/simplex-messaging.md +++ b/protocol/simplex-messaging.md @@ -466,9 +466,9 @@ To send service commands, services should authenticate themselves to SMP routers Service certificates are included in the client handshake and verified by the router. The service receives a service ID in the handshake response, which is then used as entity ID in service transmissions. ```abnf -clientHandshakeService = serviceRole serviceCertKey +clientService = serviceRole serviceCertKey serviceRole = %s"M" / %s"N" / %s"P" ; Messaging / Notifier / Proxy -serviceCertKey = certChainPubKey +serviceCertKey = certChain signedServiceKey ``` ### Service subscriptions @@ -537,14 +537,14 @@ Commands syntax below is provided using [ABNF][8] with [case-sensitive strings e smpCommand = ping / recipientCmd / senderCommand / proxyCommand / notifierCommand / linkCommand / routerMsg recipientCmd = create / subscribe / subscribeMultiple / rcvSecure / recipientKeys / - enableNotifications / disableNotifications / getMessage + enableNotifications / disableNotifications / getMessage / acknowledge / suspend / delete / getQueueInfo / setShortLink / deleteShortLink senderCommand = send / sndSecure linkCommand = setLinkKey / getLinkData proxyCommand = proxySession / proxyForward / relayForward notifierCommand = subscribeNotifications / subscribeNotificationsMultiple routerMsg = queueIds / linkResponse / serviceOk / serviceOkMultiple / - message / allReceived / notifierIdResp / messageNotification / + message / allReceived / notifierIdResponse / messageNotification / proxySessionKey / proxyResponse / relayResponse / unsubscribed / serviceUnsubscribed / deleted / queueInfo / ok / error / pong @@ -663,7 +663,7 @@ This command is used by recipient services to subscribe to multiple queues at on ```abnf subscribeMultiple = %s"SUBS " count idsHash count = 8*8 OCTET ; Int64, network byte order (big-endian) -idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs +idsHash = length 16*16 OCTET ; XOR of MD5 hashes of all queue IDs ``` The count and idsHash allow the router to detect subscription drift. The router responds with `serviceOkMultiple` (`SOKS`) response. @@ -883,19 +883,19 @@ This command is sent to the router by the sender both to confirm the queue after ```abnf send = %s"SEND " msgFlags SP smpEncMessage -msgFlags = notificationFlag reserved +msgFlags = notificationFlag notificationFlag = %s"T" / %s"F" smpEncMessage = smpEncClientMessage / smpEncConfirmation ; message up to 16048 bytes (v11+) smpEncClientMessage = smpPubHeaderNoKey msgNonce sentClientMsgBody ; message up to maxMessageLength bytes smpPubHeaderNoKey = smpClientVersion "0" -sentClientMsgBody = 16000*16000 OCTET ; = maxMessageLength(v11+) - 48 = 16048 - 48 +sentClientMsgBody = 16016*16016 OCTET ; = e2eEncMessageLength(16000) + authTagSize(16) smpEncConfirmation = smpPubHeaderWithKey msgNonce sentConfirmationBody smpPubHeaderWithKey = smpClientVersion "1" senderPublicDhKey ; sender's Curve25519 public key to agree DH secret for E2E encryption in this queue ; it is only sent in confirmation message -sentConfirmationBody = 15904*15904 OCTET ; E2E-encrypted smpClientMessage padded to e2eEncMessageLength before encryption +sentConfirmationBody = 15920*15920 OCTET ; E2E-encrypted smpConfirmation padded to e2eEncConfirmationLength(15904), + authTagSize(16) senderPublicDhKey = length x509encoded smpClientVersion = word16 @@ -1123,7 +1123,7 @@ Transmission sent to proxy router should use session ID as entity ID and use a r Encrypted transmission should use the received session ID from the connection between proxy router and destination router in the authorized body. ```abnf -proxyCommand = %s"PFWD" SP smpVersion commandKey +proxyForward = %s"PFWD" SP smpVersion commandKey smpVersion = 2*2 OCTET commandKey = length x509encoded ``` @@ -1134,6 +1134,7 @@ Having received the `RRES` response from the destination router, proxy router wi ```abnf proxyResponse = %s"PRES" SP +forwardedResponse = *OCTET ; client-encrypted SMP response, decrypted by client using per-command DH secret ``` #### Forward command to destination router @@ -1143,7 +1144,7 @@ Having received `PFWD` command from the client, the router should additionally e Transmission forwarded to destination router uses empty entity ID and its unique random correlation ID is used as a nonce to encrypt forwarded transmission. Correlation ID increased by 1 is used by the destination router as a nonce to encrypt responses. ```abnf -relayCommand = %s"RFWD" SP +relayForward = %s"RFWD" SP forwardedTransmission = fwdCorrId fwdSmpVersion fwdCommandKey transmission fwdCorrId = length 24*24 OCTET ; `fwdCorrId` - correlation ID used in `PFWD` command transmission - it is used as a nonce for client encryption, @@ -1160,6 +1161,8 @@ The shared secret for encrypting transmission bodies between proxy router and de ```abnf relayResponse = %s"RRES" SP +responseTransmission = fwdCorrId forwardedResponse + ; fwdCorrId and forwardedResponse defined above in RFWD section ``` ### Short link commands @@ -1212,7 +1215,7 @@ This command is used by notifier services to subscribe to multiple queues at onc ```abnf subscribeNotificationsMultiple = %s"NSUBS " count idsHash count = 8*8 OCTET ; Int64, network byte order (big-endian) -idsHash = 16*16 OCTET ; XOR of MD5 hashes of all queue IDs +idsHash = length 16*16 OCTET ; XOR of MD5 hashes of all queue IDs ``` The router responds with `serviceOkMultiple` (`SOKS`) response. @@ -1253,7 +1256,7 @@ Sent in response to `SUBS` or `NSUBS` commands: ```abnf serviceOkMultiple = %s"SOKS " count idsHash count = 8*8 OCTET ; Int64, network byte order (big-endian) -idsHash = 16*16 OCTET ; XOR of MD5 hashes of all subscribed queue IDs +idsHash = length 16*16 OCTET ; XOR of MD5 hashes of all subscribed queue IDs ``` #### All service messages received @@ -1297,15 +1300,13 @@ The router must deliver message notifications to all simplex queues that were su ```abnf messageNotification = %s"NMSG " nmsgNonce encryptedNMsgMeta - -encryptedNMsgMeta = -; metadata E2E encrypted between router and recipient containing router's message ID and timestamp (allows extension), -; to be passed to the recipient by the notifier for them to decrypt -; with key negotiated in NKEY and NID commands using nmsgNonce - -nmsgNonce = -; nonce used by the router for encryption of message metadata, to be passed to the recipient by the notifier -; for them to use in decryption of E2E encrypted metadata +nmsgNonce = 24*24 OCTET ; 192-bit NaCl crypto_box nonce +encryptedNMsgMeta = shortString + ; NaCl crypto_box encrypted padded(nmsgMeta, 128): 128 + 16 (auth tag) = 144 bytes + ; metadata E2E encrypted between router and recipient, + ; to be passed to the recipient by the notifier for them to decrypt + ; with key negotiated in NKEY and NID commands using nmsgNonce +nmsgMeta = msgId timestamp ; message ID and timestamp, allows future extension ``` Message notification does not contain any message data or non E2E encrypted metadata. @@ -1327,7 +1328,7 @@ Sent when service subscription is terminated (can be sent when service re-connec ```abnf serviceUnsubscribed = %s"ENDS " count idsHash count = 8*8 OCTET ; Int64, network byte order (big-endian) -idsHash = 16*16 OCTET ; XOR of MD5 hashes of terminated queue IDs +idsHash = length 16*16 OCTET ; XOR of MD5 hashes of terminated queue IDs ``` #### Queue deleted notification diff --git a/protocol/xftp.md b/protocol/xftp.md index a5e25dd735..4180e652b6 100644 --- a/protocol/xftp.md +++ b/protocol/xftp.md @@ -421,7 +421,7 @@ pong = %s"PONG" ### File sender commands -Sending any of the commands in this section (other than `register`, that is sent without data packet ID) is only allowed with sender's ID. +Sending any of the commands in this section (other than `register`, that is sent without data packet ID) is only allowed with sender's ID. The `register` command must be signed (using `sndKey` included in `fileInfo` for verification) but must NOT include a data packet ID. #### Register new data packet diff --git a/protocol/xrcp.md b/protocol/xrcp.md index 2ffff5365e..1b2c320a75 100644 --- a/protocol/xrcp.md +++ b/protocol/xrcp.md @@ -105,7 +105,7 @@ Multicast session announcement is a binary encoded packet with this syntax: sessionAddressPacket = dhPubKey nonce encrypted(unpaddedSize sessionAddress packetPad) dhPubKey = length x509encoded ; same as announced nonce = 24*24 OCTET ; NaCl 192-bit nonce, no length prefix -sessionAddress = largeLength sessionAddressUri ; as above +sessionAddress = sessionAddressUri ; length given by unpaddedSize length = 1*1 OCTET ; for binary data up to 255 bytes largeLength = 2*2 OCTET ; for binary data up to 65535 bytes packetPad = From b81670c414958f39cc2207a25dd3cffcc8aae6d8 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Tue, 10 Mar 2026 19:18:34 +0000 Subject: [PATCH 22/91] docs: move implemented rfcs --- {rfcs => plans}/done/2024-09-01-smp-message-storage.md | 1 + {rfcs => plans}/done/2024-09-15-shared-port.md | 3 +-- {rfcs => plans}/done/2024-11-25-journal-expiration.md | 1 + {rfcs => plans}/done/2026-02-17-fix-subq-deadlock.md | 1 + rfcs/README.md | 4 ++-- rfcs/done/2026-01-30-send-file-page.md | 1 + .../2026-02-04-xftp-web-browser-transport.md | 1 + .../2026-03-10-agent-protocol.md} | 9 +++++++++ .../2026-03-10-client-certificates.md} | 9 +++++++++ .../2026-03-10-encryption.md} | 9 +++++++++ .../2026-03-10-server-metadata.md} | 9 +++++++++ .../2026-03-10-short-links.md} | 9 +++++++++ .../2026-03-10-smp-blobs.md} | 9 +++++++++ .../2026-03-10-smp-queues.md} | 9 +++++++++ 14 files changed, 71 insertions(+), 4 deletions(-) rename {rfcs => plans}/done/2024-09-01-smp-message-storage.md (99%) rename {rfcs => plans}/done/2024-09-15-shared-port.md (99%) rename {rfcs => plans}/done/2024-11-25-journal-expiration.md (99%) rename {rfcs => plans}/done/2026-02-17-fix-subq-deadlock.md (99%) rename rfcs/{done/2024-06-01-agent-protocol.md => standard/2026-03-10-agent-protocol.md} (63%) rename rfcs/{done/2025-05-05-client-certificates.md => standard/2026-03-10-client-certificates.md} (95%) rename rfcs/{done/2024-02-12-encryption.md => standard/2026-03-10-encryption.md} (85%) rename rfcs/{done/2024-03-20-server-metadata.md => standard/2026-03-10-server-metadata.md} (94%) rename rfcs/{done/2024-06-21-short-links.md => standard/2026-03-10-short-links.md} (94%) rename rfcs/{done/2024-09-09-smp-blobs.md => standard/2026-03-10-smp-blobs.md} (89%) rename rfcs/{done/2025-03-16-smp-queues.md => standard/2026-03-10-smp-queues.md} (97%) diff --git a/rfcs/done/2024-09-01-smp-message-storage.md b/plans/done/2024-09-01-smp-message-storage.md similarity index 99% rename from rfcs/done/2024-09-01-smp-message-storage.md rename to plans/done/2024-09-01-smp-message-storage.md index 1acc32f3b8..17e7c9d4ae 100644 --- a/rfcs/done/2024-09-01-smp-message-storage.md +++ b/plans/done/2024-09-01-smp-message-storage.md @@ -1,3 +1,4 @@ + # SMP router message storage ## Problem diff --git a/rfcs/done/2024-09-15-shared-port.md b/plans/done/2024-09-15-shared-port.md similarity index 99% rename from rfcs/done/2024-09-15-shared-port.md rename to plans/done/2024-09-15-shared-port.md index 390e9d6c03..0e75def636 100644 --- a/rfcs/done/2024-09-15-shared-port.md +++ b/plans/done/2024-09-15-shared-port.md @@ -1,3 +1,4 @@ + # Sharing protocol ports with HTTPS Some networks block all ports other than web ports, including port 5223 used for SMP protocol by default. Running SMP routers on a common web port 443 would allow them to work on more networks. The routers would need to provide an HTTPS page for browsers (and probes). @@ -65,8 +66,6 @@ The implementation relies on a few modification to upstream code: - `warp`: Only the re-export of `serveConnection` is needed. Unfortunately the most recent `warp` version can't be used right away due to dependency cascade around `http-5` and `auto-update-2`. So a fork containing the backported re-export has to be used until the dependencies are refreshed. - - ### TLS.ServerParams When a router has port sharing enabled, a new set of TLS params is loaded and combined with transport params: diff --git a/rfcs/done/2024-11-25-journal-expiration.md b/plans/done/2024-11-25-journal-expiration.md similarity index 99% rename from rfcs/done/2024-11-25-journal-expiration.md rename to plans/done/2024-11-25-journal-expiration.md index 0445122417..ffdb7f5283 100644 --- a/rfcs/done/2024-11-25-journal-expiration.md +++ b/plans/done/2024-11-25-journal-expiration.md @@ -1,3 +1,4 @@ + # Expiring messages in journal storage ## Problem diff --git a/rfcs/done/2026-02-17-fix-subq-deadlock.md b/plans/done/2026-02-17-fix-subq-deadlock.md similarity index 99% rename from rfcs/done/2026-02-17-fix-subq-deadlock.md rename to plans/done/2026-02-17-fix-subq-deadlock.md index 9c38e6721c..07e4e36431 100644 --- a/rfcs/done/2026-02-17-fix-subq-deadlock.md +++ b/plans/done/2026-02-17-fix-subq-deadlock.md @@ -1,3 +1,4 @@ + # Fix subQ deadlock: blocking writeTBQueue inside connLock ## Problem diff --git a/rfcs/README.md b/rfcs/README.md index 51a450ab07..cf25e06527 100644 --- a/rfcs/README.md +++ b/rfcs/README.md @@ -145,6 +145,6 @@ As more protocols are designated as Core IP, development naturally transitions t |----------|----------|-------| | `protocol/` | Consolidated specs (SMP v19, Agent v7, XFTP v3, XRCP v1, NTF v3, PQDR v1) | 6 specs + overview | | `rfcs/` root | Active draft proposals | 10 | -| `rfcs/done/` | Implemented, not yet verified | 12 | -| `rfcs/standard/` | Verified against implementation | 25 | +| `rfcs/done/` | Implemented, not yet verified | 1 (+10 sub-RFCs) | +| `rfcs/standard/` | Verified against implementation | 31 | | `rfcs/rejected/` | Draft proposals not accepted | 7 | diff --git a/rfcs/done/2026-01-30-send-file-page.md b/rfcs/done/2026-01-30-send-file-page.md index 00ac027aec..9080a784c7 100644 --- a/rfcs/done/2026-01-30-send-file-page.md +++ b/rfcs/done/2026-01-30-send-file-page.md @@ -1,3 +1,4 @@ + # Send File Page — Web-based XFTP File Transfer ## 1. Problem & Business Case diff --git a/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md index 41915bf64d..784627a5a9 100644 --- a/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md +++ b/rfcs/done/2026-01-30-send-file-page/2026-02-04-xftp-web-browser-transport.md @@ -1,3 +1,4 @@ + # Browser Transport & Web Worker Architecture ## TOC diff --git a/rfcs/done/2024-06-01-agent-protocol.md b/rfcs/standard/2026-03-10-agent-protocol.md similarity index 63% rename from rfcs/done/2024-06-01-agent-protocol.md rename to rfcs/standard/2026-03-10-agent-protocol.md index 616aed33fa..896ae825a0 100644 --- a/rfcs/done/2024-06-01-agent-protocol.md +++ b/rfcs/standard/2026-03-10-agent-protocol.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-06-01 +Implemented: ~2024 +Standardized: 2026-03-10 +Protocol: agent-protocol +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Evolving agent API ## Problem diff --git a/rfcs/done/2025-05-05-client-certificates.md b/rfcs/standard/2026-03-10-client-certificates.md similarity index 95% rename from rfcs/done/2025-05-05-client-certificates.md rename to rfcs/standard/2026-03-10-client-certificates.md index 81455fb577..68321256d7 100644 --- a/rfcs/done/2025-05-05-client-certificates.md +++ b/rfcs/standard/2026-03-10-client-certificates.md @@ -1,3 +1,12 @@ +--- +Proposed: 2025-05-05 +Implemented: ~2025 (SMP v16) +Standardized: 2026-03-10 +Protocol: simplex-messaging +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Service certificates for high volume routers and services connecting to SMP routers ## Problem diff --git a/rfcs/done/2024-02-12-encryption.md b/rfcs/standard/2026-03-10-encryption.md similarity index 85% rename from rfcs/done/2024-02-12-encryption.md rename to rfcs/standard/2026-03-10-encryption.md index 8ecfcadfa8..483da6db88 100644 --- a/rfcs/done/2024-02-12-encryption.md +++ b/rfcs/standard/2026-03-10-encryption.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-02-12 +Implemented: ~2024 (SMP v11) +Standardized: 2026-03-10 +Protocol: simplex-messaging +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Transmission encryption ## Problems diff --git a/rfcs/done/2024-03-20-server-metadata.md b/rfcs/standard/2026-03-10-server-metadata.md similarity index 94% rename from rfcs/done/2024-03-20-server-metadata.md rename to rfcs/standard/2026-03-10-server-metadata.md index 3e696a8fb6..0506c01bd9 100644 --- a/rfcs/done/2024-03-20-server-metadata.md +++ b/rfcs/standard/2026-03-10-server-metadata.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-03-20 +Implemented: ~2024 +Standardized: 2026-03-10 +Protocol: simplex-messaging +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Relay metadata and SimpleX network decentralization ## Problem diff --git a/rfcs/done/2024-06-21-short-links.md b/rfcs/standard/2026-03-10-short-links.md similarity index 94% rename from rfcs/done/2024-06-21-short-links.md rename to rfcs/standard/2026-03-10-short-links.md index 1dd3bf9105..3e3e7bf8c1 100644 --- a/rfcs/done/2024-06-21-short-links.md +++ b/rfcs/standard/2026-03-10-short-links.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-06-21 +Implemented: ~2025 (SMP v15) +Standardized: 2026-03-10 +Protocol: simplex-messaging + agent-protocol +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Short invitation links ## Problem diff --git a/rfcs/done/2024-09-09-smp-blobs.md b/rfcs/standard/2026-03-10-smp-blobs.md similarity index 89% rename from rfcs/done/2024-09-09-smp-blobs.md rename to rfcs/standard/2026-03-10-smp-blobs.md index b205d36563..5d81c1ade3 100644 --- a/rfcs/done/2024-09-09-smp-blobs.md +++ b/rfcs/standard/2026-03-10-smp-blobs.md @@ -1,3 +1,12 @@ +--- +Proposed: 2024-09-09 +Implemented: ~2025 (SMP v15) +Standardized: 2026-03-10 +Protocol: simplex-messaging + agent-protocol +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Blob extensions for SMP queues Evolution of the design for short links, see [here](./2024-06-21-short-links.md) and [here](./2024-09-05-queue-storage.md). diff --git a/rfcs/done/2025-03-16-smp-queues.md b/rfcs/standard/2026-03-10-smp-queues.md similarity index 97% rename from rfcs/done/2025-03-16-smp-queues.md rename to rfcs/standard/2026-03-10-smp-queues.md index 761d624baf..a9afa23d4a 100644 --- a/rfcs/done/2025-03-16-smp-queues.md +++ b/rfcs/standard/2026-03-10-smp-queues.md @@ -1,3 +1,12 @@ +--- +Proposed: 2025-03-16 +Implemented: ~2025 (SMP v15) +Standardized: 2026-03-10 +Protocol: simplex-messaging + agent-protocol +--- + +> **Implementation note:** This RFC was promoted from done/ to standard/ based on verification that the described feature exists in the codebase. The RFC text reflects the original proposal and may not match the actual implementation in all details. The consolidated protocol specifications in `protocol/` are the authoritative reference for current behavior. + # Protocol changes for creating and connecting to SMP queues ## Problems From 0bba2efc4578fcabce00488da4b009c6aed29775 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Tue, 10 Mar 2026 22:29:51 +0000 Subject: [PATCH 23/91] add rcv services spec --- spec/README.md | 3 + spec/rcv-services.md | 741 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 744 insertions(+) create mode 100644 spec/rcv-services.md diff --git a/spec/README.md b/spec/README.md index 83ce5097c9..7154aa957a 100644 --- a/spec/README.md +++ b/spec/README.md @@ -60,5 +60,8 @@ Function documentation format: - [remote-control.md](remote-control.md) — XRCP implementation - [compression.md](compression.md) — Zstd compression +### Cross-cutting Features +- [rcv-services.md](rcv-services.md) — Service certificates for high-volume SMP clients (bulk subscription) + ### Security - [security-invariants.md](security-invariants.md) — All security invariants diff --git a/spec/rcv-services.md b/spec/rcv-services.md new file mode 100644 index 0000000000..b0d97d9f74 --- /dev/null +++ b/spec/rcv-services.md @@ -0,0 +1,741 @@ +# Receive Services (Service Certificates) + +> Cross-cutting specification for the rcv-services feature: service certificates enabling high-volume SMP clients (notification routers, chat relays, directory services) to bulk-subscribe to queues. + +**Source branch**: `rcv-services` +**Protocol reference**: [`protocol/simplex-messaging.md`](../protocol/simplex-messaging.md) +**Phase**: 3.0a (Protocol + Transport + Server), 3.0b (Client + Agent + Store + NTF) + +## Overview + +A **service client** is a high-volume SMP client that presents a TLS client certificate during handshake. The server assigns it a persistent `ServiceId` derived from the certificate fingerprint. Individual queues are then **associated** with this ServiceId via per-queue `SUB` commands carrying a service signature. Once associated, the service client can **bulk-subscribe** all its queues in a single `SUBS` command instead of issuing per-queue `SUB` commands on each reconnection. + +This matters for notification servers, chat relays, and directory services that manage thousands to millions of queues per SMP server. Without service certificates, reconnection requires O(n) SUB commands; with them, it requires O(1) SUBS. + +### Design summary + +``` +Service client SMP Server + | | + |---- TLS + service cert --------->| Three-way handshake + |<--- ServiceId -------------------| (Transport layer) + | | + |---- SUB + service sig ---------->| Per-queue association + |<--- SOK(ServiceId) --------------| (Protocol layer, one-time) + | ...repeat per queue... | + | | + |---- SUBS count idsHash --------->| Bulk subscribe + |<--- SOKS count' idsHash' --------| (count/hash from server) + |<--- MSG ... MSG ... MSG ---------| Buffered messages + |<--- ALLS ------------------------| All delivered + | | +``` + +## Version gates + +| Constant | Value | Gate | Source | +|----------|-------|------|--------| +| `serviceCertsSMPVersion` | 16 | Service handshake, `SOK`, `useServiceAuth` | Transport.hs:214 | +| `rcvServiceSMPVersion` | 19 | `SUBS`/`NSUBS` parameters, `SOKS`/`ENDS` idsHash, messaging service role in handshake | Transport.hs:223 | + +The two-version split means: +- v16-18 servers accept service certificates and per-queue `SUB` with service auth, but `SUBS`/`NSUBS` send no count/hash parameters (bare command tag only). +- v19+ servers send and receive full count + idsHash with `SUBS`/`NSUBS`/`SOKS`/`ENDS`. +- Messaging services (`SRMessaging`) are only included in the client handshake at v >= 19. Notifier services (`SRNotifier`) are included at v >= 16. + +## Types + +### ServiceId + +`ServiceId` is an `EntityId` (24-byte base64url-encoded identifier) assigned by the server during the three-way handshake. It is derived from the service certificate fingerprint via `getCreateService` in QueueStore. + +### SMPServiceRole + +```haskell +data SMPServiceRole = SRMessaging | SRNotifier | SRProxy +-- Wire: "M" | "N" | "P" +``` +Source: Transport.hs:594 + +### Party (service-related constructors) + +```haskell +data Party = ... | RecipientService | NotifierService | ... +``` +Source: Protocol.hs:335-346 + +The `ServiceParty` type family constrains to `RecipientService | NotifierService` only: +```haskell +type family ServiceParty (p :: Party) :: Constraint where + ServiceParty RecipientService = () + ServiceParty NotifierService = () + ServiceParty p = (Int ~ Bool, TypeError ...) -- compile-time error +``` +Source: Protocol.hs:430-434 + +### IdsHash + +16-byte XOR of MD5 hashes, used for drift detection between client and server subscription state. + +```haskell +newtype IdsHash = IdsHash {unIdsHash :: BS.ByteString} + +instance Semigroup IdsHash where + (IdsHash s1) <> (IdsHash s2) = IdsHash $! BS.pack $ BS.zipWith xor s1 s2 + +instance Monoid IdsHash where + mempty = IdsHash $ BS.replicate 16 0 + +queueIdHash :: QueueId -> IdsHash +queueIdHash = IdsHash . C.md5Hash . unEntityId +``` +Source: Protocol.hs:1501-1526 + +**Key property**: XOR is self-inverse, so `addServiceSubs` and `subtractServiceSubs` both use `<>` (XOR) for the hash component: +```haskell +addServiceSubs (n', idsHash') (n, idsHash) = (n + n', idsHash <> idsHash') +subtractServiceSubs (n', idsHash') (n, idsHash) + | n > n' = (n - n', idsHash <> idsHash') + | otherwise = (0, mempty) +``` +Source: Protocol.hs:1528-1534 + +### ServiceSub / ServiceSubResult / ServiceSubError + +Client-side types for comparing expected vs actual subscription state: +```haskell +data ServiceSub = ServiceSub + { smpServiceId :: ServiceId, + smpQueueCount :: Int64, + smpQueueIdsHash :: IdsHash } + +data ServiceSubResult = ServiceSubResult (Maybe ServiceSubError) ServiceSub + +data ServiceSubError + = SSErrorServiceId {expectedServiceId, subscribedServiceId :: ServiceId} + | SSErrorQueueCount {expectedQueueCount, subscribedQueueCount :: Int64} + | SSErrorQueueIdsHash {expectedQueueIdsHash, subscribedQueueIdsHash :: IdsHash} +``` +Source: Protocol.hs:1476-1499 + +`serviceSubResult` compares expected vs actual, returning the first mismatch (priority: serviceId > count > idsHash). + +### STMService (QueueStore) + +```haskell +data STMService = STMService + { serviceRec :: ServiceRec, + serviceRcvQueues :: TVar (Set RecipientId, IdsHash), + serviceNtfQueues :: TVar (Set NotifierId, IdsHash) } +``` +Source: QueueStore/STM.hs:64-68 + +Tracks the set of queue IDs and their cumulative XOR hash per service, per role (receive vs notify). + +## Transport layer: service handshake + +### Three-way handshake + +Standard SMP handshake is two messages: server sends `SMPServerHandshake`, client sends `SMPClientHandshake`. Service clients extend this to three messages: + +1. **Server -> Client**: `SMPServerHandshake` (standard, with session ID and auth key) +2. **Client -> Server**: `SMPClientHandshake` with `clientService :: Maybe SMPClientHandshakeService` +3. **Server -> Client**: `SMPServerHandshakeResponse {serviceId}` or `SMPServerHandshakeError {handshakeError}` + +Source: Transport.hs:752-791 (server), Transport.hs:796-848 (client) + +### SMPClientHandshakeService + +```haskell +data SMPClientHandshakeService = SMPClientHandshakeService + { serviceRole :: SMPServiceRole, + serviceCertKey :: CertChainPubKey } +``` +Source: Transport.hs:582-585 + +The `serviceCertKey` contains the TLS client certificate chain and a proof-of-possession: the service's Ed25519 session key signed by the service's X.509 signing key (`C.signX509 serviceSignKey $ C.publicToX509 k`). + +### Server-side validation (`getClientService`) + +1. Verify certificate chain matches TLS peer certificate: `getPeerCertChain c == cc` +2. Extract identity certificate and service key from chain +3. Verify signed session key: `C.verifyX509 serviceCertKey exact` +4. Compute fingerprint: `XV.getFingerprint idCert X.HashSHA256` +5. Call `getService` callback (QueueStore.getCreateService) to get/create ServiceId +6. Send `SMPServerHandshakeResponse {serviceId}` back to client + +Source: Transport.hs:775-791 + +### Client-side reception (`getClientService`) + +Client receives either `SMPServerHandshakeResponse {serviceId}` (success) or `SMPServerHandshakeError {handshakeError}` (failure). On success, stores `THClientService {serviceId, serviceRole, serviceCertHash, serviceKey}`. + +Source: Transport.hs:843-847 + +### Version-gated service role filtering (`mkClientService`) + +```haskell +mkClientService v (ServiceCredentials {serviceRole, ...}, (k, _)) + | serviceRole == SRMessaging && v < rcvServiceSMPVersion = Nothing + | otherwise = Just SMPClientHandshakeService {..} +``` +Source: Transport.hs:838-842 + +Messaging services are suppressed below v19. Notifier services are sent at v16+. + +### ServiceCredentials (client-side persistent state) + +```haskell +data ServiceCredentials = ServiceCredentials + { serviceRole :: SMPServiceRole, + serviceCreds :: T.Credential, -- TLS certificate + private key + serviceCertHash :: XV.Fingerprint, + serviceSignKey :: C.APrivateSignKey } +``` +Source: Transport.hs:587-592 + +## Protocol layer: commands and messages + +### Commands + +| Command | Party | Entity | Auth | Description | +|---------|-------|--------|------|-------------| +| `SUB` | Recipient | QueueId | Queue key + optional service sig | Subscribe single queue; if service sig present, associates queue with service | +| `NSUB` | Notifier | NotifierId | Queue key + optional service sig | Subscribe single notifier; if service sig present, associates with service | +| `NEW` | Creator | NoEntity | Queue key + optional service sig | Create queue; if service sig present, associates at creation | +| `SUBS count idsHash` | RecipientService | ServiceId | Service session key | Bulk-subscribe all associated receive queues | +| `NSUBS count idsHash` | NotifierService | ServiceId | Service session key | Bulk-subscribe all associated notifier queues | + +### Double authenticator (`useServiceAuth`) + +Only `NEW`, `SUB`, and `NSUB` carry a service signature (when sent from a service connection): +```haskell +useServiceAuth = \case + Cmd _ (NEW _) -> True + Cmd _ SUB -> True + Cmd _ NSUB -> True + _ -> False +``` +Source: Protocol.hs:1737-1742 + +For these commands, `tEncodeAuth` appends both the primary queue key signature and an optional service Ed25519 signature. `SUBS`/`NSUBS` use the ServiceId as entity and are signed only by the service session key. + +### Broker messages (responses) + +| Message | Fields | Description | +|---------|--------|-------------| +| `SOK` | `Maybe ServiceId` | Per-queue subscription success; `Just serviceId` when queue was associated with service | +| `SOKS` | `Int64, IdsHash` | Bulk subscription success; server's actual count and hash | +| `ALLS` | (none) | Marker: all buffered messages for this SUBS have been delivered | +| `END` | (none) | Per-queue subscription ended (another client subscribed) | +| `ENDS` | `Int64, IdsHash` | Service subscription ended (another service client took over); server's count and hash at takeover time | + +### Wire encoding (version-dependent) + +**SUBS/NSUBS encoding:** +``` +v >= 19: tag SP count idsHash +v < 19: tag (bare, no parameters) +``` +Source: Protocol.hs:1769-1771, 1787-1789 + +**SOKS/ENDS encoding:** +``` +v >= 19: tag SP count idsHash +v < 19: tag SP count (no idsHash) +``` +Source: Protocol.hs:1951-1953 + +**SOKS/ENDS decoding:** +``` +v >= 19: tag -> resp <$> _smpP <*> smpP (count + idsHash) +v < 19: tag -> resp <$> _smpP <*> pure mempty (count only, mempty hash) +``` +Source: Protocol.hs:1996-1998 + +## Server layer + +### Client state (Env/STM.hs) + +Each connected client tracks: +```haskell +data Client s = Client + { ... + serviceSubscribed :: TVar Bool, -- has SUBS been received? + ntfServiceSubscribed :: TVar Bool, -- has NSUBS been received? + serviceSubsCount :: TVar (Int64, IdsHash), -- running (count, hash) for receive queues + ntfServiceSubsCount :: TVar (Int64, IdsHash), -- running (count, hash) for notifier queues + ... } +``` +Source: Env/STM.hs:437-456 + +Server-global state: +```haskell +data ServerSubscribers s = ServerSubscribers + { subQ :: TQueue (ClientSub, ClientId), + queueSubscribers :: SubscribedClients s, -- per-queue lookup + serviceSubscribers :: SubscribedClients s, -- per-service lookup + totalServiceSubs :: TVar (Int64, IdsHash), -- global service sub count + subClients :: TVar IntSet, + pendingEvents :: TVar (IntMap (NonEmpty (EntityId, BrokerMsg))) } +``` +Source: Env/STM.hs:362-369 + +### ClientSub events + +```haskell +data ClientSub + = CSClient QueueId (Maybe ServiceId) (Maybe ServiceId) -- prev and new service IDs + | CSDeleted QueueId (Maybe ServiceId) -- prev service ID + | CSService ServiceId (Int64, IdsHash) -- service subscription change +``` +Source: Env/STM.hs:426-429 + +These are enqueued into `subQ` and processed by `serverThread` (the subscription event loop). + +### SUBS command flow + +``` +Client sends SUBS count idsHash + | + v +subscribeServiceMessages(serviceId, (count, idsHash)) Server.hs:1800 + | + +-- sharedSubscribeService(SRecipientService, ...) Server.hs:1849 + | | + | +-- If already subscribed: return cached (count, hash) + | | + | +-- First time: + | +-- getServiceQueueCountHash(party, serviceId) QueueStore + | | -> returns server's actual (count', idsHash') + | | + | +-- atomically: + | | writeTVar clientServiceSubscribed True + | | writeTVar clientServiceSubs (count', idsHash') + | | + | +-- Compute drift stats: + | | count == -1 && match -> srvSubOk++ (old NTF server) + | | diff > 0 -> srvSubMore++ (server has more) + | | diff < 0 -> srvSubFewer++ (server has fewer) + | | otherwise -> srvSubDiff++ (count match, hash mismatch) + | | + | +-- Enqueue CSService event to subQ + | + +-- If not already subscribed: + | fork "deliverServiceMessages" Server.hs:1806 + | | + | +-- foldRcvServiceMessages(serviceId, deliverQueueMsg, acc) + | | MsgStore + | +-- For each queue in service: + | | +-- Read queue record + first pending message + | | +-- Call deliverQueueMsg(acc, rId, result) Server.hs:1822 + | | | + | | +-- Error -> accumulate ERR + | | +-- No message -> skip + | | +-- Has message: + | | +-- getSubscription(rId) Server.hs:1835 + | | | If sub exists -> Nothing (skip, already delivering) + | | | Else -> create new Sub, insert in subscriptions + | | +-- setDelivered sub msg + | | +-- writeTBQueue msgQ [(corrId, rId, MSG ...)] + | | + | +-- After fold: write ALLS to msgQ + | + +-- Return SOKS count' idsHash' +``` + +### Per-queue SUB with service association + +`sharedSubscribeQueue` handles four cases (Server.hs:1738-1798): + +**Case 1: Service client, queue already associated with this service** (`queueServiceId == Just serviceId`) +- Duplicate association (retry after timeout/error) +- If no service sub exists yet, increment service queue count and enqueue CSClient +- Stats: `srvAssocDuplicate++` + +**Case 2: Service client, queue not yet associated** (new or different service) +- Call `setQueueService(queue, party, Just serviceId)` to update QueueStore +- Increment client's `serviceSubsCount` by `(1, queueIdHash rId)` +- Enqueue CSClient event +- Stats: `srvAssocNew++` or `srvAssocUpdated++` + +**Case 3: Non-service client, queue has service association** (downgrade) +- Call `setQueueService(queue, party, Nothing)` to remove association +- Stats: `srvAssocRemoved++` +- Create normal per-queue subscription + +**Case 4: Non-service client, no service association** (standard SUB) +- Create/return per-queue subscription as normal + +### Message delivery for service queues + +When a new message arrives for a queue (`tryDeliverMessage`, Server.hs:1985-2024): + +```haskell +getSubscribed = case rcvServiceId qr of + Just serviceId -> getSubscribedClient serviceId $ serviceSubscribers subscribers + Nothing -> getSubscribedClient rId $ queueSubscribers subscribers +``` + +If the queue has `rcvServiceId`, the server looks up the subscriber in `serviceSubscribers` (by ServiceId) rather than `queueSubscribers` (by QueueId). + +**On-demand Sub creation** (`newServiceDeliverySub`, Server.hs:2019-2024): When a message arrives for a service queue but no `Sub` exists in the client's `subscriptions` TMap, one is created on the fly. This handles messages arriving after SUBS but before the fold reaches that queue. + +### serverThread subscription event loop + +`serverThread` (Server.hs:250-351) processes `ClientSub` events from `subQ`: + +**CSClient** (per-queue subscription): +- If service association changed: end previous service subscription for that queue +- If new service: increment `totalServiceSubs`, end any per-queue subscriber, cancel previous service subscriber +- If no service: standard per-queue upsert + +**CSDeleted** (queue deletion): +- End both queue and service subscriptions + +**CSService** (bulk SUBS): +- Subtract changed subs from `totalServiceSubs` (because the client already has them counted) +- Cancel previous service subscriber for this ServiceId (sends ENDS to old client) + +**Service takeover** (`cancelServiceSubs`, Server.hs:317-321): +When a new service client subscribes (same ServiceId), the previous client's service subs are zeroed out: +```haskell +cancelServiceSubs serviceId = checkAnotherClient $ \c -> do + changedSubs <- swapTVar (clientServiceSubs c) (0, mempty) + pure [(c, CSADecreaseSubs changedSubs, (serviceId, ENDS n idsHash))] +``` +The previous client receives `ENDS count idsHash`. + +### Client disconnect cleanup + +`clientDisconnected` (Server.hs:1090-1121): +1. Set `connected = False` +2. Swap out all subscriptions and ntf subscriptions (clear TMap) +3. Cancel per-queue Subs +4. Update `queueSubscribers` (delete per-queue entries) and `serviceSubscribers` (delete service entry) +5. Subtract client's `serviceSubsCount` from `totalServiceSubs` +6. Kill delivery threads + +**Queue-service associations persist**: Only live subscription state is cleaned up. The `rcvServiceId` field on `QueueRec` and the `STMService` queue sets survive disconnect. On reconnection, `SUBS` resubscribes without re-associating. + +### Notification service subscription (`NSUBS`) + +`subscribeServiceNotifications` (Server.hs:1845-1847) is a thin wrapper around `sharedSubscribeService` with `SNotifierService` party. Unlike `SUBS`, it does NOT fork a delivery thread -- notification delivery is handled by the separate `deliverNtfsThread`. + +`deliverNtfsThread` (Server.hs:353) periodically scans `subClients` (which includes service subscribers) and delivers pending notifications. + +## QueueStore layer + +### getCreateService + +Lookup by certificate fingerprint; create if not found (Server/QueueStore/STM.hs:284-310): +1. `TM.lookup fp serviceCerts` -- fast IO lookup +2. If miss: STM transaction to double-check and create +3. If hit: verify service role matches; error `SERVICE` on role mismatch +4. On new service: log via store log + +### setQueueService + +Updates the `rcvServiceId` (or `ntfServiceId`) field on a `QueueRec` and maintains the service's queue set (Server/QueueStore/STM.hs:312-338): +1. Read queue record +2. If same service -> no-op +3. If different: `removeServiceQueue` from old, `addServiceQueue` to new +4. Update `QueueRec` in-place + +### addServiceQueue / removeServiceQueue + +Both use `setServiceQueues_` which XORs the queue's `queueIdHash` into the service's running hash (Server/QueueStore/STM.hs:383-398): +```haskell +update (s, idsHash) = + let !s' = updateSet qId s -- Set insert/delete + !idsHash' = queueIdHash qId <> idsHash -- XOR (self-inverse) + in (s', idsHash') +``` + +## Test coverage + +### Existing tests (ServerTests.hs) + +| Test | Lines | What it covers | +|------|-------|----------------| +| `testServiceDeliverSubscribe` | 682-742 | Create queue as service, reconnect, SUBS, message delivery, ALLS | +| `testServiceUpgradeAndDowngrade` | 744-859 | Regular SUB -> service SUB -> SUBS -> downgrade back to regular SUB | +| `testMessageServiceNotifications` | 1313-1388 | NSUB with service, service takeover (ENDS), NSUBS bulk subscribe | +| `testServiceNotificationsTwoRestarts` | 1390-1434 | NSUBS persistence across two server restarts | + +### Test gaps + +| Gap | Severity | Description | +|-----|----------|-------------| +| **TG-SVC-01** | High | No concurrent SUBS + regular SUB on same queue -- race between fold delivery and per-queue subscription | +| **TG-SVC-02** | High | No queue deletion during SUBS fold -- what happens when a queue is deleted mid-fold? | +| **TG-SVC-03** | Medium | No duplicate SUBS test -- what if client sends SUBS twice? (code returns cached count) | +| **TG-SVC-04** | Medium | No drift detection verification -- no test checks that stats are actually logged on count/hash mismatch | +| **TG-SVC-05** | Medium | No SUBS with 0 queues -- edge case where service has no associated queues | +| **TG-SVC-06** | Medium | No concurrent message delivery during fold -- messages sent while fold is in progress | +| **TG-SVC-07** | Low | No large-scale test -- fold performance with 10k+ queues | +| **TG-SVC-08** | Low | No test for `subtractServiceSubs` underflow (`n <= n'` -> `(0, mempty)`) | + +## Security invariants + +| ID | Invariant | Enforced by | Test | +|----|-----------|-------------|------| +| **SI-SVC-01** | Service certificate must match TLS peer certificate | `getClientService`: `getPeerCertChain c == cc` | Implicit in all service tests | +| **SI-SVC-02** | Service session key proof-of-possession: signed by X.509 key | `C.verifyX509 serviceCertKey exact` in `getClientService` | Implicit | +| **SI-SVC-03** | Only NEW, SUB, NSUB carry service signature | `useServiceAuth` pattern match | testServiceDeliverSubscribe (ERR SERVICE on unsigned) | +| **SI-SVC-04** | SUBS/NSUBS require service session key, not queue key | Entity is ServiceId, auth is service key | testServiceDeliverSubscribe (ERR CMD NO_AUTH on wrong key) | +| **SI-SVC-05** | Service role mismatch rejected | `getCreateService`: role check -> `Left SERVICE` | testServiceDeliverSubscribe (ERR SERVICE on wrong role) | +| **SI-SVC-06** | Non-service client cannot send SUBS | `ERR SERVICE` when no service handshake | testServiceUpgradeAndDowngrade (ERR SERVICE on plain client) | +| **SI-SVC-07** | Queue-service associations persist across disconnect | `clientDisconnected` only clears live state | testServiceNotificationsTwoRestarts | +| **SI-SVC-08** | Service takeover sends ENDS to previous client | `cancelServiceSubs` -> ENDS | testMessageServiceNotifications | +| **SI-SVC-09** | Drift is informational only -- server never rejects | `sharedSubscribeService` logs stats, always returns subs | No direct test (TG-SVC-04) | + +## Identified risks + +| ID | Risk | Severity | Description | +|----|------|----------|-------------| +| **R-SVC-01** | Postgres fold full table scan | High | `foldRcvServiceMessages` (Postgres.hs:127-139) uses `ROW_NUMBER() OVER (PARTITION BY recipient_id ORDER BY message_id ASC)` as a subquery joined to `msg_queues`. This window function scans the **entire `messages` table** before filtering. For a service with 100k+ queues and millions of messages, this query can be very slow. The STM backend iterates an in-memory Set (fast), and the Journal backend uses per-queue file locks (moderate). Only the Postgres path has this scaling problem. Consider rewriting to use a lateral join or per-queue subquery to avoid the full-table window. | +| **R-SVC-02** | `totalServiceSubs` accounting drift | Low | `totalServiceSubs` is incremented by `serverThread` when processing CSClient events (line 281), but `clientDisconnected` subtracts the full `clientServiceSubs` (line 1120) which was eagerly updated by `sharedSubscribeQueue`. If CSClient events are still pending in `subQ` at disconnect time, `totalServiceSubs` is decremented for increments that never happened, causing negative drift. `totalServiceSubs` is never read for any decision (only written), so this is cosmetic. Resets on server restart. Consider periodic reconciliation or removing the counter if unused. | +| **R-SVC-03** | Fold thread continues after service takeover | Needs analysis | When a second service client connects (same cert), `cancelServiceSubs` sends ENDS to the old client. But the old client's `deliverServiceMessages` fold thread (forked via `forkClient`, tracked in `endThreads`) keeps running -- it writes MSG to the old client's `msgQ` (captured in closure). The old client receives and can ACK these messages. After ALLS the thread exits. New messages route to the new client via `tryDeliverMessage`. Questions: (1) Can the old client's ACKs interfere with the new client's subscription state? (2) If the old client disconnects mid-fold, `clientDisconnected` kills the fold thread (line 1111) -- are partially-delivered Subs cleaned up correctly? (3) Could the fold's `getSubscription` (which inserts into old client's `subscriptions`) conflict with the old client's subscription TMap being swapped out by `clientDisconnected`? | +| **R-SVC-04** | Cert rotation = full re-association | Medium (operational) | `getCreateService` maps cert fingerprint -> ServiceId. A new cert = new fingerprint = new ServiceId. All existing queue associations remain on the old ServiceId. The service must re-SUB every queue with the new service signature -- O(n), exactly the cost SUBS was designed to avoid. Old fingerprint->ServiceId mappings remain in memory/DB (no GC). For a notification server with millions of queues, cert rotation means a full re-association storm. | +| **R-SVC-05** | Fold blocking | Low | `foldRcvServiceMessages` iterates all service queues sequentially, reading queue records and first messages. For services with many queues, this could take significant time. It runs in a forked thread, so it doesn't block the client's command processing, but the ALLS marker is delayed. No progress signal between SOKS and ALLS -- client doesn't know how many messages to expect. | +| **R-SVC-06** | XOR hash collision | Very Low | IdsHash uses XOR of MD5 hashes. XOR is commutative and associative, so different queue sets with the same XOR-combined hash would not be detected. Given 16-byte hashes, collision probability is negligible for realistic queue counts, but the hash provides no ordering information. | +| **R-SVC-07** | Count underflow in subtractServiceSubs | Very Low | If `n <= n'`, the function returns `(0, mempty)` -- a full reset. This is a defensive fallback but could mask accounting errors. | + +### Considered and dismissed + +- **Fold-delivery race**: Both the fold's `getSubscription` (Server.hs:1828) and `newServiceDeliverySub` (Server.hs:1999-2023) operate on the same `subscriptions clnt` TMap within `atomically` blocks. STM serialization ensures at most one creates the Sub; the other sees it and skips. No race exists. +- **Sub accumulation during fold**: Each service queue with a pending message gets a Sub created in the client's `subscriptions` TMap. This is necessary and correct -- the Sub holds the `delivered` TVar for ACK verification and `subThread` for delivery state. Without per-queue Subs the server cannot track what was delivered or verify ACKs. Subs are cleaned on ACK or disconnect. +- **Store log replay ordering**: `writeQueueStore` writes all services before queues. `addQueue_` (QueueStore/STM.hs:119-132) calls `addServiceQueue` when `rcvServiceId` is present in QueueRec, so snapshot replay correctly rebuilds STMService queue sets. Incremental `QueueService` log entries are always preceded by `NewService` because the handshake (which creates the service) happens before SUB (which associates queues). No ordering issue. + +--- + +## SMP Client layer (Client.hs) + +### Service subscription command + +```haskell +subscribeService :: (PartyI p, ServiceParty p) => SMPClient -> SParty p -> Int64 -> IdsHash -> ExceptT SMPClientError IO ServiceSub +subscribeService c party n idsHash = case smpClientService c of + Just THClientService {serviceId, serviceKey} -> do + sendSMPCommand c NRMBackground (Just (C.APrivateAuthKey C.SEd25519 serviceKey)) serviceId subCmd >>= \case + SOKS n' idsHash' -> pure $ ServiceSub serviceId n' idsHash' + r -> throwE $ unexpectedResponse r + where subCmd = case party of + SRecipientService -> SUBS n idsHash + SNotifierService -> NSUBS n idsHash + Nothing -> throwE PCEServiceUnavailable +``` +Source: Client.hs:921-934 + +Entity is `serviceId`, auth key is the service session key (Ed25519). The client passes its expected count and hash; the server returns its own. + +### Per-queue SUB with service + +`subscribeSMPQueue` (Client.hs:843-846) and `subscribeSMPQueues` (Client.hs:850-855) send `SUB` commands. The response handler `processSUBResponse_` (Client.hs:867-872) accepts both `OK` (no service) and `SOK serviceId_` (service-associated). + +`nsubResponse_` (Client.hs:914-918) does the same for `NSUB`. + +### Dual signature scheme (`authTransmission`) + +When `serviceAuth = True` and `useServiceAuth` returns True for the command (Client.hs:1385-1403): + +1. The entity key signs over `serviceCertHash || transmission` (not just transmission) +2. The service key signs over `transmission` alone + +This prevents MITM service substitution inside TLS: an attacker cannot replace the service certificate hash without invalidating the entity key signature. + +```haskell +(t', serviceSig) = case clientService =<< thAuth of + Just THClientService {serviceCertHash = XV.Fingerprint fp, serviceKey} | serviceAuth -> + (fp <> t, Just $ C.sign' serviceKey t) + _ -> (t, Nothing) +``` +Source: Client.hs:1398-1401 + +### Service runtime accessors + +```haskell +smpClientService :: SMPClient -> Maybe THClientService +smpClientService = thAuth . thParams >=> clientService + +smpClientServiceId :: SMPClient -> Maybe ServiceId +smpClientServiceId = fmap (\THClientService {serviceId} -> serviceId) . smpClientService +``` +Source: Client.hs:936-942 + +### Configuration + +`ProtocolClientConfig` (Client.hs:466-483) carries `serviceCredentials :: Maybe ServiceCredentials`. On handshake, the client generates a fresh Ed25519 key pair per connection and signs it with the service's X.509 key (via `mkClientService`). + +`serviceAuth` flag is set to `thVersion >= serviceCertsSMPVersion` (Client.hs:230), enabling dual signatures for all commands on v16+ connections. + +## Agent layer + +### Agent events + +Four service-specific events (Agent/Protocol.hs:401-404): + +| Event | Payload | When | +|-------|---------|------| +| `SERVICE_UP` | `SMPServer, ServiceSubResult` | SUBS succeeded; carries drift info | +| `SERVICE_DOWN` | `SMPServer, ServiceSub` | Server disconnected while service was subscribed | +| `SERVICE_ALL` | `SMPServer` | ALLS received — all buffered messages delivered | +| `SERVICE_END` | `SMPServer, ServiceSub` | ENDS received — another service client took over | + +### Service subscription flow (`Agent/Client.hs`) + +``` +subscribeClientService(c, withEvent, userId, srv, serviceSub) Client.hs:1743 + | + +-- withServiceClient(c, tSess, ...) Client.hs:1752 + | | + | +-- Get SMPClient for tSess + | +-- Check smpClientServiceId is Just -> smpServiceId + | + +-- setPendingServiceSub(tSess, serviceSub, currentSubs) TSessionSubs + | + +-- subscribeClientService_(c, withEvent, tSess, smp, serviceSub) Client.hs:1760 + | + +-- subscribeService smp SRecipientService n idsHash -> ServiceSub + +-- serviceSubResult expected subscribed -> ServiceSubResult + +-- atomically: setActiveServiceSub(tSess, sessId, subscribed) + +-- if withEvent: notify SERVICE_UP srv result +``` + +### Reconnection / resubscription (`Agent/Client.hs:1727-1740`) + +On service subscription failure during resubscription: +- `SSErrorServiceId` (server returned different ServiceId): fall back to `unassocSubscribeQueues` — removes all service associations for this server and resubscribes queues individually +- `clientServiceError`: same fallback +- Other errors: propagated + +### Startup subscription (`Agent.hs:1622-1641`) + +At agent startup, `subscribeService` is called in parallel per server. On `SSErrorServiceId` or `SSErrorQueueCount {n > 0, n' == 0}` (service exists but has no queues): falls back to unassociating queues and resubscribing individually. + +### Server disconnection (`Agent/Client.hs:787-800`) + +`serverDown` emits `SERVICE_DOWN`, then resubscribes: +- If session mode matches: full `resubscribeSMPSession` +- Otherwise: `resubscribeClientService` for service, then `subscribeQueues` for individual queues + +## TSessionSubs (Agent/TSessionSubs.hs) + +Per-session subscription state tracking, ~264 lines. + +```haskell +data SessSubs = SessSubs + { subsSessId :: TVar (Maybe SessionId), + activeSubs :: TMap RecipientId RcvQueueSub, + pendingSubs :: TMap RecipientId RcvQueueSub, + activeServiceSub :: TVar (Maybe ServiceSub), + pendingServiceSub :: TVar (Maybe ServiceSub) } +``` +Source: TSessionSubs.hs:59-65 + +Key operations: +- `setPendingServiceSub`: stores expected ServiceSub before SUBS is sent +- `setActiveServiceSub`: promotes to active after SOKS, validates session ID +- `updateActiveService`: increments count/hash when per-queue SUBs with service signature succeed (used by `Client/Agent.hs` when individual SUBs return `SOK(Just serviceId)`) +- `deleteServiceSub`: clears both active and pending (on ENDS) + +## Agent Store (AgentStore.hs) + +### `client_services` table + +```sql +CREATE TABLE client_services( + user_id INTEGER NOT NULL REFERENCES users ON DELETE CASCADE, + host TEXT NOT NULL, port TEXT NOT NULL, + server_key_hash BLOB, + service_cert BLOB NOT NULL, + service_cert_hash BLOB NOT NULL, + service_priv_key BLOB NOT NULL, + service_id BLOB, -- assigned by server, NULL until first handshake + service_queue_count INTEGER NOT NULL DEFAULT 0, + service_queue_ids_hash BLOB NOT NULL DEFAULT x'00000000000000000000000000000000' +); +``` +Source: Agent/Store/SQLite/Migrations/M20260115_service_certs.hs:11-23 + +### `rcv_queues.rcv_service_assoc` + +Boolean column added to `rcv_queues`. When set, the queue is associated with the service for this server. SQLite triggers automatically maintain `service_queue_count` and `service_queue_ids_hash` on insert/delete/update of `rcv_queues` rows. + +Triggers: `tr_rcv_queue_insert`, `tr_rcv_queue_delete`, `tr_rcv_queue_update_remove`, `tr_rcv_queue_update_add` (same migration file, lines 30-76). All use `simplex_xor_md5_combine` — the SQLite equivalent of Haskell's `queueIdHash <>`. + +### Key CRUD operations + +| Function | What it does | +|----------|--------------| +| `getClientServiceCredentials` | Load cert + key for a server; returns `Maybe ((KeyHash, TLS.Credential), Maybe ServiceId)` | +| `getSubscriptionService` | Load `ServiceSub` (serviceId, count, hash) for reconnection | +| `setClientServiceId` | Store ServiceId after first handshake | +| `setRcvServiceAssocs` | Mark queues as service-associated (sets `rcv_service_assoc = 1`) | +| `removeRcvServiceAssocs` | Remove service association for all queues on a server | +| `unassocUserServerRcvQueueSubs` | Remove association and return queues for re-subscription | + +Source: AgentStore.hs:419-494, 2378-2414 + +### Service ID nullification on cert change + +`INSERT ... ON CONFLICT DO UPDATE SET ... service_id = NULL` (AgentStore.hs:429) — when service credentials are updated (new cert), the stored `service_id` is cleared, forcing a new handshake to get a fresh ServiceId. + +## Notification server (Notifications/Server.hs) + +The NTF server is the primary consumer of service certificates for `SRNotifier` role. + +### Configuration + +`NtfServerConfig.useServiceCreds :: Bool` (Env.hs:80) — controls whether the NTF server uses service certificates for SMP subscriptions. + +### Credential generation + +On first use per SMP server, `mkDbService` (Env.hs:126-142) generates a self-signed TLS certificate (valid ~2400 days) and stores it in the `smp_servers` table. The cert is reused across connections to the same SMP server. + +### Startup subscription + +`subscribeSrvSubs` (Server.hs:460-481): +1. If service credentials exist: send NSUBS first (one command for all associated queues) +2. Then subscribe remaining individual queues in batches via `subscribeQueuesNtfs` + +### Event handling + +| Event | Handler | +|-------|---------| +| `CAServiceSubscribed` | Log count/hash match or mismatch | +| `CAServiceDisconnected` | Log disconnection | +| `CAServiceSubError` | Log error (non-fatal; fatal errors go to `CAServiceUnavailable`) | +| `CAServiceUnavailable` | **Critical recovery path**: calls `removeServiceAndAssociations`, wipes service creds, resubscribes all queues individually | + +Source: Server.hs:567-602 + +### `removeServiceAndAssociations` (Store/Postgres.hs:620-652) + +Nuclear recovery: clears `ntf_service_id`, `ntf_service_cert*`, resets `smp_notifier_count`/`smp_notifier_ids_hash`, and removes all `ntf_service_assoc` flags from subscriptions. Used when the service subscription is irrecoverably broken (e.g., ServiceId mismatch after cert rotation). + +### NTF Postgres schema + +The `smp_servers` table stores per-SMP-server state: +- `ntf_service_id`, `ntf_service_cert`, `ntf_service_cert_hash`, `ntf_service_priv_key` — service identity +- `smp_notifier_count`, `smp_notifier_ids_hash` — maintained by Postgres triggers on the `subscriptions` table + +Triggers use `xor_combine` (Postgres equivalent of XOR hash combine) and fire on `ntf_service_assoc` changes. + +## Agent test coverage + +### Existing tests + +| Test | File | What it covers | +|------|------|----------------| +| `testMigrateToServiceSubscriptions` | AgentTests/NotificationTests.hs:930-1016 | Full lifecycle: no service -> enable service (creates association) -> use service (NSUBS) -> disable service (downgrade to individual) -> re-enable | + +### Additional test gaps (Phase 3.0b) + +| Gap | Severity | Description | +|-----|----------|-------------| +| **TG-SVC-09** | Medium | No agent-level test for `SSErrorServiceId` recovery — the `unassocQueues` fallback path | +| **TG-SVC-10** | Medium | No agent-level test for concurrent reconnection — service resubscription racing with individual queue resubscription | +| **TG-SVC-11** | Medium | No test for `SERVICE_END` agent event handling — what does the agent do after receiving ENDS? | +| **TG-SVC-12** | Low | No test for SQLite trigger correctness — verifying `service_queue_count`/`service_queue_ids_hash` match expected values after insert/delete/update cycles | From ea2a62ab7e740515ac71546526eb0b246b15ef3f Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 07:32:57 +0000 Subject: [PATCH 24/91] more specs --- spec/compression.md | 81 ++++++++++- spec/encoding.md | 341 +++++++++++++++++++++++++++++++++++++++++++- spec/version.md | 194 ++++++++++++++++++++++++- 3 files changed, 609 insertions(+), 7 deletions(-) diff --git a/spec/compression.md b/spec/compression.md index 7e457438b9..faa8c275fb 100644 --- a/spec/compression.md +++ b/spec/compression.md @@ -1,7 +1,84 @@ # Compression -> Compression support for SimpleX protocols. +> Zstd compression for SimpleX protocol messages. -## Zstd +**Source file**: [`Compression.hs`](../src/Simplex/Messaging/Compression.hs) + +## Overview + +Optional Zstd compression for SMP message bodies. Short messages bypass compression entirely to avoid overhead. The `Compressed` type carries a tag byte indicating whether the payload is compressed or passthrough, making it self-describing on the wire. + +## Types + +### `Compressed` + +**Source**: `Compression.hs:17-22` + +```haskell +data Compressed + = Passthrough ByteString -- short messages, left intact + | Compressed Large -- Zstd-compressed, 2-byte length prefix +``` + +Wire encoding (`Compression.hs:30-38`): + +``` +Passthrough → '0' ++ smpEncode ByteString (1-byte tag + 1-byte length + data) +Compressed → '1' ++ smpEncode Large (1-byte tag + 2-byte length + data) +``` + +Tags are `'0'` (0x30) and `'1'` (0x31) — same ASCII convention as `Maybe` encoding. + +`Passthrough` uses standard `ByteString` encoding (max 255 bytes, 1-byte length prefix). `Compressed` uses `Large` encoding (max 65535 bytes, 2-byte Word16 length prefix), since compressed output can exceed 255 bytes for larger inputs. + +## Constants + +| Constant | Value | Purpose | Source | +|----------|-------|---------|--------| +| `maxLengthPassthrough` | 180 | Messages at or below this length are not compressed | `Compression.hs:24-25` | +| `compressionLevel` | 3 | Zstd compression level | `Compression.hs:27-28` | + +The 180-byte threshold was "sampled from real client data" — messages above this length show rapidly increasing compression ratio. Below 180 bytes, compression overhead (FFI call, dictionary-less Zstd startup) outweighs savings. ## Functions + +### `compress1` + +**Source**: `Compression.hs:40-43` + +```haskell +compress1 :: ByteString -> Compressed +``` + +Compress a message body: +- If `B.length bs <= 180` → `Passthrough bs` +- Otherwise → `Compressed (Large (Z1.compress 3 bs))` + +No context or dictionary — each message is independently compressed ("1" in `compress1` refers to single-shot compression). + +### `decompress1` + +**Source**: `Compression.hs:45-53` + +```haskell +decompress1 :: Int -> Compressed -> Either String ByteString +``` + +Decompress with size limit: +- `Passthrough bs` → `Right bs` (no check needed — already bounded by encoding) +- `Compressed (Large bs)` → check `Z1.decompressedSize bs`: + - If size is known and within `limit` → decompress + - If size unknown or exceeds `limit` → `Left` error + +The size limit check happens **before** decompression, using Zstd's frame header (which includes the decompressed size when the compressor wrote it). This prevents decompression bombs — an attacker cannot cause unbounded memory allocation by sending a small compressed payload that expands to gigabytes. + +The `Z1.decompress` result is pattern-matched for three cases: +- `Z1.Error e` → `Left e` +- `Z1.Skip` → `Right mempty` (zero-length output) +- `Z1.Decompress bs'` → `Right bs'` + +## Security notes + +- **Decompression bomb protection**: `decompress1` requires an explicit size limit and checks `decompressedSize` before allocating. Callers must pass an appropriate limit (typically the SMP block size). +- **No dictionary/context**: Each message is independently compressed. No shared state between messages that could leak information across compression boundaries. +- **Passthrough for short messages**: Messages ≤ 180 bytes are never compressed, avoiding timing side channels from compression ratio differences on short, potentially-predictable messages. diff --git a/spec/encoding.md b/spec/encoding.md index 2b8dded012..3a4fdcd275 100644 --- a/spec/encoding.md +++ b/spec/encoding.md @@ -2,10 +2,345 @@ > Binary and string encoding used across all SimpleX protocols. -## Binary Encoding +**Source files**: [`Encoding.hs`](../src/Simplex/Messaging/Encoding.hs), [`Encoding/String.hs`](../src/Simplex/Messaging/Encoding/String.hs), [`Parsers.hs`](../src/Simplex/Messaging/Parsers.hs) -## String Encoding +## Overview + +Two encoding layers serve different purposes: + +- **`Encoding`** — Binary wire format for SMP protocol transmissions. Compact, no delimiters between fields. Used in all on-the-wire protocol messages. +- **`StrEncoding`** — Human-readable string format for configuration, URIs, logs, and JSON serialization. Uses base64url for binary data, decimal for numbers, comma-separated lists, space-separated tuples. + +Both are typeclasses with `MINIMAL` pragmas requiring `encode` + (`decode` | `parser`), with the missing one derived from the other. + +## Binary Encoding (`Encoding` class) + +**Source**: `Encoding.hs:38-52` + +```haskell +class Encoding a where + smpEncode :: a -> ByteString + smpDecode :: ByteString -> Either String a -- default: parseAll smpP + smpP :: Parser a -- default: smpDecode <$?> smpP +``` + +### Length-prefix conventions + +| Type | Prefix | Max size | Source | +|------|--------|----------|--------| +| `ByteString` | 1-byte length (Word8 as Char) | 255 bytes | `Encoding.hs:102-106` | +| `Large` (newtype) | 2-byte length (Word16 big-endian) | 65535 bytes | `Encoding.hs:135-143` | +| `Tail` (newtype) | None — consumes rest of input | Unlimited | `Encoding.hs:126-132` | +| Lists (`smpEncodeList`) | 1-byte count prefix, then concatenated items | 255 items | `Encoding.hs:155-159` | +| `NonEmpty` | Same as list (fails on count=0) | 255 items | `Encoding.hs:173-178` | + +### Scalar types + +| Type | Encoding | Bytes | Source | +|------|----------|-------|--------| +| `Char` | Raw byte | 1 | `Encoding.hs:54-58` | +| `Bool` | `'T'` / `'F'` (0x54 / 0x46) | 1 | `Encoding.hs:60-70` | +| `Word16` | Big-endian | 2 | `Encoding.hs:72-76` | +| `Word32` | Big-endian | 4 | `Encoding.hs:78-82` | +| `Int64` | Two big-endian Word32s (high then low) | 8 | `Encoding.hs:84-99` | +| `SystemTime` | `systemSeconds` as Int64 (nanoseconds dropped) | 8 | `Encoding.hs:145-149` | +| `Text` | UTF-8 then ByteString encoding (1-byte length prefix) | 1 + len | `Encoding.hs:161-165` | +| `String` | `B.pack` then ByteString encoding | 1 + len | `Encoding.hs:167-171` | + +### `Maybe a` + +**Source**: `Encoding.hs:116-124` + +``` +Nothing → '0' (0x30) +Just x → '1' (0x31) ++ smpEncode x +``` + +Tags are ASCII characters `'0'`/`'1'`, not binary 0x00/0x01. + +### Tuples + +**Source**: `Encoding.hs:180-220` + +Tuples (2 through 8) encode as simple concatenation — no length prefix, no separator. Fields are parsed sequentially using each component's `smpP`. This works because each component's parser knows how many bytes to consume (via its own length prefix or fixed size). + +### Combinators + +| Function | Signature | Purpose | Source | +|----------|-----------|---------|--------| +| `_smpP` | `Parser a` | Space-prefixed parser (`A.space *> smpP`) | `Encoding.hs:151-152` | +| `smpEncodeList` | `[a] -> ByteString` | 1-byte count + concatenated items | `Encoding.hs:155-156` | +| `smpListP` | `Parser [a]` | Parse count then that many items | `Encoding.hs:158-159` | +| `lenEncode` | `Int -> Char` | Int to single-byte length char | `Encoding.hs:108-110` | + +## String Encoding (`StrEncoding` class) + +**Source**: `Encoding/String.hs:56-67` + +```haskell +class StrEncoding a where + strEncode :: a -> ByteString + strDecode :: ByteString -> Either String a -- default: parseAll strP + strP :: Parser a -- default: strDecode <$?> base64urlP +``` + +Key difference from `Encoding`: the default `strP` parses base64url input first, then applies `strDecode`. This means types that only implement `strDecode` will automatically accept base64url-encoded input. + +### Instance conventions + +| Type | Encoding | Source | +|------|----------|--------| +| `ByteString` | base64url (non-empty required) | `String.hs:70-76` | +| `Word16`, `Word32` | Decimal string | `String.hs:114-124` | +| `Int`, `Int64` | Signed decimal | `String.hs:138-148` | +| `Char`, `Bool` | Delegates to `Encoding` (`smpEncode`/`smpP`) | `String.hs:126-136` | +| `Maybe a` | Empty string = `Nothing`, otherwise `strEncode a` | `String.hs:108-112` | +| `Text` | UTF-8 bytes, parsed until space/newline | `String.hs:97-99` | +| `SystemTime` | `systemSeconds` as Int64 (decimal) | `String.hs:150-152` | +| `UTCTime` | ISO 8601 string | `String.hs:154-156` | +| `CertificateChain` | Comma-separated base64url blobs | `String.hs:158-162` | +| `Fingerprint` | base64url of fingerprint bytes | `String.hs:164-168` | + +### Collection encoding + +| Type | Separator | Source | +|------|-----------|--------| +| Lists (`strEncodeList`) | Comma `,` | `String.hs:171-175` | +| `NonEmpty` | Comma (fails on empty) | `String.hs:178-180` | +| `Set a` | Comma | `String.hs:182-184` | +| `IntSet` | Comma | `String.hs:186-188` | +| Tuples (2-6) | Space (` `) | `String.hs:193-221` | + +### `Str` newtype + +**Source**: `String.hs:84-89` + +Raw string (not base64url-encoded). Parses until space, consumes trailing space. Used for string-valued protocol fields that should not be base64-encoded. + +### `TextEncoding` class + +**Source**: `String.hs:51-53` + +```haskell +class TextEncoding a where + textEncode :: a -> Text + textDecode :: Text -> Maybe a +``` + +Separate from `StrEncoding` — operates on `Text` rather than `ByteString`. Used for types that need Text representation (e.g., enum display names). + +### JSON bridge functions + +| Function | Purpose | Source | +|----------|---------|--------| +| `strToJSON` | `StrEncoding a => a -> J.Value` via `decodeLatin1 . strEncode` | `String.hs:229-231` | +| `strToJEncoding` | Same, for Aeson encoding | `String.hs:233-235` | +| `strParseJSON` | `StrEncoding a => String -> J.Value -> JT.Parser a` — parse JSON string via `strP` | `String.hs:237-238` | +| `textToJSON` | `TextEncoding a => a -> J.Value` | `String.hs:240-242` | +| `textToEncoding` | Same, for Aeson encoding | `String.hs:244-246` | +| `textParseJSON` | `TextEncoding a => String -> J.Value -> JT.Parser a` | `String.hs:248-249` | ## Parsers -## Functions +**Source**: [`Parsers.hs`](../src/Simplex/Messaging/Parsers.hs) + +### Core parsing functions + +| Function | Signature | Purpose | Source | +|----------|-----------|---------|--------| +| `parseAll` | `Parser a -> ByteString -> Either String a` | Parse consuming all input (fails if bytes remain) | `Parsers.hs:64-65` | +| `parse` | `Parser a -> e -> ByteString -> Either e a` | `parseAll` with custom error type (discards error string) | `Parsers.hs:61-62` | +| `parseE` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | `parseAll` lifted into `ExceptT` | `Parsers.hs:67-68` | +| `parseE'` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | Like `parseE` but allows trailing input | `Parsers.hs:70-71` | +| `parseRead1` | `Read a => Parser a` | Parse a word then `readMaybe` it | `Parsers.hs:76-77` | +| `parseString` | `(ByteString -> Either String a) -> String -> a` | Parse from `String` (errors with `error`) | `Parsers.hs:89-90` | + +### `base64P` + +**Source**: `Parsers.hs:44-53` + +Standard base64 parser (not base64url — uses `+`/`/` alphabet). Takes alphanumeric + `+`/`/` characters, optional `=` padding, then decodes. Contrast with `base64urlP` in `Encoding/String.hs` which uses `-`/`_` alphabet. + +### JSON options helpers + +Platform-conditional JSON encoding for cross-platform compatibility (Haskell ↔ Swift). + +| Function | Purpose | Source | +|----------|---------|--------| +| `enumJSON` | All-nullary constructors as strings, with tag modifier | `Parsers.hs:101-106` | +| `sumTypeJSON` | Platform-conditional: `taggedObjectJSON` on non-Darwin, `singleFieldJSON` on Darwin | `Parsers.hs:109-114` | +| `taggedObjectJSON` | `{"type": "Tag", "data": {...}}` format | `Parsers.hs:119-128` | +| `singleFieldJSON` | `{"Tag": value}` format | `Parsers.hs:137-149` | +| `defaultJSON` | Default options with `omitNothingFields = True` | `Parsers.hs:151-152` | + +Pattern synonyms for JSON field names: +- `TaggedObjectJSONTag = "type"` (`Parsers.hs:131`) +- `TaggedObjectJSONData = "data"` (`Parsers.hs:134`) +- `SingleFieldJSONTag = "_owsf"` (`Parsers.hs:117`) + +### String helpers + +| Function | Purpose | Source | +|----------|---------|--------| +| `fstToLower` | Lowercase first character | `Parsers.hs:92-94` | +| `dropPrefix` | Remove prefix string, lowercase remainder | `Parsers.hs:96-99` | +| `textP` | Parse rest of input as UTF-8 `String` | `Parsers.hs:154-155` | + +## Auxiliary Types and Utilities + +### TMap + +**Source**: [`TMap.hs`](../src/Simplex/Messaging/TMap.hs) + +```haskell +type TMap k a = TVar (Map k a) +``` + +STM-based concurrent map. Wraps `Data.Map.Strict` in a `TVar`. All mutations use `modifyTVar'` (strict) to prevent thunk accumulation. + +| Function | Notes | Source | +|----------|-------|--------| +| `emptyIO` | IO allocation (`newTVarIO`) | `TMap.hs:32-34` | +| `singleton` | STM allocation | `TMap.hs:36-38` | +| `clear` | Reset to empty | `TMap.hs:40-42` | +| `lookup` / `lookupIO` | STM / non-transactional IO read | `TMap.hs:48-54` | +| `member` / `memberIO` | STM / non-transactional IO membership | `TMap.hs:56-62` | +| `insert` / `insertM` | Insert value / insert from STM action | `TMap.hs:64-70` | +| `delete` | Remove key | `TMap.hs:72-74` | +| `lookupInsert` | Atomic lookup-then-insert (returns old value) | `TMap.hs:76-78` | +| `lookupDelete` | Atomic lookup-then-delete | `TMap.hs:80-82` | +| `adjust` / `update` / `alter` / `alterF` | Standard Map operations lifted to STM | `TMap.hs:84-100` | +| `union` | Merge `Map` into `TMap` | `TMap.hs:102-104` | + +`lookupIO`/`memberIO` use `readTVarIO` — single-read outside STM transaction, useful when you need a snapshot without composing with other STM operations. + +### SessionVar + +**Source**: [`Session.hs`](../src/Simplex/Messaging/Session.hs) + +Race-safe session management using TMVar + monotonic ID. + +```haskell +data SessionVar a = SessionVar + { sessionVar :: TMVar a -- result slot + , sessionVarId :: Int -- monotonic ID from TVar counter + , sessionVarTs :: UTCTime -- creation timestamp + } +``` + +| Function | Purpose | Source | +|----------|---------|--------| +| `getSessVar` | Lookup or create session. Returns `Left new` or `Right existing` | `Session.hs:24-33` | +| `removeSessVar` | Delete session only if ID matches (prevents removing a replacement) | `Session.hs:35-39` | +| `tryReadSessVar` | Non-blocking read of session result | `Session.hs:41-42` | + +The ID-match check in `removeSessVar` (`sessionVarId v == sessionVarId v'`) prevents a race where: +1. Thread A creates session #5, starts work +2. Thread B creates session #6 (replacing #5 in TMap) +3. Thread A finishes, tries to remove — ID mismatch, removal blocked + +### ServiceScheme + +**Source**: [`ServiceScheme.hs`](../src/Simplex/Messaging/ServiceScheme.hs) + +```haskell +data ServiceScheme = SSSimplex | SSAppServer SrvLoc +data SrvLoc = SrvLoc HostName ServiceName +``` + +URI scheme for SimpleX service addresses. `SSSimplex` encodes as `"simplex:"`, `SSAppServer` as `"https://host:port"`. + +`simplexChat :: ServiceScheme` is the constant `SSAppServer (SrvLoc "simplex.chat" "")` (`ServiceScheme.hs:38-39`). + +### SystemTime + +**Source**: [`SystemTime.hs`](../src/Simplex/Messaging/SystemTime.hs) + +```haskell +newtype RoundedSystemTime (t :: Nat) = RoundedSystemTime { roundedSeconds :: Int64 } +type SystemDate = RoundedSystemTime 86400 -- day precision +type SystemSeconds = RoundedSystemTime 1 -- second precision +``` + +Phantom-typed time rounding. The `Nat` type parameter specifies rounding granularity in seconds. + +| Function | Purpose | Source | +|----------|---------|--------| +| `getRoundedSystemTime` | Get current time rounded to `t` seconds | `SystemTime.hs:40-43` | +| `getSystemDate` | Alias for day-rounded time | `SystemTime.hs:45-47` | +| `getSystemSeconds` | Second-precision (no rounding needed, just drops nanoseconds) | `SystemTime.hs:49-51` | +| `roundedToUTCTime` | Convert back to `UTCTime` | `SystemTime.hs:53-55` | + +`RoundedSystemTime` derives `FromField`/`ToField` for SQLite storage and `FromJSON`/`ToJSON` for API serialization. + +### Util + +**Source**: [`Util.hs`](../src/Simplex/Messaging/Util.hs) + +Selected utilities used across the codebase: + +**Monadic combinators**: + +| Function | Signature | Purpose | Source | +|----------|-----------|---------|--------| +| `<$?>` | `MonadFail m => (a -> Either String b) -> m a -> m b` | Lift fallible function into parser | `Util.hs:119-121` | +| `$>>=` | `(Monad m, Monad f, Traversable f) => m (f a) -> (a -> m (f b)) -> m (f b)` | Monadic bind through nested monad | `Util.hs:165-167` | +| `ifM` / `whenM` / `unlessM` | Monadic conditionals | `Util.hs:147-157` | +| `anyM` | Short-circuit `any` for monadic predicates (strict) | `Util.hs:159-161` | + +**Error handling**: + +| Function | Purpose | Source | +|----------|---------|--------| +| `tryAllErrors` | Catch all exceptions (including async) into `ExceptT` | `Util.hs:273-275` | +| `catchAllErrors` | Same with handler | `Util.hs:281-283` | +| `tryAllOwnErrors` | Catch only "own" exceptions (re-throws async cancellation) | `Util.hs:322-324` | +| `catchAllOwnErrors` | Same with handler | `Util.hs:330-332` | +| `isOwnException` | `StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded` | `Util.hs:297-304` | +| `isAsyncCancellation` | Any `SomeAsyncException` except own exceptions | `Util.hs:306-310` | +| `catchThrow` | Catch exceptions, wrap in Left | `Util.hs:289-291` | +| `allFinally` | `tryAllErrors` + `final` + `except` (like `finally` for ExceptT) | `Util.hs:293-295` | + +The own-vs-async distinction is critical: `catchOwn`/`tryAllOwnErrors` never swallow async cancellation (`ThreadKilled`, `UserInterrupt`, etc.), only synchronous exceptions and resource exhaustion (`StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded`). + +**STM**: + +| Function | Purpose | Source | +|----------|---------|--------| +| `tryWriteTBQueue` | Non-blocking bounded queue write, returns success | `Util.hs:256-261` | + +**Database result helpers**: + +| Function | Purpose | Source | +|----------|---------|--------| +| `firstRow` | Extract first row with transform, or Left error | `Util.hs:346-347` | +| `maybeFirstRow` | Extract first row as Maybe | `Util.hs:349-350` | +| `firstRow'` | Like `firstRow` but transform can also fail | `Util.hs:355-356` | + +**Collection utilities**: + +| Function | Purpose | Source | +|----------|---------|--------| +| `groupOn` | `groupBy` using equality on projected key | `Util.hs:358-359` | +| `groupAllOn` | `groupOn` after `sortOn` (groups non-adjacent elements) | `Util.hs:372-373` | +| `toChunks` | Split list into `NonEmpty` chunks of size n | `Util.hs:376-380` | +| `packZipWith` | Optimized ByteString zipWith (direct memory access) | `Util.hs:236-254` | + +**Miscellaneous**: + +| Function | Purpose | Source | +|----------|---------|--------| +| `safeDecodeUtf8` | Decode UTF-8 replacing errors with `'?'` | `Util.hs:382-386` | +| `bshow` / `tshow` | `show` to `ByteString` / `Text` | `Util.hs:123-129` | +| `threadDelay'` | `Int64` delay (handles overflow by looping) | `Util.hs:391-399` | +| `diffToMicroseconds` / `diffToMilliseconds` | `NominalDiffTime` conversion | `Util.hs:401-407` | +| `labelMyThread` | Label current thread for debugging | `Util.hs:409-410` | +| `encodeJSON` / `decodeJSON` | `ToJSON a => a -> Text` / `FromJSON a => Text -> Maybe a` | `Util.hs:415-421` | +| `traverseWithKey_` | `Map` traversal discarding results | `Util.hs:423-425` | + +## Security notes + +- **Length prefix overflow**: `ByteString` encoding uses 1-byte length — silently truncates strings > 255 bytes. Callers must ensure size bounds before encoding. `Large` extends to 65535 bytes via Word16 prefix. +- **`Tail` unbounded**: `Tail` consumes all remaining input with no size check. Only safe when total message size is already bounded (e.g., within a padded SMP block). +- **base64 vs base64url**: `Parsers.base64P` uses standard alphabet (`+`/`/`), while `String.base64urlP` uses URL-safe alphabet (`-`/`_`). Mixing them causes silent decode failures. +- **`safeDecodeUtf8`**: Replaces invalid UTF-8 with `'?'` rather than failing. Suitable for logging/display, not for security-critical string comparison. diff --git a/spec/version.md b/spec/version.md index f5b954534e..6d9a23c091 100644 --- a/spec/version.md +++ b/spec/version.md @@ -2,8 +2,198 @@ > Version ranges and compatibility checking for protocol evolution. -## Version Ranges +**Source files**: [`Version.hs`](../src/Simplex/Messaging/Version.hs), [`Version/Internal.hs`](../src/Simplex/Messaging/Version/Internal.hs) -## Compatibility +## Overview + +All SimpleX protocols use version negotiation during handshake. Each party advertises a `VersionRange` (min..max supported), and negotiation produces a `Compatible` proof value if the ranges overlap — choosing the highest mutually-supported version. + +The `Compatible` newtype can only be constructed internally (constructor is not exported), so the type system enforces that compatibility was actually checked. + +## Types + +### `Version v` + +**Source**: `Version/Internal.hs:11-12` + +```haskell +newtype Version v = Version Word16 +``` + +Phantom-typed version number. The phantom `v` distinguishes version spaces (e.g., SMP versions vs Agent versions vs XFTP versions) at the type level, preventing accidental comparison across protocols. + +- `Encoding`: 2 bytes big-endian (via Word16 instance) +- `StrEncoding`: decimal string +- JSON: numeric value +- Derives: `Eq`, `Ord`, `Show` + +The constructor is exported from `Version.Internal` but not from `Version`, so application code cannot fabricate versions — they must come from protocol constants or parsing. + +### `VersionRange v` + +**Source**: `Version.hs:46-50` + +```haskell +data VersionRange v = VRange + { minVersion :: Version v + , maxVersion :: Version v + } +``` + +Invariant: `minVersion <= maxVersion` (enforced by smart constructors). + +The `VRange` constructor is not exported — only the pattern synonym `VersionRange` (read-only, `Version.hs:41-44`) is public. + +- `Encoding`: two Word16s concatenated (4 bytes total, `Version.hs:80-84`) +- `StrEncoding`: `"min-max"` or `"v"` if min == max (`Version.hs:86-93`) +- JSON: `{"minVersion": n, "maxVersion": n}` + +### `VersionScope v` + +**Source**: `Version.hs:64` + +```haskell +class VersionScope v +``` + +Empty typeclass used as a constraint on version operations. Each protocol declares its version scope: + +```haskell +instance VersionScope SMP +instance VersionScope Agent +``` + +This prevents accidentally mixing version ranges from different protocols in negotiation functions. + +### `Compatible a` + +**Source**: `Version.hs:117-122` + +```haskell +newtype Compatible a = Compatible_ a + +pattern Compatible :: a -> Compatible a +pattern Compatible a <- Compatible_ a +``` + +Proof that compatibility was checked. The `Compatible_` constructor is not exported — `Compatible` is a read-only pattern synonym. The only way to obtain a `Compatible` value is through `compatibleVersion`, `compatibleVRange`, `proveCompatible`, or the internal `mkCompatibleIf`. + +### `VersionI` / `VersionRangeI` type classes + +**Source**: `Version.hs:95-115` + +Multi-param typeclasses with functional dependencies for generic version/range operations. Allow extension types that wrap `Version` or `VersionRange` to participate in negotiation: + +```haskell +class VersionScope v => VersionI v a | a -> v where + type VersionRangeT v a -- associated type: range form + version :: a -> Version v + toVersionRangeT :: a -> VersionRange v -> VersionRangeT v a + +class VersionScope v => VersionRangeI v a | a -> v where + type VersionT v a -- associated type: version form + versionRange :: a -> VersionRange v + toVersionRange :: a -> VersionRange v -> a + toVersionT :: a -> Version v -> VersionT v a +``` + +Identity instances exist for `Version v` and `VersionRange v` themselves. ## Functions + +### Construction + +| Function | Signature | Purpose | Source | +|----------|-----------|---------|--------| +| `mkVersionRange` | `Version v -> Version v -> VersionRange v` | Construct range, `error` if min > max | `Version.hs:67-70` | +| `safeVersionRange` | `Version v -> Version v -> Maybe (VersionRange v)` | Safe construction, `Nothing` if invalid | `Version.hs:72-75` | +| `versionToRange` | `Version v -> VersionRange v` | Singleton range (min == max) | `Version.hs:77-78` | + +### Compatibility checking + +#### `isCompatible` + +**Source**: `Version.hs:124-125` + +```haskell +isCompatible :: VersionI v a => a -> VersionRange v -> Bool +``` + +Check if a single version falls within a range. + +#### `isCompatibleRange` + +**Source**: `Version.hs:127-130` + +```haskell +isCompatibleRange :: VersionRangeI v a => a -> VersionRange v -> Bool +``` + +Check if two version ranges overlap: `min1 <= max2 && min2 <= max1`. + +#### `proveCompatible` + +**Source**: `Version.hs:132-133` + +```haskell +proveCompatible :: VersionI v a => a -> VersionRange v -> Maybe (Compatible a) +``` + +If version is compatible, wrap in `Compatible` proof. Returns `Nothing` if out of range. + +### Negotiation + +#### `compatibleVersion` + +**Source**: `Version.hs:135-140` + +```haskell +compatibleVersion :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible (VersionT v a)) +``` + +Negotiate a single version from two ranges. Returns `min(max1, max2)` — the highest mutually-supported version. Returns `Nothing` if ranges don't overlap. + +#### `compatibleVRange` + +**Source**: `Version.hs:143-148` + +```haskell +compatibleVRange :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible a) +``` + +Compute the intersection of two version ranges: `(max(min1,min2), min(max1,max2))`. Returns `Nothing` if the intersection is empty (i.e., ranges don't overlap). + +#### `compatibleVRange'` + +**Source**: `Version.hs:151-156` + +```haskell +compatibleVRange' :: VersionRangeI v a => a -> Version v -> Maybe (Compatible a) +``` + +Cap a version range's maximum at a given version. Returns `Nothing` if the cap is below the range's minimum. + +## Protocol version constants + +Version constants for each protocol are defined in their respective Transport modules. For SMP, key gates include: + +- `currentSMPAgentVersion`, `supportedSMPAgentVRange` — current negotiation range +- `serviceCertsSMPVersion = 16` — service certificate handshake +- `rcvServiceSMPVersion = 19` — service subscription commands + +See [`transport.md`](transport.md) and [`rcv-services.md`](rcv-services.md) for protocol-specific version constants. + +## Negotiation protocol + +During handshake: +1. Client sends its `VersionRange` to server +2. Server computes `compatibleVRange clientRange serverRange` +3. If `Nothing` → reject connection (incompatible) +4. If `Just (Compatible agreedRange)` → use `maxVersion agreedRange` as the effective protocol version + +The `Compatible` proof flows through the connection setup, ensuring all subsequent version-gated code paths have evidence that negotiation occurred. + +## Security notes + +- **No downgrade attack protection in negotiation itself** — an active MITM could modify the version range to force a lower version. Protection comes from the TLS layer (authentication prevents MITM) and from servers setting minimum version floors. +- **`mkVersionRange` uses `error`** — only safe for compile-time constants. Runtime construction must use `safeVersionRange`. From 66d7efa61ea03a771e555dce4216f9d8caf6191d Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 08:53:57 +0000 Subject: [PATCH 25/91] some modules documented --- spec/README.md | 99 +++--- spec/TOPICS.md | 5 + spec/encoding.md | 290 +++++++++--------- spec/modules/README.md | 155 ++++++++++ spec/modules/Simplex/Messaging/Compression.md | 17 + spec/modules/Simplex/Messaging/Encoding.md | 41 +++ .../Simplex/Messaging/Encoding/String.md | 40 +++ spec/modules/Simplex/Messaging/Parsers.md | 21 ++ .../Simplex/Messaging/ServiceScheme.md | 7 + spec/modules/Simplex/Messaging/Session.md | 15 + spec/modules/Simplex/Messaging/SystemTime.md | 13 + spec/modules/Simplex/Messaging/TMap.md | 17 + spec/modules/Simplex/Messaging/Util.md | 52 ++++ spec/modules/Simplex/Messaging/Version.md | 27 ++ .../Simplex/Messaging/Version/Internal.md | 7 + spec/rcv-services.md | 42 +-- spec/version.md | 62 ++-- 17 files changed, 632 insertions(+), 278 deletions(-) create mode 100644 spec/TOPICS.md create mode 100644 spec/modules/README.md create mode 100644 spec/modules/Simplex/Messaging/Compression.md create mode 100644 spec/modules/Simplex/Messaging/Encoding.md create mode 100644 spec/modules/Simplex/Messaging/Encoding/String.md create mode 100644 spec/modules/Simplex/Messaging/Parsers.md create mode 100644 spec/modules/Simplex/Messaging/ServiceScheme.md create mode 100644 spec/modules/Simplex/Messaging/Session.md create mode 100644 spec/modules/Simplex/Messaging/SystemTime.md create mode 100644 spec/modules/Simplex/Messaging/TMap.md create mode 100644 spec/modules/Simplex/Messaging/Util.md create mode 100644 spec/modules/Simplex/Messaging/Version.md create mode 100644 spec/modules/Simplex/Messaging/Version/Internal.md diff --git a/spec/README.md b/spec/README.md index 7154aa957a..c993f108d9 100644 --- a/spec/README.md +++ b/spec/README.md @@ -2,66 +2,73 @@ > How does the code work? What does each function do? What are the security invariants? -## Conventions +## Structure + +Spec has two levels: + +### `spec/modules/` — Per-module documentation + +Mirrors the `src/Simplex/` directory structure exactly. Each `.hs` file has a corresponding `.md` file at the same relative path. Contains only information that is **not obvious from reading the code** and cannot fit in a one-line source comment: + +- Non-obvious behavior (subtle invariants, ordering dependencies, concurrency assumptions) +- Usage considerations (when to use X vs Y, common mistakes, caller obligations) +- Relationships to other modules not visible from imports +- Security notes specific to this module + +**Not included**: type signatures, code snippets, function-by-function prose that restates the source. If reading the code tells you everything, the module doc says so briefly. -Each spec file documents: -1. **Purpose** — What this component does -2. **Protocol reference** — Link to `protocol/` file (where applicable) -3. **Types** — Key data types with field descriptions -4. **Functions** — Every exported function with call graph -5. **Security notes** — Trust assumptions, validation requirements +Function references use fully qualified names with markdown links: +``` +[Simplex.Messaging.Server.subscribeServiceMessages](./modules/Simplex/Messaging/Server.md#subscribeServiceMessages) +``` -Function documentation format: +Source code links back via comments: +```haskell +-- spec: spec/modules/Simplex/Messaging/Server.md#subscribeServiceMessages +subscribeServiceMessages :: ... ``` -### Module.functionName + +### `spec/` root — Topic documentation + +Cross-module documentation that follows a feature, mechanism, or concern across the entire stack. Topics answer "how does X work end-to-end?" rather than "what does this file do?" + +Topics reference module docs rather than restating implementation details. They focus on: +- End-to-end data flow across modules +- Cross-cutting security analysis and invariants +- Design rationale, risks, test gaps +- Version gates and compatibility concerns + +Some topics may migrate to `product/` if they are primarily about user-visible behavior and guarantees rather than implementation mechanics. + +### `spec/security-invariants.md` — All security invariants + +Cross-referenced from both module docs and topic docs. + +## Conventions + +Module doc entry format: +``` +## functionName **Purpose**: ... -**Calls**: Module.a, Module.b -**Called by**: Module.c +**Calls**: [Module.a](./modules/path.md#a), [Module.b](./modules/path.md#b) +**Called by**: [Module.c](./modules/path.md#c) **Invariant**: SI-XX **Security**: ... ``` ## Index -### Protocol Implementation -- [smp-protocol.md](smp-protocol.md) — SMP commands, types, encoding -- [xftp-protocol.md](xftp-protocol.md) — XFTP commands, chunk operations -- [ntf-protocol.md](ntf-protocol.md) — NTF commands, token/subscription lifecycle -- [xrcp-protocol.md](xrcp-protocol.md) — XRCP session handshake, commands -- [agent-protocol.md](agent-protocol.md) — Agent connection procedures, queue rotation - -### Cryptography -- [crypto.md](crypto.md) — All primitives: Ed25519, X25519, NaCl, AES-GCM, SHA, HKDF -- [crypto-ratchet.md](crypto-ratchet.md) — Double ratchet + PQDR -- [crypto-tls.md](crypto-tls.md) — TLS setup, certificate chains, validation - -### Transport -- [transport.md](transport.md) — Transport abstraction, handshake, block padding -- [transport-http2.md](transport-http2.md) — HTTP/2 framing, file streaming -- [transport-websocket.md](transport-websocket.md) — WebSocket adapter - -### Server Implementations -- [smp-server.md](smp-server.md) — SMP server -- [xftp-server.md](xftp-server.md) — XFTP server -- [ntf-server.md](ntf-server.md) — Notification server - -### Client Implementations -- [smp-client.md](smp-client.md) — SMP client, proxy relay -- [xftp-client.md](xftp-client.md) — XFTP client -- [agent.md](agent.md) — SMP agent, duplex connections - -### Storage -- [storage-server.md](storage-server.md) — Server storage backends -- [storage-agent.md](storage-agent.md) — Agent storage backends - -### Auxiliary +### Topics + +- [rcv-services.md](rcv-services.md) — Service certificates for high-volume SMP clients (bulk subscription) - [encoding.md](encoding.md) — Binary and string encoding - [version.md](version.md) — Version ranges and negotiation -- [remote-control.md](remote-control.md) — XRCP implementation - [compression.md](compression.md) — Zstd compression -### Cross-cutting Features -- [rcv-services.md](rcv-services.md) — Service certificates for high-volume SMP clients (bulk subscription) +### Modules + +See `spec/modules/` — mirrors `src/Simplex/` structure. ### Security + - [security-invariants.md](security-invariants.md) — All security invariants diff --git a/spec/TOPICS.md b/spec/TOPICS.md new file mode 100644 index 0000000000..a0c1f4eafb --- /dev/null +++ b/spec/TOPICS.md @@ -0,0 +1,5 @@ +# Topic Candidates + +> Cross-cutting patterns noticed during module documentation. Each entry may become a topic doc in `spec/` after all module docs are complete. + +- **Exception handling strategy**: `catchOwn`/`catchAll`/`tryAllErrors` pattern (defined in Util.hs) used across server, client, and agent modules. The three-category classification (synchronous, own-async, cancellation) and when to use which catch variant is not obvious from any single call site. diff --git a/spec/encoding.md b/spec/encoding.md index 3a4fdcd275..f5501cfabb 100644 --- a/spec/encoding.md +++ b/spec/encoding.md @@ -15,8 +15,6 @@ Both are typeclasses with `MINIMAL` pragmas requiring `encode` + (`decode` | `pa ## Binary Encoding (`Encoding` class) -**Source**: `Encoding.hs:38-52` - ```haskell class Encoding a where smpEncode :: a -> ByteString @@ -26,31 +24,29 @@ class Encoding a where ### Length-prefix conventions -| Type | Prefix | Max size | Source | -|------|--------|----------|--------| -| `ByteString` | 1-byte length (Word8 as Char) | 255 bytes | `Encoding.hs:102-106` | -| `Large` (newtype) | 2-byte length (Word16 big-endian) | 65535 bytes | `Encoding.hs:135-143` | -| `Tail` (newtype) | None — consumes rest of input | Unlimited | `Encoding.hs:126-132` | -| Lists (`smpEncodeList`) | 1-byte count prefix, then concatenated items | 255 items | `Encoding.hs:155-159` | -| `NonEmpty` | Same as list (fails on count=0) | 255 items | `Encoding.hs:173-178` | +| Type | Prefix | Max size | +|------|--------|----------| +| `ByteString` | 1-byte length (Word8 as Char) | 255 bytes | +| `Large` (newtype) | 2-byte length (Word16 big-endian) | 65535 bytes | +| `Tail` (newtype) | None — consumes rest of input | Unlimited | +| Lists (`smpEncodeList`) | 1-byte count prefix, then concatenated items | 255 items | +| `NonEmpty` | Same as list (fails on count=0) | 255 items | ### Scalar types -| Type | Encoding | Bytes | Source | -|------|----------|-------|--------| -| `Char` | Raw byte | 1 | `Encoding.hs:54-58` | -| `Bool` | `'T'` / `'F'` (0x54 / 0x46) | 1 | `Encoding.hs:60-70` | -| `Word16` | Big-endian | 2 | `Encoding.hs:72-76` | -| `Word32` | Big-endian | 4 | `Encoding.hs:78-82` | -| `Int64` | Two big-endian Word32s (high then low) | 8 | `Encoding.hs:84-99` | -| `SystemTime` | `systemSeconds` as Int64 (nanoseconds dropped) | 8 | `Encoding.hs:145-149` | -| `Text` | UTF-8 then ByteString encoding (1-byte length prefix) | 1 + len | `Encoding.hs:161-165` | -| `String` | `B.pack` then ByteString encoding | 1 + len | `Encoding.hs:167-171` | +| Type | Encoding | Bytes | +|------|----------|-------| +| `Char` | Raw byte | 1 | +| `Bool` | `'T'` / `'F'` (0x54 / 0x46) | 1 | +| `Word16` | Big-endian | 2 | +| `Word32` | Big-endian | 4 | +| `Int64` | Two big-endian Word32s (high then low) | 8 | +| `SystemTime` | `systemSeconds` as Int64 (nanoseconds dropped) | 8 | +| `Text` | UTF-8 then ByteString encoding (1-byte length prefix) | 1 + len | +| `String` | `B.pack` then ByteString encoding | 1 + len | ### `Maybe a` -**Source**: `Encoding.hs:116-124` - ``` Nothing → '0' (0x30) Just x → '1' (0x31) ++ smpEncode x @@ -60,23 +56,19 @@ Tags are ASCII characters `'0'`/`'1'`, not binary 0x00/0x01. ### Tuples -**Source**: `Encoding.hs:180-220` - Tuples (2 through 8) encode as simple concatenation — no length prefix, no separator. Fields are parsed sequentially using each component's `smpP`. This works because each component's parser knows how many bytes to consume (via its own length prefix or fixed size). ### Combinators -| Function | Signature | Purpose | Source | -|----------|-----------|---------|--------| -| `_smpP` | `Parser a` | Space-prefixed parser (`A.space *> smpP`) | `Encoding.hs:151-152` | -| `smpEncodeList` | `[a] -> ByteString` | 1-byte count + concatenated items | `Encoding.hs:155-156` | -| `smpListP` | `Parser [a]` | Parse count then that many items | `Encoding.hs:158-159` | -| `lenEncode` | `Int -> Char` | Int to single-byte length char | `Encoding.hs:108-110` | +| Function | Signature | Purpose | +|----------|-----------|---------| +| `_smpP` | `Parser a` | Space-prefixed parser (`A.space *> smpP`) | +| `smpEncodeList` | `[a] -> ByteString` | 1-byte count + concatenated items | +| `smpListP` | `Parser [a]` | Parse count then that many items | +| `lenEncode` | `Int -> Char` | Int to single-byte length char | ## String Encoding (`StrEncoding` class) -**Source**: `Encoding/String.hs:56-67` - ```haskell class StrEncoding a where strEncode :: a -> ByteString @@ -88,39 +80,35 @@ Key difference from `Encoding`: the default `strP` parses base64url input first, ### Instance conventions -| Type | Encoding | Source | -|------|----------|--------| -| `ByteString` | base64url (non-empty required) | `String.hs:70-76` | -| `Word16`, `Word32` | Decimal string | `String.hs:114-124` | -| `Int`, `Int64` | Signed decimal | `String.hs:138-148` | -| `Char`, `Bool` | Delegates to `Encoding` (`smpEncode`/`smpP`) | `String.hs:126-136` | -| `Maybe a` | Empty string = `Nothing`, otherwise `strEncode a` | `String.hs:108-112` | -| `Text` | UTF-8 bytes, parsed until space/newline | `String.hs:97-99` | -| `SystemTime` | `systemSeconds` as Int64 (decimal) | `String.hs:150-152` | -| `UTCTime` | ISO 8601 string | `String.hs:154-156` | -| `CertificateChain` | Comma-separated base64url blobs | `String.hs:158-162` | -| `Fingerprint` | base64url of fingerprint bytes | `String.hs:164-168` | +| Type | Encoding | +|------|----------| +| `ByteString` | base64url (non-empty required) | +| `Word16`, `Word32` | Decimal string | +| `Int`, `Int64` | Signed decimal | +| `Char`, `Bool` | Delegates to `Encoding` (`smpEncode`/`smpP`) | +| `Maybe a` | Empty string = `Nothing`, otherwise `strEncode a` | +| `Text` | UTF-8 bytes, parsed until space/newline | +| `SystemTime` | `systemSeconds` as Int64 (decimal) | +| `UTCTime` | ISO 8601 string | +| `CertificateChain` | Comma-separated base64url blobs | +| `Fingerprint` | base64url of fingerprint bytes | ### Collection encoding -| Type | Separator | Source | -|------|-----------|--------| -| Lists (`strEncodeList`) | Comma `,` | `String.hs:171-175` | -| `NonEmpty` | Comma (fails on empty) | `String.hs:178-180` | -| `Set a` | Comma | `String.hs:182-184` | -| `IntSet` | Comma | `String.hs:186-188` | -| Tuples (2-6) | Space (` `) | `String.hs:193-221` | +| Type | Separator | +|------|-----------| +| Lists (`strEncodeList`) | Comma `,` | +| `NonEmpty` | Comma (fails on empty) | +| `Set a` | Comma | +| `IntSet` | Comma | +| Tuples (2-6) | Space (` `) | ### `Str` newtype -**Source**: `String.hs:84-89` - Raw string (not base64url-encoded). Parses until space, consumes trailing space. Used for string-valued protocol fields that should not be base64-encoded. ### `TextEncoding` class -**Source**: `String.hs:51-53` - ```haskell class TextEncoding a where textEncode :: a -> Text @@ -131,14 +119,14 @@ Separate from `StrEncoding` — operates on `Text` rather than `ByteString`. Use ### JSON bridge functions -| Function | Purpose | Source | -|----------|---------|--------| -| `strToJSON` | `StrEncoding a => a -> J.Value` via `decodeLatin1 . strEncode` | `String.hs:229-231` | -| `strToJEncoding` | Same, for Aeson encoding | `String.hs:233-235` | -| `strParseJSON` | `StrEncoding a => String -> J.Value -> JT.Parser a` — parse JSON string via `strP` | `String.hs:237-238` | -| `textToJSON` | `TextEncoding a => a -> J.Value` | `String.hs:240-242` | -| `textToEncoding` | Same, for Aeson encoding | `String.hs:244-246` | -| `textParseJSON` | `TextEncoding a => String -> J.Value -> JT.Parser a` | `String.hs:248-249` | +| Function | Purpose | +|----------|---------| +| `strToJSON` | `StrEncoding a => a -> J.Value` via `decodeLatin1 . strEncode` | +| `strToJEncoding` | Same, for Aeson encoding | +| `strParseJSON` | `StrEncoding a => String -> J.Value -> JT.Parser a` — parse JSON string via `strP` | +| `textToJSON` | `TextEncoding a => a -> J.Value` | +| `textToEncoding` | Same, for Aeson encoding | +| `textParseJSON` | `TextEncoding a => String -> J.Value -> JT.Parser a` | ## Parsers @@ -146,45 +134,43 @@ Separate from `StrEncoding` — operates on `Text` rather than `ByteString`. Use ### Core parsing functions -| Function | Signature | Purpose | Source | -|----------|-----------|---------|--------| -| `parseAll` | `Parser a -> ByteString -> Either String a` | Parse consuming all input (fails if bytes remain) | `Parsers.hs:64-65` | -| `parse` | `Parser a -> e -> ByteString -> Either e a` | `parseAll` with custom error type (discards error string) | `Parsers.hs:61-62` | -| `parseE` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | `parseAll` lifted into `ExceptT` | `Parsers.hs:67-68` | -| `parseE'` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | Like `parseE` but allows trailing input | `Parsers.hs:70-71` | -| `parseRead1` | `Read a => Parser a` | Parse a word then `readMaybe` it | `Parsers.hs:76-77` | -| `parseString` | `(ByteString -> Either String a) -> String -> a` | Parse from `String` (errors with `error`) | `Parsers.hs:89-90` | +| Function | Signature | Purpose | +|----------|-----------|---------| +| `parseAll` | `Parser a -> ByteString -> Either String a` | Parse consuming all input (fails if bytes remain) | +| `parse` | `Parser a -> e -> ByteString -> Either e a` | `parseAll` with custom error type (discards error string) | +| `parseE` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | `parseAll` lifted into `ExceptT` | +| `parseE'` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | Like `parseE` but allows trailing input | +| `parseRead1` | `Read a => Parser a` | Parse a word then `readMaybe` it | +| `parseString` | `(ByteString -> Either String a) -> String -> a` | Parse from `String` (errors with `error`) | ### `base64P` -**Source**: `Parsers.hs:44-53` - Standard base64 parser (not base64url — uses `+`/`/` alphabet). Takes alphanumeric + `+`/`/` characters, optional `=` padding, then decodes. Contrast with `base64urlP` in `Encoding/String.hs` which uses `-`/`_` alphabet. ### JSON options helpers Platform-conditional JSON encoding for cross-platform compatibility (Haskell ↔ Swift). -| Function | Purpose | Source | -|----------|---------|--------| -| `enumJSON` | All-nullary constructors as strings, with tag modifier | `Parsers.hs:101-106` | -| `sumTypeJSON` | Platform-conditional: `taggedObjectJSON` on non-Darwin, `singleFieldJSON` on Darwin | `Parsers.hs:109-114` | -| `taggedObjectJSON` | `{"type": "Tag", "data": {...}}` format | `Parsers.hs:119-128` | -| `singleFieldJSON` | `{"Tag": value}` format | `Parsers.hs:137-149` | -| `defaultJSON` | Default options with `omitNothingFields = True` | `Parsers.hs:151-152` | +| Function | Purpose | +|----------|---------| +| `enumJSON` | All-nullary constructors as strings, with tag modifier | +| `sumTypeJSON` | Platform-conditional: `taggedObjectJSON` on non-Darwin, `singleFieldJSON` on Darwin | +| `taggedObjectJSON` | `{"type": "Tag", "data": {...}}` format | +| `singleFieldJSON` | `{"Tag": value}` format | +| `defaultJSON` | Default options with `omitNothingFields = True` | Pattern synonyms for JSON field names: -- `TaggedObjectJSONTag = "type"` (`Parsers.hs:131`) -- `TaggedObjectJSONData = "data"` (`Parsers.hs:134`) -- `SingleFieldJSONTag = "_owsf"` (`Parsers.hs:117`) +- `TaggedObjectJSONTag = "type"` +- `TaggedObjectJSONData = "data"` +- `SingleFieldJSONTag = "_owsf"` ### String helpers -| Function | Purpose | Source | -|----------|---------|--------| -| `fstToLower` | Lowercase first character | `Parsers.hs:92-94` | -| `dropPrefix` | Remove prefix string, lowercase remainder | `Parsers.hs:96-99` | -| `textP` | Parse rest of input as UTF-8 `String` | `Parsers.hs:154-155` | +| Function | Purpose | +|----------|---------| +| `fstToLower` | Lowercase first character | +| `dropPrefix` | Remove prefix string, lowercase remainder | +| `textP` | Parse rest of input as UTF-8 `String` | ## Auxiliary Types and Utilities @@ -198,19 +184,19 @@ type TMap k a = TVar (Map k a) STM-based concurrent map. Wraps `Data.Map.Strict` in a `TVar`. All mutations use `modifyTVar'` (strict) to prevent thunk accumulation. -| Function | Notes | Source | -|----------|-------|--------| -| `emptyIO` | IO allocation (`newTVarIO`) | `TMap.hs:32-34` | -| `singleton` | STM allocation | `TMap.hs:36-38` | -| `clear` | Reset to empty | `TMap.hs:40-42` | -| `lookup` / `lookupIO` | STM / non-transactional IO read | `TMap.hs:48-54` | -| `member` / `memberIO` | STM / non-transactional IO membership | `TMap.hs:56-62` | -| `insert` / `insertM` | Insert value / insert from STM action | `TMap.hs:64-70` | -| `delete` | Remove key | `TMap.hs:72-74` | -| `lookupInsert` | Atomic lookup-then-insert (returns old value) | `TMap.hs:76-78` | -| `lookupDelete` | Atomic lookup-then-delete | `TMap.hs:80-82` | -| `adjust` / `update` / `alter` / `alterF` | Standard Map operations lifted to STM | `TMap.hs:84-100` | -| `union` | Merge `Map` into `TMap` | `TMap.hs:102-104` | +| Function | Notes | +|----------|-------| +| `emptyIO` | IO allocation (`newTVarIO`) | +| `singleton` | STM allocation | +| `clear` | Reset to empty | +| `lookup` / `lookupIO` | STM / non-transactional IO read | +| `member` / `memberIO` | STM / non-transactional IO membership | +| `insert` / `insertM` | Insert value / insert from STM action | +| `delete` | Remove key | +| `lookupInsert` | Atomic lookup-then-insert (returns old value) | +| `lookupDelete` | Atomic lookup-then-delete | +| `adjust` / `update` / `alter` / `alterF` | Standard Map operations lifted to STM | +| `union` | Merge `Map` into `TMap` | `lookupIO`/`memberIO` use `readTVarIO` — single-read outside STM transaction, useful when you need a snapshot without composing with other STM operations. @@ -228,13 +214,13 @@ data SessionVar a = SessionVar } ``` -| Function | Purpose | Source | -|----------|---------|--------| -| `getSessVar` | Lookup or create session. Returns `Left new` or `Right existing` | `Session.hs:24-33` | -| `removeSessVar` | Delete session only if ID matches (prevents removing a replacement) | `Session.hs:35-39` | -| `tryReadSessVar` | Non-blocking read of session result | `Session.hs:41-42` | +| Function | Purpose | +|----------|---------| +| `getSessVar` | Lookup or create session. Returns `Left new` or `Right existing` | +| `removeSessVar` | Delete session only if ID matches (prevents removing a replacement) | +| `tryReadSessVar` | Non-blocking read of session result | -The ID-match check in `removeSessVar` (`sessionVarId v == sessionVarId v'`) prevents a race where: +The ID-match check in `removeSessVar` prevents a race where: 1. Thread A creates session #5, starts work 2. Thread B creates session #6 (replacing #5 in TMap) 3. Thread A finishes, tries to remove — ID mismatch, removal blocked @@ -250,7 +236,7 @@ data SrvLoc = SrvLoc HostName ServiceName URI scheme for SimpleX service addresses. `SSSimplex` encodes as `"simplex:"`, `SSAppServer` as `"https://host:port"`. -`simplexChat :: ServiceScheme` is the constant `SSAppServer (SrvLoc "simplex.chat" "")` (`ServiceScheme.hs:38-39`). +`simplexChat` is the constant `SSAppServer (SrvLoc "simplex.chat" "")`. ### SystemTime @@ -264,12 +250,12 @@ type SystemSeconds = RoundedSystemTime 1 -- second precision Phantom-typed time rounding. The `Nat` type parameter specifies rounding granularity in seconds. -| Function | Purpose | Source | -|----------|---------|--------| -| `getRoundedSystemTime` | Get current time rounded to `t` seconds | `SystemTime.hs:40-43` | -| `getSystemDate` | Alias for day-rounded time | `SystemTime.hs:45-47` | -| `getSystemSeconds` | Second-precision (no rounding needed, just drops nanoseconds) | `SystemTime.hs:49-51` | -| `roundedToUTCTime` | Convert back to `UTCTime` | `SystemTime.hs:53-55` | +| Function | Purpose | +|----------|---------| +| `getRoundedSystemTime` | Get current time rounded to `t` seconds | +| `getSystemDate` | Alias for day-rounded time | +| `getSystemSeconds` | Second-precision (no rounding needed, just drops nanoseconds) | +| `roundedToUTCTime` | Convert back to `UTCTime` | `RoundedSystemTime` derives `FromField`/`ToField` for SQLite storage and `FromJSON`/`ToJSON` for API serialization. @@ -281,62 +267,62 @@ Selected utilities used across the codebase: **Monadic combinators**: -| Function | Signature | Purpose | Source | -|----------|-----------|---------|--------| -| `<$?>` | `MonadFail m => (a -> Either String b) -> m a -> m b` | Lift fallible function into parser | `Util.hs:119-121` | -| `$>>=` | `(Monad m, Monad f, Traversable f) => m (f a) -> (a -> m (f b)) -> m (f b)` | Monadic bind through nested monad | `Util.hs:165-167` | -| `ifM` / `whenM` / `unlessM` | Monadic conditionals | `Util.hs:147-157` | -| `anyM` | Short-circuit `any` for monadic predicates (strict) | `Util.hs:159-161` | +| Function | Signature | Purpose | +|----------|-----------|---------| +| `<$?>` | `MonadFail m => (a -> Either String b) -> m a -> m b` | Lift fallible function into parser | +| `$>>=` | `(Monad m, Monad f, Traversable f) => m (f a) -> (a -> m (f b)) -> m (f b)` | Monadic bind through nested monad | +| `ifM` / `whenM` / `unlessM` | Monadic conditionals | | +| `anyM` | Short-circuit `any` for monadic predicates (strict) | | **Error handling**: -| Function | Purpose | Source | -|----------|---------|--------| -| `tryAllErrors` | Catch all exceptions (including async) into `ExceptT` | `Util.hs:273-275` | -| `catchAllErrors` | Same with handler | `Util.hs:281-283` | -| `tryAllOwnErrors` | Catch only "own" exceptions (re-throws async cancellation) | `Util.hs:322-324` | -| `catchAllOwnErrors` | Same with handler | `Util.hs:330-332` | -| `isOwnException` | `StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded` | `Util.hs:297-304` | -| `isAsyncCancellation` | Any `SomeAsyncException` except own exceptions | `Util.hs:306-310` | -| `catchThrow` | Catch exceptions, wrap in Left | `Util.hs:289-291` | -| `allFinally` | `tryAllErrors` + `final` + `except` (like `finally` for ExceptT) | `Util.hs:293-295` | +| Function | Purpose | +|----------|---------| +| `tryAllErrors` | Catch all exceptions (including async) into `ExceptT` | +| `catchAllErrors` | Same with handler | +| `tryAllOwnErrors` | Catch only "own" exceptions (re-throws async cancellation) | +| `catchAllOwnErrors` | Same with handler | +| `isOwnException` | `StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded` | +| `isAsyncCancellation` | Any `SomeAsyncException` except own exceptions | +| `catchThrow` | Catch exceptions, wrap in Left | +| `allFinally` | `tryAllErrors` + `final` + `except` (like `finally` for ExceptT) | The own-vs-async distinction is critical: `catchOwn`/`tryAllOwnErrors` never swallow async cancellation (`ThreadKilled`, `UserInterrupt`, etc.), only synchronous exceptions and resource exhaustion (`StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded`). **STM**: -| Function | Purpose | Source | -|----------|---------|--------| -| `tryWriteTBQueue` | Non-blocking bounded queue write, returns success | `Util.hs:256-261` | +| Function | Purpose | +|----------|---------| +| `tryWriteTBQueue` | Non-blocking bounded queue write, returns success | **Database result helpers**: -| Function | Purpose | Source | -|----------|---------|--------| -| `firstRow` | Extract first row with transform, or Left error | `Util.hs:346-347` | -| `maybeFirstRow` | Extract first row as Maybe | `Util.hs:349-350` | -| `firstRow'` | Like `firstRow` but transform can also fail | `Util.hs:355-356` | +| Function | Purpose | +|----------|---------| +| `firstRow` | Extract first row with transform, or Left error | +| `maybeFirstRow` | Extract first row as Maybe | +| `firstRow'` | Like `firstRow` but transform can also fail | **Collection utilities**: -| Function | Purpose | Source | -|----------|---------|--------| -| `groupOn` | `groupBy` using equality on projected key | `Util.hs:358-359` | -| `groupAllOn` | `groupOn` after `sortOn` (groups non-adjacent elements) | `Util.hs:372-373` | -| `toChunks` | Split list into `NonEmpty` chunks of size n | `Util.hs:376-380` | -| `packZipWith` | Optimized ByteString zipWith (direct memory access) | `Util.hs:236-254` | +| Function | Purpose | +|----------|---------| +| `groupOn` | `groupBy` using equality on projected key | +| `groupAllOn` | `groupOn` after `sortOn` (groups non-adjacent elements) | +| `toChunks` | Split list into `NonEmpty` chunks of size n | +| `packZipWith` | Optimized ByteString zipWith (direct memory access) | **Miscellaneous**: -| Function | Purpose | Source | -|----------|---------|--------| -| `safeDecodeUtf8` | Decode UTF-8 replacing errors with `'?'` | `Util.hs:382-386` | -| `bshow` / `tshow` | `show` to `ByteString` / `Text` | `Util.hs:123-129` | -| `threadDelay'` | `Int64` delay (handles overflow by looping) | `Util.hs:391-399` | -| `diffToMicroseconds` / `diffToMilliseconds` | `NominalDiffTime` conversion | `Util.hs:401-407` | -| `labelMyThread` | Label current thread for debugging | `Util.hs:409-410` | -| `encodeJSON` / `decodeJSON` | `ToJSON a => a -> Text` / `FromJSON a => Text -> Maybe a` | `Util.hs:415-421` | -| `traverseWithKey_` | `Map` traversal discarding results | `Util.hs:423-425` | +| Function | Purpose | +|----------|---------| +| `safeDecodeUtf8` | Decode UTF-8 replacing errors with `'?'` | +| `bshow` / `tshow` | `show` to `ByteString` / `Text` | +| `threadDelay'` | `Int64` delay (handles overflow by looping) | +| `diffToMicroseconds` / `diffToMilliseconds` | `NominalDiffTime` conversion | +| `labelMyThread` | Label current thread for debugging | +| `encodeJSON` / `decodeJSON` | `ToJSON a => a -> Text` / `FromJSON a => Text -> Maybe a` | +| `traverseWithKey_` | `Map` traversal discarding results | ## Security notes diff --git a/spec/modules/README.md b/spec/modules/README.md new file mode 100644 index 0000000000..1d18b32e35 --- /dev/null +++ b/spec/modules/README.md @@ -0,0 +1,155 @@ +# How to Document a Module + +> Read this before writing any module doc. It defines what goes in, what stays out, and why. + +## Purpose + +Module docs exist for one reason: to capture knowledge that **cannot be obtained by reading the source code**. If reading the `.hs` file tells you everything you need to know, the module doc should be brief or empty. + +These docs are an investment — their value compounds over time as multiple people (and LLMs) work on the code. Optimize for long-term value, not for looking thorough today. + +## Process + +**Read every line of the source file.** The non-obvious filter applies to what you *write*, not to what you *read*. Without reading each line, you will produce documentation from inferences rather than facts. Many non-obvious behaviors only become visible when you see a specific line of code and recognize that its implications would surprise a reader who doesn't have the surrounding context. + +## File structure + +Module docs mirror `src/Simplex/` exactly. Same subfolder structure, `.hs` replaced with `.md`: + +``` +src/Simplex/Messaging/Server.hs → spec/modules/Simplex/Messaging/Server.md +src/Simplex/Messaging/Crypto.hs → spec/modules/Simplex/Messaging/Crypto.md +src/Simplex/FileTransfer/Agent.hs → spec/modules/Simplex/FileTransfer/Agent.md +``` + +## What to include + +### 1. Non-obvious behavior +Things that would surprise a competent Haskell developer reading the code for the first time: +- Subtle invariants maintained across function calls +- Ordering dependencies ("must call X before Y because...") +- Concurrency assumptions ("this TVar is only written from thread Z") +- Implicit contracts between caller and callee not captured by types + +### 2. Usage considerations +- When to use function X vs function Y +- Common mistakes callers make +- Caller obligations not enforced by the type system +- Performance characteristics that affect usage decisions + +### 3. Cross-module relationships +- Dependencies on other modules' behavior not visible from import lists +- Assumptions about how other modules use this one +- Coordination patterns (e.g., "Server.hs reads this TVar, Agent.hs writes it") + +### 4. Security notes +- Trust boundaries this module enforces or relies on +- What happens if inputs are malicious +- Which functions are security-critical and why (reference SI-XX invariants) + +### 5. Design rationale +- Why the code is structured this way (when not obvious) +- Alternatives considered and rejected +- Known limitations and their justification + +## What NOT to include + +- **Type signatures** — the code has them +- **Code snippets** — if you're pasting code, you're making a stale copy +- **Function-by-function prose that restates the implementation** — "this function takes X and returns Y by doing Z" adds nothing +- **Line numbers** — they're brittle and break on every edit +- **Comments that fit in one line in source** — put those in the source file instead as `-- spec:` comments + +## Format + +Each module doc has a header, then entries for functions/types that need documentation. + +```markdown +# Module.Name + +> One-line description of what this module does. + +**Source**: [`Path/To/Module.hs`](relative link to source) + +## Overview + +[Only if the module's purpose or architecture is non-obvious. +Skip for simple modules.] + +## functionName + +**Purpose**: [What this does that isn't obvious from the name and type] +**Calls**: [Qualified.Name.a](link), [Qualified.Name.b](link) +**Called by**: [Qualified.Name.c](link) +**Invariant**: SI-XX +**Security**: [What this function ensures for the threat model] + +[Free-form notes about non-obvious behavior, gotchas, etc.] + +## anotherFunction + +... +``` + +**For trivial modules** (< 100 LOC, no non-obvious behavior): + +```markdown +# Module.Name + +> One-line description. + +**Source**: [`Path/To/Module.hs`](relative link to source) + +No non-obvious behavior. See source. +``` + +This is valuable — it confirms someone looked and found nothing to document. + +## Linking conventions + +### Module doc → other module docs +Use fully qualified names as link text: +```markdown +[Simplex.Messaging.Server.subscribeServiceMessages](./Simplex/Messaging/Server.md#subscribeServiceMessages) +``` + +### Module doc → topic docs +```markdown +See [rcv-services](../rcv-services.md) for the end-to-end service subscription flow. +``` + +### Source → module doc +Comment above function in source: +```haskell +-- spec: spec/modules/Simplex/Messaging/Server.md#subscribeServiceMessages +-- Delivers buffered messages for all service queues after SUBS (SI-SVC-07) +subscribeServiceMessages :: ... +``` + +Only add `-- spec:` comments where the module doc actually has something to say. Don't add links to "No non-obvious behavior" docs. + +## Topic candidate tracking + +While documenting modules, you will notice cross-cutting patterns — behaviors that span multiple modules and can't be understood from any single one. Note these in `spec/TOPICS.md` for later. Don't write the topic doc during module work; just record: + +```markdown +- **Queue rotation**: Agent.hs initiates, Client.hs sends commands, Server.hs processes, + Protocol.hs defines types. End-to-end flow not obvious from any single module. +``` + +## Quality bar + +Before finishing a module doc, ask: +1. Does every entry document something NOT in the source code? +2. Would removing any entry lose information? If not, remove it. +3. Are cross-module relationships captured that imports alone don't reveal? +4. Are security-critical functions flagged with invariant IDs? +5. Is this doc short enough that someone will actually read it? + +If any answer reveals a problem, fix it and repeat from question 1. Only finish when a full pass produces no changes. + +## Exclusions + +- **Individual migration files** (M20XXXXXX_*.hs): Self-describing SQL. No per-migration docs. +- **Auto-generated files** (GitCommit.hs): Skip. +- **Pure boilerplate** (Prometheus.hs metrics, Web/Embedded.hs static files): Document only if non-obvious. diff --git a/spec/modules/Simplex/Messaging/Compression.md b/spec/modules/Simplex/Messaging/Compression.md new file mode 100644 index 0000000000..67c7317da2 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Compression.md @@ -0,0 +1,17 @@ +# Simplex.Messaging.Compression + +> Zstd compression with passthrough for short messages. + +**Source**: [`Compression.hs`](../../../../src/Simplex/Messaging/Compression.hs) + +## compress1 + +Messages <= 180 bytes are wrapped as `Passthrough` (no compression). The threshold is empirically derived from real client data — messages above 180 bytes rapidly gain compression ratio. + +## decompress1 + +**Security**: decompression bomb protection. Requires `decompressedSize` to be present in the zstd frame header AND within the caller-specified `limit`. If the compressed data doesn't declare its decompressed size (non-standard zstd frames), decompression is refused entirely. This prevents memory exhaustion from malicious compressed payloads. + +## Wire format + +Tag byte `'0'` (0x30) = passthrough (1-byte length prefix, raw data). Tag byte `'1'` (0x31) = compressed (2-byte `Large` length prefix, zstd data). The passthrough path uses the standard `ByteString` encoding (255-byte limit); the compressed path uses `Large` (65535-byte limit). diff --git a/spec/modules/Simplex/Messaging/Encoding.md b/spec/modules/Simplex/Messaging/Encoding.md new file mode 100644 index 0000000000..f485aeaa46 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Encoding.md @@ -0,0 +1,41 @@ +# Simplex.Messaging.Encoding + +> Binary wire-format encoding for SMP protocol transmission. + +**Source**: [`Encoding.hs`](../../../../src/Simplex/Messaging/Encoding.hs) + +## Overview + +`Encoding` is the binary wire format — fixed-size or length-prefixed, no delimiters between fields. Contrast with [Simplex.Messaging.Encoding.String](./Encoding/String.md) which is the human-readable, space-delimited, base64url format used in URIs and logs. + +The two encoding classes share some instances (`Char`, `Bool`, `SystemTime`) but differ fundamentally: `Encoding` is self-delimiting via length prefixes, `StrEncoding` is delimiter-based (spaces, commas). + +## ByteString instance + +**Length prefix is 1 byte.** Maximum encodable length is 255 bytes. If a ByteString exceeds 255 bytes, the length silently wraps via `w2c . fromIntegral` — a 300-byte string encodes length as 44 (300 mod 256). Callers must ensure ByteStrings fit in 255 bytes, or use `Large` for longer values. + +**Security**: silent truncation means a caller encoding untrusted input without length validation could produce a malformed message where the decoder reads fewer bytes than were intended, then misparses the remainder as the next field. + +## Large + +2-byte length prefix (`Word16`). Use for ByteStrings that may exceed 255 bytes. Maximum 65535 bytes. + +## Maybe instance + +Tags are ASCII characters `'0'` (0x30) and `'1'` (0x31), not bytes 0x00/0x01. `Nothing` encodes as the single byte 0x30; `Just x` encodes as 0x31 followed by `smpEncode x`. + +## Tail + +Consumes all remaining input. Must be the last field in any composite encoding — placing it elsewhere silently eats subsequent fields. + +## Tuple instances + +Sequential concatenation with no separators. Works because each element's encoding is self-delimiting (length-prefixed ByteString, fixed-size Word16/Word32/Int64/Char, etc.). If an element type isn't self-delimiting, the tuple won't round-trip. + +## SystemTime + +Only seconds are encoded (as Int64); nanoseconds are discarded on encode and set to 0 on decode. + +## smpEncodeList / smpListP + +1-byte length prefix for lists — same 255-item limit as ByteString's 255-byte limit. diff --git a/spec/modules/Simplex/Messaging/Encoding/String.md b/spec/modules/Simplex/Messaging/Encoding/String.md new file mode 100644 index 0000000000..60ac9e4966 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Encoding/String.md @@ -0,0 +1,40 @@ +# Simplex.Messaging.Encoding.String + +> Human-readable, URI-friendly string encoding for SMP and agent protocols. + +**Source**: [`Encoding/String.hs`](../../../../../src/Simplex/Messaging/Encoding/String.hs) + +## Overview + +`StrEncoding` is the human-readable counterpart to [Simplex.Messaging.Encoding](../Encoding.md)'s binary `Encoding`. Key differences: + +| Aspect | `Encoding` (binary) | `StrEncoding` (string) | +|--------|---------------------|------------------------| +| ByteString | 1-byte length prefix, raw bytes | base64url encoded | +| Tuple separator | none (self-delimiting) | space-delimited | +| List separator | 1-byte count prefix | comma-separated | +| Default parser fallback | `smpP` via `parseAll` | `strP` via `base64urlP` | + +## ByteString instance + +Encodes as base64url. The parser (`strP`) only accepts non-empty strings — empty base64url input fails. + +## String instance + +Inherits from ByteString via `B.pack` / `B.unpack`. Only Char8 (Latin-1) characters round-trip; `B.pack` truncates unicode codepoints above 255. The source comment warns about this. + +## strToJSON / strParseJSON + +`strToJSON` uses `decodeLatin1`, not `decodeUtf8'`. This preserves arbitrary byte sequences (e.g., base64url-encoded binary data) as JSON strings without UTF-8 validation errors, but means the JSON representation is Latin-1, not UTF-8. + +## Default strP fallback + +If only `strDecode` is defined (no custom `strP`), the default parser runs `base64urlP` first, then passes the decoded bytes to `strDecode`. This means the type's own `strDecode` receives raw bytes, not the base64url text. Easy to confuse when implementing a new instance. + +## listItem + +Items are delimited by `,`, ` `, or `\n`. List items cannot contain these characters in their `strEncode` output. No escaping mechanism exists. + +## Str newtype + +Plain text (no base64). Delimited by spaces. `strP` consumes the trailing space — this is unusual and means `Str` parsing has a side effect on the input position that other `StrEncoding` parsers don't. diff --git a/spec/modules/Simplex/Messaging/Parsers.md b/spec/modules/Simplex/Messaging/Parsers.md new file mode 100644 index 0000000000..d6b0543782 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Parsers.md @@ -0,0 +1,21 @@ +# Simplex.Messaging.Parsers + +> Attoparsec helpers and Aeson JSON encoding options. + +**Source**: [`Parsers.hs`](../../../../src/Simplex/Messaging/Parsers.hs) + +## sumTypeJSON (platform-dependent JSON encoding) + +On Darwin with the `swiftJSON` CPP flag, `sumTypeJSON` uses `ObjectWithSingleField` encoding with tag `"_owsf"`. On all other platforms, it uses `TaggedObject` encoding with `"type"` / `"data"` keys. + +This means the same Haskell type produces **different JSON** on macOS/iOS vs Linux. Cross-platform JSON interchange must use `taggedObjectJSON` or `singleFieldJSON` directly, not `sumTypeJSON`. + +The `_owsf` tag enables Swift clients to convert between the two encodings — it's a marker that the value was encoded as ObjectWithSingleField rather than TaggedObject. + +## parseE vs parseE' + +`parseE` requires full input consumption (`endOfInput`). `parseE'` does not — it succeeds if the parser matches a prefix. Using `parseE'` where `parseE` is needed silently ignores trailing input. + +## base64P + +Parses standard base64 (`+` and `/`), not base64url (`-` and `_`). Contrast with `base64urlP` in [Simplex.Messaging.Encoding.String](./Encoding/String.md) which parses URL-safe base64. diff --git a/spec/modules/Simplex/Messaging/ServiceScheme.md b/spec/modules/Simplex/Messaging/ServiceScheme.md new file mode 100644 index 0000000000..409e8854d0 --- /dev/null +++ b/spec/modules/Simplex/Messaging/ServiceScheme.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.ServiceScheme + +> URI scheme for SimpleX service addresses. + +**Source**: [`ServiceScheme.hs`](../../../../src/Simplex/Messaging/ServiceScheme.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Session.md b/spec/modules/Simplex/Messaging/Session.md new file mode 100644 index 0000000000..22c5c90ca6 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Session.md @@ -0,0 +1,15 @@ +# Simplex.Messaging.Session + +> Atomic get-or-create session variables with identity-safe removal. + +**Source**: [`Session.hs`](../../../../src/Simplex/Messaging/Session.hs) + +## getSessVar + +Returns `Left newVar` if the key was absent (variable created), `Right existingVar` if already present. The new variable gets an atomically incremented `sessionVarId` from the shared counter, and its `sessionVar` TMVar starts empty. + +The caller uses the `Left`/`Right` distinction to decide whether to populate the TMVar (new session) or wait on the existing one. + +## removeSessVar + +Only removes if the stored variable's `sessionVarId` matches the one being removed. This is a compare-and-swap pattern: between the time a caller obtained a `SessionVar` and the time it tries to remove it, another thread may have replaced it with a new session (via `getSessVar`). Without the ID check, the stale caller would remove the new session. diff --git a/spec/modules/Simplex/Messaging/SystemTime.md b/spec/modules/Simplex/Messaging/SystemTime.md new file mode 100644 index 0000000000..92bf8e5467 --- /dev/null +++ b/spec/modules/Simplex/Messaging/SystemTime.md @@ -0,0 +1,13 @@ +# Simplex.Messaging.SystemTime + +> Type-level precision timestamps for date bucketing and expiration. + +**Source**: [`SystemTime.hs`](../../../../src/Simplex/Messaging/SystemTime.hs) + +## getRoundedSystemTime + +Rounds **down** (truncation): `(seconds / precision) * precision`. A timestamp at 23:59:59 with `SystemDate` (precision 86400) rounds to the start of the current day, not the nearest day. + +## roundedToUTCTime + +Sets nanoseconds to 0. Any `RoundedSystemTime` converted to `UTCTime` and back to `SystemTime` will differ from the original `getSystemTime` value. diff --git a/spec/modules/Simplex/Messaging/TMap.md b/spec/modules/Simplex/Messaging/TMap.md new file mode 100644 index 0000000000..f994adab15 --- /dev/null +++ b/spec/modules/Simplex/Messaging/TMap.md @@ -0,0 +1,17 @@ +# Simplex.Messaging.TMap + +> STM-safe concurrent map (`TVar (Map k a)`). + +**Source**: [`TMap.hs`](../../../../src/Simplex/Messaging/TMap.hs) + +## lookupInsert / lookupDelete + +Atomic swap operations using `stateTVar` + `alterF`. `lookupInsert` returns the previous value (if any) while inserting the new one; `lookupDelete` returns the value while removing it. Both are single STM operations — no window between lookup and modification. + +## union + +Left-biased: the passed-in `Map` wins on key conflicts. `union additions tmap` overwrites existing keys in `tmap` with values from `additions`. + +## alterF + +The STM action `f` runs inside the same STM transaction. If `f` retries, the entire `alterF` retries. If `f` has side effects via other TVars, they compose atomically with the map modification. diff --git a/spec/modules/Simplex/Messaging/Util.md b/spec/modules/Simplex/Messaging/Util.md new file mode 100644 index 0000000000..3b9fd37777 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Util.md @@ -0,0 +1,52 @@ +# Simplex.Messaging.Util + +> Shared utility functions: exception handling, monadic combinators, data helpers. + +**Source**: [`Util.hs`](../../../../src/Simplex/Messaging/Util.hs) + +## Overview + +Most of this module is straightforward. The exception handling scheme is the part that warrants documentation — the naming is misleading and the semantics are subtle. + +## Exception handling scheme + +Three categories of exceptions, two catch strategies: + +| Category | Examples | `catchAll` | `catchOwn` | +|----------|----------|------------|------------| +| Synchronous | IOError, protocol errors | caught | caught | +| "Own" async | StackOverflow, HeapOverflow, AllocationLimitExceeded | caught | caught | +| Async cancellation | ThreadKilled, all other SomeAsyncException | caught | **re-thrown** | + +### isOwnException + +Classifies `StackOverflow`, `HeapOverflow`, and `AllocationLimitExceeded` as "own" — exceptions caused by this thread's resource usage, not by external cancellation. Despite being `AsyncException`, these should be caught like synchronous exceptions because they reflect the thread's own failure. + +### isAsyncCancellation + +True for any `SomeAsyncException` that is NOT an own exception. These represent external cancellation (e.g., `cancel`, `killThread`) and must be re-thrown to preserve structured concurrency guarantees. + +### catchOwn / catchOwn' + +Despite the name, these catch **all exceptions except async cancellations** — including synchronous exceptions. The name suggests "catch only own exceptions" but the actual semantics are "catch non-cancellation exceptions." This is the standard pattern for exception-safe cleanup in concurrent Haskell. + +### tryAllErrors vs tryAllOwnErrors + +- `tryAllErrors` / `catchAllErrors`: catch everything including async cancellations. Use when you need to convert any failure into an error value (e.g., returning error responses on a connection). +- `tryAllOwnErrors` / `catchAllOwnErrors`: catch everything except async cancellations. Use in normal business logic where cancellation should propagate. + +### AnyError typeclass + +Bridges `SomeException` into application error types via `fromSomeException`. All the `tryAll*` / `catchAll*` functions require this constraint. + +## raceAny_ + +Runs all actions concurrently, waits for any one to complete, then cancels all others. Uses nested `withAsync` — earlier-launched actions are canceled last (LIFO unwinding). + +## threadDelay' + +Handles `Int64` delays exceeding `maxBound :: Int` (~2147 seconds on 32-bit) by looping in chunks. Necessary because `threadDelay` takes `Int`, not `Int64`. + +## toChunks + +Precondition: `n > 0` (comment-only, not enforced). Passing `n = 0` causes infinite loop. diff --git a/spec/modules/Simplex/Messaging/Version.md b/spec/modules/Simplex/Messaging/Version.md new file mode 100644 index 0000000000..67bbf1b4f4 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Version.md @@ -0,0 +1,27 @@ +# Simplex.Messaging.Version + +> Version negotiation with proof-carrying compatibility checks. + +**Source**: [`Version.hs`](../../../../src/Simplex/Messaging/Version.hs) + +## Overview + +The module's central design: `Compatible` and `VRange` constructors are not exported. The only way to obtain a `Compatible` value is through the negotiation functions, and the only way to construct a `VersionRange` is through `mkVersionRange` (which validates) or parsing. This makes "compatibility was checked" a compile-time guarantee — code that holds a `Compatible a` has proof that negotiation succeeded. + +See [Simplex.Messaging.Version.Internal](./Version/Internal.md) for why the `Version` constructor is separated. + +## mkVersionRange + +Uses `error` if `min > max`. Safe only for compile-time constants. Runtime construction must use `safeVersionRange`, which returns `Nothing` on invalid input. + +## compatibleVersion vs compatibleVRange + +`compatibleVersion` selects a single version: `min(max1, max2)` — the highest mutually-supported version. `compatibleVRange` returns the full intersection range: `(max(min1,min2), min(max1,max2))`. The intersection is used when both sides need to remember the agreed range for future version-gated behavior, not just the single negotiated version. + +## compatibleVRange' + +Different from `compatibleVRange`: caps the range's *maximum* at a given version, rather than intersecting two ranges. Returns `Nothing` if the cap is below the range's minimum. Used when a peer reports a specific version and you need to constrain your range accordingly. + +## VersionI / VersionRangeI typeclasses + +Allow extension types that wrap `Version` or `VersionRange` (e.g., types carrying additional handshake parameters alongside the version) to participate in negotiation without unwrapping. The associated types (`VersionT`, `VersionRangeT`) map between the version and range forms of the extension type. diff --git a/spec/modules/Simplex/Messaging/Version/Internal.md b/spec/modules/Simplex/Messaging/Version/Internal.md new file mode 100644 index 0000000000..9fe8cffe93 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Version/Internal.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Version.Internal + +> Exports the `Version` constructor for internal use. + +**Source**: [`Version/Internal.hs`](../../../../../src/Simplex/Messaging/Version/Internal.hs) + +This module exists solely to split the `Version` constructor export. `Version.hs` exports `Version` as an opaque type (no constructor); `Version/Internal.hs` exports the `Version` constructor for modules that need to fabricate version values (protocol constants, parsers, tests). Application code should not import this module. diff --git a/spec/rcv-services.md b/spec/rcv-services.md index b0d97d9f74..6518059f20 100644 --- a/spec/rcv-services.md +++ b/spec/rcv-services.md @@ -33,10 +33,10 @@ Service client SMP Server ## Version gates -| Constant | Value | Gate | Source | -|----------|-------|------|--------| -| `serviceCertsSMPVersion` | 16 | Service handshake, `SOK`, `useServiceAuth` | Transport.hs:214 | -| `rcvServiceSMPVersion` | 19 | `SUBS`/`NSUBS` parameters, `SOKS`/`ENDS` idsHash, messaging service role in handshake | Transport.hs:223 | +| Constant | Value | Gate | +|----------|-------|------| +| `serviceCertsSMPVersion` | 16 | Service handshake, `SOK`, `useServiceAuth` | +| `rcvServiceSMPVersion` | 19 | `SUBS`/`NSUBS` parameters, `SOKS`/`ENDS` idsHash, messaging service role in handshake | The two-version split means: - v16-18 servers accept service certificates and per-queue `SUB` with service auth, but `SUBS`/`NSUBS` send no count/hash parameters (bare command tag only). @@ -55,14 +55,12 @@ The two-version split means: data SMPServiceRole = SRMessaging | SRNotifier | SRProxy -- Wire: "M" | "N" | "P" ``` -Source: Transport.hs:594 ### Party (service-related constructors) ```haskell data Party = ... | RecipientService | NotifierService | ... ``` -Source: Protocol.hs:335-346 The `ServiceParty` type family constrains to `RecipientService | NotifierService` only: ```haskell @@ -71,7 +69,6 @@ type family ServiceParty (p :: Party) :: Constraint where ServiceParty NotifierService = () ServiceParty p = (Int ~ Bool, TypeError ...) -- compile-time error ``` -Source: Protocol.hs:430-434 ### IdsHash @@ -89,7 +86,6 @@ instance Monoid IdsHash where queueIdHash :: QueueId -> IdsHash queueIdHash = IdsHash . C.md5Hash . unEntityId ``` -Source: Protocol.hs:1501-1526 **Key property**: XOR is self-inverse, so `addServiceSubs` and `subtractServiceSubs` both use `<>` (XOR) for the hash component: ```haskell @@ -98,7 +94,6 @@ subtractServiceSubs (n', idsHash') (n, idsHash) | n > n' = (n - n', idsHash <> idsHash') | otherwise = (0, mempty) ``` -Source: Protocol.hs:1528-1534 ### ServiceSub / ServiceSubResult / ServiceSubError @@ -116,7 +111,6 @@ data ServiceSubError | SSErrorQueueCount {expectedQueueCount, subscribedQueueCount :: Int64} | SSErrorQueueIdsHash {expectedQueueIdsHash, subscribedQueueIdsHash :: IdsHash} ``` -Source: Protocol.hs:1476-1499 `serviceSubResult` compares expected vs actual, returning the first mismatch (priority: serviceId > count > idsHash). @@ -128,7 +122,6 @@ data STMService = STMService serviceRcvQueues :: TVar (Set RecipientId, IdsHash), serviceNtfQueues :: TVar (Set NotifierId, IdsHash) } ``` -Source: QueueStore/STM.hs:64-68 Tracks the set of queue IDs and their cumulative XOR hash per service, per role (receive vs notify). @@ -142,8 +135,6 @@ Standard SMP handshake is two messages: server sends `SMPServerHandshake`, clien 2. **Client -> Server**: `SMPClientHandshake` with `clientService :: Maybe SMPClientHandshakeService` 3. **Server -> Client**: `SMPServerHandshakeResponse {serviceId}` or `SMPServerHandshakeError {handshakeError}` -Source: Transport.hs:752-791 (server), Transport.hs:796-848 (client) - ### SMPClientHandshakeService ```haskell @@ -151,7 +142,6 @@ data SMPClientHandshakeService = SMPClientHandshakeService { serviceRole :: SMPServiceRole, serviceCertKey :: CertChainPubKey } ``` -Source: Transport.hs:582-585 The `serviceCertKey` contains the TLS client certificate chain and a proof-of-possession: the service's Ed25519 session key signed by the service's X.509 signing key (`C.signX509 serviceSignKey $ C.publicToX509 k`). @@ -164,14 +154,10 @@ The `serviceCertKey` contains the TLS client certificate chain and a proof-of-po 5. Call `getService` callback (QueueStore.getCreateService) to get/create ServiceId 6. Send `SMPServerHandshakeResponse {serviceId}` back to client -Source: Transport.hs:775-791 - ### Client-side reception (`getClientService`) Client receives either `SMPServerHandshakeResponse {serviceId}` (success) or `SMPServerHandshakeError {handshakeError}` (failure). On success, stores `THClientService {serviceId, serviceRole, serviceCertHash, serviceKey}`. -Source: Transport.hs:843-847 - ### Version-gated service role filtering (`mkClientService`) ```haskell @@ -179,7 +165,6 @@ mkClientService v (ServiceCredentials {serviceRole, ...}, (k, _)) | serviceRole == SRMessaging && v < rcvServiceSMPVersion = Nothing | otherwise = Just SMPClientHandshakeService {..} ``` -Source: Transport.hs:838-842 Messaging services are suppressed below v19. Notifier services are sent at v16+. @@ -192,7 +177,6 @@ data ServiceCredentials = ServiceCredentials serviceCertHash :: XV.Fingerprint, serviceSignKey :: C.APrivateSignKey } ``` -Source: Transport.hs:587-592 ## Protocol layer: commands and messages @@ -216,7 +200,6 @@ useServiceAuth = \case Cmd _ NSUB -> True _ -> False ``` -Source: Protocol.hs:1737-1742 For these commands, `tEncodeAuth` appends both the primary queue key signature and an optional service Ed25519 signature. `SUBS`/`NSUBS` use the ServiceId as entity and are signed only by the service session key. @@ -237,21 +220,18 @@ For these commands, `tEncodeAuth` appends both the primary queue key signature a v >= 19: tag SP count idsHash v < 19: tag (bare, no parameters) ``` -Source: Protocol.hs:1769-1771, 1787-1789 **SOKS/ENDS encoding:** ``` v >= 19: tag SP count idsHash v < 19: tag SP count (no idsHash) ``` -Source: Protocol.hs:1951-1953 **SOKS/ENDS decoding:** ``` v >= 19: tag -> resp <$> _smpP <*> smpP (count + idsHash) v < 19: tag -> resp <$> _smpP <*> pure mempty (count only, mempty hash) ``` -Source: Protocol.hs:1996-1998 ## Server layer @@ -267,7 +247,6 @@ data Client s = Client ntfServiceSubsCount :: TVar (Int64, IdsHash), -- running (count, hash) for notifier queues ... } ``` -Source: Env/STM.hs:437-456 Server-global state: ```haskell @@ -279,7 +258,6 @@ data ServerSubscribers s = ServerSubscribers subClients :: TVar IntSet, pendingEvents :: TVar (IntMap (NonEmpty (EntityId, BrokerMsg))) } ``` -Source: Env/STM.hs:362-369 ### ClientSub events @@ -289,7 +267,6 @@ data ClientSub | CSDeleted QueueId (Maybe ServiceId) -- prev service ID | CSService ServiceId (Int64, IdsHash) -- service subscription change ``` -Source: Env/STM.hs:426-429 These are enqueued into `subQ` and processed by `serverThread` (the subscription event loop). @@ -526,7 +503,6 @@ subscribeService c party n idsHash = case smpClientService c of SNotifierService -> NSUBS n idsHash Nothing -> throwE PCEServiceUnavailable ``` -Source: Client.hs:921-934 Entity is `serviceId`, auth key is the service session key (Ed25519). The client passes its expected count and hash; the server returns its own. @@ -551,7 +527,6 @@ This prevents MITM service substitution inside TLS: an attacker cannot replace t (fp <> t, Just $ C.sign' serviceKey t) _ -> (t, Nothing) ``` -Source: Client.hs:1398-1401 ### Service runtime accessors @@ -562,7 +537,6 @@ smpClientService = thAuth . thParams >=> clientService smpClientServiceId :: SMPClient -> Maybe ServiceId smpClientServiceId = fmap (\THClientService {serviceId} -> serviceId) . smpClientService ``` -Source: Client.hs:936-942 ### Configuration @@ -632,8 +606,6 @@ data SessSubs = SessSubs activeServiceSub :: TVar (Maybe ServiceSub), pendingServiceSub :: TVar (Maybe ServiceSub) } ``` -Source: TSessionSubs.hs:59-65 - Key operations: - `setPendingServiceSub`: stores expected ServiceSub before SUBS is sent - `setActiveServiceSub`: promotes to active after SOKS, validates session ID @@ -657,8 +629,6 @@ CREATE TABLE client_services( service_queue_ids_hash BLOB NOT NULL DEFAULT x'00000000000000000000000000000000' ); ``` -Source: Agent/Store/SQLite/Migrations/M20260115_service_certs.hs:11-23 - ### `rcv_queues.rcv_service_assoc` Boolean column added to `rcv_queues`. When set, the queue is associated with the service for this server. SQLite triggers automatically maintain `service_queue_count` and `service_queue_ids_hash` on insert/delete/update of `rcv_queues` rows. @@ -676,8 +646,6 @@ Triggers: `tr_rcv_queue_insert`, `tr_rcv_queue_delete`, `tr_rcv_queue_update_rem | `removeRcvServiceAssocs` | Remove service association for all queues on a server | | `unassocUserServerRcvQueueSubs` | Remove association and return queues for re-subscription | -Source: AgentStore.hs:419-494, 2378-2414 - ### Service ID nullification on cert change `INSERT ... ON CONFLICT DO UPDATE SET ... service_id = NULL` (AgentStore.hs:429) — when service credentials are updated (new cert), the stored `service_id` is cleared, forcing a new handshake to get a fresh ServiceId. @@ -709,8 +677,6 @@ On first use per SMP server, `mkDbService` (Env.hs:126-142) generates a self-sig | `CAServiceSubError` | Log error (non-fatal; fatal errors go to `CAServiceUnavailable`) | | `CAServiceUnavailable` | **Critical recovery path**: calls `removeServiceAndAssociations`, wipes service creds, resubscribes all queues individually | -Source: Server.hs:567-602 - ### `removeServiceAndAssociations` (Store/Postgres.hs:620-652) Nuclear recovery: clears `ntf_service_id`, `ntf_service_cert*`, resets `smp_notifier_count`/`smp_notifier_ids_hash`, and removes all `ntf_service_assoc` flags from subscriptions. Used when the service subscription is irrecoverably broken (e.g., ServiceId mismatch after cert rotation). diff --git a/spec/version.md b/spec/version.md index 6d9a23c091..19ad786fe2 100644 --- a/spec/version.md +++ b/spec/version.md @@ -14,8 +14,6 @@ The `Compatible` newtype can only be constructed internally (constructor is not ### `Version v` -**Source**: `Version/Internal.hs:11-12` - ```haskell newtype Version v = Version Word16 ``` @@ -31,8 +29,6 @@ The constructor is exported from `Version.Internal` but not from `Version`, so a ### `VersionRange v` -**Source**: `Version.hs:46-50` - ```haskell data VersionRange v = VRange { minVersion :: Version v @@ -42,16 +38,14 @@ data VersionRange v = VRange Invariant: `minVersion <= maxVersion` (enforced by smart constructors). -The `VRange` constructor is not exported — only the pattern synonym `VersionRange` (read-only, `Version.hs:41-44`) is public. +The `VRange` constructor is not exported — only the pattern synonym `VersionRange` (read-only) is public. -- `Encoding`: two Word16s concatenated (4 bytes total, `Version.hs:80-84`) -- `StrEncoding`: `"min-max"` or `"v"` if min == max (`Version.hs:86-93`) +- `Encoding`: two Word16s concatenated (4 bytes total) +- `StrEncoding`: `"min-max"` or `"v"` if min == max - JSON: `{"minVersion": n, "maxVersion": n}` ### `VersionScope v` -**Source**: `Version.hs:64` - ```haskell class VersionScope v ``` @@ -67,8 +61,6 @@ This prevents accidentally mixing version ranges from different protocols in neg ### `Compatible a` -**Source**: `Version.hs:117-122` - ```haskell newtype Compatible a = Compatible_ a @@ -80,8 +72,6 @@ Proof that compatibility was checked. The `Compatible_` constructor is not expor ### `VersionI` / `VersionRangeI` type classes -**Source**: `Version.hs:95-115` - Multi-param typeclasses with functional dependencies for generic version/range operations. Allow extension types that wrap `Version` or `VersionRange` to participate in negotiation: ```haskell @@ -103,76 +93,64 @@ Identity instances exist for `Version v` and `VersionRange v` themselves. ### Construction -| Function | Signature | Purpose | Source | -|----------|-----------|---------|--------| -| `mkVersionRange` | `Version v -> Version v -> VersionRange v` | Construct range, `error` if min > max | `Version.hs:67-70` | -| `safeVersionRange` | `Version v -> Version v -> Maybe (VersionRange v)` | Safe construction, `Nothing` if invalid | `Version.hs:72-75` | -| `versionToRange` | `Version v -> VersionRange v` | Singleton range (min == max) | `Version.hs:77-78` | +| Function | Signature | Purpose | +|----------|-----------|---------| +| `mkVersionRange` | `Version v -> Version v -> VersionRange v` | Construct range, `error` if min > max | +| `safeVersionRange` | `Version v -> Version v -> Maybe (VersionRange v)` | Safe construction, `Nothing` if invalid | +| `versionToRange` | `Version v -> VersionRange v` | Singleton range (min == max) | ### Compatibility checking -#### `isCompatible` +### isCompatible -**Source**: `Version.hs:124-125` +**Purpose**: Check if a single version falls within a range. ```haskell isCompatible :: VersionI v a => a -> VersionRange v -> Bool ``` -Check if a single version falls within a range. - -#### `isCompatibleRange` +### isCompatibleRange -**Source**: `Version.hs:127-130` +**Purpose**: Check if two version ranges overlap: `min1 <= max2 && min2 <= max1`. ```haskell isCompatibleRange :: VersionRangeI v a => a -> VersionRange v -> Bool ``` -Check if two version ranges overlap: `min1 <= max2 && min2 <= max1`. +### proveCompatible -#### `proveCompatible` - -**Source**: `Version.hs:132-133` +**Purpose**: If version is compatible, wrap in `Compatible` proof. Returns `Nothing` if out of range. ```haskell proveCompatible :: VersionI v a => a -> VersionRange v -> Maybe (Compatible a) ``` -If version is compatible, wrap in `Compatible` proof. Returns `Nothing` if out of range. - ### Negotiation -#### `compatibleVersion` +### compatibleVersion -**Source**: `Version.hs:135-140` +**Purpose**: Negotiate a single version from two ranges. Returns `min(max1, max2)` — the highest mutually-supported version. Returns `Nothing` if ranges don't overlap. ```haskell compatibleVersion :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible (VersionT v a)) ``` -Negotiate a single version from two ranges. Returns `min(max1, max2)` — the highest mutually-supported version. Returns `Nothing` if ranges don't overlap. - -#### `compatibleVRange` +### compatibleVRange -**Source**: `Version.hs:143-148` +**Purpose**: Compute the intersection of two version ranges: `(max(min1,min2), min(max1,max2))`. Returns `Nothing` if the intersection is empty. ```haskell compatibleVRange :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible a) ``` -Compute the intersection of two version ranges: `(max(min1,min2), min(max1,max2))`. Returns `Nothing` if the intersection is empty (i.e., ranges don't overlap). +### compatibleVRange' -#### `compatibleVRange'` - -**Source**: `Version.hs:151-156` +**Purpose**: Cap a version range's maximum at a given version. Returns `Nothing` if the cap is below the range's minimum. ```haskell compatibleVRange' :: VersionRangeI v a => a -> Version v -> Maybe (Compatible a) ``` -Cap a version range's maximum at a given version. Returns `Nothing` if the cap is below the range's minimum. - ## Protocol version constants Version constants for each protocol are defined in their respective Transport modules. For SMP, key gates include: From e5dbe97e1da8ea49a500cd223968e2298c962276 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:06:05 +0000 Subject: [PATCH 26/91] spec references in code --- spec/modules/README.md | 19 ++++++++++++++----- src/Simplex/Messaging/Compression.hs | 3 +++ src/Simplex/Messaging/Encoding.hs | 1 + src/Simplex/Messaging/Parsers.hs | 4 +++- src/Simplex/Messaging/Session.hs | 3 +++ src/Simplex/Messaging/Util.hs | 6 ++++++ 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/spec/modules/README.md b/spec/modules/README.md index 1d18b32e35..9f057b9039 100644 --- a/spec/modules/README.md +++ b/spec/modules/README.md @@ -119,14 +119,23 @@ See [rcv-services](../rcv-services.md) for the end-to-end service subscription f ``` ### Source → module doc -Comment above function in source: + +Add `-- spec:` comments as part of the module documentation work — when you document something non-obvious, add the link in source at the same time. Two levels: + +**Module-level** (below the module declaration): when the Overview section has value. +```haskell +module Simplex.Messaging.Util (...) where +-- spec: spec/modules/Simplex/Messaging/Util.md +``` + +**Function-level** (above the function): when that function has a doc entry worth pointing to. ```haskell --- spec: spec/modules/Simplex/Messaging/Server.md#subscribeServiceMessages --- Delivers buffered messages for all service queues after SUBS (SI-SVC-07) -subscribeServiceMessages :: ... +-- spec: spec/modules/Simplex/Messaging/Util.md#catchOwn +-- Catches all exceptions except async cancellations (misleading name) +catchOwn :: ... ``` -Only add `-- spec:` comments where the module doc actually has something to say. Don't add links to "No non-obvious behavior" docs. +Only add `-- spec:` comments where the module doc actually says something the code doesn't. Don't add links to "No non-obvious behavior" docs or to entries that merely restate the source. ## Topic candidate tracking diff --git a/src/Simplex/Messaging/Compression.hs b/src/Simplex/Messaging/Compression.hs index 20000ded38..32430bc88f 100644 --- a/src/Simplex/Messaging/Compression.hs +++ b/src/Simplex/Messaging/Compression.hs @@ -1,6 +1,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +-- spec: spec/modules/Simplex/Messaging/Compression.md module Simplex.Messaging.Compression ( Compressed, maxLengthPassthrough, @@ -42,6 +43,8 @@ compress1 bs | B.length bs <= maxLengthPassthrough = Passthrough bs | otherwise = Compressed . Large $ Z1.compress compressionLevel bs +-- spec: spec/modules/Simplex/Messaging/Compression.md#decompress1 +-- Decompression bomb protection: refuses data without declared size or exceeding limit decompress1 :: Int -> Compressed -> Either String ByteString decompress1 limit = \case Passthrough bs -> Right bs diff --git a/src/Simplex/Messaging/Encoding.hs b/src/Simplex/Messaging/Encoding.hs index d069e5518a..4381ff8bbd 100644 --- a/src/Simplex/Messaging/Encoding.hs +++ b/src/Simplex/Messaging/Encoding.hs @@ -7,6 +7,7 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} +-- spec: spec/modules/Simplex/Messaging/Encoding.md module Simplex.Messaging.Encoding ( Encoding (..), Tail (..), diff --git a/src/Simplex/Messaging/Parsers.hs b/src/Simplex/Messaging/Parsers.hs index 7acbec7439..3a2fd07fc0 100644 --- a/src/Simplex/Messaging/Parsers.hs +++ b/src/Simplex/Messaging/Parsers.hs @@ -4,6 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} +-- spec: spec/modules/Simplex/Messaging/Parsers.md module Simplex.Messaging.Parsers ( base64P, parse, @@ -105,7 +106,8 @@ enumJSON tagModifier = J.allNullaryToStringTag = True } --- used in platform-specific encoding, includes tag for single-field encoding of sum types to allow conversion to tagged objects +-- spec: spec/modules/Simplex/Messaging/Parsers.md#sumTypeJSON +-- Platform-dependent: ObjectWithSingleField on Darwin+swiftJSON, TaggedObject elsewhere sumTypeJSON :: (String -> String) -> J.Options #if defined(darwin_HOST_OS) && defined(swiftJSON) sumTypeJSON = singleFieldJSON_ $ Just SingleFieldJSONTag diff --git a/src/Simplex/Messaging/Session.hs b/src/Simplex/Messaging/Session.hs index ff5d7e0a05..bb082b1bb6 100644 --- a/src/Simplex/Messaging/Session.hs +++ b/src/Simplex/Messaging/Session.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE ScopedTypeVariables #-} +-- spec: spec/modules/Simplex/Messaging/Session.md module Simplex.Messaging.Session ( SessionVar (..), getSessVar, @@ -32,6 +33,8 @@ getSessVar sessSeq sessKey vs sessionVarTs = maybe (Left <$> newSessionVar) (pur TM.insert sessKey v vs pure v +-- spec: spec/modules/Simplex/Messaging/Session.md#removeSessVar +-- Compare-and-swap: only removes if sessionVarId matches, preventing stale removal removeSessVar :: Ord k => SessionVar a -> k -> TMap k (SessionVar a) -> STM () removeSessVar v sessKey vs = TM.lookup sessKey vs >>= \case diff --git a/src/Simplex/Messaging/Util.hs b/src/Simplex/Messaging/Util.hs index 6c1937144f..abbf5a3b33 100644 --- a/src/Simplex/Messaging/Util.hs +++ b/src/Simplex/Messaging/Util.hs @@ -3,6 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +-- spec: spec/modules/Simplex/Messaging/Util.md module Simplex.Messaging.Util ( AnyError (..), (<$?>), @@ -294,6 +295,7 @@ allFinally :: (AnyError e, MonadUnliftIO m) => ExceptT e m a -> ExceptT e m b -> allFinally action final = tryAllErrors action >>= \r -> final >> except r {-# INLINE allFinally #-} +-- spec: spec/modules/Simplex/Messaging/Util.md#isOwnException isOwnException :: E.SomeException -> Bool isOwnException e = case E.fromException e of Just StackOverflow -> True @@ -303,16 +305,20 @@ isOwnException e = case E.fromException e of _ -> False {-# INLINE isOwnException #-} +-- spec: spec/modules/Simplex/Messaging/Util.md#isAsyncCancellation isAsyncCancellation :: E.SomeException -> Bool isAsyncCancellation e = case E.fromException e of Just (_ :: SomeAsyncException) -> not $ isOwnException e Nothing -> False {-# INLINE isAsyncCancellation #-} +-- spec: spec/modules/Simplex/Messaging/Util.md#catchOwn +-- Catches all exceptions EXCEPT async cancellations (name is misleading) catchOwn' :: IO a -> (E.SomeException -> IO a) -> IO a catchOwn' action handleInternal = action `E.catch` \e -> if isAsyncCancellation e then E.throwIO e else handleInternal e {-# INLINE catchOwn' #-} +-- spec: spec/modules/Simplex/Messaging/Util.md#catchOwn catchOwn :: MonadUnliftIO m => m a -> (E.SomeException -> m a) -> m a catchOwn action handleInternal = withRunInIO $ \run -> From 7ece87f1b63e28f042c5331f48f174709ccd26a5 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:47:18 +0000 Subject: [PATCH 27/91] encoding notes --- spec/modules/Simplex/Messaging/Encoding.md | 4 ++++ spec/modules/Simplex/Messaging/Encoding/String.md | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/spec/modules/Simplex/Messaging/Encoding.md b/spec/modules/Simplex/Messaging/Encoding.md index f485aeaa46..8db63d0cc2 100644 --- a/spec/modules/Simplex/Messaging/Encoding.md +++ b/spec/modules/Simplex/Messaging/Encoding.md @@ -36,6 +36,10 @@ Sequential concatenation with no separators. Works because each element's encodi Only seconds are encoded (as Int64); nanoseconds are discarded on encode and set to 0 on decode. +## String instance + +`smpEncode` goes through `B.pack`, which silently truncates any Unicode character above codepoint 255 to its lowest byte. A String containing non-Latin-1 characters is silently corrupted on encode with no error. Same issue exists in the `StrEncoding String` instance — see [Simplex.Messaging.Encoding.String](./Encoding/String.md#string-instance). + ## smpEncodeList / smpListP 1-byte length prefix for lists — same 255-item limit as ByteString's 255-byte limit. diff --git a/spec/modules/Simplex/Messaging/Encoding/String.md b/spec/modules/Simplex/Messaging/Encoding/String.md index 60ac9e4966..1e60295b81 100644 --- a/spec/modules/Simplex/Messaging/Encoding/String.md +++ b/spec/modules/Simplex/Messaging/Encoding/String.md @@ -27,9 +27,12 @@ Inherits from ByteString via `B.pack` / `B.unpack`. Only Char8 (Latin-1) charact `strToJSON` uses `decodeLatin1`, not `decodeUtf8'`. This preserves arbitrary byte sequences (e.g., base64url-encoded binary data) as JSON strings without UTF-8 validation errors, but means the JSON representation is Latin-1, not UTF-8. -## Default strP fallback +## Class default: strP assumes base64url for all types -If only `strDecode` is defined (no custom `strP`), the default parser runs `base64urlP` first, then passes the decoded bytes to `strDecode`. This means the type's own `strDecode` receives raw bytes, not the base64url text. Easy to confuse when implementing a new instance. +The `MINIMAL` pragma allows defining only `strDecode` without `strP`. But the default `strP = strDecode <$?> base64urlP` then assumes input is base64url-encoded — for *any* type, not just ByteString. Two consequences: + +1. The type's `strDecode` receives raw decoded bytes, not the base64url text. Easy to confuse when implementing a new instance. +2. `base64urlP` requires non-empty input (`takeWhile1`), so the default `strP` cannot parse empty values — even if `strDecode ""` would succeed. Types that can encode to empty output must define `strP` explicitly. ## listItem From 844b5ad3f11e29d6e2fd7561f22c542545b4afa5 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 11:53:18 +0000 Subject: [PATCH 28/91] cryptography modules specs --- spec/TOPICS.md | 4 + spec/modules/Simplex/Messaging/Crypto.md | 82 ++++++++++++++++ spec/modules/Simplex/Messaging/Crypto/File.md | 25 +++++ spec/modules/Simplex/Messaging/Crypto/Lazy.md | 40 ++++++++ .../Simplex/Messaging/Crypto/Ratchet.md | 98 +++++++++++++++++++ .../Simplex/Messaging/Crypto/SNTRUP761.md | 13 +++ .../Simplex/Messaging/Crypto/ShortLink.md | 36 +++++++ src/Simplex/Messaging/Crypto.hs | 6 +- src/Simplex/Messaging/Crypto/Ratchet.hs | 6 +- 9 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 spec/modules/Simplex/Messaging/Crypto.md create mode 100644 spec/modules/Simplex/Messaging/Crypto/File.md create mode 100644 spec/modules/Simplex/Messaging/Crypto/Lazy.md create mode 100644 spec/modules/Simplex/Messaging/Crypto/Ratchet.md create mode 100644 spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md create mode 100644 spec/modules/Simplex/Messaging/Crypto/ShortLink.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index a0c1f4eafb..a8eafc1a13 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -3,3 +3,7 @@ > Cross-cutting patterns noticed during module documentation. Each entry may become a topic doc in `spec/` after all module docs are complete. - **Exception handling strategy**: `catchOwn`/`catchAll`/`tryAllErrors` pattern (defined in Util.hs) used across server, client, and agent modules. The three-category classification (synchronous, own-async, cancellation) and when to use which catch variant is not obvious from any single call site. + +- **Padding schemes**: Three different padding formats across the codebase — Crypto.hs uses 2-byte Word16 length prefix (max ~65KB), Crypto/Lazy.hs uses 8-byte Int64 prefix (file-sized), and both use '#' fill character. Ratchet header padding uses fixed sizes (88 or 2310 bytes). All use `pad`/`unPad` but with incompatible formats. The relationship between padding, encryption, and message size limits spans Crypto, Lazy, Ratchet, and the protocol layer. + +- **NaCl construction variants**: crypto_box, secret_box, and KEM hybrid secret all use the same XSalsa20+Poly1305 core (Crypto.hs `xSalsa20`), but with different key sources (DH, symmetric, SHA3_256(DH||KEM)). The lazy streaming variant (Lazy.hs) adds prepend-tag vs tail-tag placement. File.hs wraps lazy streaming with handle-based I/O. Full picture requires reading Crypto.hs, Lazy.hs, File.hs, and SNTRUP761.hs together. diff --git a/spec/modules/Simplex/Messaging/Crypto.md b/spec/modules/Simplex/Messaging/Crypto.md new file mode 100644 index 0000000000..f1c6605124 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto.md @@ -0,0 +1,82 @@ +# Simplex.Messaging.Crypto + +> Core cryptographic primitives: key types, NaCl crypto_box/secret_box, AEAD-GCM, signing, padding, X509, HKDF. + +**Source**: [`Crypto.hs`](../../../../src/Simplex/Messaging/Crypto.hs) + +## Overview + +This is the largest crypto module (~1540 lines). It defines the type-level algorithm system (GADTs + type families), all key types, and the fundamental encrypt/decrypt/sign/verify operations used throughout the protocol stack. Higher-level modules ([Ratchet](./Crypto/Ratchet.md), [Lazy](./Crypto/Lazy.md), [File](./Crypto/File.md)) build on these primitives. + +## Algorithm type system + +Four algorithms (`Ed25519`, `Ed448`, `X25519`, `X448`) are encoded as a promoted data kind `Algorithm`. Type families constrain which algorithms support which operations: + +- `SignatureAlgorithm`: only `Ed25519`, `Ed448` +- `DhAlgorithm`: only `X25519`, `X448` +- `AuthAlgorithm`: `Ed25519`, `Ed448`, `X25519` (but NOT `X448`) + +Using the wrong algorithm produces a **compile-time error** via `TypeError`. The runtime bridge uses `Dict` from `Data.Constraint` — functions like `signatureAlgorithm :: SAlgorithm a -> Maybe (Dict (SignatureAlgorithm a))` allow dynamic dispatch while preserving type safety. + +## PrivateKeyEd25519 StrEncoding deliberately omitted + +The `StrEncoding` instance for `PrivateKey Ed25519` is commented out with the note "Do not enable, to avoid leaking key data." Only `PrivateKey X25519` has `StrEncoding`, used specifically for the notification store log. This is a deliberate security decision — Ed25519 signing keys should never appear in human-readable formats. + +## Two AEAD initialization paths + +- **`initAEAD`**: Takes 16-byte `IV`, transforms it internally via `cryptonite_aes_gcm_init`. Used by the double ratchet. +- **`initAEADGCM`**: Takes 12-byte `GCMIV`, does NOT transform. Used for WebRTC frame encryption. + +These are **not interchangeable** — using the wrong IV size or init function produces silent corruption. The code comments note that WebCrypto compatibility requires `initAEADGCM`, and the ratchet may need to migrate away from `initAEAD` in the future. + +## cbNonce — silent truncation/padding + +`cbNonce` adjusts any ByteString to exactly 24 bytes: +- If longer: silently truncates to first 24 bytes +- If shorter: silently pads with zero bytes + +No error is raised for incorrect input lengths. This means a programming error passing the wrong-length nonce will produce valid but wrong encryption, not a failure. + +## pad / unPad — 2-byte length prefix + +`pad` prepends a 2-byte big-endian `Word16` length, then the message, then `'#'` padding characters to fill `paddedLen`. Maximum message length is `2^16 - 3 = 65533` bytes. The `'#'` padding character is a convention, not verified on decode — `unPad` only reads the length prefix and extracts that many bytes. + +Contrast with [Simplex.Messaging.Crypto.Lazy.pad](./Crypto/Lazy.md#padding-8-byte-length-prefix) which uses an 8-byte `Int64` prefix for file-sized data. + +## crypto_box / secret_box + +Both use the same underlying `xSalsa20` + `Poly1305.auth` implementation. The difference is only in the key: +- **crypto_box** (`cbEncrypt`/`cbDecrypt`): uses a DH shared secret (`DhSecret X25519`) +- **secret_box** (`sbEncrypt`/`sbDecrypt`): uses a symmetric key (`SbKey`, 32 bytes) + +Both apply `pad`/`unPad` by default. The `NoPad` variants skip padding. + +## xSalsa20 + +The XSalsa20 implementation splits the 24-byte nonce into two 8-byte halves. The first half initializes the cipher state (prepended with 16 zero bytes), the second derives a subkey. The first 32 bytes of output become the Poly1305 one-time key (`rs`), then the rest encrypts the message. This is the standard NaCl construction. + +## CbAuthenticator + +An authentication scheme that encrypts the SHA-512 hash of the message using crypto_box, rather than the message itself. The result is 80 bytes (64 hash + 16 auth tag). Used for authenticating messages where the content is transmitted separately from the authentication proof. + +## Secret box chains (sbcInit / sbcHkdf) + +HKDF-based key chains for deriving sequential key+nonce pairs: +- `sbcInit`: derives two 32-byte chain keys from a salt and shared secret using `HKDF(salt, secret, "SimpleXSbChainInit", 64)` +- `sbcHkdf`: advances a chain key, producing a new chain key (32 bytes), an SbKey (32 bytes), and a CbNonce (24 bytes) from `HKDF("", chainKey, "SimpleXSbChain", 88)` + +## Key encoding + +All keys are encoded as ASN.1 DER (X.509 SubjectPublicKeyInfo for public, PKCS#8 for private). The algorithm is determined by the encoded key length on decode — `decodePubKey` / `decodePrivKey` parse the ASN.1 structure, then dispatch on the X.509 key type. + +## Signature algorithm detection + +`decodeSignature` determines the algorithm by signature length: Ed25519 signatures are 64 bytes, Ed448 signatures are 114 bytes. Any other size is rejected. + +## GCMIV constructor not exported + +`GCMIV` constructor is not exported — only `gcmIV :: ByteString -> Either CryptoError GCMIV` is available, which validates that the input is exactly 12 bytes. This prevents construction of invalid IVs. + +## generateKeyPair is STM + +Key generation uses `TVar ChaChaDRG` and runs in `STM`, not `IO`. This allows key generation inside `atomically` blocks, which is used extensively in handshake and ratchet initialization code. diff --git a/spec/modules/Simplex/Messaging/Crypto/File.md b/spec/modules/Simplex/Messaging/Crypto/File.md new file mode 100644 index 0000000000..8fdb22e180 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto/File.md @@ -0,0 +1,25 @@ +# Simplex.Messaging.Crypto.File + +> Streaming encrypted file I/O using NaCl secret_box with tail auth tag. + +**Source**: [`Crypto/File.hs`](../../../../../src/Simplex/Messaging/Crypto/File.hs) + +## Overview + +`CryptoFileHandle` wraps a file `Handle` with an optional `TVar SbState` for streaming encryption/decryption. When `cryptoArgs` is `Nothing`, the file is plaintext and all operations pass through directly. + +## Auth tag position + +The auth tag is written/read **at the end of the file** (tail tag pattern), not prepended. This is important for streaming: `hPut` encrypts chunks as they arrive, accumulating the Poly1305 state in the TVar, and `hPutTag` finalizes and writes the 16-byte tag only after all data is written. + +## hGetTag + +**Security**: Uses `BA.constEq` for constant-time tag comparison, preventing timing side-channels. Must be called after reading all content bytes — it reads exactly `authTagSize` (16) remaining bytes and compares against the finalized Poly1305 state. Caller must know the file size and read only the content portion before calling this. + +## getFileContentsSize + +Subtracts `authTagSize` from the file size when crypto args are present. This gives the content size without the tag, which is needed to know how many bytes to read before calling `hGetTag`. + +## readFile / writeFile + +Whole-file variants that read/write everything at once. `readFile` uses `sbDecryptChunk` (encrypt-then-MAC verification — feeds ciphertext to Poly1305), while `writeFile` uses `sbEncryptChunk`. Both use the tail tag layout via [Simplex.Messaging.Crypto.Lazy](./Lazy.md) functions. diff --git a/spec/modules/Simplex/Messaging/Crypto/Lazy.md b/spec/modules/Simplex/Messaging/Crypto/Lazy.md new file mode 100644 index 0000000000..42ba9e02e5 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto/Lazy.md @@ -0,0 +1,40 @@ +# Simplex.Messaging.Crypto.Lazy + +> Streaming NaCl secret_box (XSalsa20 + Poly1305) for lazy ByteStrings. + +**Source**: [`Crypto/Lazy.hs`](../../../../../src/Simplex/Messaging/Crypto/Lazy.hs) + +## Overview + +Lazy counterpart to the strict NaCl operations in [Simplex.Messaging.Crypto](../Crypto.md). Processes data chunk-by-chunk via `SbState = (XSalsa.State, Poly1305.State)`, enabling streaming encryption of large files without loading everything into memory. + +## Encrypt-then-MAC asymmetry + +`sbEncryptChunk` and `sbDecryptChunk` both use XSalsa20 for the cipher operation, but feed different data to Poly1305: + +- **Encrypt**: feeds the **ciphertext** to Poly1305 (`Poly1305.update authSt c`) +- **Decrypt**: feeds the **original ciphertext** (the input chunk) to Poly1305 (`Poly1305.update authSt chunk`), not the decrypted plaintext + +This is the correct encrypt-then-MAC pattern: the MAC is always computed over ciphertext, so both sides compute the same tag. + +## Padding: 8-byte length prefix + +`pad` uses an 8-byte `Int64` length prefix (via `smpEncode`), unlike [Simplex.Messaging.Crypto.pad](../Crypto.md#pad) which uses a 2-byte `Word16` prefix. This is because lazy operations handle file-sized data that can exceed 65535 bytes. + +`unPad` / `splitLen` does not validate that the remaining data is at least `len` bytes — it uses `LB.take len` which silently returns a shorter result. The comment notes this is intentional to avoid consuming all chunks for validation. + +## Auth tag placement: prepend vs tail + +Two families of functions: +- **`sbEncrypt` / `sbDecrypt`**: tag is **prepended** (first 16 bytes of output). Used for message-sized data. +- **`sbEncryptTailTag` / `sbDecryptTailTag`**: tag is **appended** (last 16 bytes). More efficient for large files because you don't need to buffer the tag before the content. + +The tail-tag variants also support `KEMHybridSecret` via `kcbEncryptTailTag` / `kcbDecryptTailTag`. + +## sbDecryptTailTag validity + +`sbDecryptTailTag` returns `(Bool, LazyByteString)` — the `Bool` indicates whether the auth tag was valid, but the decrypted data is returned regardless. This allows the caller to decide how to handle invalid tags (e.g., [Simplex.Messaging.Crypto.File](./File.md) uses strict `unless` checks). + +## fastReplicate + +Optimizes large padding by building the lazy ByteString from 64KB chunks (minus GHC overhead for `Int` size) rather than one enormous strict ByteString. This avoids allocating a single contiguous buffer for multi-megabyte padding. diff --git a/spec/modules/Simplex/Messaging/Crypto/Ratchet.md b/spec/modules/Simplex/Messaging/Crypto/Ratchet.md new file mode 100644 index 0000000000..ebbc9c5a6d --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto/Ratchet.md @@ -0,0 +1,98 @@ +# Simplex.Messaging.Crypto.Ratchet + +> Double ratchet with post-quantum KEM extension (PQ X3DH + header encryption). + +**Source**: [`Crypto/Ratchet.hs`](../../../../../src/Simplex/Messaging/Crypto/Ratchet.hs) + +## Overview + +Implements the Signal double ratchet protocol extended with: +- **Header encryption** (HE variant): message headers are encrypted with separate header keys, hiding the ratchet public key and message counters from observers. +- **Post-quantum KEM** (PQ variant): SNTRUP761 key encapsulation is folded into each ratchet step, providing PQ-resistance alongside X448 DH. + +The ratchet uses X448 (not X25519) for DH operations — `type RatchetX448 = Ratchet 'X448`. + +## PQ X3DH key agreement + +`pqX3dhSnd` / `pqX3dhRcv` perform the extended X3DH: +- Standard triple DH: `DH(rk1, spk2)`, `DH(rk2, spk1)`, `DH(rk2, spk2)` +- Optional KEM shared secret from SNTRUP761 encapsulation +- Combined via `HKDF(salt=64_zeroes, DHs || KEMss, "SimpleXX3DH", 96)` → root key, header key, next-header key + +The roles (who is "Alice" vs "Bob") are **reversed from the double ratchet spec**: the party initiating the connection is Bob (`generateRcvE2EParams`, `initRcvRatchet`), and the party accepting is Alice (`generateSndE2EParams`, `initSndRatchet`). Comments in the source explicitly note this. + +## KDF functions + +- **rootKdf**: `HKDF(rootKey, DH(pubKey, privKey) || KEMss, "SimpleXRootRatchet", 96)` → new root key (32), chain key (32), next header key (32) +- **chainKdf**: `HKDF("", chainKey, "SimpleXChainRatchet", 96)` → new chain key (32), message key (32), two IVs (16 + 16) + +All use HKDF-SHA512 via [Simplex.Messaging.Crypto.hkdf](../Crypto.md). + +## Header encryption and padding + +Headers are encrypted with AEAD-GCM using the header key. The padded header length depends on whether PQ is supported: +- **Without PQ**: 88 bytes (fits DH key + counters) +- **With PQ**: 2310 bytes (fits DH key + KEM params + counters, with reserve for future extension) + +The actual header is ~69 bytes without PQ, ~2288 with PQ. The padding ensures all messages have identical header sizes regardless of content. + +## Version negotiation in headers + +Each message header carries `msgMaxVersion` (the sender's max supported ratchet version). On decryption, the receiver upgrades its `current` version to `min(msgMaxVersion, maxSupported)` but never downgrades. The current version determines: +- Whether KEM params are included in headers (v3+) +- Whether 2-byte length prefixes are used for headers (v3+) + +## largeP — backward-compatible length prefix parsing + +`largeP` detects the length-prefix format by peeking at the first byte: if < 32, it's a 2-byte `Large` prefix (new format); otherwise it's a 1-byte prefix (old format). This allows upgrading the header encoding format in a single message without a version bump. + +## Skipped message keys + +When messages arrive out of order, the ratchet computes and stores the message keys for skipped messages (up to `maxSkip = 512`). Skipped keys are stored in a `Map HeaderKey (Map Word32 MessageKey)` — keyed first by header key, then by message number. + +The `SkippedMsgDiff` type represents changes to the skipped key store as a diff rather than a full replacement — this is persisted to the database, and the full state is loaded for the next message. `applySMDiff` is only used in tests. + +## rcDecrypt flow + +Decryption tries three strategies in order: +1. **Skipped message keys**: try all stored header keys to decrypt the header, then look up the message number in skipped keys +2. **Current receiving ratchet**: decrypt header with `rcHKr` +3. **Next header key**: decrypt header with `rcNHKr` (triggers a ratchet advance) + +If strategy 1 decrypts the header but the message number isn't in skipped keys, it checks whether this header key corresponds to the current or next ratchet to decide whether to advance. + +## rcEncryptHeader — separated from rcEncryptMsg + +Encryption is split into two steps: `rcEncryptHeader` produces a `MsgEncryptKey` (containing the encrypted header and message key), then `rcEncryptMsg` uses that key to encrypt the message body. This separation allows the ratchet state to be updated (persisted) before the message is encrypted, which is important for crash recovery — if the process crashes after encrypting but before sending, the ratchet state must already reflect the advanced counter. + +## PQ ratchet step + +During each ratchet advance (`pqRatchetStep`), the PQ KEM is folded in: +1. Receive: if the header contains a KEM ciphertext and we have the decapsulation key, compute the shared secret +2. Send: generate a new KEM keypair, encapsulate against the received public key, include in the next header +3. The KEM shared secret is concatenated with the DH shared secret before `rootKdf` + +PQ can be enabled/disabled per-message via `pqEnc_` parameter. `rcSupportKEM` can only be enabled (never disabled) — once PQ headers are used, the larger header size is permanent. + +## PQSupport vs PQEncryption + +Two distinct newtypes with identical structure (`Bool` wrapper): +- `PQSupport`: whether PQ **can** be used (determines header padding size, cannot be disabled once enabled) +- `PQEncryption`: whether PQ **is** being used for the current send/receive ratchet + +## Error semantics + +- `CERatchetEarlierMessage n`: message number is `n` positions before the next expected (already processed or skipped-and-consumed) +- `CERatchetDuplicateMessage`: message number is the most recently received (exact repeat) +- `CERatchetTooManySkipped n`: would need to skip `n` messages, exceeding `maxSkip` +- `CERatchetHeader`: header decryption failed with all available keys +- `CERatchetState`: no sending chain (ratchet not initialized for sending) +- `CERatchetKEMState`: KEM state mismatch between parties + +## InitialKeys + +Controls PQ key inclusion in connection establishment: +- `IKUsePQ`: always include PQ keys (used in contact requests and short link data) +- `IKLinkPQ pq`: include PQ keys only in short link data, if `pq` is enabled + +`initialPQEncryption` resolves this based on whether it's a short link context. diff --git a/spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md b/spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md new file mode 100644 index 0000000000..d5cd290136 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md @@ -0,0 +1,13 @@ +# Simplex.Messaging.Crypto.SNTRUP761 + +> Hybrid KEM+DH shared secret combining SNTRUP761 and X25519. + +**Source**: [`Crypto/SNTRUP761.hs`](../../../../../src/Simplex/Messaging/Crypto/SNTRUP761.hs) + +## kemHybridSecret + +The hybrid secret is `SHA3_256(DHSecret || KEMSharedKey)` — not a simple concatenation, not HKDF. This follows the approach in draft-josefsson-ntruprime-hybrid. The result is a `ScrubbedBytes` value used as a symmetric key for NaCl crypto_box operations via `sbEncrypt_`/`sbDecrypt_`. + +## kcbEncrypt / kcbDecrypt + +These delegate directly to `sbEncrypt_` / `sbDecrypt_` from [Simplex.Messaging.Crypto](../Crypto.md), using the hybrid secret as the symmetric key. The hybrid secret is 32 bytes (SHA3-256 output), matching the expected key size for XSalsa20. diff --git a/spec/modules/Simplex/Messaging/Crypto/ShortLink.md b/spec/modules/Simplex/Messaging/Crypto/ShortLink.md new file mode 100644 index 0000000000..821a30c321 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Crypto/ShortLink.md @@ -0,0 +1,36 @@ +# Simplex.Messaging.Crypto.ShortLink + +> Short link key derivation, encryption, and signature verification for contact/invitation links. + +**Source**: [`Crypto/ShortLink.hs`](../../../../../src/Simplex/Messaging/Crypto/ShortLink.hs) + +## Overview + +Short links encode connection data in two encrypted blobs: fixed data (2048 bytes padded) and user data (13824 bytes padded). Both are encrypted with `sbEncrypt` using a key derived from the link key via HKDF. + +## KDF schemes + +Two distinct HKDF derivations with different info strings: + +- **contactShortLinkKdf**: `HKDF("", linkKey, "SimpleXContactLink", 56)` → splits into 24-byte LinkId + 32-byte SbKey. The LinkId is used as the server-side identifier. +- **invShortLinkKdf**: `HKDF("", linkKey, "SimpleXInvLink", 32)` → 32-byte SbKey only. No LinkId because invitation links don't use server-side lookup. + +## Fixed padding lengths + +- `fixedDataPaddedLength = 2008` (2048 - 24 nonce - 16 auth tag) +- `userDataPaddedLength = 13784` (13824 - 24 - 16) + +These are chosen so the encrypted output (with prepended nonce and appended auth tag) fits exactly in round sizes. + +## decryptLinkData + +**Security**: Performs three-layer verification in order: +1. Hash check: `SHA3_256(fixedData) == linkKey` — ensures data integrity +2. Root key signature: `verify(rootKey, sig1, fixedData)` — ensures authenticity +3. User data signature: `verify(rootKey, sig2, userData)` for invitations, or verify against any owner key for contact links + +For contact links, also calls `validateLinkOwners` to verify the owner chain of trust (each owner is signed by the root key). + +## encodeSign + +Prepends the Ed25519 signature to the data: `smpEncode(sign(pk, data)) <> data`. This is the format expected by `decryptLinkData`'s parser. diff --git a/src/Simplex/Messaging/Crypto.hs b/src/Simplex/Messaging/Crypto.hs index c7b5396416..4324b63521 100644 --- a/src/Simplex/Messaging/Crypto.hs +++ b/src/Simplex/Messaging/Crypto.hs @@ -35,6 +35,7 @@ -- -- This module provides cryptography implementation for SMP protocols based on -- . +-- spec: spec/modules/Simplex/Messaging/Crypto.md module Simplex.Messaging.Crypto ( -- * Cryptographic keys Algorithm (..), @@ -1133,7 +1134,8 @@ maxLength :: forall i. KnownNat i => Int maxLength = fromIntegral (natVal $ Proxy @i) {-# INLINE maxLength #-} --- this function requires 16 bytes IV, it transforms IV in cryptonite_aes_gcm_init here: +-- spec: spec/modules/Simplex/Messaging/Crypto.md#two-aead-initialization-paths +-- This function requires 16 bytes IV, it transforms IV in cryptonite_aes_gcm_init here: -- https://github.com/haskell-crypto/cryptonite/blob/master/cbits/cryptonite_aes.c -- This is used for double ratchet encryption, so to make it compatible with WebCrypto we will need to deprecate it and start using initAEADGCM initAEAD :: forall c. AES.BlockCipher c => Key -> IV -> ExceptT CryptoError IO (AES.AEAD c) @@ -1393,6 +1395,8 @@ instance ToJSON CbNonce where instance FromJSON CbNonce where parseJSON = strParseJSON "CbNonce" +-- spec: spec/modules/Simplex/Messaging/Crypto.md#cbNonce--silent-truncationpadding +-- Silently truncates or zero-pads to 24 bytes — no error on wrong length cbNonce :: ByteString -> CbNonce cbNonce s | len == 24 = CryptoBoxNonce s diff --git a/src/Simplex/Messaging/Crypto/Ratchet.hs b/src/Simplex/Messaging/Crypto/Ratchet.hs index 7250a1d607..1ea7760fdd 100644 --- a/src/Simplex/Messaging/Crypto/Ratchet.hs +++ b/src/Simplex/Messaging/Crypto/Ratchet.hs @@ -18,6 +18,7 @@ {-# LANGUAGE TypeFamilies #-} {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md module Simplex.Messaging.Crypto.Ratchet ( Ratchet (..), RatchetX448, @@ -435,7 +436,8 @@ generateE2EParams g v useKEM_ = do pure (RKParamsAccepted ct k, PrivateRKParamsAccepted ct shared ks) _ -> pure Nothing --- used by party initiating connection, Bob in double-ratchet spec +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#pq-x3dh-key-agreement +-- used by party initiating connection, Bob in double-ratchet spec (roles are reversed) generateRcvE2EParams :: (AlgorithmI a, DhAlgorithm a) => TVar ChaChaDRG -> VersionE2E -> PQSupport -> IO (PrivateKey a, PrivateKey a, Maybe (PrivRKEMParams 'RKSProposed), E2ERatchetParams 'RKSProposed a) generateRcvE2EParams g v = generateE2EParams g v . proposeKEM_ where @@ -899,6 +901,8 @@ rcCheckCanPad :: Int -> ByteString -> ExceptT CryptoError IO () rcCheckCanPad paddedMsgLen msg = unless (canPad (B.length msg) paddedMsgLen) $ throwE CryptoLargeMsgError +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#rcEncryptHeader--separated-from-rcEncryptMsg +-- Separated from rcEncryptMsg for crash recovery: persist ratchet state between header and message encryption rcEncryptHeader :: AlgorithmI a => Ratchet a -> Maybe PQEncryption -> VersionE2E -> ExceptT CryptoError IO (MsgEncryptKey a, Ratchet a) rcEncryptHeader Ratchet {rcSnd = Nothing} _ _ = throwE CERatchetState rcEncryptHeader rc@Ratchet {rcSnd = Just sr@SndRatchet {rcCKs, rcHKs}, rcDHRs, rcKEM, rcNs, rcPN, rcAD = Str rcAD, rcSupportKEM, rcEnableKEM, rcVersion} pqEnc_ supportedE2EVersion = do From 326d6cc5591a77fc1e0c04a710b4247abaf06f17 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:17:46 +0000 Subject: [PATCH 29/91] code comments --- src/Simplex/Messaging/Crypto.hs | 10 ++++++++++ src/Simplex/Messaging/Crypto/File.hs | 4 ++++ src/Simplex/Messaging/Crypto/Lazy.hs | 6 ++++++ src/Simplex/Messaging/Crypto/Ratchet.hs | 9 +++++++++ src/Simplex/Messaging/Crypto/SNTRUP761.hs | 3 +++ src/Simplex/Messaging/Crypto/ShortLink.hs | 4 ++++ 6 files changed, 36 insertions(+) diff --git a/src/Simplex/Messaging/Crypto.hs b/src/Simplex/Messaging/Crypto.hs index 4324b63521..d283ab899e 100644 --- a/src/Simplex/Messaging/Crypto.hs +++ b/src/Simplex/Messaging/Crypto.hs @@ -343,6 +343,7 @@ deriving instance Eq (PrivateKey a) deriving instance Show (PrivateKey a) +-- spec: spec/modules/Simplex/Messaging/Crypto.md#privatekeyed25519-strencoding-deliberately-omitted -- Do not enable, to avoid leaking key data -- instance StrEncoding (PrivateKey Ed25519) where @@ -736,6 +737,7 @@ generatePrivateAuthKey a g = APrivateAuthKey a <$> generatePrivateKey g generateDhKeyPair :: (AlgorithmI a, DhAlgorithm a) => SAlgorithm a -> TVar ChaChaDRG -> STM ADhKeyPair generateDhKeyPair a g = bimap (APublicDhKey a) (APrivateDhKey a) <$> generateKeyPair g +-- spec: spec/modules/Simplex/Messaging/Crypto.md#generatekeypair-is-stm generateKeyPair :: forall a. AlgorithmI a => TVar ChaChaDRG -> STM (KeyPair a) generateKeyPair g = stateTVar g (`withDRG` generateKeyPair_) @@ -826,6 +828,7 @@ instance CryptoSignature (Signature s) => Encoding (Signature s) where smpP = decodeSignature <$?> smpP {-# INLINE smpP #-} +-- spec: spec/modules/Simplex/Messaging/Crypto.md#signature-algorithm-detection instance CryptoSignature ASignature where signatureBytes (ASignature _ sig) = signatureBytes sig {-# INLINE signatureBytes #-} @@ -965,6 +968,7 @@ instance ToJSON IV where instance FromJSON IV where parseJSON = fmap IV . strParseJSON "IV" +-- spec: spec/modules/Simplex/Messaging/Crypto.md#gcmiv-constructor-not-exported -- | GCMIV bytes newtype. newtype GCMIV = GCMIV {unGCMIV :: ByteString} @@ -1081,6 +1085,7 @@ canPad msgLen paddedLen = msgLen <= maxMsgLen && padLen >= 0 where padLen = paddedLen - msgLen - 2 +-- spec: spec/modules/Simplex/Messaging/Crypto.md#pad--unpad--2-byte-length-prefix pad :: ByteString -> Int -> Either CryptoError ByteString pad msg paddedLen | len <= maxMsgLen && padLen >= 0 = Right $ encodeWord16 (fromIntegral len) <> msg <> B.replicate padLen '#' @@ -1290,6 +1295,7 @@ dh' (PublicKeyX25519 k) (PrivateKeyX25519 pk) = DhSecretX25519 $ X25519.dh k pk dh' (PublicKeyX448 k) (PrivateKeyX448 pk) = DhSecretX448 $ X448.dh k pk {-# INLINE dh' #-} +-- spec: spec/modules/Simplex/Messaging/Crypto.md#crypto_box--secret_box -- | NaCl @crypto_box@ encrypt with padding with a shared DH secret and 192-bit nonce. cbEncrypt :: DhSecret X25519 -> CbNonce -> ByteString -> Int -> Either CryptoError ByteString cbEncrypt (DhSecretX25519 secret) = sbEncrypt_ secret @@ -1359,6 +1365,7 @@ sbDecryptNoPad_ secret (CbNonce nonce) packet (rs, msg) = xSalsa20 secret nonce c tag = Poly1305.auth rs c +-- spec: spec/modules/Simplex/Messaging/Crypto.md#cbauthenticator -- type for authentication scheme using NaCl @crypto_box@ over the sha512 digest of the message. newtype CbAuthenticator = CbAuthenticator ByteString deriving (Eq, Show) @@ -1454,6 +1461,7 @@ randomSbKey gVar = SecretBoxKey <$> randomBytes 32 gVar newtype SbChainKey = SecretBoxChainKey {unSbChainKey :: ByteString} deriving (Eq, Show) +-- spec: spec/modules/Simplex/Messaging/Crypto.md#secret-box-chains-sbcinit--sbchkdf sbcInit :: ByteArrayAccess secret => ByteString -> secret -> (SbChainKey, SbChainKey) sbcInit salt secret = (SecretBoxChainKey ck1, SecretBoxChainKey ck2) where @@ -1474,6 +1482,7 @@ hkdf salt ikm info n = in H.expand prk info n {-# INLINE hkdf #-} +-- spec: spec/modules/Simplex/Messaging/Crypto.md#xsalsa20 xSalsa20 :: ByteArrayAccess key => key -> ByteString -> ByteString -> (ByteString, ByteString) xSalsa20 secret nonce msg = (rs, msg') where @@ -1501,6 +1510,7 @@ privateToX509 = \case encodeASNObj :: ASN1Object a => a -> ByteString encodeASNObj k = toStrict . encodeASN1 DER $ toASN1 k [] +-- spec: spec/modules/Simplex/Messaging/Crypto.md#key-encoding -- Decoding of binary X509 'CryptoPublicKey'. decodePubKey :: CryptoPublicKey k => ByteString -> Either String k decodePubKey = decodeASNKey >=> x509ToPublic >=> pubKey diff --git a/src/Simplex/Messaging/Crypto/File.hs b/src/Simplex/Messaging/Crypto/File.hs index 3ab491946d..e07a0db371 100644 --- a/src/Simplex/Messaging/Crypto/File.hs +++ b/src/Simplex/Messaging/Crypto/File.hs @@ -2,6 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/File.md module Simplex.Messaging.Crypto.File ( CryptoFile (..), CryptoFileArgs (..), @@ -51,6 +52,7 @@ data CryptoFileArgs = CFArgs {fileKey :: C.SbKey, fileNonce :: C.CbNonce} data CryptoFileHandle = CFHandle Handle (Maybe (TVar LC.SbState)) +-- spec: spec/modules/Simplex/Messaging/Crypto/File.md#readfile--writefile readFile :: CryptoFile -> ExceptT FTCryptoError IO LazyByteString readFile (CryptoFile path cfArgs) = do s <- liftIO $ LB.readFile path @@ -91,6 +93,7 @@ hGet (CFHandle h sb_) n = B.hGet h n >>= maybe pure decrypt sb_ where decrypt sb s = atomically $ stateTVar sb (`LC.sbDecryptChunk` s) +-- spec: spec/modules/Simplex/Messaging/Crypto/File.md#hgettag -- | Read and validate the auth tag. -- This function should be called after reading the whole file, it assumes you know the file size and read only the needed bytes. hGetTag :: CryptoFileHandle -> ExceptT FTCryptoError IO () @@ -113,6 +116,7 @@ plain = (`CryptoFile` Nothing) randomArgs :: TVar ChaChaDRG -> STM CryptoFileArgs randomArgs g = CFArgs <$> C.randomSbKey g <*> C.randomCbNonce g +-- spec: spec/modules/Simplex/Messaging/Crypto/File.md#getfilecontentssize getFileContentsSize :: CryptoFile -> IO Integer getFileContentsSize (CryptoFile path cfArgs) = do size <- getFileSize path diff --git a/src/Simplex/Messaging/Crypto/Lazy.hs b/src/Simplex/Messaging/Crypto/Lazy.hs index 6c0cf96138..192cd85b88 100644 --- a/src/Simplex/Messaging/Crypto/Lazy.hs +++ b/src/Simplex/Messaging/Crypto/Lazy.hs @@ -5,6 +5,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md module Simplex.Messaging.Crypto.Lazy ( sha256Hash, sha512Hash, @@ -65,6 +66,7 @@ sha256Hash = BA.convert . (hashlazy :: LazyByteString -> Digest SHA256) sha512Hash :: LazyByteString -> ByteString sha512Hash = BA.convert . (hashlazy :: LazyByteString -> Digest SHA512) +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md#padding-8-byte-length-prefix -- this function does not validate the length of the message to avoid consuming all chunks, -- but if the passed string is longer it will truncate it to specified length pad :: LazyByteString -> Int64 -> Int64 -> Either CryptoError LazyByteString @@ -75,6 +77,7 @@ pad msg len paddedLen encodedLen = smpEncode len -- 8 bytes Int64 encoded length padLen = paddedLen - len - 8 +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md#fastreplicate fastReplicate :: Int64 -> Char -> LazyByteString fastReplicate n c | n <= 0 = LB.empty @@ -102,6 +105,7 @@ splitLen padded where (lenStr, rest) = LB.splitAt 8 padded +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md#auth-tag-placement-prepend-vs-tail -- | NaCl @secret_box@ lazy encrypt with a symmetric 256-bit key and 192-bit nonce. -- The resulting string will be bigger than paddedLen by the size of the auth tag (16 bytes). sbEncrypt :: SbKey -> CbNonce -> LazyByteString -> Int64 -> Int64 -> Either CryptoError LazyByteString @@ -148,6 +152,7 @@ sbEncryptTailTagNoPad :: SbKeyNonce -> LazyByteString -> Either CryptoError Lazy sbEncryptTailTagNoPad (SbKey key, CbNonce nonce) msg = LB.fromChunks <$> secretBoxTailTag sbEncryptChunk key nonce msg +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md#sbdecrypttailtag-validity -- | NaCl @secret_box@ decrypt with a symmetric 256-bit key and 192-bit nonce with appended auth tag (more efficient with large files). -- paddedLen should NOT include the tag length, it should be the same number that is passed to sbEncrypt / sbEncryptTailTag. sbDecryptTailTag :: SbKey -> CbNonce -> Int64 -> LazyByteString -> Either CryptoError (Bool, LazyByteString) @@ -226,6 +231,7 @@ sbProcessChunkLazy_ :: (SbState -> ByteString -> (ByteString, SbState)) -> SbSta sbProcessChunkLazy_ = first (LB.fromChunks . reverse) .:. secretBoxLazy_ {-# INLINE sbProcessChunkLazy_ #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/Lazy.md#encrypt-then-mac-asymmetry sbEncryptChunk :: SbState -> ByteString -> (ByteString, SbState) sbEncryptChunk (st, authSt) chunk = let (!c, !st') = XSalsa.combine st chunk diff --git a/src/Simplex/Messaging/Crypto/Ratchet.hs b/src/Simplex/Messaging/Crypto/Ratchet.hs index 1ea7760fdd..5f91e728bb 100644 --- a/src/Simplex/Messaging/Crypto/Ratchet.hs +++ b/src/Simplex/Messaging/Crypto/Ratchet.hs @@ -465,6 +465,7 @@ data RatchetInitParams = RatchetInitParams } deriving (Show) +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#pq-x3dh-key-agreement -- this is used by the peer joining the connection pqX3dhSnd :: DhAlgorithm a => PrivateKey a -> PrivateKey a -> Maybe APrivRKEMParams -> E2ERatchetParams 'RKSProposed a -> Either CryptoError (RatchetInitParams, Maybe KEMKeyPair) -- 3. replied 2. received @@ -588,6 +589,7 @@ data SkippedMsgDiff | SMDRemove HeaderKey Word32 | SMDAdd SkippedMsgKeys +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#skipped-message-keys -- | this function is only used in tests to apply changes in skipped messages, -- in the agent the diff is persisted, and the whole state is loaded for the next message. applySMDiff :: SkippedMsgKeys -> SkippedMsgDiff -> SkippedMsgKeys @@ -712,6 +714,7 @@ data MsgHeader a = MsgHeader } deriving (Show) +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#header-encryption-and-padding -- to allow extension without increasing the size, the actual header length is: -- 69 = 2 (original size) + 2 + 1+56 (Curve448) + 4 + 4 -- The exact size is 2288, added reserve @@ -763,6 +766,7 @@ encodeLarge v s | v >= pqRatchetE2EEncryptVersion = smpEncode $ Large s | otherwise = smpEncode s +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#largep--backward-compatible-length-prefix-parsing -- This parser relies on the fact that header cannot be shorter than 32 bytes (it is ~69 bytes without PQ KEM), -- therefore if the first byte is less or equal to 31 (x1F), then we have 2 byte-length limited to 8191. -- This allows upgrading the current version in one message. @@ -788,6 +792,7 @@ encRatchetMessageP = do (emAuthTag, Tail emBody) <- smpP pure EncRatchetMessage {emHeader, emBody, emAuthTag} +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#pqsupport-vs-pqencryption newtype PQEncryption = PQEncryption {enablePQ :: Bool} deriving (Eq, Show) @@ -863,6 +868,7 @@ instance StrEncoding PQSupport where strP = pqEncToSupport <$> strP {-# INLINE strP #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#initialkeys data InitialKeys = IKUsePQ -- use PQ keys in contact request and short link data | IKLinkPQ PQSupport -- use PQ keys in short link data only, if PQSupport enabled @@ -991,6 +997,7 @@ type DecryptResult a = (Either CryptoError ByteString, Ratchet a, SkippedMsgDiff maxSkip :: Word32 maxSkip = 512 +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#rcdecrypt-flow rcDecrypt :: forall a. (AlgorithmI a, DhAlgorithm a) => @@ -1073,6 +1080,7 @@ rcDecrypt g rc@Ratchet {rcRcv, rcAD = Str rcAD, rcVersion} rcMKSkipped msg' = do rcNHKs = rcNHKs', rcNHKr = rcNHKr' } + -- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#pq-ratchet-step pqRatchetStep :: Ratchet a -> Maybe ARKEMParams -> ExceptT CryptoError IO (Maybe KEMSharedKey, Maybe KEMSharedKey, Maybe RatchetKEM) pqRatchetStep Ratchet {rcKEM, rcEnableKEM = PQEncryption pqEnc, rcVersion = rv} = \case -- received message does not have KEM in header, @@ -1160,6 +1168,7 @@ rcDecrypt g rc@Ratchet {rcRcv, rcAD = Str rcAD, rcVersion} rcMKSkipped msg' = do -- DECRYPT(mk, cipher-text, CONCAT(AD, enc_header)) tryE $ decryptAEAD mk iv (rcAD <> emHeader) emBody emAuthTag +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#kdf-functions rootKdf :: (AlgorithmI a, DhAlgorithm a) => RatchetKey -> PublicKey a -> PrivateKey a -> Maybe KEMSharedKey -> (RatchetKey, RatchetKey, Key) rootKdf (RatchetKey rk) k pk kemSecret_ = let dhOut = dhBytes' (dh' k pk) diff --git a/src/Simplex/Messaging/Crypto/SNTRUP761.hs b/src/Simplex/Messaging/Crypto/SNTRUP761.hs index 839fbc1e79..d5415f829b 100644 --- a/src/Simplex/Messaging/Crypto/SNTRUP761.hs +++ b/src/Simplex/Messaging/Crypto/SNTRUP761.hs @@ -2,6 +2,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md module Simplex.Messaging.Crypto.SNTRUP761 ( KEMHybridSecret (..), kcbDecrypt, @@ -22,6 +23,7 @@ import Simplex.Messaging.Crypto.SNTRUP761.Bindings newtype KEMHybridSecret = KEMHybridSecret ScrubbedBytes +-- spec: spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md#kcbencrypt--kcbdecrypt -- | NaCl @crypto_box@ decrypt with a shared hybrid DH + KEM secret and 192-bit nonce. kcbDecrypt :: KEMHybridSecret -> CbNonce -> ByteString -> Either CryptoError ByteString kcbDecrypt (KEMHybridSecret k) = sbDecrypt_ k @@ -30,6 +32,7 @@ kcbDecrypt (KEMHybridSecret k) = sbDecrypt_ k kcbEncrypt :: KEMHybridSecret -> CbNonce -> ByteString -> Int -> Either CryptoError ByteString kcbEncrypt (KEMHybridSecret k) = sbEncrypt_ k +-- spec: spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md#kemhybridsecret kemHybridSecret :: PublicKeyX25519 -> PrivateKeyX25519 -> KEMSharedKey -> KEMHybridSecret kemHybridSecret k pk (KEMSharedKey kem) = let DhSecretX25519 dh = C.dh' k pk diff --git a/src/Simplex/Messaging/Crypto/ShortLink.hs b/src/Simplex/Messaging/Crypto/ShortLink.hs index 013559fd1a..12124630fe 100644 --- a/src/Simplex/Messaging/Crypto/ShortLink.hs +++ b/src/Simplex/Messaging/Crypto/ShortLink.hs @@ -9,6 +9,7 @@ {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} +-- spec: spec/modules/Simplex/Messaging/Crypto/ShortLink.md module Simplex.Messaging.Crypto.ShortLink ( contactShortLinkKdf, invShortLinkKdf, @@ -44,6 +45,7 @@ fixedDataPaddedLength = 2008 -- 2048 - 24 (nonce) - 16 (auth tag) userDataPaddedLength :: Int userDataPaddedLength = 13784 -- 13824 - 24 - 16 +-- spec: spec/modules/Simplex/Messaging/Crypto/ShortLink.md#kdf-schemes contactShortLinkKdf :: LinkKey -> (LinkId, C.SbKey) contactShortLinkKdf (LinkKey k) = let (lnkId, sbKey) = B.splitAt 24 $ C.hkdf "" k "SimpleXContactLink" 56 @@ -72,6 +74,7 @@ connLinkData vr = \case UserInvLinkData d -> InvitationLinkData vr d UserContactLinkData d -> ContactLinkData vr d +-- spec: spec/modules/Simplex/Messaging/Crypto/ShortLink.md#encodesign encodeSign :: C.PrivateKeyEd25519 -> ByteString -> ByteString encodeSign pk s = smpEncode (C.sign' pk s) <> s @@ -97,6 +100,7 @@ encryptData g k len s = do ct <- liftEitherWith cryptoError $ C.sbEncrypt k nonce s len pure $ EncDataBytes $ smpEncode nonce <> ct +-- spec: spec/modules/Simplex/Messaging/Crypto/ShortLink.md#decryptlinkdata decryptLinkData :: forall c. ConnectionModeI c => LinkKey -> C.SbKey -> QueueLinkData -> Either AgentErrorType (FixedLinkData c, ConnLinkData c) decryptLinkData linkKey k (encFD, encMD) = do (sig1, fd) <- decrypt encFD From 9e3b47a36237b059b65e5f23485caa4d1d00c12e Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:54:56 +0000 Subject: [PATCH 30/91] code refs, additional specs --- spec/modules/README.md | 23 +++++++++++++++ spec/modules/Simplex/Messaging/Crypto.md | 20 ++++++++++--- .../Simplex/Messaging/Crypto/Ratchet.md | 29 ++++++++++++++++++- src/Simplex/Messaging/Crypto.hs | 3 ++ src/Simplex/Messaging/Crypto/Ratchet.hs | 5 ++++ 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/spec/modules/README.md b/spec/modules/README.md index 9f057b9039..ef2b458814 100644 --- a/spec/modules/README.md +++ b/spec/modules/README.md @@ -52,6 +52,21 @@ Things that would surprise a competent Haskell developer reading the code for th - Alternatives considered and rejected - Known limitations and their justification +## Non-obvious threshold + +The guiding principle: **non-obvious state machines and flows require documentation; standard things don't.** + +Document: +- Multi-step protocols and negotiation flows (e.g., KEM propose/accept round-trips) +- Monotonic or irreversible state transitions (e.g., PQ support can only be enabled, never disabled) +- Silent error behaviors (e.g., `verify` returns `False` on algorithm mismatch instead of an error) +- Design rationale for non-standard choices (e.g., why byte-reverse a nonce, why hash-then-encrypt for authenticators) + +Do NOT document: +- Standard algorithm properties (e.g., Ed25519 public key derivable from private key) +- Well-known protocol mechanics (e.g., HKDF usage per RFC 5869, deterministic nonce derivation in double ratchet) +- Implementation details that follow directly from the type signatures + ## What NOT to include - **Type signatures** — the code has them @@ -107,6 +122,14 @@ This is valuable — it confirms someone looked and found nothing to document. ## Linking conventions +### Module doc → protocol docs +When a module implements or is governed by a protocol specification in `protocol/`, link to it near the top of the module doc (after the overview). Do not duplicate protocol content — just reference it: +```markdown +**Protocol spec**: [`protocol/pqdr.md`](../../../../protocol/pqdr.md) — Post-quantum resistant augmented double ratchet algorithm. +``` + +This is especially important for modules in transport, protocol, client, server, and agent layers where behavior is defined by the protocol spec rather than being self-evident from the code. + ### Module doc → other module docs Use fully qualified names as link text: ```markdown diff --git a/spec/modules/Simplex/Messaging/Crypto.md b/spec/modules/Simplex/Messaging/Crypto.md index f1c6605124..10f074d5b0 100644 --- a/spec/modules/Simplex/Messaging/Crypto.md +++ b/spec/modules/Simplex/Messaging/Crypto.md @@ -55,10 +55,6 @@ Both apply `pad`/`unPad` by default. The `NoPad` variants skip padding. The XSalsa20 implementation splits the 24-byte nonce into two 8-byte halves. The first half initializes the cipher state (prepended with 16 zero bytes), the second derives a subkey. The first 32 bytes of output become the Poly1305 one-time key (`rs`), then the rest encrypts the message. This is the standard NaCl construction. -## CbAuthenticator - -An authentication scheme that encrypts the SHA-512 hash of the message using crypto_box, rather than the message itself. The result is 80 bytes (64 hash + 16 auth tag). Used for authenticating messages where the content is transmitted separately from the authentication proof. - ## Secret box chains (sbcInit / sbcHkdf) HKDF-based key chains for deriving sequential key+nonce pairs: @@ -77,6 +73,22 @@ All keys are encoded as ASN.1 DER (X.509 SubjectPublicKeyInfo for public, PKCS#8 `GCMIV` constructor is not exported — only `gcmIV :: ByteString -> Either CryptoError GCMIV` is available, which validates that the input is exactly 12 bytes. This prevents construction of invalid IVs. +## verify silently returns False on algorithm mismatch + +`verify :: APublicVerifyKey -> ASignature -> ByteString -> Bool` uses `testEquality` on the algorithm singletons. If the key is Ed25519 but the signature is Ed448 (or vice versa), `testEquality` fails and `verify` returns `False` — no error, no indication of a type mismatch. A correctly-formed signature can "fail" simply because the wrong algorithm key was passed. + +## dh' returns raw DH output — no key derivation + +`dh'` returns the raw X25519/X448 shared point with no hashing or HKDF. Callers must apply their own KDF: [SNTRUP761](./Crypto/SNTRUP761.md) hashes with SHA3-256, the [ratchet](./Crypto/Ratchet.md#kdf-functions) uses HKDF-SHA512. Not all DH libraries behave this way — some hash the output automatically. + +## reverseNonce + +`reverseNonce` creates a "reply" nonce by byte-reversing the original 24-byte nonce. Used for bidirectional communication where both sides need distinct nonces derived from the same starting value. The two nonces are guaranteed distinct unless the original is a byte palindrome, which is astronomically unlikely for random 24-byte values. + +## CbAuthenticator + +An authentication scheme that encrypts the SHA-512 hash of the message using crypto_box, rather than the message itself. The result is 80 bytes (64 hash + 16 auth tag). This is the djb-recommended authenticator scheme: it proves knowledge of the shared secret and the message content, without requiring the message to fit in a single crypto_box, and without revealing message content even to someone who compromises the shared key after verification. + ## generateKeyPair is STM Key generation uses `TVar ChaChaDRG` and runs in `STM`, not `IO`. This allows key generation inside `atomically` blocks, which is used extensively in handshake and ratchet initialization code. diff --git a/spec/modules/Simplex/Messaging/Crypto/Ratchet.md b/spec/modules/Simplex/Messaging/Crypto/Ratchet.md index ebbc9c5a6d..b26f95ce67 100644 --- a/spec/modules/Simplex/Messaging/Crypto/Ratchet.md +++ b/spec/modules/Simplex/Messaging/Crypto/Ratchet.md @@ -12,6 +12,8 @@ Implements the Signal double ratchet protocol extended with: The ratchet uses X448 (not X25519) for DH operations — `type RatchetX448 = Ratchet 'X448`. +**Protocol spec**: [`protocol/pqdr.md`](../../../../protocol/pqdr.md) — Post-quantum resistant augmented double ratchet algorithm. + ## PQ X3DH key agreement `pqX3dhSnd` / `pqX3dhRcv` perform the extended X3DH: @@ -46,9 +48,13 @@ Each message header carries `msgMaxVersion` (the sender's max supported ratchet `largeP` detects the length-prefix format by peeking at the first byte: if < 32, it's a 2-byte `Large` prefix (new format); otherwise it's a 1-byte prefix (old format). This allows upgrading the header encoding format in a single message without a version bump. +## maxSkip = 512 — DoS protection + +`maxSkip` is a hardcoded constant (not configurable). Messages claiming to be more than 512 positions ahead of the current counter are rejected with `CERatchetTooManySkipped`. This prevents an attacker from forcing the receiver to compute and store an unbounded number of skipped message keys. + ## Skipped message keys -When messages arrive out of order, the ratchet computes and stores the message keys for skipped messages (up to `maxSkip = 512`). Skipped keys are stored in a `Map HeaderKey (Map Word32 MessageKey)` — keyed first by header key, then by message number. +When messages arrive out of order, the ratchet computes and stores the message keys for skipped messages (up to `maxSkip`). Skipped keys are stored in a `Map HeaderKey (Map Word32 MessageKey)` — keyed first by header key, then by message number. The `SkippedMsgDiff` type represents changes to the skipped key store as a diff rather than a full replacement — this is persisted to the database, and the full state is loaded for the next message. `applySMDiff` is only used in tests. @@ -61,6 +67,14 @@ Decryption tries three strategies in order: If strategy 1 decrypts the header but the message number isn't in skipped keys, it checks whether this header key corresponds to the current or next ratchet to decide whether to advance. +### decryptSkipped — linear scan through all stored header keys + +`decryptSkipped` iterates through ALL `(HeaderKey, SkippedHdrMsgKeys)` pairs, attempting header decryption with each key. When header decryption succeeds but the message number is NOT in the skipped keys for that header, the result is `SMHeader` — which includes whether the key matches the current ratchet (`rcHKr` → `SameRatchet`) or the next ratchet (`rcNHKr` → `AdvanceRatchet`). This falls through to normal decryption processing rather than producing an error. + +### decryptMessage — ratchet advances even on failure + +`decryptMessage` returns `Either CryptoError ByteString` inside the `ExceptT` monad — a message decryption failure does NOT abort the ratchet state update. The ratchet counter advances (`rcNr + 1`) and chain key updates (`rcCKr'`) regardless of whether the message body decrypts successfully. This preserves ratchet state consistency for retransmission and error recovery. + ## rcEncryptHeader — separated from rcEncryptMsg Encryption is split into two steps: `rcEncryptHeader` produces a `MsgEncryptKey` (containing the encrypted header and message key), then `rcEncryptMsg` uses that key to encrypt the message body. This separation allows the ratchet state to be updated (persisted) before the message is encrypted, which is important for crash recovery — if the process crashes after encrypting but before sending, the ratchet state must already reflect the advanced counter. @@ -80,6 +94,19 @@ Two distinct newtypes with identical structure (`Bool` wrapper): - `PQSupport`: whether PQ **can** be used (determines header padding size, cannot be disabled once enabled) - `PQEncryption`: whether PQ **is** being used for the current send/receive ratchet +### pqEnableSupport is monotonic + +`pqEnableSupport v sup enc = PQSupport $ sup || (v >= pqRatchetE2EEncryptVersion && enc)`. The `||` means once PQ support is `True`, it stays `True` regardless of subsequent messages. PQ encryption (usage) can be toggled per-message; PQ support (capability / header size) only ratchets up. This prevents the larger header format from being downgraded once negotiated. + +## replyKEM_ — two-step KEM negotiation + +KEM establishment requires two message round-trips, as described in the [PQDR KEM state machine](../../../../protocol/pqdr.md#kem-state-machine): + +1. **Propose**: if the sender has no KEM in their header but the replier supports PQ at sufficient version, the replier includes a KEM proposal (`RKParamsProposed` — their encapsulation public key) +2. **Accept**: if the sender proposed KEM, the replier accepts by encapsulating against the proposed key and including the ciphertext + their own new encapsulation key (`RKParamsAccepted`) + +After acceptance, both sides have a shared KEM secret that is folded into the root KDF. Subsequent ratchet steps continue the KEM exchange with fresh keypairs on each side. + ## Error semantics - `CERatchetEarlierMessage n`: message number is `n` positions before the next expected (already processed or skipped-and-consumed) diff --git a/src/Simplex/Messaging/Crypto.hs b/src/Simplex/Messaging/Crypto.hs index d283ab899e..79a9b593c1 100644 --- a/src/Simplex/Messaging/Crypto.hs +++ b/src/Simplex/Messaging/Crypto.hs @@ -1285,11 +1285,13 @@ verify' (PublicKeyEd25519 k) (SignatureEd25519 sig) msg = Ed25519.verify k msg s verify' (PublicKeyEd448 k) (SignatureEd448 sig) msg = Ed448.verify k msg sig {-# INLINE verify' #-} +-- spec: spec/modules/Simplex/Messaging/Crypto.md#verify-silently-returns-false-on-algorithm-mismatch verify :: APublicVerifyKey -> ASignature -> ByteString -> Bool verify (APublicVerifyKey a k) (ASignature a' sig) msg = case testEquality a a' of Just Refl -> verify' k sig msg _ -> False +-- spec: spec/modules/Simplex/Messaging/Crypto.md#dh-returns-raw-dh-output--no-key-derivation dh' :: DhAlgorithm a => PublicKey a -> PrivateKey a -> DhSecret a dh' (PublicKeyX25519 k) (PrivateKeyX25519 pk) = DhSecretX25519 $ X25519.dh k pk dh' (PublicKeyX448 k) (PrivateKeyX448 pk) = DhSecretX448 $ X448.dh k pk @@ -1418,6 +1420,7 @@ randomCbNonce = fmap CryptoBoxNonce . randomBytes 24 randomBytes :: Int -> TVar ChaChaDRG -> STM ByteString randomBytes n gVar = stateTVar gVar $ randomBytesGenerate n +-- spec: spec/modules/Simplex/Messaging/Crypto.md#reversenonce reverseNonce :: CbNonce -> CbNonce reverseNonce (CryptoBoxNonce s) = CryptoBoxNonce (B.reverse s) diff --git a/src/Simplex/Messaging/Crypto/Ratchet.hs b/src/Simplex/Messaging/Crypto/Ratchet.hs index 5f91e728bb..02ddd6a68d 100644 --- a/src/Simplex/Messaging/Crypto/Ratchet.hs +++ b/src/Simplex/Messaging/Crypto/Ratchet.hs @@ -840,9 +840,11 @@ pqEncToSupport (PQEncryption pq) = PQSupport pq pqSupportAnd :: PQSupport -> PQSupport -> PQSupport pqSupportAnd (PQSupport s1) (PQSupport s2) = PQSupport $ s1 && s2 +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#pqenablesupport-is-monotonic pqEnableSupport :: VersionE2E -> PQSupport -> PQEncryption -> PQSupport pqEnableSupport v (PQSupport sup) (PQEncryption enc) = PQSupport $ sup || (v >= pqRatchetE2EEncryptVersion && enc) +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#replykem_--two-step-kem-negotiation replyKEM_ :: VersionE2E -> Maybe (RKEMParams 'RKSProposed) -> PQSupport -> Maybe AUseKEM replyKEM_ v kem_ = \case PQSupportOn | v >= pqRatchetE2EEncryptVersion -> Just $ case kem_ of @@ -994,6 +996,7 @@ data RatchetStep = AdvanceRatchet | SameRatchet type DecryptResult a = (Either CryptoError ByteString, Ratchet a, SkippedMsgDiff) +-- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#maxskip--512--dos-protection maxSkip :: Word32 maxSkip = 512 @@ -1131,6 +1134,7 @@ rcDecrypt g rc@Ratchet {rcRcv, rcAD = Str rcAD, rcVersion} rcMKSkipped msg' = do let (ck', mk, iv, _) = chainKdf ck mks' = M.insert msgNs (MessageKey mk iv) mks in advanceRcvRatchet (n - 1) ck' (msgNs + 1) mks' + -- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#decryptskipped--linear-scan-through-all-stored-header-keys decryptSkipped :: EncMessageHeader -> EncRatchetMessage -> ExceptT CryptoError IO (SkippedMessage a) decryptSkipped encHdr encMsg = tryDecryptSkipped SMNone $ M.assocs rcMKSkipped where @@ -1163,6 +1167,7 @@ rcDecrypt g rc@Ratchet {rcRcv, rcAD = Str rcAD, rcVersion} rcMKSkipped msg' = do decryptHeader k EncMessageHeader {ehVersion, ehBody, ehAuthTag, ehIV} = do header <- decryptAEAD k ehIV rcAD ehBody ehAuthTag `catchE` \_ -> throwE CERatchetHeader parseE' CryptoHeaderError (msgHeaderP ehVersion) header + -- spec: spec/modules/Simplex/Messaging/Crypto/Ratchet.md#decryptmessage--ratchet-advances-even-on-failure decryptMessage :: MessageKey -> EncRatchetMessage -> ExceptT CryptoError IO (Either CryptoError ByteString) decryptMessage (MessageKey mk iv) EncRatchetMessage {emHeader, emBody, emAuthTag} = -- DECRYPT(mk, cipher-text, CONCAT(AD, enc_header)) From 35d4065f325373ca883b9388d826b344a14b8cd9 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:32:02 +0000 Subject: [PATCH 31/91] specs for transport --- spec/TOPICS.md | 6 + .../modules/Simplex/FileTransfer/Transport.md | 23 ++++ .../Messaging/Notifications/Transport.md | 36 ++++++ spec/modules/Simplex/Messaging/Transport.md | 114 ++++++++++++++++++ .../Simplex/Messaging/Transport/Buffer.md | 17 +++ .../Simplex/Messaging/Transport/Client.md | 23 ++++ .../Messaging/Transport/Credentials.md | 13 ++ .../Simplex/Messaging/Transport/HTTP2.md | 13 ++ .../Messaging/Transport/HTTP2/Client.md | 19 +++ .../Simplex/Messaging/Transport/HTTP2/File.md | 7 ++ .../Messaging/Transport/HTTP2/Server.md | 15 +++ .../Simplex/Messaging/Transport/KeepAlive.md | 11 ++ .../Simplex/Messaging/Transport/Server.md | 33 +++++ .../Simplex/Messaging/Transport/Shared.md | 28 +++++ .../Simplex/Messaging/Transport/WebSockets.md | 15 +++ src/Simplex/FileTransfer/Transport.hs | 2 + src/Simplex/Messaging/Transport.hs | 6 + .../Messaging/Transport/Credentials.hs | 1 + .../Messaging/Transport/HTTP2/Client.hs | 1 + src/Simplex/Messaging/Transport/Server.hs | 2 + src/Simplex/Messaging/Transport/Shared.hs | 1 + src/Simplex/Messaging/Transport/WebSockets.hs | 1 + 22 files changed, 387 insertions(+) create mode 100644 spec/modules/Simplex/FileTransfer/Transport.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Transport.md create mode 100644 spec/modules/Simplex/Messaging/Transport.md create mode 100644 spec/modules/Simplex/Messaging/Transport/Buffer.md create mode 100644 spec/modules/Simplex/Messaging/Transport/Client.md create mode 100644 spec/modules/Simplex/Messaging/Transport/Credentials.md create mode 100644 spec/modules/Simplex/Messaging/Transport/HTTP2.md create mode 100644 spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md create mode 100644 spec/modules/Simplex/Messaging/Transport/HTTP2/File.md create mode 100644 spec/modules/Simplex/Messaging/Transport/HTTP2/Server.md create mode 100644 spec/modules/Simplex/Messaging/Transport/KeepAlive.md create mode 100644 spec/modules/Simplex/Messaging/Transport/Server.md create mode 100644 spec/modules/Simplex/Messaging/Transport/Shared.md create mode 100644 spec/modules/Simplex/Messaging/Transport/WebSockets.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index a8eafc1a13..d7a6272540 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -7,3 +7,9 @@ - **Padding schemes**: Three different padding formats across the codebase — Crypto.hs uses 2-byte Word16 length prefix (max ~65KB), Crypto/Lazy.hs uses 8-byte Int64 prefix (file-sized), and both use '#' fill character. Ratchet header padding uses fixed sizes (88 or 2310 bytes). All use `pad`/`unPad` but with incompatible formats. The relationship between padding, encryption, and message size limits spans Crypto, Lazy, Ratchet, and the protocol layer. - **NaCl construction variants**: crypto_box, secret_box, and KEM hybrid secret all use the same XSalsa20+Poly1305 core (Crypto.hs `xSalsa20`), but with different key sources (DH, symmetric, SHA3_256(DH||KEM)). The lazy streaming variant (Lazy.hs) adds prepend-tag vs tail-tag placement. File.hs wraps lazy streaming with handle-based I/O. Full picture requires reading Crypto.hs, Lazy.hs, File.hs, and SNTRUP761.hs together. + +- **Transport encryption layering**: Three encryption layers overlap — TLS (Transport.hs), optional block encryption via sbcHkdf chains (Transport.hs tPutBlock/tGetBlock), and SMP protocol-level encryption. Block encryption is disabled for proxy connections (already encrypted), and absent for NTF protocol. The interaction of these layers with proxy version downgrade logic spans Transport.hs, Client.hs, and the SMP proxy module. + +- **Certificate chain trust model**: ChainCertificates (Shared.hs) defines 0–4 cert chain semantics, used by both Client.hs (validateCertificateChain) and Server.hs (validateClientCertificate, SNI credential switching). The 4-length case skipping index 2 (operator cert) and the FQHN-disabled x509validate are decisions that span the entire transport security model. + +- **Handshake protocol family**: SMP (Transport.hs), NTF (Notifications/Transport.hs), and XFTP (FileTransfer/Transport.hs) all have handshake protocols with the same structure (version negotiation + session binding + key exchange) but different feature sets. NTF is a strict subset. XFTP doesn't use the TLS handshake at all (HTTP2 layer). The shared types (THandle, THandleParams, THandleAuth) mean changes to the handshake infrastructure affect all three protocols. diff --git a/spec/modules/Simplex/FileTransfer/Transport.md b/spec/modules/Simplex/FileTransfer/Transport.md new file mode 100644 index 0000000000..6bad5455c7 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Transport.md @@ -0,0 +1,23 @@ +# Simplex.FileTransfer.Transport + +> XFTP protocol types, version negotiation, and encrypted file streaming with integrity verification. + +**Source**: [`FileTransfer/Transport.hs`](../../../../src/Simplex/FileTransfer/Transport.hs) + +## xftpClientHandshakeStub — XFTP doesn't use TLS handshake + +`xftpClientHandshakeStub` always fails with `throwE TEVersion`. The source comment states: "XFTP protocol does not use this handshake method." The XFTP handshake is performed at the HTTP/2 layer — `XFTPServerHandshake` and `XFTPClientHandshake` are sent as HTTP/2 request/response bodies (see `FileTransfer/Client.hs` and `FileTransfer/Server.hs`). + +## receiveSbFile — constant-time auth tag verification + +`receiveSbFile` validates the authentication tag using `BA.constEq` (constant-time byte comparison). The auth tag is collected from the stream after all file data — if the file data ends mid-chunk, the remaining bytes of that chunk are used first, and a follow-up read provides the rest of the tag if needed. + +## receiveFile_ — two-phase integrity verification + +File reception has two verification phases: +1. **During receive**: either size checking (plaintext via `hReceiveFile`) or auth tag validation (encrypted via `receiveSbFile`) +2. **After receive**: `LC.sha256Hash` of the entire received file is compared to `chunkDigest` + +## sendEncFile — auth tag appended after all chunks + +`sendEncFile` streams encrypted chunks via `LC.sbEncryptChunk`, then sends `LC.sbAuth sbState` (the authentication tag) as a final frame when the remaining size reaches zero. diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md new file mode 100644 index 0000000000..dd4564738b --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -0,0 +1,36 @@ +# Simplex.Messaging.Notifications.Transport + +> Notification Router Protocol transport: manages push notification subscriptions between client and NTF Router. + +**Source**: [`Notifications/Transport.hs`](../../../../../src/Simplex/Messaging/Notifications/Transport.hs) + +**Protocol spec**: [`protocol/push-notifications.md`](../../../../../protocol/push-notifications.md) — SimpleX Notification Router protocol. + +## Overview + +This module implements the transport layer for the **Notification Router Protocol**. Per the protocol spec: "To manage notification subscriptions to SMP routers, SimpleX Notification Router provides an RPC protocol with a similar design to SimpleX Messaging Protocol router." + +The protocol spec diagram shows three separate protocols in the notification flow: +1. **Notification Router Protocol** (this module): client ↔ SimpleX Notification Router — subscription management +2. **SMP protocol**: SMP Router → SimpleX Notifications Subscriber — notification signals +3. **Push provider** (e.g., APN): SimpleX Push Router → device — per the spec: "the notifications are e2e encrypted between SimpleX Notification Router and the user's device" + +## Differences from SMP transport + +The NTF protocol reuses SMP's transport infrastructure but with reduced parameters: + +| Property | SMP | NTF | +|----------|-----|-----| +| Block size | 16384 | 512 | +| Block encryption | Yes (v11+) | No (`encryptBlock = Nothing`) | +| Service certificates | Yes (v16+) | No (`serviceAuth = False`) | +| Version range | 6–19 | 1–3 | +| Handshake messages | 2–3 | 2 | + +## Same ALPN/legacy fallback pattern as SMP + +`ntfServerHandshake` uses the same pattern as `smpServerHandshake`: if ALPN is not negotiated (`getSessionALPN` returns `Nothing`), the server offers only `legacyServerNTFVRange` (v1 only). + +## NTF handshake uses SMP shared types + +The handshake reuses SMP's `THandle`, `THandleParams`, `THandleAuth` types. The `encodeAuthEncryptCmds` and `authEncryptCmdsP` helper functions are defined locally in this module (with NTF-specific version thresholds). NTF never sets `sessSecret` / `sessSecret'`, `peerClientService`, or `clientService` — these are always `Nothing`. diff --git a/spec/modules/Simplex/Messaging/Transport.md b/spec/modules/Simplex/Messaging/Transport.md new file mode 100644 index 0000000000..f88188792f --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport.md @@ -0,0 +1,114 @@ +# Simplex.Messaging.Transport + +> SMP transport layer: TLS connection management, SMP handshake protocol, block encryption, version negotiation. + +**Source**: [`Transport.hs`](../../../../src/Simplex/Messaging/Transport.hs) + +**Protocol spec**: [`protocol/simplex-messaging.md` — Transport connection](../../../../protocol/simplex-messaging.md#transport-connection-with-the-smp-router) — SMP encrypted transport, handshake syntax, certificate chain requirements. + +## Overview + +This is the core transport module. It defines: +- The `Transport` typeclass abstracting over TLS and WebSocket connections +- The SMP handshake protocol (server and client sides) +- Optional block encryption using HKDF-derived symmetric key chains (v11+) +- Version negotiation with backward-compatible extensions + +Per the protocol spec: "Each transport block has a fixed size of 16384 bytes for traffic uniformity." The `sessionIdentifier` field uses tls-unique channel binding (RFC 5929) — "it should be included in authorized part of all SMP transmissions sent in this transport connection." + +## SMP version 13 is missing + +The version history jumps from 12 (`blockedEntitySMPVersion`) to 14 (`proxyServerHandshakeSMPVersion`). Version 13 was skipped. + +## proxiedSMPRelayVersion — anti-fingerprinting cap + +`proxiedSMPRelayVersion = 18`, one below `currentClientSMPRelayVersion = 19`. The code comment states: "SMP proxy sets it to lower than its current version to prevent client version fingerprinting by the destination relays when clients upgrade at different times." + +In practice (Server.hs), the SMP proxy uses `proxiedSMPRelayVRange` to cap the destination relay's version range in the `PKEY` response sent to the client, so the client sees a capped version range rather than the relay's actual range. + +## withTlsUnique — different API calls yield same value + +`withTlsUnique` extracts the tls-unique channel binding (RFC 5929) using a type-level dispatch: +- **Server** (`STServer`): `T.getPeerFinished` — the peer's (client's) Finished message +- **Client** (`STClient`): `T.getFinished` — own (client's) Finished message + +Both calls yield the client's Finished message. If the result is `Nothing`, the connection is closed immediately (`closeTLS cxt >> ioe_EOF`). + +## defaultSupportedParams vs defaultSupportedParamsHTTPS + +Two TLS parameter sets: + +- **`defaultSupportedParams`**: ChaCha20-Poly1305 ciphers only, Ed448/Ed25519 signatures only, X448/X25519 groups. Per the protocol spec: "TLS_CHACHA20_POLY1305_SHA256 cipher suite, ed25519 EdDSA algorithms for signatures, x25519 ECDHE groups for key exchange." +- **`defaultSupportedParamsHTTPS`**: extends `defaultSupportedParams` with `ciphersuite_strong`, additional groups, and additional hash/signature combinations. The source comment says: "A selection of extra parameters to accomodate browser chains." + +In the SMP server (Server.hs), when HTTP credentials are configured, `defaultSupportedParamsHTTPS` is used for all connections on that port (not selected per-connection). When no HTTP credentials are configured, `defaultSupportedParams` is used. + +## SMP handshake flow + +Per the [protocol spec](../../../../protocol/simplex-messaging.md#transport-handshake), the handshake is a two-message exchange (three if service certs are used): + +1. **Server → Client**: `paddedRouterHello` containing `smpVersionRange`, `sessionIdentifier` (tls-unique), and `routerCertKey` (certificate chain + X25519 key signed by the server's certificate) +2. **Client → Server**: `paddedClientHello` containing agreed `smpVersion`, `keyHash` (router identity — CA certificate fingerprint), optional `clientKey`, `proxyRouter` flag, and optional `clientService` +3. **Server → Client** (service only): `paddedRouterHandshakeResponse` containing assigned `serviceId` or `handshakeError` + +The client verifies `sessionIdentifier` matches its own tls-unique (`when (sessionId /= sessId) $ throwE TEBadSession`). The server verifies `keyHash` matches its CA fingerprint (`when (keyHash /= kh) $ throwE $ TEHandshake IDENTITY`). + +Per the protocol spec: "For TLS transport client should assert that sessionIdentifier is equal to tls-unique channel binding defined in RFC 5929." + +### legacyServerSMPRelayVRange when no ALPN + +If ALPN is not negotiated (`getSessionALPN c` returns `Nothing`), the server offers `legacyServerSMPRelayVRange` (v6 only) instead of the full version range. Per the protocol spec: "If the client does not confirm this protocol name, the router would fall back to v6 of SMP protocol." The spec notes: "This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax." + +### Service certificate handshake extension + +When `clientService` is present in the client handshake, the server performs additional verification: +- The TLS client certificate chain must exactly match the certificate chain in the handshake message (`getPeerCertChain c == cc`) +- The signed X25519 public key is verified against the leaf certificate's key (`getCertVerifyKey leafCert` then `C.verifyX509`) +- On success, the server sends `SMPServerHandshakeResponse` with a `serviceId` +- On failure, the server sends `SMPServerHandshakeError` before raising the error + +Per the protocol spec (v16+): "`clientService` provides long-term service client certificate for high-volume services using SMP router (chat relays, notification routers, high traffic bots). The router responds with a third handshake message containing the assigned service ID." + +The client only includes service credentials when `v >= serviceCertsSMPVersion && certificateSent c` (the TLS client certificate was actually sent). + +## tPutBlock / tGetBlock — optional block encryption + +When `encryptBlock` is set, transport blocks are encrypted before being sent over TLS: + +- **Send**: `atomically $ stateTVar sndKey C.sbcHkdf` advances the chain key and returns `(SbKey, CbNonce)`; the block is encrypted with `C.sbEncrypt` +- **Receive**: same pattern with `rcvKey` and `C.sbDecrypt` + +The chain keys are initialized from `C.sbcInit sessionId secret` where `sessionId` is the tls-unique value and `secret` is the session DH shared secret. + +The code comment on `proxyServer` flag states: "This property, if True, disables additional transport encrytion inside TLS. (Proxy server connection already has additional encryption, so this layer is not needed there)." The protocol spec confirms: "`proxyRouter` flag (v14+) disables additional transport encryption inside TLS for proxy connections, since proxy router connection already has additional encryption." + +The protocol spec version history (v11) describes this as "additional encryption of transport blocks with forward secrecy." + +## smpTHandleClient — chain key swap + +`smpTHandleClient` applies `swap` to the chain key pair before creating `TSbChainKeys`. The code comment states: "swap is needed to use client's sndKey as server's rcvKey and vice versa." + +## Proxy version downgrade logic + +When the proxy connects to a destination relay older than v14 (`proxyServerHandshakeSMPVersion`), the client-side handshake caps the version range: + +``` +if proxyServer && maxVersion smpVersionRange < proxyServerHandshakeSMPVersion + then vRange {maxVersion = max (minVersion vRange) deletedEventSMPVersion} +``` + +The code comment explains: "Transport encryption between proxy and destination breaks clients with version 10 or earlier, because of a larger message size (see maxMessageLength)." The cap at `deletedEventSMPVersion` (v10) ensures transport encryption (v11+) is not negotiated with older relays. + +The comment also notes: "Prior to version v6.3 the version between proxy and destination was capped at 8, by mistake, which also disables transport encryption and the latest features." + +## forceCertChain + +`forceCertChain` forces evaluation of the certificate chain and signed key via `length (show cc) `seq` show signedKey `seq` cert`. Introduced in commit 9e7e0d10 ("smp-server: conserve resources"), sub-bullet "transport: force auth params, remove async wrapper" — part of a commit that adds strictness annotations throughout (`bang more thunks`, `strict`). + +## smpTHandle — version 0 bootstrap + +`smpTHandle` creates a `THandle` with version 0, no auth, and no block encryption. This handle is used for the handshake exchange itself (`sendHandshake`/`getHandshake`). After the handshake completes, `smpTHandle_` creates the real handle with the negotiated version, auth, and encryption parameters. + +## getHandshake — forward-compatible parsing + +The code comment states: "ignores tail bytes to allow future extensions." The protocol spec confirms: "`ignoredPart` in handshake allows to add additional parameters in handshake without changing protocol version — the client and routers must ignore any extra bytes within the original block length." diff --git a/spec/modules/Simplex/Messaging/Transport/Buffer.md b/spec/modules/Simplex/Messaging/Transport/Buffer.md new file mode 100644 index 0000000000..6b1edf9fd9 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/Buffer.md @@ -0,0 +1,17 @@ +# Simplex.Messaging.Transport.Buffer + +> Buffered TLS reading with TMVar-based concurrency lock. + +**Source**: [`Transport/Buffer.hs`](../../../../../src/Simplex/Messaging/Transport/Buffer.hs) + +## TBuffer — concurrent read safety via getLock + +`TBuffer` uses a `TMVar ()` as a mutex (`getLock`). `getBuffered` acquires the lock via `withBufferLock`, then loops and accumulates bytes until the requested count is reached. + +## getBuffered — first chunk has no timeout + +`getBuffered` reads the first chunk via `getChunk` (no timeout), but applies `withTimedErr t_` (the transport timeout) to subsequent chunks. + +## getLnBuffered — test only + +The source comment states: "This function is only used in test and needs to be improved before it can be used in production, it will never complete if TLS connection is closed before there is newline." diff --git a/spec/modules/Simplex/Messaging/Transport/Client.md b/spec/modules/Simplex/Messaging/Transport/Client.md new file mode 100644 index 0000000000..bdc1cdb82a --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/Client.md @@ -0,0 +1,23 @@ +# Simplex.Messaging.Transport.Client + +> TLS client connection setup: TCP/SOCKS5 connection, TLS handshake, certificate validation, host types. + +**Source**: [`Transport/Client.hs`](../../../../../src/Simplex/Messaging/Transport/Client.hs) + +## ConnectionHandle — three-stage cleanup + +`ConnectionHandle` has three constructors: `CHSocket` (raw socket), `CHContext` (TLS context), `CHTransport` (transport connection). An `IORef` holds the current handle, updated by `set` on each successful transition. The `E.bracket` cleanup function tears down the connection at whatever stage it reached. + +## SocksIsolateByAuth + +`SocksIsolateByAuth` is the default SOCKS authentication mode. When active, [Simplex.Messaging.Client](../Client.md) generates SOCKS credentials (`SocksCredentials sessionUsername ""`) where `sessionUsername` is `B64.encode $ C.sha256Hash $ bshow userId <> ...` with additional components based on `sessionMode` (`TSMUser`, `TSMSession`, `TSMServer`, `TSMEntity`). + +The three modes defined here: `SocksAuthUsername` (explicit credentials), `SocksAuthNull` (no auth, `@` prefix), `SocksIsolateByAuth` (empty string — credentials generated by the caller). + +## validateCertificateChain + +Validation checks the SHA-256 fingerprint of the identity certificate (extracted via `chainIdCaCerts` — see [Shared.md](./Shared.md#chainidcacerts--certificate-chain-semantics)) against the key hash. If the fingerprint doesn't match, the chain is rejected with `UnknownCA`. If the fingerprint matches, standard X.509 validation is performed using the CA certificate as trust anchor. + +## No TLS timeout for client connections + +The code comment states: "No TLS timeout to avoid failing connections via SOCKS." `transportTimeout` is set to `Nothing` for all client connections via `clientTransportConfig`. diff --git a/spec/modules/Simplex/Messaging/Transport/Credentials.md b/spec/modules/Simplex/Messaging/Transport/Credentials.md new file mode 100644 index 0000000000..9aefa53aed --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/Credentials.md @@ -0,0 +1,13 @@ +# Simplex.Messaging.Transport.Credentials + +> Certificate generation for transport layer: Ed25519 key pairs, X.509 signing, TLS credential extraction. + +**Source**: [`Transport/Credentials.hs`](../../../../../src/Simplex/Messaging/Transport/Credentials.hs) + +## genCredentials — nanosecond stripping + +`genCredentials` zeroes out nanoseconds from the current time before creating the certificate validity period: `todNSec = 0`. The source comment explains: "remove nanoseconds from time - certificate encoding/decoding removes them." + +## tlsCredentials — root fingerprint from last credential + +`tlsCredentials` extracts the SHA-256 fingerprint from `L.last credentials` (the root/CA certificate), and the private key from `L.head credentials` (the leaf). The returned `KeyHash` wraps this root fingerprint. diff --git a/spec/modules/Simplex/Messaging/Transport/HTTP2.md b/spec/modules/Simplex/Messaging/Transport/HTTP2.md new file mode 100644 index 0000000000..ad4b5de572 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/HTTP2.md @@ -0,0 +1,13 @@ +# Simplex.Messaging.Transport.HTTP2 + +> Bridges TLS transport to HTTP/2 configuration, buffer management, and body reading. + +**Source**: [`Transport/HTTP2.hs`](../../../../../src/Simplex/Messaging/Transport/HTTP2.hs) + +## allocHTTP2Config — manual buffer allocation + +`allocHTTP2Config` uses `mallocBytes` to allocate a write buffer (`Ptr Word8`) for the `http2` package's `Config`. The config bridges TLS to HTTP/2 by passing `cPut c` and `cGet c` from the `Transport` typeclass into the HTTP/2 config's `confSendAll` and `confReadN`. + +## http2TLSParams + +`http2TLSParams` uses `ciphersuite_strong_det` (from `Network.TLS.Extra`), distinct from the `ciphersuite_strong` used in `defaultSupportedParamsHTTPS`. This is the default `suportedTLSParams` in the HTTP/2 client configuration. diff --git a/spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md b/spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md new file mode 100644 index 0000000000..0ff0e0af8c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md @@ -0,0 +1,19 @@ +# Simplex.Messaging.Transport.HTTP2.Client + +> Thread-safe HTTP/2 client with request queuing, connection lifecycle, and timeout management. + +**Source**: [`Transport/HTTP2/Client.hs`](../../../../../../src/Simplex/Messaging/Transport/HTTP2/Client.hs) + +## sendRequest vs sendRequestDirect — thread safety + +`sendRequest` is thread-safe: it puts the request on a `TBQueue` and waits for the response via a `TMVar`. A single background thread (`process`) dequeues and sends requests sequentially through the HTTP/2 session. + +`sendRequestDirect` bypasses the queue and calls `sendReq` directly. The source comment warns: "this function should not be used until HTTP2 is thread safe, use sendRequest." + +## attachHTTP2Client — runs on both client and server TLS + +The source comment states: "HTTP2 client can be run on both client and server TLS connections." `attachHTTP2Client` takes a `TLS p` where `p` can be `TClient` or `TServer`, allowing an HTTP/2 client session to run on an existing server-side TLS connection. + +## Connection timeout and async lifecycle + +`getVerifiedHTTP2ClientWith` starts the HTTP/2 session in an `async` and waits up to `connTimeout` for the session to establish (signal via `TMVar`). If the timeout fires, the async is cancelled. If the session establishes successfully, the `action` field holds the async handle — `closeHTTP2Client` cancels it with `uninterruptibleCancel`. diff --git a/spec/modules/Simplex/Messaging/Transport/HTTP2/File.md b/spec/modules/Simplex/Messaging/Transport/HTTP2/File.md new file mode 100644 index 0000000000..8d03d82390 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/HTTP2/File.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Transport.HTTP2.File + +> File transfer over HTTP/2: chunked send/receive with size tracking. + +**Source**: [`Transport/HTTP2/File.hs`](../../../../../../src/Simplex/Messaging/Transport/HTTP2/File.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Transport/HTTP2/Server.md b/spec/modules/Simplex/Messaging/Transport/HTTP2/Server.md new file mode 100644 index 0000000000..931f89bca1 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/HTTP2/Server.md @@ -0,0 +1,15 @@ +# Simplex.Messaging.Transport.HTTP2.Server + +> HTTP/2 server with inactive client expiration. The single-queue server is for testing only. + +**Source**: [`Transport/HTTP2/Server.hs`](../../../../../../src/Simplex/Messaging/Transport/HTTP2/Server.hs) + +## Inactive client expiration + +`runHTTP2ServerWith_` tracks last activity per client via a `TVar SystemTime`. A background thread (`expireInactiveClient`) periodically checks whether the client has been inactive beyond the `ExpirationConfig` threshold. If so, it calls `closeConnection tls`. + +The activity timestamp is updated on every HTTP/2 request (before dispatching to the handler). + +## getHTTP2Server — testing only + +The source comment states: "This server is for testing only, it processes all requests in a single queue." `getHTTP2Server` puts all requests on a single `TBQueue`. `runHTTP2Server` dispatches requests directly via `H.run` without queueing. diff --git a/spec/modules/Simplex/Messaging/Transport/KeepAlive.md b/spec/modules/Simplex/Messaging/Transport/KeepAlive.md new file mode 100644 index 0000000000..1f943bafa8 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/KeepAlive.md @@ -0,0 +1,11 @@ +# Simplex.Messaging.Transport.KeepAlive + +> Platform-specific TCP keepalive configuration via CApiFFI. + +**Source**: [`Transport/KeepAlive.hs`](../../../../../src/Simplex/Messaging/Transport/KeepAlive.hs) + +## Platform-specific TCP_KEEPIDLE + +macOS uses `TCP_KEEPALIVE` instead of `TCP_KEEPIDLE`. The CPP conditional imports the correct constant at compile time via `foreign import capi`. Windows uses hardcoded numeric values — the source comment states: "The values are copied from windows::Win32::Networking::WinSock." + +Defaults: idle=30s, interval=15s, count=4. diff --git a/spec/modules/Simplex/Messaging/Transport/Server.md b/spec/modules/Simplex/Messaging/Transport/Server.md new file mode 100644 index 0000000000..181951dcda --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/Server.md @@ -0,0 +1,33 @@ +# Simplex.Messaging.Transport.Server + +> TLS server: socket lifecycle, client acceptance, SNI credential switching, socket leak detection. + +**Source**: [`Transport/Server.hs`](../../../../../src/Simplex/Messaging/Transport/Server.hs) + +## safeAccept — errno-based retry + +`safeAccept` retries `accept()` on specific errno values. The code comment references the POSIX man page: "man accept says: For reliable operation the application should detect the network errors defined for the protocol after accept() and treat them like EAGAIN by retrying." The retry set: `eCONNABORTED, eAGAIN, eNETDOWN, ePROTO, eNOPROTOOPT, eHOSTDOWN, eNONET, eHOSTUNREACH, eOPNOTSUPP, eNETUNREACH`. Any other error is logged and re-thrown. + +## SocketState — leak detection + +`SocketState = (TVar Int, TVar Int, TVar (IntMap (Weak ThreadId)))` tracks: accepted count, gracefully-closed count, and active client threads. `getSocketStats` computes `socketsLeaked = socketsAccepted - socketsClosed - socketsActive`. + +## closeServer — weak thread references + +`closeServer` kills active client threads via `Weak ThreadId`. The code: `readTVarIO clients >>= mapM_ (deRefWeak >=> mapM_ killThread)`. `deRefWeak` returns `Nothing` if the thread has already been garbage collected, so the shutdown does not fail on already-dead threads. + +## SNI credential switching + +`supportedTLSServerParams` selects TLS credentials based on SNI: +- **No SNI**: uses `credential` (the primary server credential) +- **SNI present**: uses `sniCredential` (when configured) + +The `sniCredUsed` TVar records whether SNI triggered credential switching. In the SMP server (Server.hs), when `sniUsed` is `True`, the connection is dispatched to the HTTP handler instead of the SMP handler. + +## startTCPServer — address resolution + +`startTCPServer` resolves the listen address and selects `AF_INET6` first, falling back to `AF_INET`: `select as = fromJust $ family AF_INET6 <|> family AF_INET`. + +## Client certificate validation for services + +`paramsAskClientCert` enables TLS client certificate requests. In `validateClientCertificate`, an empty chain (`CCEmpty`) returns no error — client certificates are optional, as noted by the code comment: "client certificates are only used for services." diff --git a/spec/modules/Simplex/Messaging/Transport/Shared.md b/spec/modules/Simplex/Messaging/Transport/Shared.md new file mode 100644 index 0000000000..8248c068be --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/Shared.md @@ -0,0 +1,28 @@ +# Simplex.Messaging.Transport.Shared + +> Certificate chain parsing and X.509 validation utilities shared between client and server. + +**Source**: [`Transport/Shared.hs`](../../../../../src/Simplex/Messaging/Transport/Shared.hs) + +**Protocol spec**: [`protocol/simplex-messaging.md` — Router certificate](../../../../protocol/simplex-messaging.md#router-certificate) — certificate chain lengths and semantics. + +## chainIdCaCerts — certificate chain semantics + +`chainIdCaCerts` classifies TLS certificate chains (which are ordered leaf-first) by length: + +| Length | Constructor | Code comment | +|--------|------------|--------------| +| 0 | `CCEmpty` | (no chain) | +| 1 | `CCSelf cert` | (self-signed) | +| 2 | `CCValid {leafCert, idCert=cert, caCert=cert}` | "current long-term online/offline certificates chain" | +| 3 | `CCValid {leafCert, idCert, caCert}` | "with additional operator certificate (preset in the client)" | +| 4 | `CCValid {leafCert, idCert, _, caCert}` | "with network certificate" | +| 5+ | `CCLong` | (rejected) | + +The protocol spec defines supported chain lengths of 2, 3, and 4 certificates (see [Router certificate](../../../../protocol/simplex-messaging.md#router-certificate)). In all `CCValid` cases, `idCert` is the certificate whose fingerprint is compared against the server address key hash, and `caCert` is used as the X.509 trust anchor. + +In the 4-cert case, index 2 is skipped (`_`) — it is present in the chain but not used as either the identity or the trust anchor. + +## x509validate — FQHN check disabled + +`x509validate` sets `checkFQHN = False`. The protocol spec identifies servers by certificate fingerprint (key hash in the server address), not by domain name. The validation uses a fresh `ValidationCache` (`ValidationCacheUnknown` for all lookups, no-op store) — each connection validates independently. diff --git a/spec/modules/Simplex/Messaging/Transport/WebSockets.md b/spec/modules/Simplex/Messaging/Transport/WebSockets.md new file mode 100644 index 0000000000..7f1b3e6736 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Transport/WebSockets.md @@ -0,0 +1,15 @@ +# Simplex.Messaging.Transport.WebSockets + +> WebSocket transport implementation over TLS, with strict message framing. + +**Source**: [`Transport/WebSockets.hs`](../../../../../src/Simplex/Messaging/Transport/WebSockets.hs) + +## cGet — strict size check (unlike TLS) + +`cGet` throws `TEBadBlock` if the received WebSocket message length doesn't equal `n`. This differs from the TLS `cGet` which uses `getBuffered` to accumulate partial reads. + +## WebSocket options + +- `connectionCompressionOptions = NoCompression` +- `connectionFramePayloadSizeLimit = SizeLimit $ fromIntegral smpBlockSize` (16384) +- `connectionMessageDataSizeLimit = SizeLimit 65536` diff --git a/src/Simplex/FileTransfer/Transport.hs b/src/Simplex/FileTransfer/Transport.hs index d55b251483..11b504a4c5 100644 --- a/src/Simplex/FileTransfer/Transport.hs +++ b/src/Simplex/FileTransfer/Transport.hs @@ -103,6 +103,7 @@ currentXFTPVersion = VersionXFTP 3 supportedFileServerVRange :: VersionRangeXFTP supportedFileServerVRange = mkVersionRange initialXFTPVersion currentXFTPVersion +-- spec: spec/modules/Simplex/FileTransfer/Transport.md#xftpclienthandshakestub--xftp-doesnt-use-tls-handshake -- XFTP protocol does not use this handshake method xftpClientHandshakeStub :: c 'TClient -> Maybe C.KeyPairX25519 -> C.KeyHash -> VersionRangeXFTP -> Bool -> Maybe (ServiceCredentials, C.KeyPairEd25519) -> ExceptT TransportError IO (THandle XFTPVersion c 'TClient) xftpClientHandshakeStub _c _ks _keyHash _xftpVRange _proxyServer _serviceKeys = throwE TEVersion @@ -190,6 +191,7 @@ receiveEncFile getBody = receiveFile_ . receive data ReceiveFileError = RFESize | RFECrypto +-- spec: spec/modules/Simplex/FileTransfer/Transport.md#receivesbfile--constant-time-auth-tag-verification receiveSbFile :: (Int -> IO ByteString) -> Handle -> LC.SbState -> Word32 -> IO (Either ReceiveFileError ()) receiveSbFile getBody h = receive where diff --git a/src/Simplex/Messaging/Transport.hs b/src/Simplex/Messaging/Transport.hs index f1eb1a8bd0..fde483177b 100644 --- a/src/Simplex/Messaging/Transport.hs +++ b/src/Simplex/Messaging/Transport.hs @@ -240,6 +240,7 @@ currentServerSMPRelayVersion = VersionSMP 19 -- Max SMP protocol version to be used in e2e encrypted -- connection between client and server, as defined by SMP proxy. +-- spec: spec/modules/Simplex/Messaging/Transport.md#proxiedsmprelayversion--anti-fingerprinting-cap -- SMP proxy sets it to lower than its current version -- to prevent client version fingerprinting by the -- destination relays when clients upgrade at different times. @@ -376,6 +377,7 @@ getTLS cfg tlsCertSent tlsPeerCert cxt = withTlsUnique @TLS @p cxt newTLS tlsALPN <- T.getNegotiatedProtocol cxt pure TLS {tlsContext = cxt, tlsALPN, tlsTransportConfig = cfg, tlsCertSent, tlsPeerCert, tlsUniq, tlsBuffer} +-- spec: spec/modules/Simplex/Messaging/Transport.md#withtlsunique--different-api-calls-yield-same-value withTlsUnique :: forall c p. TransportPeerI p => T.Context -> (ByteString -> IO (c p)) -> IO (c p) withTlsUnique cxt f = cxtFinished cxt @@ -722,6 +724,7 @@ instance Encoding TransportError where TENoServerAuth -> "NO_AUTH" TEHandshake e -> "HANDSHAKE " <> bshow e +-- spec: spec/modules/Simplex/Messaging/Transport.md#tputblock--tgetblock--optional-block-encryption -- | Pad and send block to SMP transport. tPutBlock :: Transport c => THandle v c p -> ByteString -> IO (Either TransportError ()) tPutBlock THandle {connection = c, params = THandleParams {blockSize, encryptBlock}} block = do @@ -797,6 +800,7 @@ smpClientHandshake :: forall c. Transport c => c 'TClient -> Maybe C.KeyPairX255 smpClientHandshake c ks_ keyHash@(C.KeyHash kh) vRange proxyServer serviceKeys_ = do SMPServerHandshake {sessionId = sessId, smpVersionRange, authPubKey} <- getHandshake th when (sessionId /= sessId) $ throwE TEBadSession + -- spec: spec/modules/Simplex/Messaging/Transport.md#proxy-version-downgrade-logic -- Below logic downgrades version range in case the "client" is SMP proxy server and it is -- connected to the destination server of the version 11 or older. -- It disables transport encryption between SMP proxy and destination relay. @@ -857,6 +861,7 @@ smpTHandleClient :: forall c. THandleSMP c 'TClient -> VersionSMP -> VersionRang smpTHandleClient th v vr pk_ ck_ proxyServer clientService = do let thAuth = clientTHParams <$!> ck_ be <- blockEncryption th v proxyServer thAuth + -- spec: spec/modules/Simplex/Messaging/Transport.md#smpthandleclient--chain-key-swap -- swap is needed to use client's sndKey as server's rcvKey and vice versa pure $ smpTHandle_ th v vr thAuth $ uncurry TSbChainKeys . swap <$> be where @@ -893,6 +898,7 @@ smpTHandle_ th@THandle {params} v vr thAuth encryptBlock = } in (th :: THandleSMP c p) {params = params'} +-- spec: spec/modules/Simplex/Messaging/Transport.md#forcecertchain--space-leak-prevention forceCertChain :: CertChainPubKey -> CertChainPubKey forceCertChain cert@(CertChainPubKey (X.CertificateChain cc) signedKey) = length (show cc) `seq` show signedKey `seq` cert {-# INLINE forceCertChain #-} diff --git a/src/Simplex/Messaging/Transport/Credentials.hs b/src/Simplex/Messaging/Transport/Credentials.hs index 8e3efe7959..26bcadf7af 100644 --- a/src/Simplex/Messaging/Transport/Credentials.hs +++ b/src/Simplex/Messaging/Transport/Credentials.hs @@ -40,6 +40,7 @@ privateToTls (C.APrivateSignKey _ k) = case k of type Credentials = (C.ASignatureKeyPair, X509.SignedCertificate) +-- spec: spec/modules/Simplex/Messaging/Transport/Credentials.md#gencredentials--nanosecond-stripping genCredentials :: TVar ChaChaDRG -> Maybe Credentials -> (Hours, Hours) -> Text -> IO Credentials genCredentials g parent (before, after) subjectName = do subjectKeys <- atomically $ C.generateSignatureKeyPair C.SEd25519 g diff --git a/src/Simplex/Messaging/Transport/HTTP2/Client.hs b/src/Simplex/Messaging/Transport/HTTP2/Client.hs index ca07142250..d3402130be 100644 --- a/src/Simplex/Messaging/Transport/HTTP2/Client.hs +++ b/src/Simplex/Messaging/Transport/HTTP2/Client.hs @@ -193,6 +193,7 @@ sendRequest HTTP2Client {client_ = HClient {config, reqQ}} req reqTimeout_ = do let reqTimeout = http2RequestTimeout config reqTimeout_ maybe (Left HCResponseTimeout) Right <$> (reqTimeout `timeout` atomically (takeTMVar resp)) +-- spec: spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md#sendrequest-vs-sendrequestdirect--thread-safety -- | this function should not be used until HTTP2 is thread safe, use sendRequest sendRequestDirect :: HTTP2Client -> Request -> Maybe Int -> IO (Either HTTP2ClientError HTTP2Response) sendRequestDirect HTTP2Client {client_ = HClient {config, disconnected}, sendReq} req reqTimeout_ = do diff --git a/src/Simplex/Messaging/Transport/Server.hs b/src/Simplex/Messaging/Transport/Server.hs index cdfc300b71..63e471855d 100644 --- a/src/Simplex/Messaging/Transport/Server.hs +++ b/src/Simplex/Messaging/Transport/Server.hs @@ -183,6 +183,7 @@ runTCPServerSocket (accepted, gracefullyClosed, clients) started getSocket serve tId <- mkWeakThreadId =<< server conn `forkFinally` closeConn atomically $ unlessM (readTVar closed) $ modifyTVar' clients $ IM.insert cId tId +-- spec: spec/modules/Simplex/Messaging/Transport/Server.md#safeaccept--errno-based-retry -- | Recover from errors in `accept` whenever it is safe. -- Some errors are safe to ignore, while blindly restaring `accept` may trigger a busy loop. -- @@ -224,6 +225,7 @@ getSocketStats (accepted, closed, active) = do let socketsLeaked = socketsAccepted - socketsClosed - socketsActive pure SocketStats {socketsAccepted, socketsClosed, socketsActive, socketsLeaked} +-- spec: spec/modules/Simplex/Messaging/Transport/Server.md#closeserver--weak-thread-references closeServer :: TMVar Bool -> TVar (IntMap (Weak ThreadId)) -> Socket -> IO () closeServer started clients sock = do close sock diff --git a/src/Simplex/Messaging/Transport/Shared.hs b/src/Simplex/Messaging/Transport/Shared.hs index 204ef3f5ef..e7f450f499 100644 --- a/src/Simplex/Messaging/Transport/Shared.hs +++ b/src/Simplex/Messaging/Transport/Shared.hs @@ -24,6 +24,7 @@ data ChainCertificates | CCValid {leafCert :: X.SignedCertificate, idCert :: X.SignedCertificate, caCert :: X.SignedCertificate} | CCLong +-- spec: spec/modules/Simplex/Messaging/Transport/Shared.md#chainidcacerts--certificate-chain-semantics chainIdCaCerts :: X.CertificateChain -> ChainCertificates chainIdCaCerts (X.CertificateChain chain) = case chain of [] -> CCEmpty diff --git a/src/Simplex/Messaging/Transport/WebSockets.hs b/src/Simplex/Messaging/Transport/WebSockets.hs index 3ab213dcda..38ac6627dc 100644 --- a/src/Simplex/Messaging/Transport/WebSockets.hs +++ b/src/Simplex/Messaging/Transport/WebSockets.hs @@ -69,6 +69,7 @@ instance Transport WS where closeConnection = S.close . wsStream {-# INLINE closeConnection #-} + -- spec: spec/modules/Simplex/Messaging/Transport/WebSockets.md#cget--strict-size-check-unlike-tls cGet :: WS p -> Int -> IO ByteString cGet c n = do s <- receiveData (wsConnection c) From 09d55de115ac2a39903f02b13f66476fccf031fd Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Wed, 11 Mar 2026 20:17:00 +0000 Subject: [PATCH 32/91] protocol and client specs --- spec/TOPICS.md | 6 ++ spec/modules/README.md | 2 + spec/modules/Simplex/Messaging/Client.md | 85 +++++++++++++++++ .../modules/Simplex/Messaging/Client/Agent.md | 92 +++++++++++++++++++ spec/modules/Simplex/Messaging/Protocol.md | 68 ++++++++++++++ .../Simplex/Messaging/Protocol/Types.md | 7 ++ src/Simplex/Messaging/Client.hs | 5 + src/Simplex/Messaging/Client/Agent.hs | 2 + src/Simplex/Messaging/Protocol.hs | 3 + 9 files changed, 270 insertions(+) create mode 100644 spec/modules/Simplex/Messaging/Client.md create mode 100644 spec/modules/Simplex/Messaging/Client/Agent.md create mode 100644 spec/modules/Simplex/Messaging/Protocol.md create mode 100644 spec/modules/Simplex/Messaging/Protocol/Types.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index d7a6272540..9771076529 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -12,4 +12,10 @@ - **Certificate chain trust model**: ChainCertificates (Shared.hs) defines 0–4 cert chain semantics, used by both Client.hs (validateCertificateChain) and Server.hs (validateClientCertificate, SNI credential switching). The 4-length case skipping index 2 (operator cert) and the FQHN-disabled x509validate are decisions that span the entire transport security model. +- **SMP proxy protocol flow**: The PRXY/PFWD/RFWD proxy protocol involves Client.hs (proxySMPCommand with 10 error scenarios, forwardSMPTransmission with sessionSecret encryption), Protocol.hs (command types, version-dependent encoding), Transport.hs (proxiedSMPRelayVersion cap, proxyServer flag disabling block encryption). The double encryption (client-relay via PFWD + proxy-relay via RFWD), combined timeout (tcpConnect + tcpTimeout), nonce/reverseNonce pairing, and version downgrade logic are not visible from any single module. + +- **Service certificate subscription model**: Service subscriptions (SUBS/NSUBS) and per-queue subscriptions (SUB/NSUB) coexist with complex state transitions. Client/Agent.hs manages dual active/pending subscription maps with session-aware cleanup. Protocol.hs defines useServiceAuth (only NEW/SUB/NSUB). Client.hs implements authTransmission with dual signing (entity key over cert hash + transmission, service key over transmission only). Transport.hs handles the service certificate handshake extension (v16+). The full subscription lifecycle — from DBService credentials through handshake to service subscription to disconnect/reconnect — spans all four modules. + +- **Two agent layers**: Client/Agent.hs ("small agent") is used only in servers — SMP proxy and notification server — to manage client connections to other SMP servers. Agent.hs + Agent/Client.hs ("big agent") is used in client applications. Both manage SMP client connections with subscription tracking and reconnection, but the big agent adds the full messaging agent layer (connections, double ratchet, file transfer). When documenting Agent/Client.hs, Client/Agent.hs should be reviewed for shared patterns and differences. + - **Handshake protocol family**: SMP (Transport.hs), NTF (Notifications/Transport.hs), and XFTP (FileTransfer/Transport.hs) all have handshake protocols with the same structure (version negotiation + session binding + key exchange) but different feature sets. NTF is a strict subset. XFTP doesn't use the TLS handshake at all (HTTP2 layer). The shared types (THandle, THandleParams, THandleAuth) mean changes to the handshake infrastructure affect all three protocols. diff --git a/spec/modules/README.md b/spec/modules/README.md index ef2b458814..7b7666aadb 100644 --- a/spec/modules/README.md +++ b/spec/modules/README.md @@ -74,6 +74,8 @@ Do NOT document: - **Function-by-function prose that restates the implementation** — "this function takes X and returns Y by doing Z" adds nothing - **Line numbers** — they're brittle and break on every edit - **Comments that fit in one line in source** — put those in the source file instead as `-- spec:` comments +- **Verbatim quotes of source comments** — reference them instead: "See comment on `functionName`." Then add only what the comment doesn't cover (cross-module implications, what breaks if violated). If the source comment says everything, the function doesn't need a doc entry. +- **Tables that reproduce code structure** — if the information is self-evident from reading the code's pattern matching or type definitions, it doesn't belong in the doc (e.g., per-command credential requirements, version-conditional encoding branches) ## Format diff --git a/spec/modules/Simplex/Messaging/Client.md b/spec/modules/Simplex/Messaging/Client.md new file mode 100644 index 0000000000..a4f7be3525 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Client.md @@ -0,0 +1,85 @@ +# Simplex.Messaging.Client + +> Generic protocol client: connection management, command sending/receiving, batching, proxy protocol, reconnection. + +**Source**: [`Client.hs`](../../../../src/Simplex/Messaging/Client.hs) + +**Protocol spec**: [`protocol/simplex-messaging.md`](../../../../protocol/simplex-messaging.md) — SimpleX Messaging Protocol. + +## Overview + +This module implements the client side of the `Protocol` typeclass — connecting to servers, sending commands, receiving responses, and managing connection lifecycle. It is generic over `Protocol v err msg`, instantiated for SMP as `SMPClient` (= `ProtocolClient SMPVersion ErrorType BrokerMsg`). The SMP proxy protocol (PRXY/PFWD/RFWD) is also implemented here. + +## Four concurrent threads — teardown semantics + +`getProtocolClient` launches four threads via `raceAny_`: +- `send`: reads from `sndQ` (TBQueue) and writes to TLS +- `receive`: reads from TLS and writes to `rcvQ` (TBQueue), updates `lastReceived` +- `process`: reads from `rcvQ` and dispatches to response vars or `msgQ` +- `monitor`: periodic ping loop (only when `smpPingInterval > 0`) + +When ANY thread exits (normally or exceptionally), `raceAny_` cancels all others. `E.finally` ensures the `disconnected` callback always fires. Implication: a single stuck thread (e.g., TLS read blocked on a half-open connection) keeps the entire client alive until `monitor` drops it. There is no per-thread health check — liveness depends entirely on the monitor's timeout logic. + +## Request lifecycle and leak risk + +`mkRequest` inserts a `Request` into `sentCommands` TMap BEFORE the transmission is written to TLS. If the TLS write fails silently or the connection drops before the response, the entry remains in `sentCommands` until the monitor's timeout counter exceeds `maxCnt` and drops the entire client. There is no per-request cleanup on send failure — individual request entries are only removed by `processMsg` (on response) or by `getResponse` timeout (which sets `pending = False` but doesn't remove the entry). + +## getResponse — pending flag race contract + +This is the core concurrency contract between timeout and response processing: + +1. `getResponse` waits with `timeout` for `takeTMVar responseVar` +2. Regardless of result, atomically sets `pending = False` and tries `tryTakeTMVar` again (see comment on `getResponse`) +3. In `processMsg`, when a response arrives for a request where `pending` is already `False` (timeout won), `wasPending` is `False` and the response is forwarded to `msgQ` as `STResponse` rather than discarded + +The double-check pattern (`swapTVar pending False` + `tryTakeTMVar`) handles the race window where a response arrives between timeout firing and `pending` being set to `False`. Without this, responses arriving in that gap would be silently lost. + +`timeoutErrorCount` is reset to 0 in three places: in `getResponse` when a response arrives, in `receive` on every TLS read, and the monitor uses this count to decide when to drop the connection. + +## processMsg — server events vs expired responses + +When `corrId` is empty, the message is an `STEvent` (server-initiated). When non-empty and the request was already expired (`wasPending` is `False`), the response becomes `STResponse` — not discarded, but forwarded to `msgQ` with the original command context. Entity ID mismatch is `STUnexpectedError`. + +## nonBlockingWriteTBQueue — fork on full + +If `tryWriteTBQueue` returns `False`, a new thread is forked for the blocking write. No backpressure mechanism — under sustained overload, thread count grows without bound. This is a deliberate tradeoff: the caller never blocks (preventing deadlock between send and process threads), at the cost of potential unbounded thread creation. + +## Batch commands do not expire + +See comment on `sendBatch`. Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands use `Just r` and the send thread checks `pending` after dequeue. The coupling: if the server stops responding, batched commands can block the send queue indefinitely since they have no timeout-based expiry. + +## monitor — quasi-periodic adaptive ping + +The ping loop sleeps for `smpPingInterval`, then checks elapsed time since `lastReceived`. If significant time remains in the interval (> 1 second), it re-sleeps for just the remaining time rather than sending a ping. This means ping frequency adapts to actual receive activity — frequent receives suppress pings. + +Pings are only sent when `sendPings` is `True`, set by `enablePings` (called from `subscribeSMPQueue`, `subscribeSMPQueues`, `subscribeSMPQueueNotifications`, `subscribeSMPQueuesNtfs`, `subscribeService`). The client drops the connection when `maxCnt` commands have timed out in sequence AND at least `recoverWindow` (15 minutes) has passed since the last received response. + +## clientCorrId — dual-purpose random values + +`clientCorrId` is a `TVar ChaChaDRG` generating random `CbNonce` values that serve as both correlation IDs and nonces for proxy encryption. When a nonce is explicitly passed (e.g., by `createSMPQueue`), it is used instead of generating a random one. + +## Proxy command re-parameterization + +`proxySMPCommand` constructs modified `thParams` per-request — setting `sessionId`, `peerServerPubKey`, and `thVersion` to the proxy-relay connection's parameters rather than the client-proxy connection's. A single `SMPClient` connection to the proxy carries commands with different auth parameters per destination relay. The encoding, signing, and encryption all use these per-request params, not the connection's original params. + +## proxySMPCommand — error classification + +See comment above `proxySMPCommand` for the 9 error scenarios (0-9) mapping each combination of success/error at client-proxy and proxy-relay boundaries. Errors from the destination relay wrapped in `PRES` are thrown as `ExceptT` errors (transparent proxy). Errors from the proxy itself are returned as `Left ProxyClientError`. + +## forwardSMPTransmission — proxy-side forwarding + +Used by the proxy server to forward `RFWD` to the destination relay. Uses `cbEncryptNoPad`/`cbDecryptNoPad` (no padding) with the session secret from the proxy-relay connection. Response nonce is `reverseNonce` of the request nonce. + +## authTransmission — dual auth with service signature + +When `useServiceAuth` is `True` and a service certificate is present, the entity key signs over `serviceCertHash <> transmission` (not just the transmission) — see comment on `authTransmission`. The service key only signs the transmission itself. For X25519 keys, `cbAuthenticate` produces a `TAAuthenticator`; for Ed25519/Ed448, `C.sign'` produces a `TASignature`. + +The service signature is only added when the entity authenticator is non-empty. If authenticator generation fails silently (returns empty bytes), service signing is silently skipped. This mirrors the [state-dependent parser contract](./Protocol.md#service-signature--state-dependent-parser-contract) in Protocol.hs. + +## action — weak thread reference + +`action` stores a `Weak ThreadId` (via `mkWeakThreadId`) to the main client thread. `closeProtocolClient` dereferences and kills it. The weak reference allows the thread to be garbage collected if all other references are dropped. + +## writeSMPMessage — server-side event injection + +`writeSMPMessage` writes directly to `msgQ` as `STEvent`, bypassing the entire command/response pipeline. This is used by the server to inject MSG events into the subscription response path. diff --git a/spec/modules/Simplex/Messaging/Client/Agent.md b/spec/modules/Simplex/Messaging/Client/Agent.md new file mode 100644 index 0000000000..96e6ff84b9 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Client/Agent.md @@ -0,0 +1,92 @@ +# Simplex.Messaging.Client.Agent + +> SMP client connections with subscription management, reconnection, and service certificate support. + +**Source**: [`Client/Agent.hs`](../../../../../src/Simplex/Messaging/Client/Agent.hs) + +## Overview + +This is the "small agent" — used only in servers (SMP proxy, notification server) to manage client connections to other SMP servers. The "big agent" in `Simplex.Messaging.Agent` + `Simplex.Messaging.Agent.Client` serves client applications and adds the full messaging agent layer. See [Two agent layers](../../../../TOPICS.md) topic. + +`SMPClientAgent` manages `SMPClient` connections via `smpClients :: TMap SMPServer SMPClientVar` (one per SMP server), tracks active and pending subscriptions, and handles automatic reconnection. It is parameterized by `Party` (`p`) and uses the `ServiceParty` constraint to support both `RecipientService` and `NotifierService` modes. + +## Dual subscription model + +Four TMap fields track subscriptions in two dimensions: + +| | Active | Pending | +|---|---|---| +| **Service** | `activeServiceSubs` (TMap SMPServer (TVar (Maybe (ServiceSub, SessionId)))) | `pendingServiceSubs` (TMap SMPServer (TVar (Maybe ServiceSub))) | +| **Queue** | `activeQueueSubs` (TMap SMPServer (TMap QueueId (SessionId, C.APrivateAuthKey))) | `pendingQueueSubs` (TMap SMPServer (TMap QueueId C.APrivateAuthKey)) | + +See comments on `activeServiceSubs` and `pendingServiceSubs` for the coexistence rules. Key constraint: only one service subscription per server. Active subs store the `SessionId` that established them. + +## SessionVar compare-and-swap — core concurrency safety + +`removeSessVar` (in Session.hs) uses `sessionVarId` (monotonically increasing counter from `sessSeq`) to prevent stale removal. When a disconnected client's cleanup runs after a new client already replaced the map entry, the ID mismatch causes removal to silently no-op. See comment on `removeSessVar`. This is used throughout: `removeClientAndSubs` for client map, `cleanup` for worker map. + +## removeClientAndSubs — outside-STM lookup optimization + +See comment on `removeClientAndSubs`. Subscription TVar references are obtained outside STM (via `TM.lookupIO`), then modified inside `atomically`. This is safe because the invariant is that subscription TVar entries for a server are never deleted from the outer TMap, only their contents change. Moving lookups inside the STM transaction would cause excessive re-evaluation under contention. + +## Disconnect preserves others' subscriptions + +`updateServiceSub` only moves active→pending when `sessId` matches the disconnected client (see its comment). If a new client already established different subscriptions on the same server, those are preserved. Queue subs use `M.partition` to split by SessionId — only matching subs move to pending, non-matching remain active. + +## Pending never reset to Nothing on disconnect + +See comment on `updateServiceSub`. After clearing an active service sub, the code sets pending to the cleared value but does NOT reset pending to `Nothing`. This avoids the race where a concurrent new client session has already set a different pending subscription. Implication: pending subs can only grow (be set) during disconnect, never shrink (be cleared). + +## persistErrorInterval — delayed error cleanup + +When `connectClient` calls `newSMPClient` and it fails, the error is stored with an expiry timestamp. `waitForSMPClient` checks expiry before retrying. When `persistErrorInterval` is 0, the error is stored without timestamp and the SessionVar is immediately removed from the map. + +## Session validation after subscription RPC + +Both `smpSubscribeQueues` and `smpSubscribeService` validate `activeClientSession` AFTER the subscription RPC completes, before committing results to state. If the session changed during the RPC (client reconnected), results are discarded and reconnection is triggered. This is optimistic execution with post-hoc validation — the RPC may succeed but its results are thrown away if the session is stale. + +## groupSub — subscription response classification + +Each queue response is classified by a `foldr` over the (subs, responses) zip: + +- **Success with matching serviceId**: counted as service-subscribed (`sQs` list) +- **Success without matching serviceId**: counted as queue-only (`qOks` list with SessionId and key) +- **Not in pending map**: silently skipped (handles concurrent activation by another path) +- **Temporary error** (network, timeout): sets the `tempErrs` flag but does NOT remove from pending — queue stays pending for retry on reconnect +- **Permanent error**: removes from pending and added to `finalErrs` — terminal, no automatic retry + +Even if multiple temporary errors occur in a batch, only one `reconnectClient` call is made (via the boolean accumulator flag). + +## updateActiveServiceSub — accumulative merge + +When serviceId and sessionId match the existing active subscription, queue count is added (`n + n'`) and IdsHash is XOR-merged (`idsHash <> idsHash'`). This accumulates across multiple subscription batches for the same service. When they don't match, the subscription is replaced entirely (silently drops old data). + +## CAServiceUnavailable — cascade to queue resubscription + +When `smpSubscribeService` detects service ID or role mismatch with the connection, it fires `CAServiceUnavailable`. See comment on `CAServiceUnavailable` for the full implication: the app must resubscribe all queues individually, creating new associations. This can happen if the SMP server reassigns service IDs (e.g., after downgrade and upgrade). + +## getPending — polymorphic over STM/IO + +`getPending` uses rank-2 polymorphism to work in both STM (for the "should we spawn a worker?" check, providing a consistent snapshot) and IO (for the actual reconnection data read, providing fresh data). Between these two calls, new pending subs could be added — the worker loop handles this by re-checking on each iteration. + +## Reconnect worker lifecycle + +### Spawn decision +`reconnectClient` checks `active` outside STM, then atomically checks for pending subs and gets/creates a worker SessionVar. If no pending subs exist, no worker is spawned — this prevents race with cleanup and adding pending queues in another call. + +### Worker cleanup blocks on TMVar fill +See comment on `cleanup`. The STM `retry` loop waits until the async handle is inserted into the TMVar before removing the worker from the map. Without this, cleanup could race ahead of the `putTMVar` in `newSubWorker`, leaving a terminated worker in the map. + +### Double timeout on reconnection +`runSubWorker` wraps the entire reconnection in `System.Timeout.timeout` using `tcpConnectTimeout` in addition to the network-layer timeout. Two layers — network for the connection attempt, outer for the entire operation including subscription. + +### Reconnect filters already-active queues +During reconnection, `reconnectSMPClient` reads current active queue subs (outside STM, same "vars never removed" invariant) and filters them out before resubscribing. Subscription is chunked by `agentSubsBatchSize` — partial success is possible across chunks. + +## Agent shutdown ordering + +`closeSMPClientAgent` executes in order: set `active = False`, close all client connections, then swap workers map to empty and fork cancellation threads. The cancel threads use `uninterruptibleCancel` but are fire-and-forget — `closeSMPClientAgent` may return before all workers are actually cancelled. + +## addSubs_ — left-biased union + +`addSubs_` uses `TM.union` which delegates to `M.union` (left-biased). If a queue subscription already exists, the new auth key from the incoming map wins. Service subs use `writeTVar` (overwrite) since only one service sub exists per server. diff --git a/spec/modules/Simplex/Messaging/Protocol.md b/spec/modules/Simplex/Messaging/Protocol.md new file mode 100644 index 0000000000..dc1328cdfe --- /dev/null +++ b/spec/modules/Simplex/Messaging/Protocol.md @@ -0,0 +1,68 @@ +# Simplex.Messaging.Protocol + +> SMP protocol types, commands, responses, encoding/decoding, and transport functions. + +**Source**: [`Protocol.hs`](../../../../src/Simplex/Messaging/Protocol.hs) + +**Protocol spec**: [`protocol/simplex-messaging.md`](../../../../protocol/simplex-messaging.md) — SimpleX Messaging Protocol. + +## Overview + +This module defines the SMP protocol's type-level structure, wire encoding, and transport batching. It does not implement the server or client — those are in [Server.hs](./Server.md) and [Client.hs](./Client.md). The protocol spec governs the command semantics; this doc focuses on non-obvious implementation choices. + +## Two separate version scopes + +SMP client protocol version (`SMPClientVersion`, 4 versions) is separate from SMP relay protocol version (`SMPVersion`, up to version 19, defined in [Transport.hs](./Transport.md)). The client version governs client-to-client concerns (binary encoding, multi-host addresses, SKEY command, short links). The relay version governs client-to-server wire format, transport encryption, and command availability. See comment above `SMPClientVersion` data declaration for version history. + +## maxMessageLength — version-dependent + +`maxMessageLength` returns three different sizes depending on the relay version: +- v11+ (`encryptedBlockSMPVersion`): 16048 +- v9+ (`sendingProxySMPVersion`): 16064 +- older: 16088 + +The source has `TODO v6.0 remove dependency on version`. The type-level `MaxMessageLen` is fixed at 16088 with `TODO v7.0 change to 16048`. + +## Type-level party system + +10 `Party` constructors with `SParty` singletons, `PartyI` typeclass, and three constraint type families (`QueueParty`, `BatchParty`, `ServiceParty`). Invalid party usage produces compile-time errors via the `(Int ~ Bool, TypeError ...)` trick — the unsatisfiable `Int ~ Bool` constraint forces GHC to emit the `TypeError` message. + +## IdsHash — reversible XOR for state drift monitoring + +`IdsHash` uses `BS.zipWith xor` as its `Semigroup`. `queueIdHash` computes MD5 of the queue ID (16 bytes). `mempty` is 16 zero bytes. See comment on `subtractServiceSubs` for the reversibility property. `mconcat` is optimized to avoid repeated pack/unpack per step. + +## TransmissionAuth — size-based type discrimination + +`decodeTAuthBytes` distinguishes authenticator from signature by checking `B.length s == C.cbAuthenticatorSize`. This is a trap: if `cbAuthenticatorSize` ever coincides with a valid signature encoding size, the discrimination breaks. See comment on `tEncodeAuth` for the backward compatibility note (the encoding is backwards compatible with v6 that used `Maybe C.ASignature`). + +## Service signature — state-dependent parser contract + +In `transmissionP`, the service signature is only parsed when `serviceAuth` is true AND the authenticator is non-empty (`not (B.null authenticator)`). This means the parser's behavior depends on earlier parsed state — the service signature field is conditionally present on the wire. If a future change makes the authenticator always non-empty (or always empty), it silently changes whether service signatures are parsed. + +## transmissionP / implySessId + +When `implySessId` is `True`, the session ID is not transmitted on the wire — `transmissionP` sets `sessId` to `""` and prepends the local `sessionId` to the `authorized` bytes for verification. In `tDecodeServer`/`tDecodeClient`, session ID check is bypassed when `implySessId` is `True`. + +## batchTransmissions_ — constraints and ordering + +See comment for the 19-byte overhead calculation (pad size + transmission count + auth tag). Maximum 255 transmissions per batch (single-byte count). Uses `foldr` with `(:)` accumulation, which preserves original transmission order within each batch. + +## ClientMsgEnvelope — two-layer message format + +`ClientMsgEnvelope` has a `PubHeader` (client protocol version + optional X25519 DH key) and an encrypted body. The decrypted body is a `ClientMessage` containing a `PrivHeader` with prefix-based type discrimination: `"K"` for `PHConfirmation` (includes public auth key), `"_"` for `PHEmpty`. + +## MsgFlags — forward-compatible parsing + +The `MsgFlags` parser consumes the `notification` Bool then calls `A.takeTill (== ' ')` to swallow any remaining flag data. See comment on `MsgFlags` encoding for the 7-byte size constraint. Future flags added after `notification` are silently consumed and discarded by old clients. + +## BrokerErrorType NETWORK — detail loss + +The `NETWORK` variant of `BrokerErrorType` encodes as just `"NETWORK"` (detail dropped), with `TODO once all upgrade` comment. The parser falls back to `NEFailedError` when the `NetworkError` detail can't be parsed (`_smpP <|> pure NEFailedError`). This means a newer server's detailed network error is seen as `NEFailedError` by older clients. + +## Version-dependent encoding — scope + +`encodeProtocol` for both `Command` and `BrokerMsg` uses extensive version-conditional encoding. `NEW` has four encoding paths, `IDS` has five. All encoding paths for `IDS` must maintain the same field ordering — this is an implicit contract between encoder and decoder with no compile-time enforcement. + +## SUBS/NSUBS — asymmetric defaulting + +When the server parses `SUBS`/`NSUBS` from a client using a version older than `rcvServiceSMPVersion`, both count and hash default (`-1` and `mempty`). For the response side (`SOKS`/`ENDS` via `serviceRespP`), count is still parsed from the wire — only hash defaults to `mempty`. This asymmetry means command-side and response-side parsing have different fallback behavior for the same version boundary. diff --git a/spec/modules/Simplex/Messaging/Protocol/Types.md b/spec/modules/Simplex/Messaging/Protocol/Types.md new file mode 100644 index 0000000000..0797bc185f --- /dev/null +++ b/spec/modules/Simplex/Messaging/Protocol/Types.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Protocol.Types + +> Client notice type with optional TTL, used in BLOCKED error responses. + +**Source**: [`Protocol/Types.hs`](../../../../../src/Simplex/Messaging/Protocol/Types.hs) + +No non-obvious behavior. See source. diff --git a/src/Simplex/Messaging/Client.hs b/src/Simplex/Messaging/Client.hs index 67b31de186..0f3b16813f 100644 --- a/src/Simplex/Messaging/Client.hs +++ b/src/Simplex/Messaging/Client.hs @@ -641,6 +641,7 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS atomically $ do writeTVar (connected c) True putTMVar cVar $ Right c' + -- spec: spec/modules/Simplex/Messaging/Client.md#four-concurrent-threads--teardown-semantics raceAny_ ([send c' th, process c', receive c' th] <> [monitor c' | smpPingInterval > 0]) `E.finally` disconnected c' @@ -689,6 +690,7 @@ getProtocolClient g nm transportSession@(_, srv, _) cfg@ProtocolClientConfig {qS forM_ msgQ $ \q -> mapM_ (atomically . writeTBQueue q . serverTransmission c) (L.nonEmpty ts') + -- spec: spec/modules/Simplex/Messaging/Client.md#processmsg--server-events-vs-expired-responses processMsg :: ProtocolClient v err msg -> Transmission (Either err msg) -> IO (Maybe (EntityId, ServerTransmission err msg)) processMsg ProtocolClient {client_ = PClient {sentCommands}} (corrId, entId, respOrErr) | B.null $ bs corrId = sendMsg $ STEvent clientResp @@ -1338,11 +1340,13 @@ sendProtocolCommand_ c@ProtocolClient {client_ = PClient {sndQ}, thParams = THan | batch = tEncodeBatch1 serviceAuth t | otherwise = tEncode serviceAuth t +-- spec: spec/modules/Simplex/Messaging/Client.md#nonblockingwritetbqueue--fork-on-full nonBlockingWriteTBQueue :: TBQueue a -> a -> IO () nonBlockingWriteTBQueue q x = do sent <- atomically $ tryWriteTBQueue q x unless sent $ void $ forkIO $ atomically $ writeTBQueue q x +-- spec: spec/modules/Simplex/Messaging/Client.md#getresponse--pending-flag-race-contract getResponse :: ProtocolClient v err msg -> NetworkRequestMode -> Maybe Int -> Request err msg -> IO (Response err msg) getResponse ProtocolClient {client_ = PClient {tcpTimeout, timeoutErrorCount}} nm tOut Request {entityId, pending, responseVar} = do r <- fromMaybe (netTimeoutInt tcpTimeout nm) tOut `timeout` atomically (takeTMVar responseVar) @@ -1382,6 +1386,7 @@ mkTransmission_ ProtocolClient {thParams, client_ = PClient {clientCorrId, sentC atomically $ TM.insert corrId r sentCommands pure r +-- spec: spec/modules/Simplex/Messaging/Client.md#authtransmission--dual-auth-with-service-signature authTransmission :: Maybe (THandleAuth 'TClient) -> Bool -> Maybe C.APrivateAuthKey -> C.CbNonce -> ByteString -> Either TransportError (Maybe TAuthorizations) authTransmission thAuth serviceAuth pKey_ nonce t = traverse authenticate pKey_ where diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index d302ba2371..8dfd0e5631 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -275,6 +275,7 @@ connectClient ca@SMPClientAgent {agentCfg, dbService, smpClients, smpSessions, m removeClientAndSubs :: SMPClient -> IO (Maybe ServiceSub, Maybe (Map QueueId C.APrivateAuthKey)) removeClientAndSubs smp = do + -- spec: spec/modules/Simplex/Messaging/Client/Agent.md#removeclientandsubs--outside-stm-lookup-optimization -- Looking up subscription vars outside of STM transaction to reduce re-evaluation. -- It is possible because these vars are never removed, they are only added. sVar_ <- TM.lookupIO srv $ activeServiceSubs ca @@ -452,6 +453,7 @@ smpSubscribeQueues ca smp srv subs = do pure acc sessId = sessionId $ thParams smp smpServiceId = smpClientServiceId smp + -- spec: spec/modules/Simplex/Messaging/Client/Agent.md#groupsub--subscription-response-classification groupSub :: Map QueueId C.APrivateAuthKey -> ((QueueId, C.APrivateAuthKey), Either SMPClientError (Maybe ServiceId)) -> diff --git a/src/Simplex/Messaging/Protocol.hs b/src/Simplex/Messaging/Protocol.hs index fa58d88439..3f9773991b 100644 --- a/src/Simplex/Messaging/Protocol.hs +++ b/src/Simplex/Messaging/Protocol.hs @@ -527,6 +527,7 @@ tEncodeAuth serviceAuth = \case TASignature s -> C.signatureBytes s TAAuthenticator (C.CbAuthenticator s) -> s +-- spec: spec/modules/Simplex/Messaging/Protocol.md#transmissionauth--size-based-type-discrimination decodeTAuthBytes :: ByteString -> Maybe (C.Signature 'C.Ed25519) -> Either String (Maybe TAuthorizations) decodeTAuthBytes s serviceSig | B.null s = Right Nothing @@ -1703,6 +1704,7 @@ instance ToJSON BlockingReason where instance FromJSON BlockingReason where parseJSON = strParseJSON "BlockingReason" +-- spec: spec/modules/Simplex/Messaging/Protocol.md#transmissionp--implysessid -- | SMP transmission parser. transmissionP :: THandleParams v p -> Parser RawTransmission transmissionP THandleParams {sessionId, implySessId, serviceAuth} = do @@ -2244,6 +2246,7 @@ batchTransmissions' THandleParams {batch, blockSize = bSize, serviceAuth} ts s = tEncode serviceAuth t -- | Pack encoded transmissions into batches +-- spec: spec/modules/Simplex/Messaging/Protocol.md#batchtransmissions_--constraints-and-ordering batchTransmissions_ :: Int -> NonEmpty (Either TransportError ByteString, r) -> [TransportBatch r] batchTransmissions_ bSize = addBatch . foldr addTransmission ([], 0, 0, [], []) where From 260ffb1a9dee84ead72a547606e495f2002a74c1 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:29:18 +0000 Subject: [PATCH 33/91] SMP router specs --- spec/TOPICS.md | 4 + spec/modules/Simplex/Messaging/Server.md | 106 ++++++++++++++++++ spec/modules/Simplex/Messaging/Server/CLI.md | 31 +++++ .../Simplex/Messaging/Server/Control.md | 7 ++ .../Simplex/Messaging/Server/Env/STM.md | 47 ++++++++ .../Simplex/Messaging/Server/Expiration.md | 7 ++ .../Simplex/Messaging/Server/Information.md | 7 ++ spec/modules/Simplex/Messaging/Server/Main.md | 37 ++++++ .../Simplex/Messaging/Server/Main/Init.md | 17 +++ .../Simplex/Messaging/Server/MsgStore.md | 7 ++ .../Messaging/Server/MsgStore/Journal.md | 7 ++ .../Messaging/Server/MsgStore/Postgres.md | 57 ++++++++++ .../Simplex/Messaging/Server/MsgStore/STM.md | 29 +++++ .../Messaging/Server/MsgStore/Types.md | 29 +++++ .../Simplex/Messaging/Server/NtfStore.md | 15 +++ .../Simplex/Messaging/Server/Prometheus.md | 21 ++++ .../Simplex/Messaging/Server/QueueStore.md | 7 ++ .../Messaging/Server/QueueStore/Postgres.md | 97 ++++++++++++++++ .../Messaging/Server/QueueStore/QueueInfo.md | 7 ++ .../Messaging/Server/QueueStore/STM.md | 37 ++++++ .../Messaging/Server/QueueStore/Types.md | 7 ++ .../modules/Simplex/Messaging/Server/Stats.md | 39 +++++++ .../Simplex/Messaging/Server/StoreLog.md | 36 ++++++ .../Messaging/Server/StoreLog/ReadWrite.md | 17 +++ .../Messaging/Server/StoreLog/Types.md | 7 ++ spec/modules/Simplex/Messaging/Server/Web.md | 21 ++++ src/Simplex/Messaging/Server.hs | 4 + src/Simplex/Messaging/Server/Env/STM.hs | 1 + .../Messaging/Server/MsgStore/Postgres.hs | 1 + .../Messaging/Server/MsgStore/Types.hs | 1 + .../Messaging/Server/QueueStore/Postgres.hs | 1 + .../Messaging/Server/QueueStore/STM.hs | 1 + src/Simplex/Messaging/Server/StoreLog.hs | 3 + 33 files changed, 715 insertions(+) create mode 100644 spec/modules/Simplex/Messaging/Server.md create mode 100644 spec/modules/Simplex/Messaging/Server/CLI.md create mode 100644 spec/modules/Simplex/Messaging/Server/Control.md create mode 100644 spec/modules/Simplex/Messaging/Server/Env/STM.md create mode 100644 spec/modules/Simplex/Messaging/Server/Expiration.md create mode 100644 spec/modules/Simplex/Messaging/Server/Information.md create mode 100644 spec/modules/Simplex/Messaging/Server/Main.md create mode 100644 spec/modules/Simplex/Messaging/Server/Main/Init.md create mode 100644 spec/modules/Simplex/Messaging/Server/MsgStore.md create mode 100644 spec/modules/Simplex/Messaging/Server/MsgStore/Journal.md create mode 100644 spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md create mode 100644 spec/modules/Simplex/Messaging/Server/MsgStore/STM.md create mode 100644 spec/modules/Simplex/Messaging/Server/MsgStore/Types.md create mode 100644 spec/modules/Simplex/Messaging/Server/NtfStore.md create mode 100644 spec/modules/Simplex/Messaging/Server/Prometheus.md create mode 100644 spec/modules/Simplex/Messaging/Server/QueueStore.md create mode 100644 spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md create mode 100644 spec/modules/Simplex/Messaging/Server/QueueStore/QueueInfo.md create mode 100644 spec/modules/Simplex/Messaging/Server/QueueStore/STM.md create mode 100644 spec/modules/Simplex/Messaging/Server/QueueStore/Types.md create mode 100644 spec/modules/Simplex/Messaging/Server/Stats.md create mode 100644 spec/modules/Simplex/Messaging/Server/StoreLog.md create mode 100644 spec/modules/Simplex/Messaging/Server/StoreLog/ReadWrite.md create mode 100644 spec/modules/Simplex/Messaging/Server/StoreLog/Types.md create mode 100644 spec/modules/Simplex/Messaging/Server/Web.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index 9771076529..6ef029e964 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -19,3 +19,7 @@ - **Two agent layers**: Client/Agent.hs ("small agent") is used only in servers — SMP proxy and notification server — to manage client connections to other SMP servers. Agent.hs + Agent/Client.hs ("big agent") is used in client applications. Both manage SMP client connections with subscription tracking and reconnection, but the big agent adds the full messaging agent layer (connections, double ratchet, file transfer). When documenting Agent/Client.hs, Client/Agent.hs should be reviewed for shared patterns and differences. - **Handshake protocol family**: SMP (Transport.hs), NTF (Notifications/Transport.hs), and XFTP (FileTransfer/Transport.hs) all have handshake protocols with the same structure (version negotiation + session binding + key exchange) but different feature sets. NTF is a strict subset. XFTP doesn't use the TLS handshake at all (HTTP2 layer). The shared types (THandle, THandleParams, THandleAuth) mean changes to the handshake infrastructure affect all three protocols. + +- **Server subscription architecture**: The SMP server's subscription model spans Server.hs (serverThread split-STM lifecycle, tryDeliverMessage sync/async, ProhibitSub/ServerSub state machine), Env/STM.hs (SubscribedClients TVar-of-Maybe continuity, Client three-queue architecture), and Client/Agent.hs (small agent dual subscription model). The interaction between service subscriptions, direct queue subscriptions, notification subscriptions, and the serverThread subQ processing is not visible from any single module. + +- **Outside-STM lookup pattern**: Multiple modules use the pattern of looking up TVar references outside STM (via readTVarIO/TM.lookupIO), then reading/modifying the TVar contents inside STM. This avoids transaction re-evaluation from unrelated map changes. Used in: Server.hs (serverThread client lookup, tryDeliverMessage subscriber lookup), Env/STM.hs (deleteSubcribedClient), Client/Agent.hs (removeClientAndSubs, reconnectSMPClient). The safety invariant is that the outer map entries (TVars) are never removed — only their contents change. diff --git a/spec/modules/Simplex/Messaging/Server.md b/spec/modules/Simplex/Messaging/Server.md new file mode 100644 index 0000000000..0ed6e43e19 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server.md @@ -0,0 +1,106 @@ +# Simplex.Messaging.Server + +> SMP server: client handling, subscription lifecycle, message delivery, proxy forwarding, control port. + +**Source**: [`Server.hs`](../../../../src/Simplex/Messaging/Server.hs) + +**Protocol spec**: [`protocol/simplex-messaging.md`](../../../../protocol/simplex-messaging.md) — SimpleX Messaging Protocol. + +## Overview + +The server runs as `raceAny_` over many threads — any thread exit stops the entire server. The thread set includes: one `serverThread` per subscription type (SMP, NTF), a notification delivery thread, a pending events thread, a proxy agent receiver, a SIGINT handler, plus per-transport listener threads and optional expiration/stats/prometheus/control-port threads. `E.finally` ensures `stopServer` runs on any exit. + +## serverThread — subscription lifecycle with split STM + +See comment on `serverThread`. It reads the subscription request from `subQ`, then looks up the client **outside** STM (via `getServerClient`), then enters an STM transaction (`updateSubscribers`) to compute which old subscriptions to end, then runs `endPreviousSubscriptions` in IO. If the client disconnects between lookup and transaction, `updateSubscribers` handles `Nothing` by still sending END/DELD to other subscribed clients. + +`checkAnotherClient` ensures END messages are only sent to clients **other than** the subscribing client — if `clntId == clientId`, the action is skipped. + +`removeWhenNoSubs` removes a client from `subClients` only when **both** queue and service subscriptions are empty — not after each individual unsubscription. + +## SubscribedClients — TVar-of-Maybe pattern + +See comment on `SubscribedClients` in Env/STM.hs. Subscription entries store `TVar (Maybe (Client s))` — the TVar's contents change between `Just client` and `Nothing` on disconnect/reconnect, allowing STM transactions reading the TVar to automatically re-evaluate when the subscriber changes. Entries **are** removed via `lookupDeleteSubscribedClient` (when subscriptions end) and `deleteSubcribedClient` (on client disconnect), though the source comment describes the original intent of never cleaning them up. + +`upsertSubscribedClient` returns the previously subscribed client only if it's a **different** client (checked via `sameClientId`). Same client → returns `Nothing` (no END needed). + +## ProhibitSub / ServerSub state machine + +`Sub.subThread` is either `ProhibitSub` or `ServerSub (TVar SubscriptionThread)`. GET creates `ProhibitSub`, preventing subsequent SUB on the same queue (`CMD PROHIBITED`). SUB creates `ServerSub NoSub`, preventing subsequent GET (`CMD PROHIBITED`). This is enforced per-connection — the state tracks which access pattern the client chose. + +`SubscriptionThread` transitions: `NoSub` → `SubPending` (sndQ full during delivery) → `SubThread (Weak ThreadId)` (delivery thread spawned). `SubPending` is set **before** the thread is spawned; the thread atomically upgrades to `SubThread` after forking. If the thread exits before upgrading, the `modifyTVar'` is a no-op (checks for `SubPending` specifically). + +## tryDeliverMessage — sync/async split delivery + +See comment on `tryDeliverMessage`. When a SEND arrives and the queue was empty: + +1. Look up subscribed client **outside STM** (avoids transaction cost when no subscriber exists) +2. In STM: check `delivered` is Nothing, check sndQ not full → deliver synchronously, return Nothing +3. If sndQ is full: set `SubPending`, return the client/sub/stateVar triple +4. Fork a delivery thread that waits for sndQ space, verifies `sameClient` (prevents delivery to reconnected client), then delivers and sets state to `NoSub` + +The `sameClient` check inside the delivery thread prevents a race: if the client reconnected between fork and delivery, the new client will receive the message via its own SUB. + +`newServiceDeliverySub` creates a transient subscription **only** for service-associated queues during message delivery — this is separate from the SUB-created subscriptions. + +## Constant-time authorization — dummy keys + +See comment on `verifyCmdAuthorization`. Always runs verification regardless of whether the queue exists. When the queue is missing (AUTH error), `dummyVerifyCmd` runs verification with hardcoded dummy keys (Ed25519, Ed448, X25519) and the result is discarded via `seq`. The time depends only on the authorization type provided, not on queue existence. This mitigates timing side-channel attacks that could reveal whether a queue ID exists. + +When the signature algorithm doesn't match the queue key, verification runs with a dummy key of the **provided** algorithm and the result is forced then discarded (`seq False`). + +## Service subscription — hash-based drift detection + +See comment on `sharedSubscribeService`. The client sends expected `(count, idsHash)`. The server reads the actual values from storage, then computes `subsChange = subtractServiceSubs currSubs subs'` — the **difference** between what the client's session currently tracks and the new values. This difference (not the absolute values) is passed to `serverThread` via `CSService` to adjust `totalServiceSubs`. Using differences prevents double-counting when a service resubscribes. + +Stats classification: exactly one of `srvSubOk`/`srvSubMore`/`srvSubFewer`/`srvSubDiff` is incremented per subscription. `count == -1` is a special case for old NTF servers. + +## Proxy forwarding — single transmission, no service identity + +See comment on `processForwardedCommand`. Only single forwarded transmissions are allowed — batches are rejected with `BLOCK`. The synthetic `THandleAuth` has `peerClientService = Nothing`, preventing forwarded clients from claiming service identity. Only SEND, SKEY, LKEY, and LGET are allowed through `rejectOrVerify`. + +Double encryption: response is encrypted first to the client (with `C.cbEncrypt` using `reverseNonce clientNonce`), then wrapped and encrypted to the proxy (with `C.cbEncryptNoPad` using `reverseNonce proxyNonce`). Using reversed nonces ensures request and response directions use distinct nonces. + +## Proxy concurrency limiter + +See `wait`/`signal` around `forkProxiedCmd`. `procThreads` TVar implements a counting semaphore via STM `retry`. When `used >= serverClientConcurrency`, the transaction retries until another thread finishes. No bound on wait time — under sustained proxy load, commands queue indefinitely. + +## sendPendingEvtsThread — atomic swap + +`swapTVar pendingEvents IM.empty` atomically takes all pending events and clears the map. Events accumulated during processing are captured in the next interval. `tryWriteTBQueue` is tried first (non-blocking); if the sndQ is full, a forked thread does the blocking write. This prevents the pending events thread from stalling on one slow client. + +## deliverNtfsThread — throwSTM for control flow + +See `withSubscribed`. When a service client unsubscribes between the TVar read and the flush, `throwSTM (userError "service unsubscribed")` aborts the STM transaction. This is caught by `tryAny` and logged as "cancelled" — it's a successful path, not an error. The `flushSubscribedNtfs` function also cancels via `throwSTM` if the client is no longer current or sndQ is full. + +## Batch subscription responses — SOK grouped with MSG + +See comment on `processSubBatch`. When batched SUB commands produce SOK responses plus messages, the first message is appended to the SOK batch (up to 4 SOKs per block) in a single transmission. Remaining messages go to `msgQ` for separate delivery. This ensures the client receives at least one message quickly with its subscription acknowledgments. + +## send thread — MVar fair lock + +The TLS handle is wrapped in an `MVar` (`newMVar h`). Both `send` (command responses from `sndQ`) and `sendMsg` (messages from `msgQ`) acquire this lock via `withMVar`. This ensures fair interleaving between response batches and individual messages, preventing either from starving the other. + +## Queue creation — ID oracle prevention + +See comment on queue creation with client-supplied IDs. When `clntIds = True`, the ID must equal `B.take 24 (C.sha3_384 (bs corrId))`. This prevents ID oracle attacks where an attacker could probe for queue existence by attempting to create a queue with a specific ID and observing DUPLICATE vs AUTH errors. + +## disconnectTransport — subscription-aware idle timeout + +See `noSubscriptions`. The idle client disconnect thread only checks expiration when the client has **no** subscriptions (not in `subClients` for either SMP or NTF subscribers). Subscribed clients are kept alive indefinitely regardless of inactivity — they're waiting for messages, not idle. + +## clientDisconnected — ordered cleanup + +On disconnect: (1) set `connected = False`, (2) atomically swap out all subscriptions, (3) cancel subscription threads, (4) if server is still active: delete client from server map, update queue and service subscribers. Service subscription cleanup (`updateServiceSubs`) subtracts the client's accumulated `(count, idsHash)` from `totalServiceSubs`. End threads are swapped out and killed. + +## Control port — single auth, no downgrade + +See `controlPortAuth`. Role is set on first `CPAuth` only (`CPRNone` case). Subsequent AUTH commands print the current role but do not change it — the message says "start new session to change." This prevents role downgrade attacks within a session. + +## withQueue_ — updatedAt time check + +Every queue command calls `withQueue_` which checks if `updatedAt` matches today's date. If not, `updateQueueTime` is called to update it. This means `updatedAt` is a daily resolution activity timestamp, not a per-command timestamp. The SEND path passes `queueNotBlocked = False` to still update the time even for blocked queues (though SEND fails on blocked queues separately). + +## foldrM in client command processing + +`foldrM process ([], [])` processes a batch of verified commands right-to-left, accumulating responses and messages. The responses list is built with `(:)`, so the final order matches the original command order. Messages from SUB are collected separately and passed as the second element of the `sndQ` tuple. diff --git a/spec/modules/Simplex/Messaging/Server/CLI.md b/spec/modules/Simplex/Messaging/Server/CLI.md new file mode 100644 index 0000000000..a369b69811 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/CLI.md @@ -0,0 +1,31 @@ +# Simplex.Messaging.Server.CLI + +> CLI argument parsing, INI configuration reading, X.509 certificate generation, and utility functions. + +**Source**: [`CLI.hs`](../../../../../src/Simplex/Messaging/Server/CLI.hs) + +## strictIni / iniOnOff — error semantics + +`strictIni` calls `error` on missing INI keys — no structured error, no recovery. `readStrictIni` chains this with `read`, so both "key missing" and "key present but unparseable" produce exceptions indistinguishable by callers. + +`iniOnOff` returns `Maybe Bool`: "on" → `Just True`, "off" → `Just False`, missing key → `Nothing`, any other value → `error` (not a parse failure). This tri-valued logic drives the implicit-default pattern in [Main.md](./Main.md#restore_messages--implicit-default-propagation). + +## iniTransports — port reuse prevention + +SMP ports are parsed first. When explicit WebSocket ports are provided, they are filtered to exclude already-used SMP ports (`ports ws \\ smpPorts`). However, when "websockets" is "on" with no explicit port, it defaults to `["80"]` without filtering against SMP ports. This means if SMP is also on port 80, the default WebSocket configuration would conflict. + +## iniDBOptions — schema creation disabled at CLI + +When reading database options from INI, `createSchema` is always set to `False` regardless of INI content. This enforces a security invariant: database schemas must be created manually or by migration, never automatically by the server. + +## createServerX509_ — external tool dependency + +Certificate generation shells out to `openssl` commands via `readCreateProcess`, which throws `IOError` on non-zero exit codes. Failures are thus detected but propagate as uncaught exceptions — no structured error handling wraps the certificate generation sequence. + +## checkSavedFingerprint — startup invariant + +Fingerprint is extracted from the CA certificate and saved during init. On every server start, the saved fingerprint is compared against the current certificate. Mismatch → startup failure. See [Main.md#initializeserver--fingerprint-invariant](./Main.md#initializeserver--fingerprint-invariant). + +## genOnline — existing certificate dependency + +When `signAlgorithm_` or `commonName_` are not provided, `genOnline` reads them from the existing certificate. This creates a hidden dependency on current certificate state that's not visible from the function signature. Expects exactly one certificate in the PEM file. diff --git a/spec/modules/Simplex/Messaging/Server/Control.md b/spec/modules/Simplex/Messaging/Server/Control.md new file mode 100644 index 0000000000..644fb786ac --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Control.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.Control + +> Control port protocol types and encoding for server administration. + +**Source**: [`Control.hs`](../../../../../src/Simplex/Messaging/Server/Control.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/Env/STM.md b/spec/modules/Simplex/Messaging/Server/Env/STM.md new file mode 100644 index 0000000000..d0e9481200 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Env/STM.md @@ -0,0 +1,47 @@ +# Simplex.Messaging.Server.Env.STM + +> Server environment, configuration, client state, subscription types, and storage initialization. + +**Source**: [`Env/STM.hs`](../../../../../../src/Simplex/Messaging/Server/Env/STM.hs) + +## Overview + +This module defines the server's shared state (`Env`, `Server`, `Client`) and the subscription model types. Most non-obvious patterns are about concurrency safety — preventing STM contention while maintaining consistency. Key patterns are documented in [Server.md](../Server.md) where they're used; this doc covers patterns specific to the type definitions and initialization. + +## SubscribedClients — TVar-of-Maybe pattern + +See comment on `SubscribedClients`. Entries store `TVar (Maybe (Client s))` rather than the client directly. Three implications: + +1. STM transactions reading the TVar automatically re-evaluate when the subscriber changes (disconnect/reconnect) +2. IO lookups via `TM.lookupIO` can be done outside STM safely (the TVar reference itself is stable while it exists) +3. Reconnecting clients can reuse existing subscription slots without map-level contention + +Note: despite the source comment saying subscriptions "are not removed," the code does remove entries via `lookupDeleteSubscribedClient` (when subscriptions end) and `deleteSubcribedClient` (on client disconnect). The comment reflects the original design intent for mobile client continuity, but the current implementation does clean up. + +See also [Server.md#subscribedclients--tvar-of-maybe-pattern](../Server.md#subscribedclients--tvar-of-maybe-pattern). + +## deleteSubcribedClient — split transaction for contention avoidance + +See comment on `deleteSubcribedClient`. The TVar lookup is in a separate IO read from the client comparison and deletion. This is safe because the client is read in the same STM transaction as the deletion — if another client was inserted between lookup and delete, `sameClient` returns False and the delete is skipped. After setting the TVar to `Nothing`, the entry is also removed from the TMap. + +## insertServerClient — connected check + +`insertServerClient` checks `connected` inside the STM transaction before inserting. If the client was already marked disconnected (race with cleanup), the insert is skipped and returns `False`. This prevents resurrecting a disconnected client in the server map. + +## SupportedStore — compile-time storage validation + +Type family with `(Int ~ Bool, TypeError ...)` for invalid combinations. The unsatisfiable `Int ~ Bool` constraint forces GHC to emit the `TypeError` message. Valid: Memory+Memory, Memory+Journal, Postgres+Journal, Postgres+Postgres (with flag). Invalid: Memory+Postgres, Postgres+Memory. The `dbServerPostgres` CPP flag controls whether Postgres+Postgres is available. + +## newEnv — initialization order + +Store initialization order matters: (1) create message store (loads store log for STM backends), (2) create notification store (empty TMap), (3) generate TLS credentials, (4) compute server identity from fingerprint, (5) create stats, (6) create proxy agent. The store log load (`loadStoreLog`) calls `readWriteQueueStore` which reads the existing log, replays it to build state, then opens a new log for writing. `setStoreLog` attaches the write log to the store. + +HTTPS credentials are validated: must be at least 4096-bit RSA (`public_size >= 512` bytes). The check explicitly notes that Let's Encrypt ECDSA uses "insecure curve p256." + +## ServerSubscribers — dual subscriber tracking + +`ServerSubscribers` has two `SubscribedClients` maps: `queueSubscribers` (one entry per queue, for direct subscriptions) and `serviceSubscribers` (one entry per service, for service-certificate subscriptions). `totalServiceSubs` tracks the aggregate `(count, IdsHash)` across all services. `subClients` is an `IntSet` of all client IDs with any subscription (union of queue and service subscribers) — used for idle disconnect decisions. + +## endThreads — weak references with sequence counter + +See comment on `endThreads`. Forked client threads (delivery, proxy commands) are tracked in `IntMap (Weak ThreadId)` with a monotonically increasing `endThreadSeq`. On client disconnect, all threads are swapped out and killed. Weak references allow GC to collect threads that finished normally without explicit cleanup. diff --git a/spec/modules/Simplex/Messaging/Server/Expiration.md b/spec/modules/Simplex/Messaging/Server/Expiration.md new file mode 100644 index 0000000000..a51e24c209 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Expiration.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.Expiration + +> Expiration configuration and epoch calculation. + +**Source**: [`Expiration.hs`](../../../../../src/Simplex/Messaging/Server/Expiration.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/Information.md b/spec/modules/Simplex/Messaging/Server/Information.md new file mode 100644 index 0000000000..16f153154c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Information.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.Information + +> Server public information types (config, operator, hosting) for the server info page. + +**Source**: [`Information.hs`](../../../../../src/Simplex/Messaging/Server/Information.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/Main.md b/spec/modules/Simplex/Messaging/Server/Main.md new file mode 100644 index 0000000000..aed5385737 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Main.md @@ -0,0 +1,37 @@ +# Simplex.Messaging.Server.Main + +> Server CLI entry point: dispatches Init, Start, Delete, Journal, and Database commands. + +**Source**: [`Main.hs`](../../../../../src/Simplex/Messaging/Server/Main.hs) + +## Overview + +This is the CLI dispatcher for the SMP server. It parses INI configuration, validates storage mode combinations, and dispatches to the appropriate command handler. The most complex logic is storage configuration validation and migration between storage modes. + +## Storage mode compatibility — state machine + +`checkMsgStoreMode` and `iniStoreCfg` implement a state machine of valid storage mode combinations. Valid: Memory+Memory, Memory+Journal (deprecated), Postgres+Journal, Postgres+Postgres (with flag). Invalid: Memory+Postgres (queue store doesn't support it), Postgres+Memory (messages can't be in-memory with DB queues). Error messages guide the user toward migration commands. The validity is also enforced at the type level via `SupportedStore` in [Env/STM.md](./Env/STM.md#supportedstore--compile-time-storage-validation). + +## INI parsing — error context loss + +`readIniFile` errors are coerced to `String` without structured error information. When INI keys are missing or unparseable, `strictIni` calls `error` (see [CLI.md](./CLI.md#strictini--inionoff--error-semantics)). No line numbers or parse context are preserved. + +## restore_messages — implicit default propagation + +The `restore_messages` INI setting has three-valued logic: explicit "on" → restore, explicit "off" → skip, missing → inherits from `enable_store_log`. This implicit default is not captured in the type system — callers see `Maybe Bool` that silently resolves against another setting. + +## serverPublicInfo — validation with field dependencies + +`sourceCode` is required if ANY `ServerPublicInfo` field is present (in line with AGPLv3 license). `operator_country` requires `operator` to be set. `hosting_country` requires `hosting`. These constraints are enforced at parse time, not by the type system — they can be violated by programmatic construction. + +## initializeServer — fingerprint invariant + +During init, the CA certificate fingerprint is saved to a file. On every subsequent start, `checkSavedFingerprint` (in CLI.hs) validates that the current CA certificate matches the saved fingerprint. If the certificate is replaced without updating the fingerprint file, startup fails. This prevents silent key rotation. + +## Database import — non-atomic migration + +`importStoreLogToDatabase` reads the store log into memory, writes to database, then renames the original file with `.bak` suffix. If the function fails after partial database writes, the original file is still present but the database has partial data. No transactional guarantee across the file→DB boundary. + +## Journal store deprecation warning + +`SSCMemoryJournal` initialization prints a deprecation warning (see `newEnv` in Env/STM.hs). Journal message stores will be removed — migration path is: journal export → database import. diff --git a/spec/modules/Simplex/Messaging/Server/Main/Init.md b/spec/modules/Simplex/Messaging/Server/Main/Init.md new file mode 100644 index 0000000000..665938ae8f --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Main/Init.md @@ -0,0 +1,17 @@ +# Simplex.Messaging.Server.Main.Init + +> Server initialization: INI file content generation, default settings, and CLI option structures. + +**Source**: [`Main/Init.hs`](../../../../../../src/Simplex/Messaging/Server/Main/Init.hs) + +## iniFileContent — selective commenting + +`iniFileContent` uses `optDisabled`/`optDisabled'` to conditionally comment out INI settings. A setting appears commented when it was not explicitly provided or matches the default value. Consequence: regenerating the INI file after user changes will re-comment modified-to-default values, making it appear the user's change was reverted. + +## iniDbOpts — default fallback + +Database connection settings are uncommented only if they differ from `defaultDBOpts`. A custom connection string that matches the default will appear commented. + +## Control port passwords — independent generation + +Admin and user control port passwords are generated as two independent `randomBase64 18` calls during initialization. Despite `let pwd = ... in (,) <$> pwd <*> pwd` appearing to share a value, `pwd` is an IO action — applicative `<*>` executes it twice, producing two different random passwords. The INI template thus has distinct admin and user passwords. diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore.md b/spec/modules/Simplex/Messaging/Server/MsgStore.md new file mode 100644 index 0000000000..625649170e --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/MsgStore.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.MsgStore + +> Message log record type for store log serialization. + +**Source**: [`MsgStore.hs`](../../../../../src/Simplex/Messaging/Server/MsgStore.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Journal.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Journal.md new file mode 100644 index 0000000000..3c0ab6afcf --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Journal.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.MsgStore.Journal + +> **Deprecated** — will be removed. Migration path: `journal export` → in-memory, then `database import` → PostgreSQL. See deprecation warning in [Env/STM.hs](../../../../../../src/Simplex/Messaging/Server/Env/STM.hs) `SSCMemoryJournal` initialization. + +**Source**: [`Journal.hs`](../../../../../../src/Simplex/Messaging/Server/MsgStore/Journal.hs) + +No further documentation — this module is deprecated. diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md new file mode 100644 index 0000000000..7262bde0a6 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md @@ -0,0 +1,57 @@ +# Simplex.Messaging.Server.MsgStore.Postgres + +> PostgreSQL message store: server-side stored procedures for message operations, COPY protocol for bulk import. + +**Source**: [`Postgres.hs`](../../../../../../src/Simplex/Messaging/Server/MsgStore/Postgres.hs) + +## MsgQueue is unit type + +`type MsgQueue PostgresMsgStore = ()`. There is no message queue object for Postgres — all message operations go directly to the database via stored procedures. Functions like `getMsgQueue` return `pure ()`. + +## Partial interface — error stubs + +Multiple `MsgStoreClass` methods are `error "X not used"`: `withActiveMsgQueues`, `unsafeWithAllMsgQueues`, `logQueueStates`, `withIdleMsgQueue`, `getQueueMessages_`, `tryDeleteMsg_`, `setOverQuota_`, `getQueueSize_`, `unsafeRunStore`. These are required by the type class but not applicable to Postgres. Calling any at runtime crashes. Postgres overrides the default implementations of `tryPeekMsg`, `tryDelMsg`, `tryDelPeekMsg`, `deleteExpiredMsgs`, and `getQueueSize` with direct database calls. + +## writeMsg — quota logic in stored procedure + +`write_message(?,?,?,?,?,?,?)` is a PostgreSQL stored procedure that returns `(quota_written, was_empty)`. Quota enforcement happens in SQL, not in Haskell. This means quota logic is duplicated: STM store checks `canWrite` flag in Haskell, Postgres store checks in the database function. The two implementations must agree on quota semantics. + +## tryDelPeekMsg — variable row count + +The stored procedure `try_del_peek_msg` returns 0, 1, or 2 rows. For the 1-row case, the code checks whether the returned message's `messageId` matches the requested `msgId` to distinguish "deleted, no next message" from "delete failed, current message returned." This disambiguation is possible because the stored procedure always returns available messages even when deletion doesn't match. + +## uninterruptibleMask_ on all database operations + +All write operations (`writeMsg`, `tryDelMsg`, `tryDelPeekMsg`, `deleteExpiredMsgs`) and `isolateQueue` are wrapped in `E.uninterruptibleMask_`. This prevents async exceptions (e.g., client disconnect) from interrupting mid-transaction, which could leave database connections in an inconsistent state. + +## batchInsertMessages — COPY protocol + +Uses PostgreSQL's COPY FROM STDIN protocol (`DB.copy_` + `DB.putCopyData` + `DB.putCopyEnd`) for bulk message import, which is much faster than individual INSERTs. Messages are encoded to CSV format. Parse errors on individual records are logged and skipped — the import is error-tolerant. The entire operation runs in a single transaction (`withTransaction`). + +## exportDbMessages — batched I/O + +Accumulates rows in an `IORef` list (prepended for O(1) insert), flushing every 1000 records with `reverse` to restore order. Uses `DB.foldWithOptions_` with `fetchQuantity = Fixed 1000` to avoid loading all messages into memory. + +## updateQueueCounts — two-step reset + +Creates a temp table with aggregated message stats, then updates `msg_queues` in two steps: first zeros all queue counts, then applies actual stats from the temp table. The two-step approach handles queues with zero messages: they're reset by the first UPDATE but not touched by the second (no matching row in temp table). + +## toMessage — nanosecond precision lost + +`MkSystemTime ts 0` constructs timestamps with zero nanoseconds. Only whole seconds are stored in the database. Messages read from Postgres have coarser timestamps than messages in STM/Journal stores. + +## isolateQueue IS the transaction boundary + +`isolateQueue` for Postgres does `uninterruptibleMask_ $ withDB' op ... $ runReaderT a . DBTransaction`. Each `isolateQueue` call creates a fresh `DBTransaction` carrying the DB connection. This is how `tryPeekMsg_` (which uses `asks dbConn`) gets its connection. The `withQueueLock` is identity for Postgres, so `isolateQueue` provides no mutual exclusion — only the DB transaction provides isolation. + +## newMsgStore hardcodes useCache = False + +`newQueueStore @PostgresQueue (queueStoreCfg config, False)` — the Postgres message store always disables queue caching. All lookups go directly to the database. Contrast with the Journal+Postgres combination where caching is enabled. + +## deleteQueueSize — size before delete + +`deleteQueueSize` calls `getQueueSize` BEFORE `deleteStoreQueue`. The returned size is the count at query time — a concurrent `writeMsg` between the size query and the delete means the reported size is stale. This is acceptable because the size is used for statistics, not for correctness. + +## unsafeMaxLenBS + +`toMessage` uses `C.unsafeMaxLenBS` to bypass the `MaxLen` length check on message bodies read from the database. A TODO comment questions this choice. If the database contains oversized data, the length invariant is silently violated. diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/STM.md b/spec/modules/Simplex/Messaging/Server/MsgStore/STM.md new file mode 100644 index 0000000000..95423cdf11 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/STM.md @@ -0,0 +1,29 @@ +# Simplex.Messaging.Server.MsgStore.STM + +> In-memory STM message store: TQueue-based message queues with quota enforcement. + +**Source**: [`STM.hs`](../../../../../../src/Simplex/Messaging/Server/MsgStore/STM.hs) + +## withQueueLock is identity + +`withQueueLock _ _ = id` — STM queues need no locking since STM provides atomicity. Journal.hs overrides this with a `TMVar`-based in-memory lock (via `withLockWaitShared`). Any code calling `withQueueLock` transparently gets the right concurrency control for the backend. + +## writeMsg — quota with empty-queue override + +When `canWrite` is `False` (over quota) but the queue is empty, writing is still allowed. This handles the case where all messages were deleted or expired but the `canWrite` flag was not reset. When the quota is exceeded, the actual message content is replaced with a `MessageQuota` (preserving only `msgId` and `msgTs`) — the client receives a quota notification instead of the message. + +## getMsgQueue — lazy initialization + +The message queue TVar (`msgQueue'`) starts as `Nothing`. The queue is created on first `getMsgQueue` call (lazy initialization). This means queues that are created but never receive messages don't allocate a TQueue. `getPeekMsgQueue` returns `Nothing` if no message queue exists — callers handle this as "queue is empty." + +## deleteQueue_ — atomic swap prevents post-delete operations + +`swapTVar (msgQueue' q) Nothing` atomically retrieves the old message queue and sets to `Nothing`. Any subsequent `getMsgQueue` call would create a fresh empty queue, but the deleted queue's `queueRec` TVar is also set to `Nothing` by `deleteStoreQueue`, so all operations would fail with `AUTH` first. + +## tryDeleteMsg_ — blind dequeue, no msgId check + +`tryDeleteMsg_` does `tryReadTQueue` — removes whatever is at the head without verifying the message ID. The msgId check lives in the default `tryDelMsg` / `tryDelPeekMsg` implementations in `Types.hs`, which always call `tryPeekMsg_` first to verify. Calling `tryDeleteMsg_` directly would silently delete the wrong message if the head changed between peek and delete. Safe only because `isolateQueue` serializes all operations on the same queue. + +## getQueueMessages_ snapshot — invisible gap + +`getQueueMessages_ False` implements non-destructive read by flushing TQueue then writing back. This runs inside `atomically` (via `isolateQueue`), so the temporarily-empty state is never visible to other transactions. diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md new file mode 100644 index 0000000000..2fd4c79bf7 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md @@ -0,0 +1,29 @@ +# Simplex.Messaging.Server.MsgStore.Types + +> Type class for message stores with injective type families and polymorphic isolation. + +**Source**: [`Types.hs`](../../../../../../src/Simplex/Messaging/Server/MsgStore/Types.hs) + +## Injective type families + +All associated types (`StoreMonad`, `MsgQueue`, `StoreQueue`, `QueueStore`, `MsgStoreConfig`) use injective type families (`| m -> s`). This means each associated type uniquely determines the store type, avoiding ambiguity at call sites. Without injectivity, most call sites would need explicit type applications. + +## isolateQueue — polymorphic isolation + +`isolateQueue` abstracts the concurrency model: STM store implements it as `liftIO . atomically` (single STM transaction), while Journal store acquires a TMVar-based in-memory lock (not a filesystem lock). All message operations go through `isolateQueue` or `withPeekMsgQueue` (which calls `isolateQueue`). This means the atomicity guarantee varies by backend — STM gives true atomicity, Journal gives mutual exclusion via lock. + +## tryDelPeekMsg — atomic delete-and-peek + +Deletes the current message AND peeks the next one in a single `isolateQueue` call. This atomicity is critical for the ACK flow: the server needs to know if there's a next message to deliver immediately after acknowledging the current one, without a window where a concurrent SEND could interleave. + +## withIdleMsgQueue — journal-specific lifecycle + +For Journal store, the message queue file handle is closed after the action if it was initially closed or idle longer than the configured interval. For STM store, this is effectively a no-op (always open, never "idle"). The return tuple `(Maybe a, Int)` provides both the action result and the queue size — the `Maybe` is `Nothing` when no message queue exists (no messages ever written). + +## unsafeWithAllMsgQueues — CLI-only + +Explicitly unsafe: iterates all queues including those not in active memory. Only safe before server start or in CLI commands. During normal operation, Journal store may have queues on disk but not loaded — this function would load them, interfering with the lazy-loading lifecycle. + +## snapshotTQueue visibility gap + +`getQueueMessages_ False` (non-destructive read) flushes the TQueue then writes all messages back. Between flush and rewrite, concurrent STM transactions would see an empty queue. Since this runs inside `atomically` for STM store, the gap is invisible to other transactions. For Journal store (where `StoreMonad` is IO-based), this is not used. diff --git a/spec/modules/Simplex/Messaging/Server/NtfStore.md b/spec/modules/Simplex/Messaging/Server/NtfStore.md new file mode 100644 index 0000000000..b58a44fad5 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/NtfStore.md @@ -0,0 +1,15 @@ +# Simplex.Messaging.Server.NtfStore + +> In-memory notification store: per-notifier message notification lists with expiration. + +**Source**: [`NtfStore.hs`](../../../../../src/Simplex/Messaging/Server/NtfStore.hs) + +## storeNtf — outside-STM lookup with STM fallback + +`storeNtf` uses `TM.lookupIO` outside STM, then falls back to `TM.lookup` inside STM if the notifier entry doesn't exist. This is the same outside-STM lookup pattern used in Server.hs and Client/Agent.hs — avoids transaction re-evaluation from unrelated map changes. The double-check inside STM prevents races when two messages arrive concurrently for a new notifier. + +## deleteExpiredNtfs — last-is-earliest optimization + +Notifications are prepended (cons), so the last element in the list is the earliest. `deleteExpiredNtfs` checks `last ntfs` first — if the earliest notification is not expired, none are, and the entire list is skipped without filtering. This avoids traversing notification lists that have no expired entries. + +The outer `readTVarIO` check for empty list avoids entering an STM transaction at all for notifiers with no notifications. diff --git a/spec/modules/Simplex/Messaging/Server/Prometheus.md b/spec/modules/Simplex/Messaging/Server/Prometheus.md new file mode 100644 index 0000000000..11610ee239 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Prometheus.md @@ -0,0 +1,21 @@ +# Simplex.Messaging.Server.Prometheus + +> Prometheus text exposition format for server metrics, with histogram gap-filling and derived aggregations. + +**Source**: [`Prometheus.hs`](../../../../../src/Simplex/Messaging/Server/Prometheus.hs) + +## Histogram gap-filling + +`showTimeBuckets` uses `mapAccumL` over sorted bucket keys. When the gap between consecutive buckets exceeds 60 seconds, it inserts a synthetic bucket at `sec - 60` with the cumulative total up to that point. This fills sparse `TimeBuckets` maps into continuous Prometheus histograms. The 60-second gap threshold is hardcoded. + +## Bucket sum aggregation — filters by value, not key + +`showBucketSums` intends to aggregate buckets into fixed time periods: 0-60s, 60-300s, 300-1200s, 1200-3600s, 3600+s. However, `IM.filter` (from `Data.IntMap.Strict`) filters by **value** (count), not by key (time). The predicate `\sec -> minTime <= sec && sec < maxTime` is applied to count values, not to the IntMap keys that represent seconds. This means buckets are selected based on whether their count falls in the range, not based on their time boundary. The aggregation boundaries are also independent of the bucketing thresholds in `updateTimeBuckets` (Stats.hs), which uses 5s/10s/30s/60s quantization. + +## Non-standard Prometheus timestamp output + +The `mstr` function appends `tsEpoch ts` (millisecond-precision Unix timestamp) directly after metric values, which is valid Prometheus text exposition format. + +## Delivery histogram count/sum source + +`simplex_smp_delivery_ack_confirmed_time_count` is `_msgRecv + _msgRecvGet`. `simplex_smp_delivery_ack_confirmed_time_sum` is `sumTime` from `_msgRecvAckTimes`. The count is accumulated separately from the histogram — if there's a code path that increments `msgRecv` without calling `updateTimeBuckets`, count and sum diverge. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore.md b/spec/modules/Simplex/Messaging/Server/QueueStore.md new file mode 100644 index 0000000000..c906c9ecc3 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/QueueStore.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.QueueStore + +> Core record types for queue storage: QueueRec, NtfCreds, ServiceRec, ServerEntityStatus. + +**Source**: [`QueueStore.hs`](../../../../../src/Simplex/Messaging/Server/QueueStore.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md b/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md new file mode 100644 index 0000000000..f97acaa2d3 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md @@ -0,0 +1,97 @@ +# Simplex.Messaging.Server.QueueStore.Postgres + +> PostgreSQL queue store: cache-coherent TMap layer over database, double-checked locking, soft-delete lifecycle, COPY-based bulk import. + +**Source**: [`Postgres.hs`](../../../../../../src/Simplex/Messaging/Server/QueueStore/Postgres.hs) + +## addQueue_ — no in-memory duplicate check, relies on DB constraint + +See comment on `addQueue_`: "Not doing duplicate checks in maps as the probability of duplicates is very low." The STM implementation checks all four ID maps before insertion and returns `DUPLICATE_`. The Postgres implementation skips this and relies on `UniqueViolation` from the DB, which `handleDuplicate` maps to `AUTH`, not `DUPLICATE_`. The same logical error produces different error codes depending on the store backend. + +## addQueue_ — non-atomic cache updates + +After the successful SQL INSERT, each cache map (`queues`, `senders`, `notifiers`, `links`) is updated in its own `atomically` block. Between these updates, the cache is partially consistent — a concurrent `getQueue_` by sender ID could miss the queue during the window between the `queues` insert and the `senders` insert. The STM implementation updates all maps in a single `atomically` block. `E.uninterruptibleMask_` prevents async exceptions but not concurrent reads. + +## getQueue_ / SNotifier — one-shot cache eviction on read + +See comment on `getQueue_` for the SNotifier case. After a successful notifier lookup, the notifier ID is deleted from the `notifiers` TMap. This makes the notifier cache a one-shot cache: the first lookup uses the cache, subsequent lookups hit the database. Unique to SNotifier — SSender entries persist indefinitely. The batch path (`getQueues_` SNotifier) does NOT do this eviction, so single and batch paths have different cache side effects. + +## getQueue_ / loadNtfQueue — notifier lookups never cache the queue + +See comment on `loadNtfQueue`: "checking recipient map first, not creating lock in map, not caching queue." Notifier-initiated DB loads produce ephemeral queue objects created with `mkQ False` (no persistent lock). Two concurrent notifier lookups for the same queue create independent queue objects with separate `TVar`s. Contrast with `loadSndQueue_` which caches via `cacheQueue`. + +## cacheQueue — double-checked locking + +Classic pattern: (1) TMap lookup outside lock, (2) if miss, DB load + create queue + acquire `withQueueLock`, (3) second TMap check inside lock + `atomically`, (4) if another thread won the race, discard the freshly created queue. See comment on `cacheQueue` for the rationale about preventing duplicate file opens. For Journal storage, the losing thread's lock remains in `queueLocks` as a harmless orphan. For Postgres-only storage (`mkQueue` creates a TVar), no resource leak. + +## getQueues_ — snapshot-based cache with stale-read risk + +Both SRecipient and SNotifier paths start with `readTVarIO` snapshots of the relevant TMap(s), then partition requested IDs into "found" and "need DB load." Between snapshot and DB query, the cache can change. The `cacheRcvQueue` path handles this with a second check inside the lock. The SNotifier path does NOT cache — it uses the stale snapshot to decide `maybe (mkQ False rId qRec) pure (M.lookup rId qs)`, so concurrent loads can create duplicate ephemeral objects. + +## getQueues_ — error code asymmetry: INTERNAL vs AUTH + +When all IDs are found in cache but some map to `Left` (theoretically impossible), the error is `INTERNAL`. When some IDs needed DB loading and were missing, the error is `AUTH`. Same "not found" condition, different error codes depending on whether the DB was consulted. The `INTERNAL` branch is a defensive assertion against inconsistent TMap snapshots. + +## withDB — every operation runs in its own transaction + +`withDB` wraps each action in `withTransaction` (PostgreSQL `READ COMMITTED`). No multi-statement transactions in queue store operations (unlike `getEntityCounts` and `batchInsertQueues` which use `withTransaction` directly). SQL exceptions are caught, logged, and mapped to `STORE` with the exception text — which propagates to the SMP client over the wire. + +## withQueueRec — lock-mask-read pattern + +All mutating operations share: (1) `withQueueLock` (per-queue lock), (2) `E.uninterruptibleMask_` (no async exceptions mid-operation), (3) `readQueueRecIO` (check queue not deleted). If the TVar reads `Nothing`, the operation short-circuits with `AUTH` without touching the database. The TVar is the authoritative "is deleted" check; `assertUpdated` (zero rows → `AUTH`) catches cache-DB divergence as a secondary check. + +## deleteStoreQueue — two-phase soft delete + +Queue deletion is soft: `UPDATE ... SET deleted_at = ?`. The row remains in the database. `compactQueues` later does the hard delete: `DELETE ... WHERE deleted_at < ?` using the configurable `deletedTTL`. All queries include `AND deleted_at IS NULL` to exclude soft-deleted rows. The STM implementation has no equivalent — `compactQueues` returns `pure 0`. + +## deleteStoreQueue — non-atomic cache cleanup, links never cleaned + +The TVar is set to `Nothing` first, then secondary maps (`senders`, `notifiers`, `notifierLocks`) are cleaned in separate `atomically` blocks. Between these, secondary maps point to a dead queue (functionally correct — returns AUTH either way). The `links` map is never cleaned up here — link entries for deleted queues remain in memory indefinitely. + +## secureQueue — idempotency difference from STM + +Re-securing with the same key falls through the verify function to `pure ()`, then **still executes the SQL UPDATE and TVar write**. The STM implementation returns `Right ()` without TVar mutation when the same key is provided. Both implementations write a store log entry either way. The Postgres version performs an unnecessary DB round-trip, connection pool checkout, and TVar write that the STM version avoids. + +## addQueueNotifier — three-layer duplicate detection + +(1) **Cache check**: `checkCachedNotifier` acquires a per-notifier-ID lock via `notifierLocks`, then checks `TM.memberIO`. Returns `DUPLICATE_`. (2) **Queue lock**: Via `withQueueRec`, prevents concurrent modifications to the same queue. (3) **Database constraint**: `handleDuplicate` catches `UniqueViolation`, returns `AUTH`. Same duplicate, different error codes depending on whether cache was warm. The `notifierLocks` map grows unboundedly — locks are never removed except when the queue is deleted. + +## addQueueNotifier — always clears notification service + +The SQL UPDATE always sets `ntf_service_id = NULL` when adding/replacing a notifier. The previous notifier's service association is silently lost. The STM implementation additionally calls `removeServiceQueue` to update service-level tracking; the Postgres version does not. + +## rowToQueueRec — link data replaced with empty stubs + +The standard `queueRecQuery` does NOT select `fixed_data` and `user_data` columns. When converting to `QueueRec`, link data is stubbed: `(,(EncDataBytes "", EncDataBytes "")) <$> linkId_`. Actual link data is loaded on demand via `getQueueLinkData`. Any code reading `queueData` from a cached `QueueRec` without going through `getQueueLinkData` sees empty bytes. The separate `rowToQueueRecWithData` (used by `foldQueueRecs` with `withData = True`) includes real data. + +## getCreateService — serialization via serviceLocks + +Entire operation wrapped in `withLockMap (serviceLocks st) fp`, serializing all creation/lookup for the same certificate fingerprint. Inside the lock: SELECT by `service_cert_hash`, if not found attempt INSERT catching `UniqueViolation`. The `serviceLocks` map grows unboundedly — no cleanup mechanism. + +## batchInsertQueues — COPY protocol with manual CSV serialization + +Uses PostgreSQL's `COPY FROM STDIN WITH (FORMAT CSV)` for bulk import. Queue records manually serialized via `queueRecToText`/`renderField`. This must stay in sync with `insertQueueQuery` column order — a mismatch causes silent data corruption. The `renderField` function does not escape CSV metacharacters, which is safe only because field values (entity IDs, keys, DH secrets) are binary data without commas/quotes/newlines. Runs in a single transaction; row count queried in a separate transaction afterward. + +## withLog_ — fire-and-forget store log writes + +`withLog_` catches all exceptions via `catchAny` and logs a warning, but does not fail the operation. Store log writes are best-effort. Contrast with the STM `withLog'` where log failures can propagate as `STORE` errors. In the Postgres implementation, the store log can fall behind the database state since the DB is the authoritative persistence layer. + +## useCache flag — behavioral bifurcation + +`useCache :: Bool` creates two distinct code paths. When `False`: `addQueue_` skips all TMap updates, `getQueue_` always loads from DB, `addQueueNotifier` skips cache duplicate check, `deleteStoreQueue` skips cache cleanup. Notably, `loadQueueNoCache` still creates queues with `mkQ True` (persistent lock) even though caching is disabled — the lock is needed for `withQueueRec`'s `withQueueLock`. + +## getServiceQueueCountHash — behavioral divergence from STM + +Postgres returns `Right (0, mempty)` when the service is not found (via `maybeFirstRow'` default). STM returns `Left AUTH`. Same logical condition, different error handling. Callers that expect AUTH on missing service will silently get a zero count from Postgres. + +## deleteStoreQueue — cross-module lock contract + +See comment on `deleteStoreQueue`: "this method is called from JournalMsgStore deleteQueue that already locks the queue." Unlike other mutations that go through `withQueueRec` (which acquires the lock), `deleteStoreQueue` uses `E.uninterruptibleMask_ $ runExceptT` directly — no `withQueueLock`. The caller must hold the lock. + +## addQueueLinkData — immutable data protection + +When link data already exists with the same `lnkId`, the SQL UPDATE adds `AND (fixed_data IS NULL OR fixed_data = ?)` to prevent overwriting immutable (fixed) data. If the immutable portion doesn't match, `assertUpdated` triggers AUTH. This enforces the invariant that `fixed_data` can only be set once. + +## assertUpdated — AUTH is overloaded + +`assertUpdated` checks that non-zero rows were affected. Zero rows → `AUTH`. This is the same error code returned for "not found" (via `readQueueRecIO`) and "duplicate" (via `handleDuplicate`). The actual cause — stale cache, deleted queue, or constraint violation — is indistinguishable in logs. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/QueueInfo.md b/spec/modules/Simplex/Messaging/Server/QueueStore/QueueInfo.md new file mode 100644 index 0000000000..b0ca648777 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/QueueInfo.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.QueueStore.QueueInfo + +> Data types for queue info display (control port), with JSON encoding. + +**Source**: [`QueueInfo.hs`](../../../../../../src/Simplex/Messaging/Server/QueueStore/QueueInfo.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md b/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md new file mode 100644 index 0000000000..6ff8da3b5c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md @@ -0,0 +1,37 @@ +# Simplex.Messaging.Server.QueueStore.STM + +> In-memory STM queue store: queue CRUD with store log journaling and service tracking. + +**Source**: [`STM.hs`](../../../../../../src/Simplex/Messaging/Server/QueueStore/STM.hs) + +## addQueue_ — atomic multi-ID DUPLICATE check + +`addQueue_` checks ALL entity IDs (recipient, sender, notifier, link) for existence in a single STM transaction. If ANY already exist, returns `DUPLICATE_` without inserting anything. This prevents partial state where some IDs were inserted before the duplicate was detected on another. The `mkQ` callback runs outside STM before the check — the queue object is created optimistically and discarded if the check fails. + +## getCreateService — outside-STM with role validation + +`getCreateService` uses the outside-STM lookup pattern (`TM.lookupIO` then STM fallback). When a service cert already exists, `checkService` validates the role matches — a cert attempting to register with a different `SMPServiceRole` gets `SERVICE` error. A new service is only created if the ID is not already in `services` (prevents DUPLICATE). The `(serviceId, True/False)` return indicates whether the log should be written (only for new services). + +## IdsHash XOR in setServiceQueues_ + +Both `addServiceQueue` and `removeServiceQueue` use `setServiceQueues_`, which unconditionally XORs `queueIdHash qId` into `idsHash`. Since XOR is self-inverse, removal cancels addition. However, the XOR is applied blindly — there is no `S.member` guard. If `addServiceQueue` were called twice for the same `qId`, the XOR would self-cancel while the `Set` (via `S.insert` idempotency) retains the element, making hash and Set inconsistent. Similarly, `removeServiceQueue` on a non-member XORs a phantom ID into the hash. Correctness relies on callers maintaining the invariant: each `qId` is added exactly once and removed at most once per service. + +## withLog — uninterruptibleMask_ for log integrity + +Store log writes are wrapped in `E.uninterruptibleMask_` — cannot be interrupted by async exceptions during the write. This prevents partial log records that would corrupt the store log file during replay. Synchronous exceptions are caught by `E.try` and converted to `STORE` error (logged, not crashed). + +## secureQueue — idempotent replay + +If `senderKey` already matches the provided key, returns `Right ()`. A different key returns `Left AUTH`. This idempotency is essential for store log replay where the same `SecureQueue` record may be applied multiple times. + +## getQueues_ — map snapshot for batch consistency + +Batch queue lookups (`getQueues_`) read the entire TVar map once with `readTVarIO`, then look up each queue ID in the pure `Map`. This provides a consistent snapshot (all lookups see the same map state) and is more efficient than per-queue IO lookups for large batches. + +## closeQueueStore — non-atomic shutdown + +`closeQueueStore` clears TMaps in separate `atomically` calls, not one transaction. Concurrent operations during shutdown could see partially cleared state. This is acceptable because the store log is closed first, and the server should not be processing new requests during shutdown. + +## addQueueLinkData — conditional idempotency + +Re-adding link data with the same `lnkId` and matching first component of `QueueLinkData` succeeds (idempotent replay). Different `lnkId` or mismatched data returns `AUTH`. This handles store log replay where the same `CreateLink` may be applied multiple times. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/Types.md b/spec/modules/Simplex/Messaging/Server/QueueStore/Types.md new file mode 100644 index 0000000000..173cbe967d --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/Types.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.QueueStore.Types + +> Type classes for queue store and stored queue operations. + +**Source**: [`Types.hs`](../../../../../../src/Simplex/Messaging/Server/QueueStore/Types.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Server/Stats.md b/spec/modules/Simplex/Messaging/Server/Stats.md new file mode 100644 index 0000000000..056dc4a880 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Stats.md @@ -0,0 +1,39 @@ +# Simplex.Messaging.Server.Stats + +> Server statistics: counters, rolling period tracking, delivery time histograms, proxy stats, service stats. + +**Source**: [`Stats.hs`](../../../../../src/Simplex/Messaging/Server/Stats.hs) + +## Overview + +All stats are `IORef`-based, not STM — individual increments are atomic (`atomicModifyIORef'_`) but multi-field reads are not transactional. `getServerStatsData` reads 30+ IORefs sequentially — the resulting snapshot is temporally smeared, not a point-in-time atomic view. + +## PeriodStats — rolling window with boundary-only reset + +`PeriodStats` maintains three `IORef IntSet` (day, week, month). `updatePeriodStats` hashes the entity ID and inserts into all three periods. `periodStatCounts` resets a period's IntSet **only** when the period boundary is reached (day 1 of that period). At non-boundary times, it returns `""` (empty string) — the data is kept accumulating but not reported. + +See comment on `periodCount`. At day boundary (`periodCount 1 ref`), the day set is atomically swapped to empty and its size returned. Week resets on Monday (day 1 of week), month on the 1st. Periods are independent — day reset does NOT affect week/month accumulation. Each period counts unique queue hashes that were active during that period. + +## Disabled metrics — performance trade-offs + +See comments on `qSubNoMsg` and `subscribedQueues` in the source. `qSubNoMsg` is disabled because counting "subscription with no message" creates too many STM transactions. `subscribedQueues` is disabled because maintaining PeriodStats-style IntSets for all subscribed queues uses too much memory. Both fields are omitted from the stats output entirely. The parser handles old log files that contain these fields: `qSubNoMsg` is silently skipped via `skipInt`, and `subscribedQueues` is parsed but replaced with empty data. + +## TimeBuckets — ceil-aligned bucketing with precision loss + +`updateTimeBuckets` quantizes delivery-to-acknowledgment times into sparse buckets. Exact for 0-5s, then ceil-aligned: 6-30s → 5s buckets, 31-60s → 10s, 61-180s → 30s, 180+s → 60s. The `toBucket` formula uses `- ((- n) \`div\` m) * m` for ceiling division. `sumTime` and `maxTime` preserve exact values; only the histogram is lossy. + +## Serialization backward compatibility — silent data coercion + +The `strP` parser for `ServerStatsData` handles multiple format generations. Old format `qDeleted=` is read as `(value, 0, 0)` — `qDeletedNew` and `qDeletedSecured` default to 0. `qSubNoMsg` is parsed and silently discarded (`skipInt`). `subscribedQueues` is parsed but replaced with empty data. Data loaded from old formats is coerced, not reconstructed — precision is permanently lost. + +## Serialization typo — internally consistent + +The field `_srvAssocUpdated` is serialized as `"assocUpdatedt="` (extra 't') in `ServiceStatsData` encoding. The parser expects the same misspelling. Both sides are consistent, so it works — but external systems expecting `assocUpdated=` will fail to parse. + +## atomicSwapIORef for stats logging + +In `logServerStats` (Server.hs), each counter is read and reset via `atomicSwapIORef ref 0`. This is lock-free but means counters are zeroed after each logging interval — values represent delta since last log, not cumulative totals. `qCount` and `msgCount` are exceptions: they're read-only (via `readIORef`) because they track absolute current values, not deltas. + +## setPeriodStats — not thread safe + +See comment on `setPeriodStats`. Uses `writeIORef` (not atomic). Only safe during server startup when no other threads are running. If called concurrently, period data could be corrupted. diff --git a/spec/modules/Simplex/Messaging/Server/StoreLog.md b/spec/modules/Simplex/Messaging/Server/StoreLog.md new file mode 100644 index 0000000000..cef1bdfb2a --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/StoreLog.md @@ -0,0 +1,36 @@ +# Simplex.Messaging.Server.StoreLog + +> Append-only log for queue state changes: write, read/replay, compaction, crash recovery, backup retention. + +**Source**: [`StoreLog.hs`](../../../../../src/Simplex/Messaging/Server/StoreLog.hs) + +## writeStoreLogRecord — atomicity via manual write + +See comment in `writeStoreLogRecord`. `hPutStrLn` breaks writes larger than 1024 bytes into multiple system calls on `LineBuffered` handles, which could interleave with concurrent writes. The solution is manual `B.hPut` (single call for the complete record + newline) plus `hFlush`. `E.uninterruptibleMask_` prevents async exceptions between write and flush — ensures a complete record is always written. + +## readWriteStoreLog — crash recovery state machine + +The `.start` temp backup file provides crash recovery during compaction. The sequence: + +1. Read existing log, replay into memory +2. Rename log to `.start` (atomic rename = backup point) +3. Write compacted state to new file +4. Rename `.start` to timestamped backup, remove old backups + +If the server crashes during step 3, the next startup detects `.start` and restores from it instead of the incomplete new file. Any partially-written current file is preserved as `.bak`. The comment says "do not terminate" during compaction — there is no safe interrupt point between steps 2 and 4. + +## removeStoreLogBackups — layered retention policy + +Backup retention is layered: (1) keep all backups newer than 24 hours, (2) of the rest, keep at least 3, (3) of those eligible for deletion, only delete backups older than 21 days. This means a server with infrequent restarts accumulates many backups (only cleaned on startup), while a frequently-restarting server keeps a rolling window. Backup timestamps come from ISO 8601 suffixes parsed from filenames. + +## QueueRec StrEncoding — backward-compatible parsing + +The `strP` parser handles two field name generations: old format `sndSecure=` (boolean, mapping `True` → `QMMessaging`, `False` → `QMContact`) and new format `queue_mode=`. Missing queue mode defaults to `Nothing` with the comment "unknown queue mode, we cannot imply that it is contact address." `EntityActive` status is implicit — not written to the log, and parsed as default when `status=` is absent. + +## openReadStoreLog — creates file if missing + +`openReadStoreLog` creates an empty file if it doesn't exist. Callers never need to handle "file not found." + +## foldLogLines — EOF flag for batching + +The `action` callback receives a `Bool` indicating whether the current line is the last one. This allows consumers (like `readQueueStore`) to batch operations and flush only on the final line. diff --git a/spec/modules/Simplex/Messaging/Server/StoreLog/ReadWrite.md b/spec/modules/Simplex/Messaging/Server/StoreLog/ReadWrite.md new file mode 100644 index 0000000000..c6fd7e7456 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/StoreLog/ReadWrite.md @@ -0,0 +1,17 @@ +# Simplex.Messaging.Server.StoreLog.ReadWrite + +> Store log replay (read) and snapshot (write) for STM queue store. + +**Source**: [`ReadWrite.hs`](../../../../../../src/Simplex/Messaging/Server/StoreLog/ReadWrite.hs) + +## readQueueStore — error-tolerant replay + +Log replay (`readQueueStore`) processes each line independently. Parse errors are printed to stdout and skipped. Operation errors (e.g., queue not found during `SecureQueue` replay) are logged and skipped. A deleted queue encountered during replay (`queueRec` is `Nothing`) logs a warning but does not fail. This means a corrupted log line only loses that single operation, not the entire store. + +## NewService ID validation + +During replay, `getCreateService` may return a different `serviceId` than the one stored in the log (if the service cert already exists with a different ID). This is logged as an error but does not abort replay — the store continues with the ID it assigned. This handles the case where a store log was manually edited or partially corrupted. + +## writeQueueStore — services before queues + +`writeQueueStore` writes services first, then queues. Order matters: when the log is replayed, service IDs must already exist before queues reference them via `rcvServiceId`/`ntfServiceId`. diff --git a/spec/modules/Simplex/Messaging/Server/StoreLog/Types.md b/spec/modules/Simplex/Messaging/Server/StoreLog/Types.md new file mode 100644 index 0000000000..4918152823 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/StoreLog/Types.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Server.StoreLog.Types + +> GADT wrapper for file handles with type-level IOMode enforcement. + +**Source**: [`Types.hs`](../../../../../../src/Simplex/Messaging/Server/StoreLog/Types.hs) + +No non-obvious behavior. See source. Constructors are intentionally not exported — callers must use `openWriteStoreLog`/`openReadStoreLog`. diff --git a/spec/modules/Simplex/Messaging/Server/Web.md b/spec/modules/Simplex/Messaging/Server/Web.md new file mode 100644 index 0000000000..716845aa5c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Server/Web.md @@ -0,0 +1,21 @@ +# Simplex.Messaging.Server.Web + +> Static site generation, serving (HTTP, HTTPS, HTTP/2), and template rendering for the server info page. + +**Source**: [`Web.hs`](../../../../../src/Simplex/Messaging/Server/Web.hs) + +## attachStaticFiles — reusing Warp internals for TLS connections + +`attachStaticFiles` receives already-established TLS connections (which passed TLS handshake and ALPN check in the SMP transport layer) and runs Warp's HTTP handler on them. It manually calls `WI.withII`, `WT.attachConn`, `WI.registerKillThread`, and `WI.serveConnection` — internal Warp APIs. This couples the server to Warp internals and could break on Warp library updates. + +## serveStaticPageH2 — path traversal protection + +The H2 static file server uses `canonicalizePath` to resolve symlinks and `..` components, then checks the resolved path is a prefix of `canonicalRoot`. The caller must pre-compute `canonicalRoot` via `canonicalizePath` for the check to work. Without pre-canonicalization, a symlink in the root itself could defeat the protection. + +## .well-known path rewriting + +Both WAI (`changeWellKnownPath`) and H2 (`rewriteWellKnownH2`) rewrite `/.well-known/` to `/well-known/` because `staticApp` does not serve hidden directories (dot-prefixed). The generated site uses `well-known/` as the physical directory. If one rewrite path is updated without the other, the served files diverge between HTTP/1.1 and HTTP/2. + +## section_ / item_ — template rendering + +`render` applies substitutions to HTML templates using `...` section markers and `${label}` item markers. When a substitution value is `Nothing`, the entire section (including content between markers) is removed. `section_` recurses to handle multiple occurrences of the same section. `item_` is a simple find-and-replace. The section end marker is mandatory — a missing end marker calls `error` (crashes). diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 3d977dc8c4..bce73d3387 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -247,6 +247,7 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt closeServer :: M s () closeServer = asks (smpAgent . proxyAgent) >>= liftIO . closeSMPClientAgent + -- spec: spec/modules/Simplex/Messaging/Server.md#serverthread--subscription-lifecycle-with-split-stm serverThread :: forall sub. String -> Server s -> @@ -1223,6 +1224,7 @@ disconnectTransport THandle {connection, params = THandleParams {sessionId}} rcv data VerificationResult s = VRVerified (Maybe (StoreQueue s, QueueRec)) | VRFailed ErrorType +-- spec: spec/modules/Simplex/Messaging/Server.md#constant-time-authorization--dummy-keys -- This function verifies queue command authorization, with the objective to have constant time between the three AUTH error scenarios: -- - the queue and party key exist, and the provided authorization has type matching queue key, but it is made with the different key. -- - the queue and party key exist, but the provided authorization has incorrect type. @@ -1982,6 +1984,7 @@ client -- If the queue is not full, then the thread is created where these checks are made: -- - it is the same subscribed client (in case it was reconnected it would receive message via SUB command) -- - nothing was delivered to this subscription (to avoid race conditions with the recipient). + -- spec: spec/modules/Simplex/Messaging/Server.md#trydelivermessage--syncasync-split-delivery tryDeliverMessage :: Message -> IO () tryDeliverMessage msg = -- the subscribed client var is read outside of STM to avoid transaction cost @@ -2063,6 +2066,7 @@ client encNMsgMeta = C.cbEncrypt rcvNtfDhSecret ntfNonce (smpEncode msgMeta) 128 pure $ MsgNtf {ntfMsgId = msgId, ntfTs = msgTs, ntfNonce, ntfEncMeta = fromRight "" encNMsgMeta} + -- spec: spec/modules/Simplex/Messaging/Server.md#proxy-forwarding--single-transmission-no-service-identity processForwardedCommand :: EncFwdTransmission -> M s BrokerMsg processForwardedCommand (EncFwdTransmission s) = fmap (either ERR RRES) . runExceptT $ do THAuthServer {serverPrivKey, sessSecret'} <- maybe (throwE $ transportErr TENoServerAuth) pure (thAuth thParams') diff --git a/src/Simplex/Messaging/Server/Env/STM.hs b/src/Simplex/Messaging/Server/Env/STM.hs index 574111c15e..b4a275922e 100644 --- a/src/Simplex/Messaging/Server/Env/STM.hs +++ b/src/Simplex/Messaging/Server/Env/STM.hs @@ -368,6 +368,7 @@ data ServerSubscribers s = ServerSubscribers pendingEvents :: TVar (IntMap (NonEmpty (EntityId, BrokerMsg))) } +-- spec: spec/modules/Simplex/Messaging/Server/Env/STM.md#subscribedclients--tvar-of-maybe-pattern -- not exported, to prevent accidental concurrent Map lookups inside STM transactions. -- Map stores TVars with pointers to the clients rather than client ID to allow reading the same TVar -- inside transactions to ensure that transaction is re-evaluated in case subscriber changes. diff --git a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs index edf7f481cd..1617c1c915 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs @@ -76,6 +76,7 @@ data PostgresQueue = PostgresQueue queueRec' :: TVar (Maybe QueueRec) } +-- spec: spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md#msgqueue-is-unit-type instance StoreQueueClass PostgresQueue where recipientId = recipientId' {-# INLINE recipientId #-} diff --git a/src/Simplex/Messaging/Server/MsgStore/Types.hs b/src/Simplex/Messaging/Server/MsgStore/Types.hs index acb661a408..12566ec2fa 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Types.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Types.hs @@ -49,6 +49,7 @@ import Simplex.Messaging.Server.QueueStore import Simplex.Messaging.Server.QueueStore.Types import Simplex.Messaging.Util ((<$$>), ($>>=)) +-- spec: spec/modules/Simplex/Messaging/Server/MsgStore/Types.md#injective-type-families--unambiguous-type-resolution class (Monad (StoreMonad s), QueueStoreClass (StoreQueue s) (QueueStore s)) => MsgStoreClass s where type StoreMonad s = (m :: Type -> Type) | m -> s type MsgStoreConfig s = c | c -> s diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs index a8c8c040aa..bba58e35bc 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs @@ -169,6 +169,7 @@ instance StoreQueueClass q => QueueStoreClass q (PostgresQueueStore q) where (SRMessaging, SRNotifier) pure EntityCounts {queueCount, notifierCount, rcvServiceCount, ntfServiceCount, rcvServiceQueuesCount, ntfServiceQueuesCount} + -- spec: spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md#addqueue_--no-in-memory-duplicate-check-relies-on-db-constraint -- this implementation assumes that the lock is already taken by addQueue -- and relies on unique constraints in the database to prevent duplicate IDs. addQueue_ :: PostgresQueueStore q -> (RecipientId -> QueueRec -> IO q) -> RecipientId -> QueueRec -> IO (Either ErrorType q) diff --git a/src/Simplex/Messaging/Server/QueueStore/STM.hs b/src/Simplex/Messaging/Server/QueueStore/STM.hs index 3a236076c4..110a9cd33d 100644 --- a/src/Simplex/Messaging/Server/QueueStore/STM.hs +++ b/src/Simplex/Messaging/Server/QueueStore/STM.hs @@ -116,6 +116,7 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where serviceCount role = M.foldl' (\ !n s -> if serviceRole (serviceRec s) == role then n + 1 else n) 0 serviceQueuesCount serviceSel = foldM (\n s -> (n +) . S.size . fst <$> readTVarIO (serviceSel s)) 0 + -- spec: spec/modules/Simplex/Messaging/Server/QueueStore/STM.md#addqueue_--atomic-multi-id-duplicate-check addQueue_ :: STMQueueStore q -> (RecipientId -> QueueRec -> IO q) -> RecipientId -> QueueRec -> IO (Either ErrorType q) addQueue_ st mkQ rId qr@QueueRec {senderId = sId, notifier, queueData, rcvServiceId} = do sq <- mkQ rId qr diff --git a/src/Simplex/Messaging/Server/StoreLog.hs b/src/Simplex/Messaging/Server/StoreLog.hs index 4ceb3cddde..8c69b40637 100644 --- a/src/Simplex/Messaging/Server/StoreLog.hs +++ b/src/Simplex/Messaging/Server/StoreLog.hs @@ -96,6 +96,7 @@ data SLRTag | NewService_ | QueueService_ +-- spec: spec/modules/Simplex/Messaging/Server/StoreLog.md#queuerec-strencoding--backward-compatible-parsing instance StrEncoding QueueRec where strEncode QueueRec {recipientKeys, rcvDhSecret, rcvServiceId, senderId, senderKey, queueMode, queueData, notifier, status, updatedAt} = B.concat @@ -242,6 +243,7 @@ closeStoreLog = \case where close_ h = hClose h `catchAny` \e -> logError ("STORE: closeStoreLog, error closing, " <> tshow e) +-- spec: spec/modules/Simplex/Messaging/Server/StoreLog.md#writestorelogrecord--atomicity-via-manual-write writeStoreLogRecord :: StrEncoding r => StoreLog 'WriteMode -> r -> IO () writeStoreLogRecord (WriteStoreLog _ h) r = E.uninterruptibleMask_ $ do B.hPut h $ strEncode r `B.snoc` '\n' -- hPutStrLn makes write non-atomic for length > 1024 @@ -289,6 +291,7 @@ logNewService s = writeStoreLogRecord s . NewService logQueueService :: (PartyI p, ServiceParty p) => StoreLog 'WriteMode -> RecipientId -> SParty p -> Maybe ServiceId -> IO () logQueueService s rId party = writeStoreLogRecord s . QueueService rId (ASP party) +-- spec: spec/modules/Simplex/Messaging/Server/StoreLog.md#readwritestorelog--crash-recovery-state-machine readWriteStoreLog :: (FilePath -> s -> IO ()) -> (StoreLog 'WriteMode -> s -> IO ()) -> FilePath -> s -> IO (StoreLog 'WriteMode) readWriteStoreLog readStore writeStore f st = ifM From f7be44981a3685b6ebc9c73cc1de2d9fd588a4b4 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:29:18 +0000 Subject: [PATCH 34/91] SMP router specs --- spec/modules/README.md | 8 ++++++ spec/modules/Simplex/Messaging/Client.md | 14 +++++----- .../modules/Simplex/Messaging/Client/Agent.md | 14 +++++----- .../Simplex/Messaging/Crypto/ShortLink.md | 4 +-- .../Messaging/Notifications/Transport.md | 2 +- spec/modules/Simplex/Messaging/Protocol.md | 8 +++--- spec/modules/Simplex/Messaging/Server.md | 8 +++--- spec/modules/Simplex/Messaging/Server/CLI.md | 4 +-- .../Simplex/Messaging/Server/Control.md | 2 +- .../Simplex/Messaging/Server/Env/STM.md | 8 +++--- .../Simplex/Messaging/Server/Information.md | 2 +- spec/modules/Simplex/Messaging/Server/Main.md | 4 +-- .../Simplex/Messaging/Server/Main/Init.md | 2 +- .../Messaging/Server/MsgStore/Postgres.md | 2 +- .../Messaging/Server/MsgStore/Types.md | 4 +-- .../Simplex/Messaging/Server/NtfStore.md | 2 +- .../Simplex/Messaging/Server/Prometheus.md | 2 +- .../Messaging/Server/QueueStore/STM.md | 2 +- .../modules/Simplex/Messaging/Server/Stats.md | 4 +-- .../Simplex/Messaging/Server/StoreLog.md | 4 +-- spec/modules/Simplex/Messaging/Server/Web.md | 4 +-- spec/modules/Simplex/Messaging/Transport.md | 26 +++++++++---------- .../Simplex/Messaging/Transport/Server.md | 6 ++--- .../Simplex/Messaging/Transport/Shared.md | 6 ++--- 24 files changed, 75 insertions(+), 67 deletions(-) diff --git a/spec/modules/README.md b/spec/modules/README.md index 7b7666aadb..0a961abdf1 100644 --- a/spec/modules/README.md +++ b/spec/modules/README.md @@ -182,6 +182,14 @@ Before finishing a module doc, ask: If any answer reveals a problem, fix it and repeat from question 1. Only finish when a full pass produces no changes. +## Terminology — the spec as translation boundary + +The protocol documents (`protocol/overview-tjr.md`, `protocol/simplex-messaging.md`, `protocol/agent-protocol.md`) define the canonical terminology. Code uses different names for some of the same concepts. The spec is where the translation happens. + +The most important distinction: SimpleX protocol routers are referred to as "servers" in code. The term "server" was adopted historically because SimpleX routers were implemented as Linux-based software that is deployed in the same way as servers. But the similarity is entirely formal. Functionally, servers serve responses to the requests of their users - that is why the term "server" was adopted for computers and software that provide Internet services. SimpleX protocol routers don't serve responses - they route packets between endpoints, and they have no concept of a user. Functionally they are similar to Internet Protocol routers, but with a resource-based addressing scheme. Further, SimpleX protocol routers are hardware and software agnostic. SimpleX protocols are open and documented, so they can be implemented in any language and run on a different architecture. For example, [SimpleGo](https://simplego.dev) is a prototype implementation of the SimpleX protocol stack in C for a microcontroller architecture. + +**The rule**: use protocol terms for concepts, code terms for identifiers. Write "router" when describing the network node's role, `SMPServer` or `Server.hs` when referencing code. Similarly, "router identity" for the concept (called "server key hash" or "fingerprint" in code). When the distinction matters, bridge explicitly: "the SMP router (implemented by the `Server` module)" or "the `SMPServer` type (representing a router address)." + ## Exclusions - **Individual migration files** (M20XXXXXX_*.hs): Self-describing SQL. No per-migration docs. diff --git a/spec/modules/Simplex/Messaging/Client.md b/spec/modules/Simplex/Messaging/Client.md index a4f7be3525..35fee92262 100644 --- a/spec/modules/Simplex/Messaging/Client.md +++ b/spec/modules/Simplex/Messaging/Client.md @@ -8,7 +8,7 @@ ## Overview -This module implements the client side of the `Protocol` typeclass — connecting to servers, sending commands, receiving responses, and managing connection lifecycle. It is generic over `Protocol v err msg`, instantiated for SMP as `SMPClient` (= `ProtocolClient SMPVersion ErrorType BrokerMsg`). The SMP proxy protocol (PRXY/PFWD/RFWD) is also implemented here. +This module implements the client side of the `Protocol` typeclass — connecting to SMP routers, sending commands, receiving responses, and managing connection lifecycle. It is generic over `Protocol v err msg`, instantiated for SMP as `SMPClient` (= `ProtocolClient SMPVersion ErrorType BrokerMsg`). The SMP proxy protocol (PRXY/PFWD/RFWD) is also implemented here. ## Four concurrent threads — teardown semantics @@ -36,9 +36,9 @@ The double-check pattern (`swapTVar pending False` + `tryTakeTMVar`) handles the `timeoutErrorCount` is reset to 0 in three places: in `getResponse` when a response arrives, in `receive` on every TLS read, and the monitor uses this count to decide when to drop the connection. -## processMsg — server events vs expired responses +## processMsg — router events vs expired responses -When `corrId` is empty, the message is an `STEvent` (server-initiated). When non-empty and the request was already expired (`wasPending` is `False`), the response becomes `STResponse` — not discarded, but forwarded to `msgQ` with the original command context. Entity ID mismatch is `STUnexpectedError`. +When `corrId` is empty, the message is an `STEvent` (router-initiated). When non-empty and the request was already expired (`wasPending` is `False`), the response becomes `STResponse` — not discarded, but forwarded to `msgQ` with the original command context. Entity ID mismatch is `STUnexpectedError`. ## nonBlockingWriteTBQueue — fork on full @@ -46,7 +46,7 @@ If `tryWriteTBQueue` returns `False`, a new thread is forked for the blocking wr ## Batch commands do not expire -See comment on `sendBatch`. Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands use `Just r` and the send thread checks `pending` after dequeue. The coupling: if the server stops responding, batched commands can block the send queue indefinitely since they have no timeout-based expiry. +See comment on `sendBatch`. Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands use `Just r` and the send thread checks `pending` after dequeue. The coupling: if the router stops responding, batched commands can block the send queue indefinitely since they have no timeout-based expiry. ## monitor — quasi-periodic adaptive ping @@ -68,7 +68,7 @@ See comment above `proxySMPCommand` for the 9 error scenarios (0-9) mapping each ## forwardSMPTransmission — proxy-side forwarding -Used by the proxy server to forward `RFWD` to the destination relay. Uses `cbEncryptNoPad`/`cbDecryptNoPad` (no padding) with the session secret from the proxy-relay connection. Response nonce is `reverseNonce` of the request nonce. +Used by the proxy router to forward `RFWD` to the destination relay. Uses `cbEncryptNoPad`/`cbDecryptNoPad` (no padding) with the session secret from the proxy-relay connection. Response nonce is `reverseNonce` of the request nonce. ## authTransmission — dual auth with service signature @@ -80,6 +80,6 @@ The service signature is only added when the entity authenticator is non-empty. `action` stores a `Weak ThreadId` (via `mkWeakThreadId`) to the main client thread. `closeProtocolClient` dereferences and kills it. The weak reference allows the thread to be garbage collected if all other references are dropped. -## writeSMPMessage — server-side event injection +## writeSMPMessage — router-side event injection -`writeSMPMessage` writes directly to `msgQ` as `STEvent`, bypassing the entire command/response pipeline. This is used by the server to inject MSG events into the subscription response path. +`writeSMPMessage` writes directly to `msgQ` as `STEvent`, bypassing the entire command/response pipeline. This is used by the router to inject MSG events into the subscription response path. diff --git a/spec/modules/Simplex/Messaging/Client/Agent.md b/spec/modules/Simplex/Messaging/Client/Agent.md index 96e6ff84b9..30fbe2ac23 100644 --- a/spec/modules/Simplex/Messaging/Client/Agent.md +++ b/spec/modules/Simplex/Messaging/Client/Agent.md @@ -6,9 +6,9 @@ ## Overview -This is the "small agent" — used only in servers (SMP proxy, notification server) to manage client connections to other SMP servers. The "big agent" in `Simplex.Messaging.Agent` + `Simplex.Messaging.Agent.Client` serves client applications and adds the full messaging agent layer. See [Two agent layers](../../../../TOPICS.md) topic. +This is the "small agent" — used only in routers (SMP proxy, notification router) to manage client connections to other SMP routers. The "big agent" in `Simplex.Messaging.Agent` + `Simplex.Messaging.Agent.Client` serves client applications and adds the full messaging agent layer. See [Two agent layers](../../../../TOPICS.md) topic. -`SMPClientAgent` manages `SMPClient` connections via `smpClients :: TMap SMPServer SMPClientVar` (one per SMP server), tracks active and pending subscriptions, and handles automatic reconnection. It is parameterized by `Party` (`p`) and uses the `ServiceParty` constraint to support both `RecipientService` and `NotifierService` modes. +`SMPClientAgent` manages `SMPClient` connections via `smpClients :: TMap SMPServer SMPClientVar` (one per router), tracks active and pending subscriptions, and handles automatic reconnection. It is parameterized by `Party` (`p`) and uses the `ServiceParty` constraint to support both `RecipientService` and `NotifierService` modes. ## Dual subscription model @@ -19,7 +19,7 @@ Four TMap fields track subscriptions in two dimensions: | **Service** | `activeServiceSubs` (TMap SMPServer (TVar (Maybe (ServiceSub, SessionId)))) | `pendingServiceSubs` (TMap SMPServer (TVar (Maybe ServiceSub))) | | **Queue** | `activeQueueSubs` (TMap SMPServer (TMap QueueId (SessionId, C.APrivateAuthKey))) | `pendingQueueSubs` (TMap SMPServer (TMap QueueId C.APrivateAuthKey)) | -See comments on `activeServiceSubs` and `pendingServiceSubs` for the coexistence rules. Key constraint: only one service subscription per server. Active subs store the `SessionId` that established them. +See comments on `activeServiceSubs` and `pendingServiceSubs` for the coexistence rules. Key constraint: only one service subscription per router. Active subs store the `SessionId` that established them. ## SessionVar compare-and-swap — core concurrency safety @@ -27,11 +27,11 @@ See comments on `activeServiceSubs` and `pendingServiceSubs` for the coexistence ## removeClientAndSubs — outside-STM lookup optimization -See comment on `removeClientAndSubs`. Subscription TVar references are obtained outside STM (via `TM.lookupIO`), then modified inside `atomically`. This is safe because the invariant is that subscription TVar entries for a server are never deleted from the outer TMap, only their contents change. Moving lookups inside the STM transaction would cause excessive re-evaluation under contention. +See comment on `removeClientAndSubs`. Subscription TVar references are obtained outside STM (via `TM.lookupIO`), then modified inside `atomically`. This is safe because the invariant is that subscription TVar entries for a router are never deleted from the outer TMap, only their contents change. Moving lookups inside the STM transaction would cause excessive re-evaluation under contention. ## Disconnect preserves others' subscriptions -`updateServiceSub` only moves active→pending when `sessId` matches the disconnected client (see its comment). If a new client already established different subscriptions on the same server, those are preserved. Queue subs use `M.partition` to split by SessionId — only matching subs move to pending, non-matching remain active. +`updateServiceSub` only moves active→pending when `sessId` matches the disconnected client (see its comment). If a new client already established different subscriptions on the same router, those are preserved. Queue subs use `M.partition` to split by SessionId — only matching subs move to pending, non-matching remain active. ## Pending never reset to Nothing on disconnect @@ -63,7 +63,7 @@ When serviceId and sessionId match the existing active subscription, queue count ## CAServiceUnavailable — cascade to queue resubscription -When `smpSubscribeService` detects service ID or role mismatch with the connection, it fires `CAServiceUnavailable`. See comment on `CAServiceUnavailable` for the full implication: the app must resubscribe all queues individually, creating new associations. This can happen if the SMP server reassigns service IDs (e.g., after downgrade and upgrade). +When `smpSubscribeService` detects service ID or role mismatch with the connection, it fires `CAServiceUnavailable`. See comment on `CAServiceUnavailable` for the full implication: the app must resubscribe all queues individually, creating new associations. This can happen if the SMP router reassigns service IDs (e.g., after downgrade and upgrade). ## getPending — polymorphic over STM/IO @@ -89,4 +89,4 @@ During reconnection, `reconnectSMPClient` reads current active queue subs (outsi ## addSubs_ — left-biased union -`addSubs_` uses `TM.union` which delegates to `M.union` (left-biased). If a queue subscription already exists, the new auth key from the incoming map wins. Service subs use `writeTVar` (overwrite) since only one service sub exists per server. +`addSubs_` uses `TM.union` which delegates to `M.union` (left-biased). If a queue subscription already exists, the new auth key from the incoming map wins. Service subs use `writeTVar` (overwrite) since only one service sub exists per router. diff --git a/spec/modules/Simplex/Messaging/Crypto/ShortLink.md b/spec/modules/Simplex/Messaging/Crypto/ShortLink.md index 821a30c321..5b83de31a8 100644 --- a/spec/modules/Simplex/Messaging/Crypto/ShortLink.md +++ b/spec/modules/Simplex/Messaging/Crypto/ShortLink.md @@ -12,8 +12,8 @@ Short links encode connection data in two encrypted blobs: fixed data (2048 byte Two distinct HKDF derivations with different info strings: -- **contactShortLinkKdf**: `HKDF("", linkKey, "SimpleXContactLink", 56)` → splits into 24-byte LinkId + 32-byte SbKey. The LinkId is used as the server-side identifier. -- **invShortLinkKdf**: `HKDF("", linkKey, "SimpleXInvLink", 32)` → 32-byte SbKey only. No LinkId because invitation links don't use server-side lookup. +- **contactShortLinkKdf**: `HKDF("", linkKey, "SimpleXContactLink", 56)` → splits into 24-byte LinkId + 32-byte SbKey. The LinkId is used as the router-side identifier. +- **invShortLinkKdf**: `HKDF("", linkKey, "SimpleXInvLink", 32)` → 32-byte SbKey only. No LinkId because invitation links don't use router-side lookup. ## Fixed padding lengths diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md index dd4564738b..7c7955154e 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Transport.md +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -29,7 +29,7 @@ The NTF protocol reuses SMP's transport infrastructure but with reduced paramete ## Same ALPN/legacy fallback pattern as SMP -`ntfServerHandshake` uses the same pattern as `smpServerHandshake`: if ALPN is not negotiated (`getSessionALPN` returns `Nothing`), the server offers only `legacyServerNTFVRange` (v1 only). +`ntfServerHandshake` uses the same pattern as `smpServerHandshake`: if ALPN is not negotiated (`getSessionALPN` returns `Nothing`), the notification router offers only `legacyServerNTFVRange` (v1 only). ## NTF handshake uses SMP shared types diff --git a/spec/modules/Simplex/Messaging/Protocol.md b/spec/modules/Simplex/Messaging/Protocol.md index dc1328cdfe..2ed7113c8b 100644 --- a/spec/modules/Simplex/Messaging/Protocol.md +++ b/spec/modules/Simplex/Messaging/Protocol.md @@ -8,11 +8,11 @@ ## Overview -This module defines the SMP protocol's type-level structure, wire encoding, and transport batching. It does not implement the server or client — those are in [Server.hs](./Server.md) and [Client.hs](./Client.md). The protocol spec governs the command semantics; this doc focuses on non-obvious implementation choices. +This module defines the SMP protocol's type-level structure, wire encoding, and transport batching. It does not implement the router or client — those are in [Server.hs](./Server.md) and [Client.hs](./Client.md). The protocol spec governs the command semantics; this doc focuses on non-obvious implementation choices. ## Two separate version scopes -SMP client protocol version (`SMPClientVersion`, 4 versions) is separate from SMP relay protocol version (`SMPVersion`, up to version 19, defined in [Transport.hs](./Transport.md)). The client version governs client-to-client concerns (binary encoding, multi-host addresses, SKEY command, short links). The relay version governs client-to-server wire format, transport encryption, and command availability. See comment above `SMPClientVersion` data declaration for version history. +SMP client protocol version (`SMPClientVersion`, 4 versions) is separate from SMP relay protocol version (`SMPVersion`, up to version 19, defined in [Transport.hs](./Transport.md)). The client version governs client-to-client concerns (binary encoding, multi-host addresses, SKEY command, short links). The relay version governs client-to-router wire format, transport encryption, and command availability. See comment above `SMPClientVersion` data declaration for version history. ## maxMessageLength — version-dependent @@ -57,7 +57,7 @@ The `MsgFlags` parser consumes the `notification` Bool then calls `A.takeTill (= ## BrokerErrorType NETWORK — detail loss -The `NETWORK` variant of `BrokerErrorType` encodes as just `"NETWORK"` (detail dropped), with `TODO once all upgrade` comment. The parser falls back to `NEFailedError` when the `NetworkError` detail can't be parsed (`_smpP <|> pure NEFailedError`). This means a newer server's detailed network error is seen as `NEFailedError` by older clients. +The `NETWORK` variant of `BrokerErrorType` encodes as just `"NETWORK"` (detail dropped), with `TODO once all upgrade` comment. The parser falls back to `NEFailedError` when the `NetworkError` detail can't be parsed (`_smpP <|> pure NEFailedError`). This means a newer router's detailed network error is seen as `NEFailedError` by older clients. ## Version-dependent encoding — scope @@ -65,4 +65,4 @@ The `NETWORK` variant of `BrokerErrorType` encodes as just `"NETWORK"` (detail d ## SUBS/NSUBS — asymmetric defaulting -When the server parses `SUBS`/`NSUBS` from a client using a version older than `rcvServiceSMPVersion`, both count and hash default (`-1` and `mempty`). For the response side (`SOKS`/`ENDS` via `serviceRespP`), count is still parsed from the wire — only hash defaults to `mempty`. This asymmetry means command-side and response-side parsing have different fallback behavior for the same version boundary. +When the router parses `SUBS`/`NSUBS` from a client using a version older than `rcvServiceSMPVersion`, both count and hash default (`-1` and `mempty`). For the response side (`SOKS`/`ENDS` via `serviceRespP`), count is still parsed from the wire — only hash defaults to `mempty`. This asymmetry means command-side and response-side parsing have different fallback behavior for the same version boundary. diff --git a/spec/modules/Simplex/Messaging/Server.md b/spec/modules/Simplex/Messaging/Server.md index 0ed6e43e19..8d23404c99 100644 --- a/spec/modules/Simplex/Messaging/Server.md +++ b/spec/modules/Simplex/Messaging/Server.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server -> SMP server: client handling, subscription lifecycle, message delivery, proxy forwarding, control port. +> SMP router (`Server` module): client handling, subscription lifecycle, message delivery, proxy forwarding, control port. **Source**: [`Server.hs`](../../../../src/Simplex/Messaging/Server.hs) @@ -8,7 +8,7 @@ ## Overview -The server runs as `raceAny_` over many threads — any thread exit stops the entire server. The thread set includes: one `serverThread` per subscription type (SMP, NTF), a notification delivery thread, a pending events thread, a proxy agent receiver, a SIGINT handler, plus per-transport listener threads and optional expiration/stats/prometheus/control-port threads. `E.finally` ensures `stopServer` runs on any exit. +The router runs as `raceAny_` over many threads — any thread exit stops the entire router process. The thread set includes: one `serverThread` per subscription type (SMP, NTF), a notification delivery thread, a pending events thread, a proxy agent receiver, a SIGINT handler, plus per-transport listener threads and optional expiration/stats/prometheus/control-port threads. `E.finally` ensures `stopServer` runs on any exit. ## serverThread — subscription lifecycle with split STM @@ -51,7 +51,7 @@ When the signature algorithm doesn't match the queue key, verification runs with ## Service subscription — hash-based drift detection -See comment on `sharedSubscribeService`. The client sends expected `(count, idsHash)`. The server reads the actual values from storage, then computes `subsChange = subtractServiceSubs currSubs subs'` — the **difference** between what the client's session currently tracks and the new values. This difference (not the absolute values) is passed to `serverThread` via `CSService` to adjust `totalServiceSubs`. Using differences prevents double-counting when a service resubscribes. +See comment on `sharedSubscribeService`. The client sends expected `(count, idsHash)`. The router reads the actual values from storage, then computes `subsChange = subtractServiceSubs currSubs subs'` — the **difference** between what the client's session currently tracks and the new values. This difference (not the absolute values) is passed to `serverThread` via `CSService` to adjust `totalServiceSubs`. Using differences prevents double-counting when a service resubscribes. Stats classification: exactly one of `srvSubOk`/`srvSubMore`/`srvSubFewer`/`srvSubDiff` is incremented per subscription. `count == -1` is a special case for old NTF servers. @@ -91,7 +91,7 @@ See `noSubscriptions`. The idle client disconnect thread only checks expiration ## clientDisconnected — ordered cleanup -On disconnect: (1) set `connected = False`, (2) atomically swap out all subscriptions, (3) cancel subscription threads, (4) if server is still active: delete client from server map, update queue and service subscribers. Service subscription cleanup (`updateServiceSubs`) subtracts the client's accumulated `(count, idsHash)` from `totalServiceSubs`. End threads are swapped out and killed. +On disconnect: (1) set `connected = False`, (2) atomically swap out all subscriptions, (3) cancel subscription threads, (4) if router is still active: delete client from `serverClients` map, update queue and service subscribers. Service subscription cleanup (`updateServiceSubs`) subtracts the client's accumulated `(count, idsHash)` from `totalServiceSubs`. End threads are swapped out and killed. ## Control port — single auth, no downgrade diff --git a/spec/modules/Simplex/Messaging/Server/CLI.md b/spec/modules/Simplex/Messaging/Server/CLI.md index a369b69811..5747eba8fb 100644 --- a/spec/modules/Simplex/Messaging/Server/CLI.md +++ b/spec/modules/Simplex/Messaging/Server/CLI.md @@ -16,7 +16,7 @@ SMP ports are parsed first. When explicit WebSocket ports are provided, they are ## iniDBOptions — schema creation disabled at CLI -When reading database options from INI, `createSchema` is always set to `False` regardless of INI content. This enforces a security invariant: database schemas must be created manually or by migration, never automatically by the server. +When reading database options from INI, `createSchema` is always set to `False` regardless of INI content. This enforces a security invariant: database schemas must be created manually or by migration, never automatically by the router. ## createServerX509_ — external tool dependency @@ -24,7 +24,7 @@ Certificate generation shells out to `openssl` commands via `readCreateProcess`, ## checkSavedFingerprint — startup invariant -Fingerprint is extracted from the CA certificate and saved during init. On every server start, the saved fingerprint is compared against the current certificate. Mismatch → startup failure. See [Main.md#initializeserver--fingerprint-invariant](./Main.md#initializeserver--fingerprint-invariant). +Fingerprint is extracted from the CA certificate and saved during init. On every router start, the saved fingerprint is compared against the current certificate. Mismatch → startup failure. See [Main.md#initializeserver--fingerprint-invariant](./Main.md#initializeserver--fingerprint-invariant). ## genOnline — existing certificate dependency diff --git a/spec/modules/Simplex/Messaging/Server/Control.md b/spec/modules/Simplex/Messaging/Server/Control.md index 644fb786ac..ddeedff3a5 100644 --- a/spec/modules/Simplex/Messaging/Server/Control.md +++ b/spec/modules/Simplex/Messaging/Server/Control.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.Control -> Control port protocol types and encoding for server administration. +> Control port protocol types and encoding for router administration. **Source**: [`Control.hs`](../../../../../src/Simplex/Messaging/Server/Control.hs) diff --git a/spec/modules/Simplex/Messaging/Server/Env/STM.md b/spec/modules/Simplex/Messaging/Server/Env/STM.md index d0e9481200..3d990a47d7 100644 --- a/spec/modules/Simplex/Messaging/Server/Env/STM.md +++ b/spec/modules/Simplex/Messaging/Server/Env/STM.md @@ -1,12 +1,12 @@ # Simplex.Messaging.Server.Env.STM -> Server environment, configuration, client state, subscription types, and storage initialization. +> Router environment, configuration, client state, subscription types, and storage initialization. **Source**: [`Env/STM.hs`](../../../../../../src/Simplex/Messaging/Server/Env/STM.hs) ## Overview -This module defines the server's shared state (`Env`, `Server`, `Client`) and the subscription model types. Most non-obvious patterns are about concurrency safety — preventing STM contention while maintaining consistency. Key patterns are documented in [Server.md](../Server.md) where they're used; this doc covers patterns specific to the type definitions and initialization. +This module defines the router's shared state (`Env`, `Server`, `Client`) and the subscription model types. Most non-obvious patterns are about concurrency safety — preventing STM contention while maintaining consistency. Key patterns are documented in [Server.md](../Server.md) where they're used; this doc covers patterns specific to the type definitions and initialization. ## SubscribedClients — TVar-of-Maybe pattern @@ -26,7 +26,7 @@ See comment on `deleteSubcribedClient`. The TVar lookup is in a separate IO read ## insertServerClient — connected check -`insertServerClient` checks `connected` inside the STM transaction before inserting. If the client was already marked disconnected (race with cleanup), the insert is skipped and returns `False`. This prevents resurrecting a disconnected client in the server map. +`insertServerClient` checks `connected` inside the STM transaction before inserting. If the client was already marked disconnected (race with cleanup), the insert is skipped and returns `False`. This prevents resurrecting a disconnected client in the `serverClients` map. ## SupportedStore — compile-time storage validation @@ -34,7 +34,7 @@ Type family with `(Int ~ Bool, TypeError ...)` for invalid combinations. The uns ## newEnv — initialization order -Store initialization order matters: (1) create message store (loads store log for STM backends), (2) create notification store (empty TMap), (3) generate TLS credentials, (4) compute server identity from fingerprint, (5) create stats, (6) create proxy agent. The store log load (`loadStoreLog`) calls `readWriteQueueStore` which reads the existing log, replays it to build state, then opens a new log for writing. `setStoreLog` attaches the write log to the store. +Store initialization order matters: (1) create message store (loads store log for STM backends), (2) create notification store (empty TMap), (3) generate TLS credentials, (4) compute router identity from fingerprint, (5) create stats, (6) create proxy agent. The store log load (`loadStoreLog`) calls `readWriteQueueStore` which reads the existing log, replays it to build state, then opens a new log for writing. `setStoreLog` attaches the write log to the store. HTTPS credentials are validated: must be at least 4096-bit RSA (`public_size >= 512` bytes). The check explicitly notes that Let's Encrypt ECDSA uses "insecure curve p256." diff --git a/spec/modules/Simplex/Messaging/Server/Information.md b/spec/modules/Simplex/Messaging/Server/Information.md index 16f153154c..a2efa040c8 100644 --- a/spec/modules/Simplex/Messaging/Server/Information.md +++ b/spec/modules/Simplex/Messaging/Server/Information.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.Information -> Server public information types (config, operator, hosting) for the server info page. +> Router public information types (config, operator, hosting) for the router info page. **Source**: [`Information.hs`](../../../../../src/Simplex/Messaging/Server/Information.hs) diff --git a/spec/modules/Simplex/Messaging/Server/Main.md b/spec/modules/Simplex/Messaging/Server/Main.md index aed5385737..00483d35b7 100644 --- a/spec/modules/Simplex/Messaging/Server/Main.md +++ b/spec/modules/Simplex/Messaging/Server/Main.md @@ -1,12 +1,12 @@ # Simplex.Messaging.Server.Main -> Server CLI entry point: dispatches Init, Start, Delete, Journal, and Database commands. +> Router CLI entry point: dispatches Init, Start, Delete, Journal, and Database commands. **Source**: [`Main.hs`](../../../../../src/Simplex/Messaging/Server/Main.hs) ## Overview -This is the CLI dispatcher for the SMP server. It parses INI configuration, validates storage mode combinations, and dispatches to the appropriate command handler. The most complex logic is storage configuration validation and migration between storage modes. +This is the CLI dispatcher for the SMP router. It parses INI configuration, validates storage mode combinations, and dispatches to the appropriate command handler. The most complex logic is storage configuration validation and migration between storage modes. ## Storage mode compatibility — state machine diff --git a/spec/modules/Simplex/Messaging/Server/Main/Init.md b/spec/modules/Simplex/Messaging/Server/Main/Init.md index 665938ae8f..2472164d06 100644 --- a/spec/modules/Simplex/Messaging/Server/Main/Init.md +++ b/spec/modules/Simplex/Messaging/Server/Main/Init.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.Main.Init -> Server initialization: INI file content generation, default settings, and CLI option structures. +> Router initialization: INI file content generation, default settings, and CLI option structures. **Source**: [`Main/Init.hs`](../../../../../../src/Simplex/Messaging/Server/Main/Init.hs) diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md index 7262bde0a6..eaeca3b903 100644 --- a/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.MsgStore.Postgres -> PostgreSQL message store: server-side stored procedures for message operations, COPY protocol for bulk import. +> PostgreSQL message store: router-side stored procedures for message operations, COPY protocol for bulk import. **Source**: [`Postgres.hs`](../../../../../../src/Simplex/Messaging/Server/MsgStore/Postgres.hs) diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md index 2fd4c79bf7..c57aedb0b4 100644 --- a/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Types.md @@ -14,7 +14,7 @@ All associated types (`StoreMonad`, `MsgQueue`, `StoreQueue`, `QueueStore`, `Msg ## tryDelPeekMsg — atomic delete-and-peek -Deletes the current message AND peeks the next one in a single `isolateQueue` call. This atomicity is critical for the ACK flow: the server needs to know if there's a next message to deliver immediately after acknowledging the current one, without a window where a concurrent SEND could interleave. +Deletes the current message AND peeks the next one in a single `isolateQueue` call. This atomicity is critical for the ACK flow: the router needs to know if there's a next message to deliver immediately after acknowledging the current one, without a window where a concurrent SEND could interleave. ## withIdleMsgQueue — journal-specific lifecycle @@ -22,7 +22,7 @@ For Journal store, the message queue file handle is closed after the action if i ## unsafeWithAllMsgQueues — CLI-only -Explicitly unsafe: iterates all queues including those not in active memory. Only safe before server start or in CLI commands. During normal operation, Journal store may have queues on disk but not loaded — this function would load them, interfering with the lazy-loading lifecycle. +Explicitly unsafe: iterates all queues including those not in active memory. Only safe before router start or in CLI commands. During normal operation, Journal store may have queues on disk but not loaded — this function would load them, interfering with the lazy-loading lifecycle. ## snapshotTQueue visibility gap diff --git a/spec/modules/Simplex/Messaging/Server/NtfStore.md b/spec/modules/Simplex/Messaging/Server/NtfStore.md index b58a44fad5..fca054f8bd 100644 --- a/spec/modules/Simplex/Messaging/Server/NtfStore.md +++ b/spec/modules/Simplex/Messaging/Server/NtfStore.md @@ -6,7 +6,7 @@ ## storeNtf — outside-STM lookup with STM fallback -`storeNtf` uses `TM.lookupIO` outside STM, then falls back to `TM.lookup` inside STM if the notifier entry doesn't exist. This is the same outside-STM lookup pattern used in Server.hs and Client/Agent.hs — avoids transaction re-evaluation from unrelated map changes. The double-check inside STM prevents races when two messages arrive concurrently for a new notifier. +`storeNtf` uses `TM.lookupIO` outside STM, then falls back to `TM.lookup` inside STM if the notifier entry doesn't exist. This is the same outside-STM lookup pattern used in the router (`Server.hs`) and `Client/Agent.hs` — avoids transaction re-evaluation from unrelated map changes. The double-check inside STM prevents races when two messages arrive concurrently for a new notifier. ## deleteExpiredNtfs — last-is-earliest optimization diff --git a/spec/modules/Simplex/Messaging/Server/Prometheus.md b/spec/modules/Simplex/Messaging/Server/Prometheus.md index 11610ee239..626459dd1b 100644 --- a/spec/modules/Simplex/Messaging/Server/Prometheus.md +++ b/spec/modules/Simplex/Messaging/Server/Prometheus.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.Prometheus -> Prometheus text exposition format for server metrics, with histogram gap-filling and derived aggregations. +> Prometheus text exposition format for router metrics, with histogram gap-filling and derived aggregations. **Source**: [`Prometheus.hs`](../../../../../src/Simplex/Messaging/Server/Prometheus.hs) diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md b/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md index 6ff8da3b5c..b7c5d05930 100644 --- a/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/STM.md @@ -30,7 +30,7 @@ Batch queue lookups (`getQueues_`) read the entire TVar map once with `readTVarI ## closeQueueStore — non-atomic shutdown -`closeQueueStore` clears TMaps in separate `atomically` calls, not one transaction. Concurrent operations during shutdown could see partially cleared state. This is acceptable because the store log is closed first, and the server should not be processing new requests during shutdown. +`closeQueueStore` clears TMaps in separate `atomically` calls, not one transaction. Concurrent operations during shutdown could see partially cleared state. This is acceptable because the store log is closed first, and the router should not be processing new requests during shutdown. ## addQueueLinkData — conditional idempotency diff --git a/spec/modules/Simplex/Messaging/Server/Stats.md b/spec/modules/Simplex/Messaging/Server/Stats.md index 056dc4a880..e17620d2d9 100644 --- a/spec/modules/Simplex/Messaging/Server/Stats.md +++ b/spec/modules/Simplex/Messaging/Server/Stats.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Server.Stats -> Server statistics: counters, rolling period tracking, delivery time histograms, proxy stats, service stats. +> Router statistics: counters, rolling period tracking, delivery time histograms, proxy stats, service stats. **Source**: [`Stats.hs`](../../../../../src/Simplex/Messaging/Server/Stats.hs) @@ -36,4 +36,4 @@ In `logServerStats` (Server.hs), each counter is read and reset via `atomicSwapI ## setPeriodStats — not thread safe -See comment on `setPeriodStats`. Uses `writeIORef` (not atomic). Only safe during server startup when no other threads are running. If called concurrently, period data could be corrupted. +See comment on `setPeriodStats`. Uses `writeIORef` (not atomic). Only safe during router startup when no other threads are running. If called concurrently, period data could be corrupted. diff --git a/spec/modules/Simplex/Messaging/Server/StoreLog.md b/spec/modules/Simplex/Messaging/Server/StoreLog.md index cef1bdfb2a..9c7e49127e 100644 --- a/spec/modules/Simplex/Messaging/Server/StoreLog.md +++ b/spec/modules/Simplex/Messaging/Server/StoreLog.md @@ -17,11 +17,11 @@ The `.start` temp backup file provides crash recovery during compaction. The seq 3. Write compacted state to new file 4. Rename `.start` to timestamped backup, remove old backups -If the server crashes during step 3, the next startup detects `.start` and restores from it instead of the incomplete new file. Any partially-written current file is preserved as `.bak`. The comment says "do not terminate" during compaction — there is no safe interrupt point between steps 2 and 4. +If the router crashes during step 3, the next startup detects `.start` and restores from it instead of the incomplete new file. Any partially-written current file is preserved as `.bak`. The comment says "do not terminate" during compaction — there is no safe interrupt point between steps 2 and 4. ## removeStoreLogBackups — layered retention policy -Backup retention is layered: (1) keep all backups newer than 24 hours, (2) of the rest, keep at least 3, (3) of those eligible for deletion, only delete backups older than 21 days. This means a server with infrequent restarts accumulates many backups (only cleaned on startup), while a frequently-restarting server keeps a rolling window. Backup timestamps come from ISO 8601 suffixes parsed from filenames. +Backup retention is layered: (1) keep all backups newer than 24 hours, (2) of the rest, keep at least 3, (3) of those eligible for deletion, only delete backups older than 21 days. This means a router with infrequent restarts accumulates many backups (only cleaned on startup), while a frequently-restarting router keeps a rolling window. Backup timestamps come from ISO 8601 suffixes parsed from filenames. ## QueueRec StrEncoding — backward-compatible parsing diff --git a/spec/modules/Simplex/Messaging/Server/Web.md b/spec/modules/Simplex/Messaging/Server/Web.md index 716845aa5c..eb8449ef87 100644 --- a/spec/modules/Simplex/Messaging/Server/Web.md +++ b/spec/modules/Simplex/Messaging/Server/Web.md @@ -1,12 +1,12 @@ # Simplex.Messaging.Server.Web -> Static site generation, serving (HTTP, HTTPS, HTTP/2), and template rendering for the server info page. +> Static site generation, serving (HTTP, HTTPS, HTTP/2), and template rendering for the router info page. **Source**: [`Web.hs`](../../../../../src/Simplex/Messaging/Server/Web.hs) ## attachStaticFiles — reusing Warp internals for TLS connections -`attachStaticFiles` receives already-established TLS connections (which passed TLS handshake and ALPN check in the SMP transport layer) and runs Warp's HTTP handler on them. It manually calls `WI.withII`, `WT.attachConn`, `WI.registerKillThread`, and `WI.serveConnection` — internal Warp APIs. This couples the server to Warp internals and could break on Warp library updates. +`attachStaticFiles` receives already-established TLS connections (which passed TLS handshake and ALPN check in the SMP transport layer) and runs Warp's HTTP handler on them. It manually calls `WI.withII`, `WT.attachConn`, `WI.registerKillThread`, and `WI.serveConnection` — internal Warp APIs. This couples the router to Warp internals and could break on Warp library updates. ## serveStaticPageH2 — path traversal protection diff --git a/spec/modules/Simplex/Messaging/Transport.md b/spec/modules/Simplex/Messaging/Transport.md index f88188792f..1b4656071b 100644 --- a/spec/modules/Simplex/Messaging/Transport.md +++ b/spec/modules/Simplex/Messaging/Transport.md @@ -10,7 +10,7 @@ This is the core transport module. It defines: - The `Transport` typeclass abstracting over TLS and WebSocket connections -- The SMP handshake protocol (server and client sides) +- The SMP handshake protocol (router and client sides) - Optional block encryption using HKDF-derived symmetric key chains (v11+) - Version negotiation with backward-compatible extensions @@ -29,8 +29,8 @@ In practice (Server.hs), the SMP proxy uses `proxiedSMPRelayVRange` to cap the d ## withTlsUnique — different API calls yield same value `withTlsUnique` extracts the tls-unique channel binding (RFC 5929) using a type-level dispatch: -- **Server** (`STServer`): `T.getPeerFinished` — the peer's (client's) Finished message -- **Client** (`STClient`): `T.getFinished` — own (client's) Finished message +- **Router side** (`STServer`): `T.getPeerFinished` — the peer's (client's) Finished message +- **Client side** (`STClient`): `T.getFinished` — own (client's) Finished message Both calls yield the client's Finished message. If the result is `Nothing`, the connection is closed immediately (`closeTLS cxt >> ioe_EOF`). @@ -41,31 +41,31 @@ Two TLS parameter sets: - **`defaultSupportedParams`**: ChaCha20-Poly1305 ciphers only, Ed448/Ed25519 signatures only, X448/X25519 groups. Per the protocol spec: "TLS_CHACHA20_POLY1305_SHA256 cipher suite, ed25519 EdDSA algorithms for signatures, x25519 ECDHE groups for key exchange." - **`defaultSupportedParamsHTTPS`**: extends `defaultSupportedParams` with `ciphersuite_strong`, additional groups, and additional hash/signature combinations. The source comment says: "A selection of extra parameters to accomodate browser chains." -In the SMP server (Server.hs), when HTTP credentials are configured, `defaultSupportedParamsHTTPS` is used for all connections on that port (not selected per-connection). When no HTTP credentials are configured, `defaultSupportedParams` is used. +In the SMP router (`Server.hs`), when HTTP credentials are configured, `defaultSupportedParamsHTTPS` is used for all connections on that port (not selected per-connection). When no HTTP credentials are configured, `defaultSupportedParams` is used. ## SMP handshake flow Per the [protocol spec](../../../../protocol/simplex-messaging.md#transport-handshake), the handshake is a two-message exchange (three if service certs are used): -1. **Server → Client**: `paddedRouterHello` containing `smpVersionRange`, `sessionIdentifier` (tls-unique), and `routerCertKey` (certificate chain + X25519 key signed by the server's certificate) -2. **Client → Server**: `paddedClientHello` containing agreed `smpVersion`, `keyHash` (router identity — CA certificate fingerprint), optional `clientKey`, `proxyRouter` flag, and optional `clientService` -3. **Server → Client** (service only): `paddedRouterHandshakeResponse` containing assigned `serviceId` or `handshakeError` +1. **Router → Client**: `paddedRouterHello` containing `smpVersionRange`, `sessionIdentifier` (tls-unique), and `routerCertKey` (certificate chain + X25519 key signed by the router's certificate) +2. **Client → Router**: `paddedClientHello` containing agreed `smpVersion`, `keyHash` (router identity — CA certificate fingerprint), optional `clientKey`, `proxyRouter` flag, and optional `clientService` +3. **Router → Client** (service only): `paddedRouterHandshakeResponse` containing assigned `serviceId` or `handshakeError` -The client verifies `sessionIdentifier` matches its own tls-unique (`when (sessionId /= sessId) $ throwE TEBadSession`). The server verifies `keyHash` matches its CA fingerprint (`when (keyHash /= kh) $ throwE $ TEHandshake IDENTITY`). +The client verifies `sessionIdentifier` matches its own tls-unique (`when (sessionId /= sessId) $ throwE TEBadSession`). The router verifies `keyHash` matches its CA fingerprint (`when (keyHash /= kh) $ throwE $ TEHandshake IDENTITY`). Per the protocol spec: "For TLS transport client should assert that sessionIdentifier is equal to tls-unique channel binding defined in RFC 5929." ### legacyServerSMPRelayVRange when no ALPN -If ALPN is not negotiated (`getSessionALPN c` returns `Nothing`), the server offers `legacyServerSMPRelayVRange` (v6 only) instead of the full version range. Per the protocol spec: "If the client does not confirm this protocol name, the router would fall back to v6 of SMP protocol." The spec notes: "This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax." +If ALPN is not negotiated (`getSessionALPN c` returns `Nothing`), the router offers `legacyServerSMPRelayVRange` (v6 only) instead of the full version range. Per the protocol spec: "If the client does not confirm this protocol name, the router would fall back to v6 of SMP protocol." The spec notes: "This is added to allow support of older clients without breaking backward compatibility and to extend or modify handshake syntax." ### Service certificate handshake extension -When `clientService` is present in the client handshake, the server performs additional verification: +When `clientService` is present in the client handshake, the router performs additional verification: - The TLS client certificate chain must exactly match the certificate chain in the handshake message (`getPeerCertChain c == cc`) - The signed X25519 public key is verified against the leaf certificate's key (`getCertVerifyKey leafCert` then `C.verifyX509`) -- On success, the server sends `SMPServerHandshakeResponse` with a `serviceId` -- On failure, the server sends `SMPServerHandshakeError` before raising the error +- On success, the router sends `SMPServerHandshakeResponse` with a `serviceId` +- On failure, the router sends `SMPServerHandshakeError` before raising the error Per the protocol spec (v16+): "`clientService` provides long-term service client certificate for high-volume services using SMP router (chat relays, notification routers, high traffic bots). The router responds with a third handshake message containing the assigned service ID." @@ -86,7 +86,7 @@ The protocol spec version history (v11) describes this as "additional encryption ## smpTHandleClient — chain key swap -`smpTHandleClient` applies `swap` to the chain key pair before creating `TSbChainKeys`. The code comment states: "swap is needed to use client's sndKey as server's rcvKey and vice versa." +`smpTHandleClient` applies `swap` to the chain key pair before creating `TSbChainKeys`. The code comment states: "swap is needed to use client's sndKey as server's rcvKey and vice versa." (Here "server" is the code's term for the router side of the transport.) ## Proxy version downgrade logic diff --git a/spec/modules/Simplex/Messaging/Transport/Server.md b/spec/modules/Simplex/Messaging/Transport/Server.md index 181951dcda..8027ef2d72 100644 --- a/spec/modules/Simplex/Messaging/Transport/Server.md +++ b/spec/modules/Simplex/Messaging/Transport/Server.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Transport.Server -> TLS server: socket lifecycle, client acceptance, SNI credential switching, socket leak detection. +> TLS listener: socket lifecycle, client acceptance, SNI credential switching, socket leak detection. **Source**: [`Transport/Server.hs`](../../../../../src/Simplex/Messaging/Transport/Server.hs) @@ -19,10 +19,10 @@ ## SNI credential switching `supportedTLSServerParams` selects TLS credentials based on SNI: -- **No SNI**: uses `credential` (the primary server credential) +- **No SNI**: uses `credential` (the primary router credential) - **SNI present**: uses `sniCredential` (when configured) -The `sniCredUsed` TVar records whether SNI triggered credential switching. In the SMP server (Server.hs), when `sniUsed` is `True`, the connection is dispatched to the HTTP handler instead of the SMP handler. +The `sniCredUsed` TVar records whether SNI triggered credential switching. In the SMP router (`Server.hs`), when `sniUsed` is `True`, the connection is dispatched to the HTTP handler instead of the SMP handler. ## startTCPServer — address resolution diff --git a/spec/modules/Simplex/Messaging/Transport/Shared.md b/spec/modules/Simplex/Messaging/Transport/Shared.md index 8248c068be..1817fd13e2 100644 --- a/spec/modules/Simplex/Messaging/Transport/Shared.md +++ b/spec/modules/Simplex/Messaging/Transport/Shared.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Transport.Shared -> Certificate chain parsing and X.509 validation utilities shared between client and server. +> Certificate chain parsing and X.509 validation utilities shared between client and router. **Source**: [`Transport/Shared.hs`](../../../../../src/Simplex/Messaging/Transport/Shared.hs) @@ -19,10 +19,10 @@ | 4 | `CCValid {leafCert, idCert, _, caCert}` | "with network certificate" | | 5+ | `CCLong` | (rejected) | -The protocol spec defines supported chain lengths of 2, 3, and 4 certificates (see [Router certificate](../../../../protocol/simplex-messaging.md#router-certificate)). In all `CCValid` cases, `idCert` is the certificate whose fingerprint is compared against the server address key hash, and `caCert` is used as the X.509 trust anchor. +The protocol spec defines supported chain lengths of 2, 3, and 4 certificates (see [Router certificate](../../../../protocol/simplex-messaging.md#router-certificate)). In all `CCValid` cases, `idCert` is the certificate whose fingerprint is compared against the router identity (key hash in the queue URI), and `caCert` is used as the X.509 trust anchor. In the 4-cert case, index 2 is skipped (`_`) — it is present in the chain but not used as either the identity or the trust anchor. ## x509validate — FQHN check disabled -`x509validate` sets `checkFQHN = False`. The protocol spec identifies servers by certificate fingerprint (key hash in the server address), not by domain name. The validation uses a fresh `ValidationCache` (`ValidationCacheUnknown` for all lookups, no-op store) — each connection validates independently. +`x509validate` sets `checkFQHN = False`. The protocol spec identifies routers by certificate fingerprint (key hash in the queue URI), not by domain name. The validation uses a fresh `ValidationCache` (`ValidationCacheUnknown` for all lookups, no-op store) — each connection validates independently. From c8f2edc242ab8ca14f5341496a5bb1359c9daee7 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 07:35:15 +0000 Subject: [PATCH 35/91] spec for agent protocol --- spec/TOPICS.md | 6 + .../Simplex/Messaging/Agent/Protocol.md | 118 ++++++++++++++++++ src/Simplex/Messaging/Agent.hs | 3 +- src/Simplex/Messaging/Agent/Protocol.hs | 3 +- 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 spec/modules/Simplex/Messaging/Agent/Protocol.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index 6ef029e964..0971071430 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -22,4 +22,10 @@ - **Server subscription architecture**: The SMP server's subscription model spans Server.hs (serverThread split-STM lifecycle, tryDeliverMessage sync/async, ProhibitSub/ServerSub state machine), Env/STM.hs (SubscribedClients TVar-of-Maybe continuity, Client three-queue architecture), and Client/Agent.hs (small agent dual subscription model). The interaction between service subscriptions, direct queue subscriptions, notification subscriptions, and the serverThread subQ processing is not visible from any single module. +- **Duplex connection handshake**: The SMP duplex connection procedure (standard 10-step and fast 7-step) spans Agent.hs (orchestration, state machine), Agent/Protocol.hs (message types: AgentConfirmation/AgentConnInfoReply/AgentInvitation/HELLO, queue status types), Client.hs (SMP command dispatch), Protocol.hs (SMP-level KEY/SKEY commands). The handshake involves two-layer encryption (per-queue E2E + double ratchet), version-dependent paths (v2+ duplex, v6+ sender auth key, v7+ ratchet on confirmation, v9+ fast handshake with SKEY), and the asymmetry between initiating and accepting parties (different message types, different confirmation processing). The protocol spec (`agent-protocol.md`) defines the procedure but the implementation details — error handling, state persistence across restarts, race conditions between confirmation and message delivery — are only visible by reading the code across these modules. + +- **Connection links**: Full connection links (URI format with `#/?` query parameters) and binary-encoded links (`Encoding` instances) serve different contexts — URIs for out-of-band sharing, binary for agent-to-agent messages. Each has independent version-conditional encoding with different backward-compat rules (URI parser adjusts agent version ranges for old contact links, binary parser patches `queueMode` for forward compat). The `VersionI`/`VersionRangeI` typeclasses convert between `SMPQueueInfo` (versioned, in confirmations) and `SMPQueueUri` (version-ranged, in links). Full picture requires Agent/Protocol.hs, Protocol.hs, and agent-protocol.md. + +- **Short links**: Short links are a compact representation for sharing via URLs, not a replacement for full connection links — both are used. Short links store encrypted link data on the router and encode only a server hostname, link type character, and key hash in the URL. The link data lifecycle (creation, encryption with key derivation, owner chain-of-trust validation, mutable user data updates) spans Agent/Protocol.hs (types, serialization, owner validation, server shortening/restoration), Agent.hs (link creation and resolution API), and the router-side link storage. The `FixedLinkData`/`ConnLinkData` split (immutable vs mutable), `OwnerAuth` chain validation, and `PreparedLinkParams` pre-computation are not visible from any single module. + - **Outside-STM lookup pattern**: Multiple modules use the pattern of looking up TVar references outside STM (via readTVarIO/TM.lookupIO), then reading/modifying the TVar contents inside STM. This avoids transaction re-evaluation from unrelated map changes. Used in: Server.hs (serverThread client lookup, tryDeliverMessage subscriber lookup), Env/STM.hs (deleteSubcribedClient), Client/Agent.hs (removeClientAndSubs, reconnectSMPClient). The safety invariant is that the outer map entries (TVars) are never removed — only their contents change. diff --git a/spec/modules/Simplex/Messaging/Agent/Protocol.md b/spec/modules/Simplex/Messaging/Agent/Protocol.md new file mode 100644 index 0000000000..ad95df8094 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Protocol.md @@ -0,0 +1,118 @@ +# Simplex.Messaging.Agent.Protocol + +> Agent protocol types, wire formats, connection link serialization, and error taxonomy. + +**Source**: [`Agent/Protocol.hs`](../../../../../../src/Simplex/Messaging/Agent/Protocol.hs) + +**Protocol spec**: [`protocol/agent-protocol.md`](../../../../../protocol/agent-protocol.md) — duplex connection procedure, agent message syntax, connection link formats. + +## Overview + +This module defines the agent-level protocol: the types exchanged between agents (via SMP routers) and between agent and client application. It contains no IO — purely types, serialization, and validation logic. + +The module carries two independent version scopes: `SMPAgentVersion` (agent-to-agent protocol, currently v2–v7) and `SMPClientVersion` (agent-to-router protocol, imported from `Protocol.hs`). These version scopes interact but are negotiated independently — see [Protocol.md](../Protocol.md#two-separate-version-scopes). + +## Two-layer message format + +Agent messages use a two-layer envelope structure: + +1. **Outer envelope** (`AgentMsgEnvelope`): version + single-char type tag (`C`/`M`/`I`/`R`) + type-specific payload. The `C` (confirmation) and `M` (message) variants carry double-ratchet encrypted content. The `I` (invitation) variant is encrypted only with per-queue E2E. The `R` (ratchet key) variant carries ratchet renegotiation parameters. + +2. **Inner message** (`AgentMessage`): after double-ratchet decryption, discriminated by tags `I`/`D`/`R`/`M`. The `M` variant contains `APrivHeader` (sequential message ID + previous message hash for integrity) followed by `AMessage` (the actual command: `HELLO`, `A_MSG`, queue rotation, etc). + +The tag characters overlap between layers (`I` means confirmation-conninfo in inner, invitation in outer; `M` means message-envelope in outer, agent-message in inner). These are disambiguated by context — outer parsing happens first, then decryption, then inner parsing. + +## e2eEncConnInfoLength / e2eEncAgentMsgLength — PQ-dependent size budgets + +Connection info and agent message size limits depend on both agent version and PQ support. When PQ is enabled (v5+), the limits are *smaller* — not larger — because the ratchet header and reply link grow with PQ keys (SNTRUP761), consuming space from the fixed SMP message block. The specific reductions (3726 for conninfo, 2222 for messages) are documented in source comments. + +## AgentMsgEnvelope — connInfo encryption asymmetry + +`AgentInvitation` encrypts `connInfo` only with per-queue E2E (no double ratchet) — see source comment. This is because invitations are sent to contact address queues where no ratchet has been established. `AgentConfirmation` encrypts with double ratchet. `AgentRatchetKey` uses per-queue E2E for the ratchet parameters themselves (bootstrapping problem: can't use ratchet to renegotiate the ratchet). + +## AgentMessageType — dual encoding paths + +`AgentMessageType` and `AMsgType` encode the same set of message types but serve different purposes: `AgentMessageType` includes the envelope types (`AM_CONN_INFO`, `AM_CONN_INFO_REPLY`, `AM_RATCHET_INFO`) for database storage, while `AMsgType` only covers the inner `AMessage` types. Both share the same wire tags for the overlapping types (`H`, `M`, `V`, `QC`, `QA`, `QK`, `QU`, `QT`, `E`). The `Q`-prefixed types use two-character tags (prefix dispatch), all others use single characters. + +## HELLO — sent once after securing + +`HELLO` is sent exactly once, when the queue is known to be secured (duplex handshake). Not used at all in fast duplex connection (v9+ SMP). The v1 slow handshake (which sent HELLO multiple times until securing succeeded) is no longer supported — `minSupportedSMPAgentVersion = duplexHandshakeSMPAgentVersion` (v2). + +## AEvent entity type system + +`AEvent` is a GADT indexed by `AEntity` (phantom type: `AEConn`, `AERcvFile`, `AESndFile`, `AENone`). This prevents the type system from allowing file events on connection entities and vice versa. The existential wrapper `AEvt` erases the entity type for storage in heterogeneous collections — equality comparison (`Eq AEvt`) uses `testEquality` on the singleton witness to recover type information. + +`AENone` is used for events that aren't associated with any specific entity (e.g., `DOWN`, `UP`, `SUSPENDED`, `DEL_USER`). These are router-level or agent-level notifications, not connection-level. + +## ConnectionMode singleton pattern + +`ConnectionMode` / `SConnectionMode` / `ConnectionModeI` implement the singleton pattern: `SConnectionMode` is the type-level witness, `ConnectionModeI` is the typeclass that lets you recover the singleton from a type parameter. Many types are parameterized by `ConnectionMode` (`ConnectionRequestUri m`, `ConnShortLink m`, `ConnectionLink m`, etc.) to prevent mixing invitation and contact types at compile time. + +`checkConnMode` is the runtime escape hatch — it uses `testEquality` to cast between mode-parameterized types, returning `Left "bad connection mode"` on mismatch. This is used extensively in parsers where the mode is determined at parse time. + +## ConnReqUriData smpP — queueMode patch + +The binary parser for `ConnReqUriData` applies `patchQueueMode` to all queues, setting `queueMode = Just QMContact` when it's `Nothing`. See source comment: this compensates for `QMContact` not being included in queue encoding until min SMP client version >= 3. This patch is safe because the binary encoding path was not used before SMP client version 4. + +## Connection link URI parsing — version range adjustment + +`connReqUriP` adjusts the agent version range for contact links: `adjustAgentVRange` clamps the minimum to `minSupportedSMPAgentVersion`. This preserves compatibility with old contact links published online — they may advertise version ranges starting below the current minimum, and clamping prevents negotiation from failing on an unsupported version. + +The semicolon separator for SMP queues in the URI query string is deliberate — commas are used within server addresses to separate hostnames, so semicolons separate queues to avoid ambiguity. + +## Short link encoding — contactConnType as URL path character + +Short links encode `ContactConnType` as a single lowercase letter in the URL path: `a` (contact), `c` (channel), `g` (group), `r` (relay). Invitation links use `i`. The parser uses `toUpper` before dispatching to `ctTypeP` (which expects uppercase), while the encoder uses `toLower` on `ctTypeChar` output. This case dance happens because the wire format wants lowercase URLs but the internal representation uses uppercase. + +## Short link server shortening + +`shortenShortLink` strips port and key hash from preset servers, leaving only the hostname (`SMPServerOnlyHost` pattern). This makes short links shorter for well-known servers. `restoreShortLink` reverses this by looking up the full server definition from the preset list. Both functions match on primary hostname only (first in the `NonEmpty` list). + +`isPresetServer` has a non-obvious port matching rule: empty port in the preset matches `"443"` or `"5223"` in the link. This handles servers that use default ports without explicitly listing them. + +## OwnerAuth — chain-of-trust validation + +`OwnerAuth` is double-encoded: the inner fields are `smpEncode`d, then the result is encoded as a `ByteString` (with length prefix). See source comment: "additionally encoded as ByteString to have known length and allow OwnerAuth extension." The parser uses `parseOnly` on the inner bytes, which silently ignores trailing data — providing forward compatibility for future field additions. + +`validateLinkOwners` enforces a chain-of-trust: each owner must be signed by either the root key or any *preceding* owner in the list. Order matters — an owner signed by a later owner in the list will fail validation. Duplicate keys or IDs are rejected. An owner key matching the root key is rejected (prevents trivial self-authorization). + +## UserLinkData — length-prefix switchover + +`UserLinkData` uses a 1-byte length prefix for data ≤ 254 bytes, switching to a `\255` sentinel byte followed by a 2-byte (`Large`) length prefix for longer data. This is a backward-compatible extension of the standard `smpEncode` string format (which uses 1-byte length, capping at 255 bytes). + +## FixedLinkData / ConnLinkData — forward-compatible parsing + +Both `FixedLinkData` and `ConnLinkData` (invitation variant) consume trailing bytes with `A.takeByteString` after parsing known fields. See source comment: "ignoring tail for forward compatibility with the future link data encoding." This allows newer agents to add fields without breaking older parsers. + +## AgentErrorType — BlockedIndefinitely promotion + +`fromSomeException` in the `AnyError` instance promotes `BlockedIndefinitelyOnSTM` and `BlockedIndefinitelyOnMVar` to `CRITICAL` errors (with `offerRestart = True`) rather than generic `INTERNAL`. These are thread deadlock signals from the GHC runtime — they indicate a program bug, not a transient error. The `CRITICAL` classification with restart offer means the client application should prompt the user. + +## cryptoErrToSyncState — error severity classification + +Maps crypto errors to ratchet sync states: `DECRYPT_AES`, `DECRYPT_CB`, and `RATCHET_EARLIER` map to `RSAllowed` (sync is optional, may self-recover). `RATCHET_HEADER`, `RATCHET_SKIPPED`, and `RATCHET_SYNC` map to `RSRequired` (sync must happen before communication can continue). This classification determines whether the agent automatically initiates ratchet resynchronization. + +## extraSMPServerHosts — hardcoded onion mappings + +Maps clearnet hostnames of preset SMP routers to their `.onion` addresses. `updateSMPServerHosts` adds the onion host as a second hostname when parsing legacy queue URIs that only have one host. This is used for backward compatibility with queue URIs created before multi-host support — modern URIs include all hosts directly. + +## Queue rotation state machines + +`RcvSwitchStatus` and `SndSwitchStatus` encode the two sides of the queue rotation protocol: + +- **Receiver side**: `RSSwitchStarted` → `RSSendingQADD` → `RSSendingQUSE` → `RSReceivedMessage` +- **Sender side**: `SSSendingQKEY` → `SSSendingQTEST` + +The asymmetry reflects the protocol: the receiver initiates rotation and sends more messages (QADD, QUSE), while the sender responds (QKEY, QTEST). These states are persisted to the database — the `StrEncoding` instances use snake_case strings as the canonical serialization format. See [agent-protocol.md — Rotating messaging queue](../../../../../protocol/agent-protocol.md#rotating-messaging-queue). + +## SMPQueueInfo / SMPQueueUri — version duality + +`SMPQueueInfo` (single version) and `SMPQueueUri` (version range) represent the same queue address but in different contexts. `VersionI` / `VersionRangeI` typeclasses convert between them — `toVersionT` pins a version range to a specific version, `toVersionRangeT` wraps a versioned type in a range. See source comment on `VersionI SMPClientVersion SMPQueueInfo`: the current conversion is trivial (just swapping the version/range field) but the typeclass exists so that future field additions can have version-dependent conversion logic. + +`SMPQueueInfo` encoding has four version-conditional paths: v1 (legacy server encoding), v2+ (standard encoding), v3+ with secure sender (appends `sndSecure` bool), v4+ (appends `queueMode`). The parser uses `clientVersion` to select between `legacyServerP` and standard `smpP` for the server field, and `updateSMPServerHosts` backfills onion addresses for legacy URIs. + +## ACommand — binary body parsing + +`commandP` takes a custom body parser. `dbCommandP` uses `A.take =<< A.decimal <* "\n"` — length-prefixed binary read. This is for commands stored in the database where the body must be fully parsed (not left as unparsed trailing bytes). The standard command parser uses `A.takeByteString` for bodies, consuming remaining input. + +`pqIKP` defaults to `IKLinkPQ PQSupportOff` when PQ support is not specified, and `pqSupP` defaults to `PQSupportOff`. These defaults maintain backward compatibility with commands serialized before PQ support was added. diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 9e637ca960..25ad87b213 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -3352,6 +3352,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandlePar agentEnvelope <- parseMessage clientBody -- Version check is removed here, because when connecting via v1 contact address the agent still sends v2 message, -- to allow duplexHandshake mode, in case the receiving agent was updated to v2 after the address was created. + -- v1 slow handshake is no longer supported (minSupportedSMPAgentVersion = duplexHandshakeSMPAgentVersion). -- aVRange <- asks $ smpAgentVRange . config -- if agentVersion agentEnvelope `isCompatible` aVRange -- then pure (privHeader, agentEnvelope) @@ -3392,7 +3393,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandlePar AgentConnInfoReply smpQueues connInfo -> do processConf connInfo SMPConfirmation {senderKey, e2ePubKey, connInfo, smpReplyQueues = L.toList smpQueues, smpClientVersion = phVer} withStore' c $ \db -> updateRcvMsgHash db connId 1 (InternalRcvId 0) (C.sha256Hash agentMsgBody) - _ -> prohibited "conf: not AgentConnInfoReply" -- including AgentConnInfo, that is prohibited here in v2 + _ -> prohibited "conf: not AgentConnInfoReply" -- including AgentConnInfo, that is prohibited here in v2 (v1 slow handshake is no longer supported) where processConf connInfo senderConf = do let newConfirmation = NewConfirmation {connId, senderConf, ratchetState = rc'} diff --git a/src/Simplex/Messaging/Agent/Protocol.hs b/src/Simplex/Messaging/Agent/Protocol.hs index 557a92a73e..6c5833e66a 100644 --- a/src/Simplex/Messaging/Agent/Protocol.hs +++ b/src/Simplex/Messaging/Agent/Protocol.hs @@ -388,7 +388,7 @@ data AEvent (e :: AEntity) where INV :: AConnectionRequestUri -> AEvent AEConn LINK :: ConnShortLink 'CMContact -> UserConnLinkData 'CMContact -> AEvent AEConn LDATA :: FixedLinkData 'CMContact -> ConnLinkData 'CMContact -> AEvent AEConn - CONF :: ConfirmationId -> PQSupport -> [SMPServer] -> ConnInfo -> AEvent AEConn -- ConnInfo is from sender, [SMPServer] will be empty only in v1 handshake + CONF :: ConfirmationId -> PQSupport -> [SMPServer] -> ConnInfo -> AEvent AEConn -- ConnInfo is from sender, [SMPServer] will be empty only in v1 handshake (no longer supported) REQ :: InvitationId -> PQSupport -> NonEmpty SMPServer -> ConnInfo -> AEvent AEConn -- ConnInfo is from sender INFO :: PQSupport -> ConnInfo -> AEvent AEConn CON :: PQEncryption -> AEvent AEConn -- notification that connection is established @@ -1024,6 +1024,7 @@ data AMessage aMessageType :: AMessage -> AgentMessageType aMessageType = \case + -- v1 slow handshake is no longer supported (minSupportedSMPAgentVersion = duplexHandshakeSMPAgentVersion). -- HELLO is used both in v1 and in v2, but differently. -- - in v1 (and, possibly, in v2 for simplex connections) can be sent multiple times, -- until the queue is secured - the OK response from the server instead of initial AUTH errors confirms it. From 3a756f9842f6e549cea6f83914080fb4e831833c Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 08:49:50 +0000 Subject: [PATCH 36/91] agent client spec --- spec/TOPICS.md | 6 + .../modules/Simplex/Messaging/Agent/Client.md | 221 ++++++++++++++++++ .../Simplex/Messaging/Agent/understanding.md | 59 +++++ 3 files changed, 286 insertions(+) create mode 100644 spec/modules/Simplex/Messaging/Agent/Client.md create mode 100644 spec/modules/Simplex/Messaging/Agent/understanding.md diff --git a/spec/TOPICS.md b/spec/TOPICS.md index 0971071430..c29e617052 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -28,4 +28,10 @@ - **Short links**: Short links are a compact representation for sharing via URLs, not a replacement for full connection links — both are used. Short links store encrypted link data on the router and encode only a server hostname, link type character, and key hash in the URL. The link data lifecycle (creation, encryption with key derivation, owner chain-of-trust validation, mutable user data updates) spans Agent/Protocol.hs (types, serialization, owner validation, server shortening/restoration), Agent.hs (link creation and resolution API), and the router-side link storage. The `FixedLinkData`/`ConnLinkData` split (immutable vs mutable), `OwnerAuth` chain validation, and `PreparedLinkParams` pre-computation are not visible from any single module. +- **Agent worker framework**: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval with doWork flag atomics) defined in Agent/Client.hs, consumed by Agent.hs (async commands, message delivery), NtfSubSupervisor.hs (notification workers), FileTransfer/Agent.hs (XFTP workers), and simplex-chat. The framework separates two concerns: worker lifecycle (create-or-reuse, fork async, rate-limit restarts, escalate to CRITICAL) and task pattern (get next task, do task, as separate parameters). The doWork TMVar flag choreography (clear before query to prevent race) and the work-item-error vs store-error distinction are not obvious from any single consumer. + +- **Agent operation suspension**: Five `AgentOpState` TVars (RcvNetwork, MsgDelivery, SndNetwork, Database, NtfNetwork) with a cascade ordering: ending RcvNetwork suspends MsgDelivery, ending MsgDelivery suspends SndNetwork + Database, ending SndNetwork suspends Database. `beginAgentOperation` retries if suspended, `endAgentOperation` decrements and cascades. All DB access goes through `withStore` which brackets with AODatabase. This ensures graceful shutdown propagates through dependent operations. Defined in Agent/Client.hs, used by Agent.hs subscriber and worker loops. + +- **Queue rotation protocol**: Four agent messages (QADD → QKEY → QUSE → QTEST) on top of SMP commands, with asymmetric state machines on receiver side (`RcvSwitchStatus`: 4 states) and sender side (`SndSwitchStatus`: 2 states). Receiver initiates, creates new queue, sends QADD. Sender responds with QKEY. Receiver sends QUSE. Sender sends QTEST to complete. State types in Agent/Protocol.hs, orchestration in Agent.hs, queue creation/deletion in Agent/Client.hs. Protocol spec in agent-protocol.md. The fast variant (v9+ SMP with SKEY) skips the KEY command step. + - **Outside-STM lookup pattern**: Multiple modules use the pattern of looking up TVar references outside STM (via readTVarIO/TM.lookupIO), then reading/modifying the TVar contents inside STM. This avoids transaction re-evaluation from unrelated map changes. Used in: Server.hs (serverThread client lookup, tryDeliverMessage subscriber lookup), Env/STM.hs (deleteSubcribedClient), Client/Agent.hs (removeClientAndSubs, reconnectSMPClient). The safety invariant is that the outer map entries (TVars) are never removed — only their contents change. diff --git a/spec/modules/Simplex/Messaging/Agent/Client.md b/spec/modules/Simplex/Messaging/Agent/Client.md new file mode 100644 index 0000000000..eb4ff47e70 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Client.md @@ -0,0 +1,221 @@ +# Simplex.Messaging.Agent.Client + +> Agent infrastructure layer: protocol client lifecycle, worker framework, subscription management, operation suspension, and concurrency primitives. + +**Source**: [`Agent/Client.hs`](../../../../../../src/Simplex/Messaging/Agent/Client.hs) + +**See also**: [Agent.hs](./Agent.md) — the orchestration layer that consumes these primitives. + +## Overview + +This module defines `AgentClient`, the central state container for the messaging agent, and all reusable infrastructure that Agent.hs and other consumers (NtfSubSupervisor.hs, FileTransfer/Agent.hs, simplex-chat) build upon. It contains ~2868 lines covering: + +- **Protocol client lifecycle**: lazy singleton connections to SMP/NTF/XFTP routers via `SessionVar` pattern, with disconnect callbacks and reconnection workers +- **Worker framework**: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval with doWork flag atomics) +- **Subscription state**: active/pending/removed queues, session-aware cleanup on disconnect, batch subscription RPCs with post-hoc session validation +- **Operation suspension**: five `AgentOpState` TVars with cascade ordering for graceful shutdown +- **Concurrency primitives**: per-connection locks, transport session batching, proxy routing + +The module is consumed by Agent.hs (which passes specific worker bodies, task queries, and handler logic into these frameworks) and by external consumers that reuse the worker and protocol client infrastructure. + +## AgentClient — central state container + +`AgentClient` has ~50 fields, almost all TVars or TMaps. Key architectural groupings: + +- **Event queues**: `subQ` (events to client application), `msgQ` (messages from SMP routers) +- **Protocol client pools**: `smpClients`, `ntfClients`, `xftpClients` — all are TMaps of `TransportSession` → `SessionVar`, implementing lazy singletons via `getSessVar` +- **Subscription tracking**: `currentSubs` (TSessionSubs, active+pending per transport session), `removedSubs` (failed subscriptions with errors), `subscrConns` (set of connection IDs currently subscribed) +- **Worker pools**: `smpDeliveryWorkers`, `asyncCmdWorkers`, `smpSubWorkers` — TMaps keyed by work address/connection +- **Operation states**: `ntfNetworkOp`, `rcvNetworkOp`, `msgDeliveryOp`, `sndNetworkOp`, `databaseOp` +- **Locking**: `connLocks`, `invLocks`, `deleteLock`, `getMsgLocks` + +All TVars are initialized in `newAgentClient`. The `active` TVar is the global kill switch — `closeAgentClient` sets it to `False`, and all protocol client getters check it first. + +## Protocol client lifecycle — SessionVar singleton pattern + +Protocol client connections (SMP, NTF, XFTP) use a lazy singleton pattern implemented by [Session.hs](../../../Session.md): + +1. **`getSessVar`** atomically checks the TMap. Returns `Left newVar` if absent (caller must connect), `Right existingVar` if present (caller waits for the TMVar). +2. **`newProtocolClient`** wraps the connection attempt. On success, fills the `sessionVar` TMVar with `Right client`. On failure, fills with `Left (error, maybeRetryTime)` and re-throws. +3. **`waitForProtocolClient`** reads the TMVar with a timeout. If the stored error has an expiry time that has passed, it removes the SessionVar and retries from scratch — this is the `persistErrorInterval` retry mechanism. + +### SessionVar compare-and-swap + +`removeSessVar` (Session.hs) only removes a SessionVar from the map if its `sessionVarId` matches the current entry. The `sessionVarId` is a monotonically increasing counter from `workerSeq`. This prevents a stale disconnection callback from removing a *new* client that was created after the old one disconnected. Without this, the sequence "client A disconnects → client B connects → client A's callback runs" would incorrectly remove client B. + +### SMP disconnect callback + +`smpClientDisconnected` is the most complex disconnect handler (NTF/XFTP have simpler versions that just remove the SessionVar): + +1. `removeSessVar` atomically removes the client if still current +2. If `active`, moves active subscriptions to pending (only those matching the disconnecting client's `sessionId` — see next section) +3. Removes proxied relay sessions that this client created +4. Fires `DOWN` events for affected connections +5. Triggers `resubscribeSMPSession` to spawn a reconnection worker + +### Session-aware subscription cleanup + +`removeClientAndSubs` (inside `smpClientDisconnected`) uses `SS.setSubsPending` with the disconnecting client's `sessionId`. Only subscriptions whose session ID matches the disconnecting client are moved to pending. If a new client already connected and made its own subscriptions active, those are *not* disturbed. This prevents the race: "old client disconnects → new client subscribes → old client's cleanup incorrectly demotes new client's subscriptions." + +## ProtocolServerClient typeclass + +Unifies SMP/NTF/XFTP client management with associated types: +- `Client msg` — the connected client type (SMP wraps in `SMPConnectedClient` with proxied relay map; NTF and XFTP use the raw protocol client) +- `ProtoClient msg` — the underlying protocol client for logging/closing + +SMP is special: `SMPConnectedClient` bundles the protocol client with `proxiedRelays :: TMap SMPServer ProxiedRelayVar`, a per-connection map of relay sessions for proxy routing. + +## Worker framework + +Defined here, consumed by Agent.hs, NtfSubSupervisor.hs, FileTransfer/Agent.hs, and simplex-chat. Two separable parts: + +### getAgentWorker — lifecycle management + +Creates or reuses a worker for a given key. Workers are stored in a TMap keyed by their work address. + +- **Create-or-reuse**: atomically checks the map. If absent, creates a new `Worker` (with `doWork` TMVar pre-filled with `()`). If present and `hasWork=True`, signals the existing worker. +- **Fork**: `runWorkerAsync` takes the `action` TMVar. If `Nothing` (worker idle), it starts work. If `Just weakThreadId` (worker running), it puts the value back and returns. This bracket ensures at-most-one concurrent execution. +- **Restart rate limiting**: on worker exit (success or error), checks `restartCount` against `maxWorkerRestartsPerMin`. If under the limit, restarts with `hasWorkToDo` signal. If over the limit, deletes the worker from the map and sends a `CRITICAL True` error. +- **Worker identity**: `workerId` (from `workerSeq`) prevents a stale restart from interfering with a new worker that replaced it in the map. + +`getAgentWorker'` is the generic version with custom worker wrapper — used by `smpDeliveryWorkers` which pairs each Worker with a `TMVar ()` retry lock. + +### withWork / withWork_ / withWorkItems — task retrieval + +Takes `getWork` (fetch next task) and `action` (process it) as separate parameters. The consumer's worker body loops: `waitForWork doWork` → `withWork doWork getTask handleTask`. + +**Critical: doWork flag race prevention.** `noWorkToDo` (clearing the flag) happens BEFORE `getWork` (querying for tasks), not after. This prevents the race where: (1) worker queries, finds nothing, (2) another thread adds work and sets the flag, (3) worker clears the flag — losing the signal. By clearing first, any concurrent signal after the query will be preserved. + +**Error classification**: `withWork_` distinguishes work-item errors from store errors: +- **Work item error** (`isWorkItemError`): the worker stops and sends `CRITICAL False`. The next iteration would likely produce the same error, so stopping prevents infinite loops. +- **Store error**: the flag is re-set and an `INTERNAL` error is reported. The assumption is that store errors are transient (e.g., DB busy) and retrying may succeed. + +`withWorkItems` handles batched work — a list of items where some may have individual errors. If all items are work-item errors, the worker stops. If only some are, the worker continues with the successful items and reports errors. + +### runWorkerAsync — at-most-one execution + +Uses a bracket on the `action` TMVar: +- `takeTMVar action` — blocks if another thread is starting the worker (TMVar empty during start) +- If the taken value is `Nothing` — worker is idle, start it. Store `Just weakThreadId` in the TMVar. +- If `Just _` — worker is already running, put it back and return. + +The `Weak ThreadId` in `action` is a weak reference — it doesn't prevent the worker thread from being garbage collected. This is the cleanup mechanism: if the thread dies without explicitly clearing `action`, the weak reference becomes stale and the next `runWorkerAsync` call will detect it as idle. + +## Operation suspension cascade + +Five `AgentOpState` TVars track whether each operation category is suspended and how many operations are in-flight: + +``` +AONtfNetwork (independent) +AORcvNetwork → AOMsgDelivery → AOSndNetwork → AODatabase +``` + +The cascade means: +- `endAgentOperation AORcvNetwork` suspends `AOMsgDelivery`, which cascades to `AOSndNetwork` → `AODatabase` +- `endAgentOperation AOMsgDelivery` suspends `AOSndNetwork` → `AODatabase` +- `endAgentOperation AOSndNetwork` suspends `AODatabase` +- Each leaf in the cascade calls `notifySuspended` (writes `SUSPENDED` to `subQ`, sets `agentState` to `ASSuspended`) + +**`beginAgentOperation`** retries (blocks in STM) if the operation is suspended. This provides backpressure: new operations wait until the operation is resumed. + +**`agentOperationBracket`** wraps an operation with begin/end. All database access goes through `withStore` which brackets with `AODatabase`. This ensures graceful shutdown propagates: suspending `AORcvNetwork` eventually suspends all downstream operations, and `notifySuspended` only fires when all in-flight operations have completed. + +**`waitWhileSuspended`** vs **`waitUntilForeground`**: `waitWhileSuspended` proceeds during `ASSuspending` (allowing in-flight operations to complete), while `waitUntilForeground` blocks during both `ASSuspending` and `ASSuspended`. + +## Subscription management + +### subscribeQueues — batch-by-transport-session + +`subscribeQueues` is the main entry point for subscribing to receive queues: + +1. `checkQueues` filters out queues with active GET locks (prevents concurrent GET + SUB on the same queue) +2. `batchQueues` groups queues by transport session +3. `addPendingSubs` marks all queues as pending before the RPC +4. `mapConcurrently` subscribes each session batch in parallel + +### subscribeSessQueues_ — post-hoc session validation + +After the subscription RPC completes, `subscribeSessQueues_` validates `activeClientSession` — checking that the SessionVar still holds the same client that was used for the RPC. If the client was replaced during the RPC (reconnection happened), the results are discarded and resubscription is triggered. This is optimistic execution with post-hoc validation: do the work, then check if it's still valid. + +### processSubResults — partitioning + +Subscription results are partitioned into four categories: +1. **Failed with client notice** — queue has a server-side notice (e.g., queue status change) +2. **Failed permanently** — non-temporary error, queue is removed from pending and added to `removedSubs` +3. **Failed temporarily** — error is transient, queue stays in pending for retry on reconnect +4. **Subscribed** — moved from pending to active. Further split into: queues whose service ID matches the session service (added as service-associated) and others. +5. **Ignored** — queue was not in the pending map (already activated by a concurrent path), counted for statistics only + +### Resubscription worker + +`resubscribeSMPSession` spawns a worker per transport session that retries pending subscriptions with exponential backoff (`withRetryForeground`). The worker: + +1. Reads pending subs and pending service sub +2. Waits for foreground and network +3. Resubscribes service and queues +4. Loops until no pending subs remain + +**Cleanup blocks on TMVar fill** — the `cleanup` STM action retries (`whenM (isEmptyTMVar $ sessionVar v) retry`) until the async handle is inserted. This prevents the race where cleanup runs before the worker async is stored, which would leave a terminated worker in the map. + +## Proxy routing — sendOrProxySMPCommand + +Implements SMP proxy/direct routing with fallback: + +1. `shouldUseProxy` checks `smpProxyMode` (Always/Unknown/Unprotected/Never) and whether the destination server is "known" (in the user's server list) +2. If proxying: `getSMPProxyClient` creates or reuses a proxy connection, then `connectSMPProxiedRelay` establishes the relay session. On `NO_SESSION` error, re-creates the relay session through the same proxy. +3. If proxying fails with a host error and `smpProxyFallback` allows it: falls back to direct connection +4. `deleteRelaySession` carefully validates that the current relay session matches the one that failed before removing it (prevents removing a concurrently-created replacement session) + +## withStore — database access bracket + +`withStore` wraps database access with `agentOperationBracket c AODatabase`, ensuring the operation suspension cascade is respected. SQLite errors are classified: +- `ErrorBusy`/`ErrorLocked` → `SEDatabaseBusy` → `CRITICAL True` (prompts user restart) +- Other SQL errors → `SEInternal` + +`SEAgentError` is a special wrapper that allows agent-level errors to be threaded through store operations — used when "transaction-like" access is needed but the operation involves agent logic, not just DB queries. See source comment: "network IO should NOT be used inside AgentStoreMonad." + +## Server selection — getNextServer / withNextSrv + +Server selection has two-level diversity: +1. **Operator diversity**: prefer servers from operators not already used (tracked by `usedOperators` set) +2. **Host diversity**: prefer servers with hosts not already used (tracked by `usedHosts` set) + +`filterOrAll` ensures that if all servers are "used," the full list is returned rather than an empty one. + +`withNextSrv` is designed for retry loops — it re-reads user servers on each call (allowing configuration changes during retries) and tracks `triedHosts` across attempts. When all hosts are tried, the tried set is reset (`S.empty`), creating a round-robin effect. + +## Network configuration — slow/fast selection + +`getNetworkConfig` selects between slow and fast network configs based on `userNetworkInfo`: +- `UNCellular` or `UNNone` → slow config (1.5× timeouts via `slowNetworkConfig`) +- `UNWifi`, `UNEthernet`, `UNOther` → fast config + +Both configs are stored together in `useNetworkConfig :: TVar (NetworkConfig, NetworkConfig)`. The slow config is derived from the fast config in `newAgentClient`. + +## closeAgentClient — shutdown sequence + +1. Sets `active = False` — all protocol client getters will throw `INACTIVE` +2. Closes all protocol server clients (SMP, NTF, XFTP) by swapping maps to empty and forking close threads +3. Clears proxied relays +4. Cancels resubscription workers — forks cancellation threads (fire-and-forget, `closeAgentClient` may return before all workers are cancelled) +5. Clears delivery and async command workers +6. Clears subscription state + +The cancellation of resubscription workers reads the TMVar first (to get the Async handle), then calls `uninterruptibleCancel`. This is wrapped in a forked thread to avoid blocking the shutdown sequence. + +## Transport session modes + +`TransportSessionMode` (`TSMEntity` vs other) determines whether the transport session key includes the entity ID (connection/queue ID). When `TSMEntity`, each queue gets its own TLS connection to the router. When not, queues to the same router share a connection. This is controlled by `sessionMode` in the network config. + +`mkSMPTSession` and related functions compute the transport session key based on the current mode. This affects connection multiplexing — entity-mode sessions provide better privacy (router can't correlate queues) at the cost of more connections. + +## getMsgLocks — GET exclusion + +`getQueueMessage` creates a TMVar lock keyed by `(server, rcvId)` and takes it before sending GET. This prevents concurrent GET and SUB on the same queue (SUB is checked via `hasGetLock` in `checkQueues`). The lock is released by `releaseGetLock` after ACK or on error. + +## Error classification — temporaryAgentError + +Classifies errors as temporary (retryable) or permanent. Notable non-obvious classifications: +- `TEHandshake BAD_SERVICE` is temporary — it indicates a DB error on the router, not a permanent rejection +- `CRITICAL True` is temporary — `True` means the error shows a restart button, implying the user should retry +- `INACTIVE` is temporary — the agent may be reactivated diff --git a/spec/modules/Simplex/Messaging/Agent/understanding.md b/spec/modules/Simplex/Messaging/Agent/understanding.md new file mode 100644 index 0000000000..4ca3ed2d9a --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/understanding.md @@ -0,0 +1,59 @@ +# Agent Module Documentation Notes + +> Working notes for documenting Agent/Client.hs and Agent.hs. Not a spec doc — will be deleted after both docs are written. + +## Documentation approach + +**Bottom-up**: Client.hs first, then Agent.hs. + +**Client.hs** documents reusable infrastructure with contracts (what callers must provide, what guarantees they get), listing known consumers. Stands alone as "here's the framework." + +**Agent.hs** references Client.hs for infrastructure, focuses on what it passes into those frameworks — specific worker bodies, task queries, handler logic, and the orchestration policies (handshake, rotation, ratchet sync). Stands alone as "here's how the agent uses that framework." + +Coupling captured by cross-references, not duplication. + +## Module roles + +**Client.hs — infrastructure layer (~2868 lines):** +- `AgentClient`: central state container (TVars, TMaps, worker pools, locks, operation states) +- Protocol client lifecycle: lazy singleton for SMP/NTF/XFTP connections, disconnect callbacks, reconnection via sub workers +- Subscription state machine: active/pending/removed, session-aware cleanup on disconnect +- Worker framework: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval pattern with doWork flag atomics) +- Operation suspension cascade: RcvNetwork → MsgDelivery → SndNetwork → Database +- Queue creation (`newRcvQueue`) and protocol-level operations +- Concurrency primitives: per-connection locks, session vars with monotonic IDs, batching by transport session +- Encryption helpers, server selection, statistics + +**Agent.hs — orchestration/policy layer (~3868 lines):** +- Public API: createConnection, joinConnection, allowConnection, sendMessage, ackMessage, switchConnection, etc. +- Subscriber loop: reads `msgQ`, dispatches to per-connection handlers via `processSMP` +- Duplex handshake: confirmation processing, HELLO exchange, CON notification +- Queue rotation protocol: QADD → QKEY → QUSE → QTEST +- Ratchet synchronization: AgentRatchetKey exchange, hash-ordering to break symmetry +- Async command processing: `runCommandProcessing` worker body using `withWork` + `getPendingServerCommand` +- Message delivery: `runSmpQueueMsgDelivery` worker body per SndQueue +- Message integrity: sequential ID + hash chain validation + +## Worker framework details + +Defined in Client.hs, consumed by Agent.hs, NtfSubSupervisor.hs, FileTransfer/Agent.hs, and simplex-chat. + +Two separable parts: +1. **`getAgentWorker`**: lifecycle — create-or-reuse worker for a key, fork async, handle restart rate limiting (max per minute, delete after max). `getAgentWorker'` is generic version with custom worker wrapper (e.g., adding a retryLock TMVar for delivery workers). +2. **`withWork` / `withWork_` / `withWorkItems`**: task retrieval pattern — takes `getWork` (fetch next task) and `action` (process it) as separate parameters. Clears doWork flag BEFORE querying (prevents race where another thread sets flag after query returns empty). Re-sets flag if work was found. On work item error vs store error: work item errors stop the worker (CRITICAL), store errors re-set flag and log. + +Worker body (in consumer module) loops: `waitForWork doWork` → `withWork doWork getTask handleTask`. + +## Key non-obvious patterns to document + +### Client.hs — DONE (see Agent/Client.md) + +### Agent.hs +- Subscriber loop is the main event processor +- Duplex handshake role asymmetry: initiator expects AgentConnInfoReply, acceptor expects AgentConnInfo +- Queue rotation is 4 agent messages on top of SMP commands +- Ratchet sync hash-ordering: lower hash initializes receive ratchet +- Message integrity validation: external sender ID sequential + hash chain +- Split-phase connection creation (prepareConnectionLink + createConnectionForLink) prevents race +- ACK is NOT automatic for A_MSG (user must call ackMessage), IS automatic for control messages +- Connection upgrade: RcvConnection → DuplexConnection when reply queue created From 541b3f924b412e1714b810e6ed1b07a36b456c1a Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 09:43:58 +0000 Subject: [PATCH 37/91] agent spec --- spec/modules/Simplex/Messaging/Agent.md | 238 ++++++++++++++++++ .../modules/Simplex/Messaging/Agent/Client.md | 2 +- .../Simplex/Messaging/Agent/understanding.md | 59 ----- 3 files changed, 239 insertions(+), 60 deletions(-) create mode 100644 spec/modules/Simplex/Messaging/Agent.md delete mode 100644 spec/modules/Simplex/Messaging/Agent/understanding.md diff --git a/spec/modules/Simplex/Messaging/Agent.md b/spec/modules/Simplex/Messaging/Agent.md new file mode 100644 index 0000000000..0b1e9cec14 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent.md @@ -0,0 +1,238 @@ +# Simplex.Messaging.Agent + +> Orchestration layer: duplex connection lifecycle, message processing dispatch, queue rotation, ratchet synchronization, and async command framework. + +**Source**: [`Agent.hs`](../../../../../src/Simplex/Messaging/Agent.hs) + +**See also**: [Agent/Client.md](./Agent/Client.md) — the infrastructure layer (AgentClient, worker framework, protocol client lifecycle, subscription state, operation suspension). + +**Protocol spec**: [`agent-protocol.md`](../../../../protocol/agent-protocol.md) — duplex connection procedure, agent message syntax. + +## Overview + +This module is the top-level messaging agent, consumed by simplex-chat and other client applications. It passes specific worker bodies, task queries, and handler logic into the frameworks defined in [Agent/Client.hs](./Agent/Client.md), and implements the orchestration policies: duplex handshake, queue rotation, ratchet synchronization, message integrity validation. + +The agent starts four threads (in `getSMPAgentClient_`): `subscriber` (main event loop), `runNtfSupervisor` (notification token management), `cleanupManager` (periodic garbage collection), and `logServersStats` (statistics reporting). These threads are raced via `raceAny_` — if any exits, all are cancelled. + +## Split-phase connection creation + +`prepareConnectionLink` and `createConnectionForLink` separate link preparation (key generation, link formatting — no network) from queue creation (single network call). This prevents the race where a link is published before the queue exists on the router. The link can be shared out-of-band after `prepareConnectionLink`, and `createConnectionForLink` is called only when the user is ready to accept connections. + +## Subscriber loop — processSMPTransmissions + +The subscriber thread reads batches from `msgQ` (filled by SMP protocol clients) and dispatches to `processSMPTransmissions`. Key non-obvious behaviors: + +**Batch UP notification accumulation.** Successful subscription confirmations (`processSubOk`) append to a shared `upConnIds` TVar across the batch. A single `UP` event is emitted after all transmissions in the batch are processed, not per-transmission. Similarly, `serviceRQs` accumulates service-associated receive queues for batch processing via `processRcvServiceAssocs`. + +**Double validation for subscription results.** `isPendingSub` checks two conditions atomically: the queue must be in the pending map AND the client session must still be active. If either fails, the subscription result is counted as ignored (statistics only). This handles the race where a subscription response arrives after the client disconnected and a new client connected. + +**subQ overflow to pendingMsgs.** `processSMP` writes events to `subQ` (bounded TBQueue) but when it's full, events go into a `pendingMsgs` TVar instead. After processing completes, pending messages are drained in reverse order. This prevents the message processing thread from blocking on a full queue, which would stall the entire SMP client. + +**END/ENDS session validation.** Both `END` (single queue) and `ENDS` (service) check `activeClientSession` before removing subscriptions. If the session doesn't match (stale disconnect), the event is logged but ignored. This prevents a delayed END from a disconnected client from removing subscriptions that a new client established. + +## Message processing — processSMP + +`processSMP` dispatches on the SMP message type within a per-connection lock (`withConnLock`). + +### Four e2e key states + +The MSG handler discriminates on `(e2eDhSecret, e2ePubKey_)` — the per-queue shared secret and the incoming public key: + +- `(Nothing, Just key)` — **Handshake phase**: no shared secret yet, public key present. Computes DH, decrypts with per-queue E2E. Dispatches to `smpConfirmation` (if AgentConfirmation) or `smpInvitation` (if AgentInvitation). +- `(Just dh, Nothing)` — **Established phase**: shared secret exists, no new key. This is normal message flow. Dispatches to `AgentRatchetKey` (ratchet renegotiation) or `AgentMsgEnvelope` (double-ratchet encrypted message). +- `(Just dh, Just _)` — **Repeated confirmation**: both present. Only AgentConfirmation is accepted (this is a retry because ACK failed), everything else is rejected. +- `(Nothing, Nothing)` — **Error**: no keys at all. + +### ACK semantics + +ACK is NOT automatic for `A_MSG` — the function returns `ACKPending` and the user must call `ackMessage`. ACK IS automatic for all control messages (HELLO, QADD, QKEY, QUSE, QTEST, EREADY, A_RCVD). This is because `A_MSG` delivery to the user application must be confirmed before the message is removed from the router. + +`handleNotifyAck` wraps each MSG processing branch: if any error occurs, it sends `ERR` to the client but still ACKs the SMP message. This prevents a processing error from causing infinite re-delivery of the same message. + +### agentClientMsg — transactional message processing + +The inner function `agentClientMsg` performs ratchet decryption, message parsing, and integrity checking inside a single `withStore` transaction with `lockConnForUpdate`. This serializes all message processing for a given connection, preventing concurrent ratchet state modifications. The function returns the pre-decryption ratchet state (`rcPrev`) alongside the message — this is needed by `ereadyMsg` to decide whether to send EREADY. + +### Duplicate message handling + +Three paths for `A_DUPLICATE` errors: + +1. **Stored and user-acked**: `getLastMsg` finds it with `userAck = True` → `ackDel` (delete from router). +2. **Stored, A_MSG, not user-acked**: re-notify the user with `MSG` event and return `ACKPending`. The user may not have seen the original notification. +3. **Not stored or non-A_MSG**: verify via `checkDuplicateHash` that the encrypted hash exists in the DB. If it doesn't, the error is re-thrown (it's a real decryption failure, not a duplicate). + +For crypto errors (`A_CRYPTO`): the encrypted message hash is checked for existence. If the hash already exists, the error is silently suppressed (it's a duplicate that failed decryption differently). If not, `notifySync` classifies the error via `cryptoErrToSyncState` and may trigger ratchet resynchronization. + +### resetRatchetSync on successful decryption + +When a double-ratchet message is successfully decrypted and the connection's ratchet sync state is not `RSOk` or `RSStarted`, the state is reset to `RSOk` and `RSYNC RSOk` is notified. This means successful message delivery is the recovery signal for ratchet desynchronization. + +### updateConnVersion on every message + +Every received `AgentMsgEnvelope` triggers `updateConnVersion`, which upgrades the connection's agreed agent version if the message's version is higher and compatible. This is a monotonic upgrade — versions only increase. The `safeVersionRange` construction handles the case where the sender's version is higher than the receiver's maximum — it creates a range from `minVersion` to the sender's version. + +## Duplex handshake + +See [agent-protocol.md](../../../../protocol/agent-protocol.md) for the protocol description. Implementation-specific details: + +### Initiating party (RcvConnection) + +Receives AgentConfirmation with `e2eEncryption = Just sndParams`. Initializes the receive ratchet from the sender's E2E parameters. **v7+ (ratchetOnConfSMPAgentVersion)**: creates the ratchet immediately on confirmation processing, not later on `allowConnection`. See source comment on `processConf` — this supports decrypting messages that may arrive before `allowConnection` is called. The ratchet creation, E2E secret setup, and confirmation storage all happen in one `withStore` transaction. + +### Accepting party (DuplexConnection) + +Receives AgentConfirmation with `e2eEncryption = Nothing` and `AgentConnInfo` (not `AgentConnInfoReply`). The ratchet was already initialized during `joinConnection`. If `senderKey` is present, enqueues `ICDuplexSecure` (the queue needs to be secured with SKEY). If absent (sender already secured via LKEY), sends `CON` immediately. + +### HELLO exchange + +HELLO is processed in `helloMsg`. The key dispatch is on `sndStatus`: +- `sndStatus == Active`: this side already sent HELLO, so receiving HELLO means both sides are connected → emit `CON`. +- Otherwise: this side hasn't sent HELLO yet → enqueue HELLO reply via `enqueueDuplexHello`. + +HELLO is not used at all in fast duplex connection (v9+ SMP with SKEY — the sender secures the queue directly, skipping the HELLO exchange). + +## Queue rotation + +Four agent messages implement queue rotation. See [agent-protocol.md](../../../../protocol/agent-protocol.md#rotating-messaging-queue) for the protocol. Implementation-specific details: + +**QADD** (processed by sender in `qAddMsg`): Creates a new `SndQueue` with DH key exchange. Before creating the new queue, deletes any previous pending replacement (`delSqs` partitioned by `dbReplaceQId`). Responds with `QKEY`. The replacement chain means multiple consecutive rotation requests are handled correctly — only the latest replacement survives. + +**QKEY** (processed by recipient in `qKeyMsg`): Validates that the queue is `New` or `Confirmed` and the switch status is `RSSendingQADD`. Enqueues `ICQSecure` to secure the queue asynchronously — the actual KEY command is sent by `runCommandProcessing`. + +**QUSE** (processed by sender in `qUseMsg`): Marks the new queue as `Secured`. Sends `QTEST` **only to the new queue**, not the old one. The old queue is deleted after QTEST is successfully delivered (handled in `runSmpQueueMsgDelivery`). + +**QTEST** (no handler): Comment explains — any message received on the new queue triggers deletion of the old queue via the `dbReplaceQueueId` logic in `processSMP`'s AgentMsgEnvelope branch. QTEST exists only to ensure at least one message traverses the new queue. + +**Ratchet sync guard**: All four handlers check `ratchetSyncSendProhibited` before proceeding. Queue rotation is blocked during ratchet desynchronization. + +## Ratchet synchronization — newRatchetKey + +When an `AgentRatchetKey` message is received, `newRatchetKey` handles ratchet re-establishment. + +### Hash-ordering for initialization role + +Both parties generate key pairs and exchange them. The party whose `rkHash(k1, k2)` is **lower** (lexicographic comparison) initializes as the **receiving** ratchet; the other initializes as **sending** and sends EREADY. This deterministic ordering breaks the symmetry when both parties simultaneously request ratchet sync. + +### State machine + +The current `ratchetSyncState` determines behavior: +- `RSOk`, `RSAllowed`, `RSRequired` → **receiving client**: generate new keys, send `AgentRatchetKey` reply, then proceed with hash-ordering. +- `RSStarted` → **initiating client**: use the keys already stored (from `synchronizeRatchet'`), proceed with hash-ordering. +- `RSAgreed` → **error**: ratchet was already re-established but another key arrived. Sets state to `RSRequired` and throws `RATCHET_SYNC`. This handles the edge case where both parties initiate simultaneously and one has already completed. + +### Deduplication + +`checkRatchetKeyHashExists` prevents processing the same ratchet key message twice. The hash is stored before processing, so a duplicate delivery is detected and short-circuited via `ratchetExists`. + +### EREADY + +Sent when the ratchet was initialized as receiving (`rcSnd` is `Nothing` in the pre-decryption ratchet state). Carries `lastExternalSndId` so the other party knows which messages were sent with the old ratchet. Processed by `ereadyMsg`, which checks `rcPrev` (the ratchet state before decrypting the current message) for the same condition — if the pre-decryption ratchet had no send chain, it sends EREADY. + +## Message integrity — checkMsgIntegrity + +Sequential external sender ID + previous message hash chain. Five outcomes: +- **MsgOk**: `extSndId == prevExtSndId + 1` AND hashes match. +- **MsgBadId**: `extSndId < prevExtSndId` — message from the past. +- **MsgDuplicate**: `extSndId == prevExtSndId` — same ID as last message. +- **MsgSkipped**: `extSndId > prevExtSndId + 1` — gap in sequence, reports range of skipped IDs. +- **MsgBadHash**: IDs are sequential but hashes don't match — message was modified or a different message was inserted. + +The integrity result is stored in `MsgMeta` and delivered to the client application. The agent does not reject messages with integrity failures — it reports them and continues processing. This is intentional: the client application decides the policy. + +## Async command processing — runCommandProcessing + +Uses the worker framework from [Agent/Client.hs](./Agent/Client.md#worker-framework). The worker body calls `withWork` with `getPendingServerCommand` as the task source. + +### Internal commands + +The command processor dispatches internal commands that are enqueued by message handlers and other agent operations: + +- **ICAllowSecure / ICDuplexSecure**: Complete the duplex handshake by securing the queue and sending confirmation. `ICAllowSecure` is the user-initiated path (from `allowConnection`), `ICDuplexSecure` is the automatic path (from receiving AgentConnInfo with senderKey). +- **ICQSecure / ICQDelete**: Queue rotation — secure the new queue (KEY command) and delete the old queue. +- **ICAck / ICAckDel**: Send ACK to the SMP router, optionally deleting the internal message record. +- **ICDeleteConn / ICDeleteRcvQueue**: Connection and queue cleanup. + +### Retry semantics + +`runCommandProcessing` has two retry intervals: zero (immediate retry via `0`) for commands that fail with temporary errors, and `asyncCmdRetryInterval` for stuck commands. `tryMoveableCommand` attempts to skip a stuck command by marking it with a future `connId` so `getPendingServerCommand` returns the next one instead. + +### withConnLockNotify + +Wraps command execution with `withConnLock` plus automatic error notification to `subQ`. This ensures that even if a command fails, the client application is notified. + +## Message delivery — runSmpQueueMsgDelivery + +Per-queue delivery loop using the worker framework. Each `SndQueue` has its own delivery worker (keyed by queue address in `smpDeliveryWorkers`). + +### Per-message-type error handling + +Error handling differs by message type and SMP error: + +**QUOTA**: The queue has exceeded its message quota. Sets `quotaExceededTs` and starts an expiry timer if `messageExpireInterval` is configured. Does NOT retry — the sender must wait for the recipient to drain messages (signaled by `A_QCONT`). + +**AUTH**: Different response per message type: +- `A_MSG_` (user message): sends `SENT` with `SndMsgRcvQueued` status to the client. The message was accepted by the router but auth failed on the receive side — likely the queue was replaced during rotation. +- Other types: sends `MERR` error to the client. +- In both cases, if `messageExpireInterval` is configured, expired messages are deleted. + +**Timeout/network errors**: retried with the worker framework's built-in retry. The `retryLock` TMVar (paired with each delivery worker — see `getAgentWorker'` in [Agent/Client.md](./Agent/Client.md#getagentworker--lifecycle-management)) provides external retry signaling from `A_QCONT`. + +## Batch message sending — sendMessagesB_ + +`sendMessagesB_` sends messages to multiple connections. When multiple messages have the same body (common for group messages), the body is encrypted once and referenced via `VRRef` for subsequent connections. `vrCopyMap` tracks `ByteString → (VRValue encrypted)` mappings. This is a performance optimization — ratchet encryption is expensive, and group messages go to many connections with identical plaintext. + +The function partitions connections by send queue and builds per-queue delivery batches. Each connection's message is encrypted with its own ratchet but the plaintext body lookup avoids redundant work. + +## Subscription management + +### subscribeAllConnections' + +Batch subscription with throttling: `maxPending` limits how many pending subscriptions exist simultaneously. When the pending count exceeds the limit, the function waits before enqueuing more. This prevents memory exhaustion on reconnection when thousands of connections need resubscription. + +Service subscriptions are attempted first (`subscribeClientServices'`). If a service subscription succeeds, its associated queues don't need individual SUB commands — they're covered by the service subscription. Queues not associated with any service are subscribed individually. + +### resubscribeConnection' + +Individual connection resubscription. Checks connection status and queue status before subscribing — deleted or suspended connections are skipped. Used for targeted resubscription after specific operations (e.g., after `allowConnection`). + +## Notification token lifecycle + +`registerNtfToken'` → `verifyNtfToken'` → `checkNtfToken'` → `deleteNtfToken'` manage push notification token registration with the NTF server. Token verification uses a challenge-response flow where the NTF server sends a verification code through the push notification channel, and the client confirms receipt. + +## Cleanup manager + +Runs periodically (configurable interval, typically 1 minute). Operations: +- **Delete marked connections**: connections in "deleted" or "deleted-waiting-delivery" states +- **Delete expired/deleted files**: both receive and send files, with configurable TTLs +- **Clean temp paths**: remove temporary file paths from completed transfers +- **Delete orphaned users**: users with no remaining connections get `DEL_USER` notification + +Each cleanup operation catches errors individually (`catchAllErrors`) — a failure in one doesn't prevent others from running. The manager uses `waitActive` to pause during agent suspension, with `tryAny` to handle the case where the agent is being shut down. + +## Agent suspension + +`suspendAgent` triggers the operation suspension cascade defined in [Agent/Client.md](./Agent/Client.md#operation-suspension-cascade). `foregroundAgent` resumes operations. The cascade ordering (RcvNetwork → MsgDelivery → SndNetwork → Database) ensures that receiving stops first, then in-flight message delivery completes, then sending stops, and finally database operations complete. + +## connectReplyQueues — background duplex upgrade + +Used during async command processing to complete the duplex handshake. Handles two cases: +- **Fresh connection** (`sq_ = Nothing`): upgrades `RcvConnection` to `DuplexConnection` by creating a new send queue. +- **SKEY retry** (`sq_ = Just sq`): connection is already duplex from a previous attempt. Reuses the existing send queue. + +Both paths then secure the queue and enqueue the confirmation. + +## secureConfirmQueue vs secureConfirmQueueAsync + +Two paths for sending the confirmation message during duplex handshake: +- **secureConfirmQueue** (synchronous): secures the queue and sends confirmation directly via network. Used in `joinConnection` (foreground user-initiated path). +- **secureConfirmQueueAsync** (asynchronous): secures the queue, stores the confirmation in the database, and submits to the delivery worker. Used in `allowConnection` (background path via `ICAllowSecure`). + +Both call `agentSecureSndQueue` first, which returns whether the initiator's ratchet should be created on confirmation (v7+ behavior). + +## smpConfirmation — version compatibility + +The confirmation handler accepts messages where the agent version or client version is either within the configured range OR at-or-below the already-agreed version. See source comment: "checking agreed versions to continue connection in case of client/agent version downgrades." This means a downgraded client can still complete in-progress handshakes. + +## smpInvitation — contact address handling + +Invitation messages received on a contact address connection are passed through even if version-incompatible. See source comment: "show connection request even if invitation via contact address is not compatible." The client application sees the `REQ` event with `PQSupportOff` when incompatible, allowing it to display the request to the user (who may choose to respond from a compatible client). diff --git a/spec/modules/Simplex/Messaging/Agent/Client.md b/spec/modules/Simplex/Messaging/Agent/Client.md index eb4ff47e70..e5d83675b7 100644 --- a/spec/modules/Simplex/Messaging/Agent/Client.md +++ b/spec/modules/Simplex/Messaging/Agent/Client.md @@ -8,7 +8,7 @@ ## Overview -This module defines `AgentClient`, the central state container for the messaging agent, and all reusable infrastructure that Agent.hs and other consumers (NtfSubSupervisor.hs, FileTransfer/Agent.hs, simplex-chat) build upon. It contains ~2868 lines covering: +This module defines `AgentClient`, the central state container for the messaging agent, and all reusable infrastructure that Agent.hs and other consumers (NtfSubSupervisor.hs, FileTransfer/Agent.hs, simplex-chat) build upon. It covers: - **Protocol client lifecycle**: lazy singleton connections to SMP/NTF/XFTP routers via `SessionVar` pattern, with disconnect callbacks and reconnection workers - **Worker framework**: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval with doWork flag atomics) diff --git a/spec/modules/Simplex/Messaging/Agent/understanding.md b/spec/modules/Simplex/Messaging/Agent/understanding.md deleted file mode 100644 index 4ca3ed2d9a..0000000000 --- a/spec/modules/Simplex/Messaging/Agent/understanding.md +++ /dev/null @@ -1,59 +0,0 @@ -# Agent Module Documentation Notes - -> Working notes for documenting Agent/Client.hs and Agent.hs. Not a spec doc — will be deleted after both docs are written. - -## Documentation approach - -**Bottom-up**: Client.hs first, then Agent.hs. - -**Client.hs** documents reusable infrastructure with contracts (what callers must provide, what guarantees they get), listing known consumers. Stands alone as "here's the framework." - -**Agent.hs** references Client.hs for infrastructure, focuses on what it passes into those frameworks — specific worker bodies, task queries, handler logic, and the orchestration policies (handshake, rotation, ratchet sync). Stands alone as "here's how the agent uses that framework." - -Coupling captured by cross-references, not duplication. - -## Module roles - -**Client.hs — infrastructure layer (~2868 lines):** -- `AgentClient`: central state container (TVars, TMaps, worker pools, locks, operation states) -- Protocol client lifecycle: lazy singleton for SMP/NTF/XFTP connections, disconnect callbacks, reconnection via sub workers -- Subscription state machine: active/pending/removed, session-aware cleanup on disconnect -- Worker framework: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval pattern with doWork flag atomics) -- Operation suspension cascade: RcvNetwork → MsgDelivery → SndNetwork → Database -- Queue creation (`newRcvQueue`) and protocol-level operations -- Concurrency primitives: per-connection locks, session vars with monotonic IDs, batching by transport session -- Encryption helpers, server selection, statistics - -**Agent.hs — orchestration/policy layer (~3868 lines):** -- Public API: createConnection, joinConnection, allowConnection, sendMessage, ackMessage, switchConnection, etc. -- Subscriber loop: reads `msgQ`, dispatches to per-connection handlers via `processSMP` -- Duplex handshake: confirmation processing, HELLO exchange, CON notification -- Queue rotation protocol: QADD → QKEY → QUSE → QTEST -- Ratchet synchronization: AgentRatchetKey exchange, hash-ordering to break symmetry -- Async command processing: `runCommandProcessing` worker body using `withWork` + `getPendingServerCommand` -- Message delivery: `runSmpQueueMsgDelivery` worker body per SndQueue -- Message integrity: sequential ID + hash chain validation - -## Worker framework details - -Defined in Client.hs, consumed by Agent.hs, NtfSubSupervisor.hs, FileTransfer/Agent.hs, and simplex-chat. - -Two separable parts: -1. **`getAgentWorker`**: lifecycle — create-or-reuse worker for a key, fork async, handle restart rate limiting (max per minute, delete after max). `getAgentWorker'` is generic version with custom worker wrapper (e.g., adding a retryLock TMVar for delivery workers). -2. **`withWork` / `withWork_` / `withWorkItems`**: task retrieval pattern — takes `getWork` (fetch next task) and `action` (process it) as separate parameters. Clears doWork flag BEFORE querying (prevents race where another thread sets flag after query returns empty). Re-sets flag if work was found. On work item error vs store error: work item errors stop the worker (CRITICAL), store errors re-set flag and log. - -Worker body (in consumer module) loops: `waitForWork doWork` → `withWork doWork getTask handleTask`. - -## Key non-obvious patterns to document - -### Client.hs — DONE (see Agent/Client.md) - -### Agent.hs -- Subscriber loop is the main event processor -- Duplex handshake role asymmetry: initiator expects AgentConnInfoReply, acceptor expects AgentConnInfo -- Queue rotation is 4 agent messages on top of SMP commands -- Ratchet sync hash-ordering: lower hash initializes receive ratchet -- Message integrity validation: external sender ID sequential + hash chain -- Split-phase connection creation (prepareConnectionLink + createConnectionForLink) prevents race -- ACK is NOT automatic for A_MSG (user must call ackMessage), IS automatic for control messages -- Connection upgrade: RcvConnection → DuplexConnection when reply queue created From c940f16f37b1852ea8bd81b45f8056e511a0c3a7 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 10:14:47 +0000 Subject: [PATCH 38/91] update agent specs --- spec/modules/Simplex/Messaging/Agent.md | 266 ++++++++++++------ .../modules/Simplex/Messaging/Agent/Client.md | 125 ++++++-- 2 files changed, 283 insertions(+), 108 deletions(-) diff --git a/spec/modules/Simplex/Messaging/Agent.md b/spec/modules/Simplex/Messaging/Agent.md index 0b1e9cec14..a52be21561 100644 --- a/spec/modules/Simplex/Messaging/Agent.md +++ b/spec/modules/Simplex/Messaging/Agent.md @@ -10,25 +10,41 @@ ## Overview -This module is the top-level messaging agent, consumed by simplex-chat and other client applications. It passes specific worker bodies, task queries, and handler logic into the frameworks defined in [Agent/Client.hs](./Agent/Client.md), and implements the orchestration policies: duplex handshake, queue rotation, ratchet synchronization, message integrity validation. +This module is the top-level SimpleX agent, consumed by simplex-chat and other client applications. It passes specific worker bodies, task queries, and handler logic into the frameworks defined in [Agent/Client.hs](./Agent/Client.md), and implements the orchestration policies: duplex handshake, queue rotation, ratchet synchronization, message integrity validation. -The agent starts four threads (in `getSMPAgentClient_`): `subscriber` (main event loop), `runNtfSupervisor` (notification token management), `cleanupManager` (periodic garbage collection), and `logServersStats` (statistics reporting). These threads are raced via `raceAny_` — if any exits, all are cancelled. +### Agent startup — backgroundMode + +`getSMPAgentClient_` accepts a `backgroundMode` flag that fundamentally changes agent capabilities: +- **Normal mode** (`backgroundMode = False`): starts four threads raced via `raceAny_` — `subscriber` (main event loop), `runNtfSupervisor` (notification management), `cleanupManager` (garbage collection), `logServersStats` (statistics). Also restores persisted server statistics. If any thread crashes, all are cancelled; statistics are saved in a `finally` block. +- **Background mode** (`backgroundMode = True`): starts only the `subscriber` thread. No cleanup, no notifications, no stats persistence. Used when the agent needs minimal receive-only operation. + +Thread crashes are caught by the `run` wrapper: if the agent is still active (`acThread` is set), the exception is reported as `CRITICAL True` to `subQ`. If the agent is being disposed, crashes are silently ignored. + +### Service + entity session mode prohibition + +Service certificates and entity transport session mode (`TSMEntity`) are mutually exclusive. This is checked in four places: `getSMPAgentClient_`, `setNetworkConfig`, `createUser'`, `setUserService'`. If violated, throws `CMD PROHIBITED`. The constraint exists because service certificates associate multiple queues under one identity, which contradicts entity session mode's goal of preventing queue correlation. ## Split-phase connection creation -`prepareConnectionLink` and `createConnectionForLink` separate link preparation (key generation, link formatting — no network) from queue creation (single network call). This prevents the race where a link is published before the queue exists on the router. The link can be shared out-of-band after `prepareConnectionLink`, and `createConnectionForLink` is called only when the user is ready to accept connections. +`prepareConnectionLink` and `createConnectionForLink` separate link preparation (key generation, link formatting — no network) from queue creation (single network call). This prevents the race where a link is published before the queue exists on the router. + +**Sender ID derivation.** The sender ID is deterministic: `SMP.EntityId $ B.take 24 $ C.sha3_384 corrId` where `corrId` is a random nonce. `createConnectionForLink` validates `actualSndId == sndId` — if the router returns a different sender ID, the connection is rejected. See source comment: "the remaining 24 bytes are reserved, possibly for notifier ID in the new notifications protocol." + +**PQ restriction.** `IKUsePQ` is prohibited for prepared links — throws `CMD PROHIBITED`. PQ keys are too large for the short link format. ## Subscriber loop — processSMPTransmissions -The subscriber thread reads batches from `msgQ` (filled by SMP protocol clients) and dispatches to `processSMPTransmissions`. Key non-obvious behaviors: +The subscriber thread reads batches from `msgQ` (filled by SMP protocol clients) and dispatches to `processSMPTransmissions`. Each batch is processed within `agentOperationBracket c AORcvNetwork waitUntilActive`, tying into the operation suspension cascade. -**Batch UP notification accumulation.** Successful subscription confirmations (`processSubOk`) append to a shared `upConnIds` TVar across the batch. A single `UP` event is emitted after all transmissions in the batch are processed, not per-transmission. Similarly, `serviceRQs` accumulates service-associated receive queues for batch processing via `processRcvServiceAssocs`. +**Batch UP notification accumulation.** Successful subscription confirmations (`processSubOk`) append to a shared `upConnIds` TVar across the batch. A single `UP` event is emitted after all transmissions are processed, not per-transmission. Similarly, `serviceRQs` accumulates service-associated receive queues for batch processing via `processRcvServiceAssocs`. -**Double validation for subscription results.** `isPendingSub` checks two conditions atomically: the queue must be in the pending map AND the client session must still be active. If either fails, the subscription result is counted as ignored (statistics only). This handles the race where a subscription response arrives after the client disconnected and a new client connected. +**Double validation for subscription results.** `isPendingSub` checks two conditions atomically: the queue must be in the pending map AND the client session must still be active (`activeClientSession`). If either fails, the result is counted as ignored (statistics only). This handles the race where a subscription response arrives after reconnection. -**subQ overflow to pendingMsgs.** `processSMP` writes events to `subQ` (bounded TBQueue) but when it's full, events go into a `pendingMsgs` TVar instead. After processing completes, pending messages are drained in reverse order. This prevents the message processing thread from blocking on a full queue, which would stall the entire SMP client. +**SUB response piggybacking MSG.** When a SUB response arrives as `Right msg@SMP.MSG {}`, the connection is marked UP (via `processSubOk`) AND the MSG is processed. The UP notification happens even if the MSG processing fails — the connection is up regardless. -**END/ENDS session validation.** Both `END` (single queue) and `ENDS` (service) check `activeClientSession` before removing subscriptions. If the session doesn't match (stale disconnect), the event is logged but ignored. This prevents a delayed END from a disconnected client from removing subscriptions that a new client established. +**subQ overflow to pendingMsgs.** `processSMP` writes events to `subQ` (bounded TBQueue) but when full, events go into a `pendingMsgs` TVar. After processing, pending messages are drained in reverse order (LIFO). This prevents the message processing thread from blocking on a full queue, which would stall the entire SMP client. + +**END/ENDS session validation.** Both check `activeClientSession` before removing subscriptions. If the session doesn't match (stale disconnect), the event is logged but ignored. ## Message processing — processSMP @@ -36,40 +52,44 @@ The subscriber thread reads batches from `msgQ` (filled by SMP protocol clients) ### Four e2e key states -The MSG handler discriminates on `(e2eDhSecret, e2ePubKey_)` — the per-queue shared secret and the incoming public key: +The MSG handler discriminates on `(e2eDhSecret, e2ePubKey_)`: -- `(Nothing, Just key)` — **Handshake phase**: no shared secret yet, public key present. Computes DH, decrypts with per-queue E2E. Dispatches to `smpConfirmation` (if AgentConfirmation) or `smpInvitation` (if AgentInvitation). -- `(Just dh, Nothing)` — **Established phase**: shared secret exists, no new key. This is normal message flow. Dispatches to `AgentRatchetKey` (ratchet renegotiation) or `AgentMsgEnvelope` (double-ratchet encrypted message). -- `(Just dh, Just _)` — **Repeated confirmation**: both present. Only AgentConfirmation is accepted (this is a retry because ACK failed), everything else is rejected. +- `(Nothing, Just key)` — **Handshake**: computes DH, decrypts with per-queue E2E. Dispatches to `smpConfirmation` or `smpInvitation`. +- `(Just dh, Nothing)` — **Established**: normal message flow. Dispatches to `AgentRatchetKey` or `AgentMsgEnvelope`. +- `(Just dh, Just _)` — **Repeated confirmation**: only AgentConfirmation is accepted (ACK for previous one failed), everything else is rejected. - `(Nothing, Nothing)` — **Error**: no keys at all. ### ACK semantics -ACK is NOT automatic for `A_MSG` — the function returns `ACKPending` and the user must call `ackMessage`. ACK IS automatic for all control messages (HELLO, QADD, QKEY, QUSE, QTEST, EREADY, A_RCVD). This is because `A_MSG` delivery to the user application must be confirmed before the message is removed from the router. +ACK is NOT automatic for `A_MSG` — the function returns `ACKPending` and the user must call `ackMessage`. ACK IS automatic for all control messages (HELLO, QADD, QKEY, QUSE, QTEST, EREADY, A_RCVD). -`handleNotifyAck` wraps each MSG processing branch: if any error occurs, it sends `ERR` to the client but still ACKs the SMP message. This prevents a processing error from causing infinite re-delivery of the same message. +`handleNotifyAck` wraps the MSG processing: if any error occurs, it sends `ERR` to the client but still ACKs the SMP message. This prevents a processing error from causing infinite re-delivery. ### agentClientMsg — transactional message processing -The inner function `agentClientMsg` performs ratchet decryption, message parsing, and integrity checking inside a single `withStore` transaction with `lockConnForUpdate`. This serializes all message processing for a given connection, preventing concurrent ratchet state modifications. The function returns the pre-decryption ratchet state (`rcPrev`) alongside the message — this is needed by `ereadyMsg` to decide whether to send EREADY. +Performs ratchet decryption, message parsing, and integrity checking inside a single `withStore` transaction with `lockConnForUpdate`. This serializes all message processing for a given connection, preventing concurrent ratchet state modifications. Returns the pre-decryption ratchet state (`rcPrev`) alongside the message — needed by `ereadyMsg` to decide whether to send EREADY. + +### Additional queue status transitions on message receipt + +When receiving an `AgentMsgEnvelope` on a non-Active queue, the queue is set to Active. For primary queues during rotation (`dbReplaceQueueId` is set), the new queue is set as primary and the old queue is scheduled for deletion via `ICQDelete`. This is how the receiving side completes queue rotation — any message on the new queue triggers cleanup of the old one. ### Duplicate message handling Three paths for `A_DUPLICATE` errors: -1. **Stored and user-acked**: `getLastMsg` finds it with `userAck = True` → `ackDel` (delete from router). -2. **Stored, A_MSG, not user-acked**: re-notify the user with `MSG` event and return `ACKPending`. The user may not have seen the original notification. -3. **Not stored or non-A_MSG**: verify via `checkDuplicateHash` that the encrypted hash exists in the DB. If it doesn't, the error is re-thrown (it's a real decryption failure, not a duplicate). +1. **Stored and user-acked**: `getLastMsg` finds it with `userAck = True` → `ackDel`. +2. **Stored, A_MSG, not user-acked**: re-notify the user with `MSG` event and return `ACKPending`. The user may not have seen the original. +3. **Not stored or non-A_MSG**: `checkDuplicateHash` verifies the encrypted hash exists in the DB. If not, re-throws (real decryption failure, not duplicate). -For crypto errors (`A_CRYPTO`): the encrypted message hash is checked for existence. If the hash already exists, the error is silently suppressed (it's a duplicate that failed decryption differently). If not, `notifySync` classifies the error via `cryptoErrToSyncState` and may trigger ratchet resynchronization. +For crypto errors (`A_CRYPTO`): if the encrypted hash already exists, suppressed (duplicate). If not, `notifySync` classifies via `cryptoErrToSyncState` (RSAllowed or RSRequired) and updates the connection's ratchet sync state. ### resetRatchetSync on successful decryption -When a double-ratchet message is successfully decrypted and the connection's ratchet sync state is not `RSOk` or `RSStarted`, the state is reset to `RSOk` and `RSYNC RSOk` is notified. This means successful message delivery is the recovery signal for ratchet desynchronization. +When a double-ratchet message is successfully decrypted and the connection's ratchet sync state is not `RSOk` or `RSStarted`, the state is reset to `RSOk` and `RSYNC RSOk` is notified. Successful message delivery is the recovery signal for ratchet desynchronization. -### updateConnVersion on every message +### updateConnVersion — monotonic upgrade -Every received `AgentMsgEnvelope` triggers `updateConnVersion`, which upgrades the connection's agreed agent version if the message's version is higher and compatible. This is a monotonic upgrade — versions only increase. The `safeVersionRange` construction handles the case where the sender's version is higher than the receiver's maximum — it creates a range from `minVersion` to the sender's version. +Every received `AgentMsgEnvelope` triggers `updateConnVersion`. If the message's agent version is higher than the current agreed version and compatible, the agreed version is upgraded. Versions only increase. `safeVersionRange` handles the case where the sender's version exceeds the receiver's maximum — creates a range from `minVersion` to the sender's version. ## Duplex handshake @@ -81,29 +101,41 @@ Receives AgentConfirmation with `e2eEncryption = Just sndParams`. Initializes th ### Accepting party (DuplexConnection) -Receives AgentConfirmation with `e2eEncryption = Nothing` and `AgentConnInfo` (not `AgentConnInfoReply`). The ratchet was already initialized during `joinConnection`. If `senderKey` is present, enqueues `ICDuplexSecure` (the queue needs to be secured with SKEY). If absent (sender already secured via LKEY), sends `CON` immediately. +Receives AgentConfirmation with `e2eEncryption = Nothing` and `AgentConnInfo` (not `AgentConnInfoReply`). The ratchet was already initialized during `joinConnection`. If `senderKey` is present, enqueues `ICDuplexSecure` (queue needs securing with SKEY). If absent (sender already secured via LKEY), sends `CON` immediately and sets the queue Active. ### HELLO exchange HELLO is processed in `helloMsg`. The key dispatch is on `sndStatus`: - `sndStatus == Active`: this side already sent HELLO, so receiving HELLO means both sides are connected → emit `CON`. -- Otherwise: this side hasn't sent HELLO yet → enqueue HELLO reply via `enqueueDuplexHello`. +- Otherwise: this side hasn't sent HELLO yet → enqueue HELLO reply. + +HELLO is not used in fast duplex connection (v9+ SMP with SKEY). + +### startJoinInvitation — retry-safe ratchet creation + +When retrying a join (existing `SndQueue`), `startJoinInvitation` tries to get the existing ratchet via `getSndRatchet` before creating a new one. If the ratchet exists, it reuses it. If not (error), it logs a non-blocking error via `nonBlockingWriteTBQueue` and creates a fresh ratchet. This prevents a retry from corrupting an already-established ratchet. The same pattern appears in `mkJoinInvitation` for contact URI joins. -HELLO is not used at all in fast duplex connection (v9+ SMP with SKEY — the sender secures the queue directly, skipping the HELLO exchange). +### PQ support negotiation + +PQ support is the AND of four conditions: the local client's PQ preference, the peer's agent version (>= `pqdrSMPAgentVersion`), the E2E encryption version (>= `pqRatchetE2EEncryptVersion`), and the connection's current PQ support. This negotiation happens at `joinConn` and `smpConfirmation` time via `versionPQSupport_` and `pqSupportAnd`. ## Queue rotation Four agent messages implement queue rotation. See [agent-protocol.md](../../../../protocol/agent-protocol.md#rotating-messaging-queue) for the protocol. Implementation-specific details: -**QADD** (processed by sender in `qAddMsg`): Creates a new `SndQueue` with DH key exchange. Before creating the new queue, deletes any previous pending replacement (`delSqs` partitioned by `dbReplaceQId`). Responds with `QKEY`. The replacement chain means multiple consecutive rotation requests are handled correctly — only the latest replacement survives. +**QADD** (processed by sender in `qAddMsg`): Creates a new `SndQueue` with DH key exchange. Deletes any previous pending replacement (`delSqs` partitioned by `dbReplaceQId`). Responds with `QKEY`. The replacement chain means consecutive rotation requests are handled correctly — only the latest survives. + +**QKEY** (processed by recipient in `qKeyMsg`): Validates queue is `New` or `Confirmed` and switch status is `RSSendingQADD`. Enqueues `ICQSecure` for async processing. + +**QUSE** (processed by sender in `qUseMsg`): Marks new queue `Secured`. Sends `QTEST` **only to the new queue**. -**QKEY** (processed by recipient in `qKeyMsg`): Validates that the queue is `New` or `Confirmed` and the switch status is `RSSendingQADD`. Enqueues `ICQSecure` to secure the queue asynchronously — the actual KEY command is sent by `runCommandProcessing`. +**QTEST** (no handler in processSMP): Any message on the new queue triggers old queue deletion via `dbReplaceQueueId` logic. QTEST exists only to ensure at least one message traverses the new queue. -**QUSE** (processed by sender in `qUseMsg`): Marks the new queue as `Secured`. Sends `QTEST` **only to the new queue**, not the old one. The old queue is deleted after QTEST is successfully delivered (handled in `runSmpQueueMsgDelivery`). +**Sender-side completion in delivery handler.** When `AM_QTEST_` is successfully sent in `runSmpQueueMsgDelivery`, the old send queue is removed from the connection: pending messages are deleted, the queue record is removed, and the old queue's delivery worker is deleted from `smpDeliveryWorkers` (stopping its thread). This happens inside `withConnLockNotify` to prevent deadlock with the subscriber. -**QTEST** (no handler): Comment explains — any message received on the new queue triggers deletion of the old queue via the `dbReplaceQueueId` logic in `processSMP`'s AgentMsgEnvelope branch. QTEST exists only to ensure at least one message traverses the new queue. +**ICQDelete error tolerance.** In `runCommandProcessing`, if deleting the old receive queue fails with a permanent error (e.g., queue already gone on router), `finalizeSwitch` still runs — the local switch completes. Only temporary errors prevent completion. -**Ratchet sync guard**: All four handlers check `ratchetSyncSendProhibited` before proceeding. Queue rotation is blocked during ratchet desynchronization. +**Ratchet sync guard**: All four message handlers check `ratchetSyncSendProhibited` before proceeding. ## Ratchet synchronization — newRatchetKey @@ -111,128 +143,192 @@ When an `AgentRatchetKey` message is received, `newRatchetKey` handles ratchet r ### Hash-ordering for initialization role -Both parties generate key pairs and exchange them. The party whose `rkHash(k1, k2)` is **lower** (lexicographic comparison) initializes as the **receiving** ratchet; the other initializes as **sending** and sends EREADY. This deterministic ordering breaks the symmetry when both parties simultaneously request ratchet sync. +Both parties generate key pairs and exchange them. The party whose `rkHash(k1, k2)` is **lower** (lexicographic comparison) initializes the **receiving** ratchet; the other initializes **sending** and sends EREADY. This breaks the symmetry when both parties simultaneously request ratchet sync. ### State machine -The current `ratchetSyncState` determines behavior: - `RSOk`, `RSAllowed`, `RSRequired` → **receiving client**: generate new keys, send `AgentRatchetKey` reply, then proceed with hash-ordering. -- `RSStarted` → **initiating client**: use the keys already stored (from `synchronizeRatchet'`), proceed with hash-ordering. -- `RSAgreed` → **error**: ratchet was already re-established but another key arrived. Sets state to `RSRequired` and throws `RATCHET_SYNC`. This handles the edge case where both parties initiate simultaneously and one has already completed. +- `RSStarted` → **initiating client**: use keys already stored (from `synchronizeRatchet'`), proceed with hash-ordering. +- `RSAgreed` → **error**: sets state to `RSRequired`, throws `RATCHET_SYNC`. Handles the edge case where both parties initiate simultaneously and one has completed. ### Deduplication -`checkRatchetKeyHashExists` prevents processing the same ratchet key message twice. The hash is stored before processing, so a duplicate delivery is detected and short-circuited via `ratchetExists`. +`checkRatchetKeyHashExists` prevents processing the same ratchet key twice. The hash is stored atomically before processing begins. ### EREADY -Sent when the ratchet was initialized as receiving (`rcSnd` is `Nothing` in the pre-decryption ratchet state). Carries `lastExternalSndId` so the other party knows which messages were sent with the old ratchet. Processed by `ereadyMsg`, which checks `rcPrev` (the ratchet state before decrypting the current message) for the same condition — if the pre-decryption ratchet had no send chain, it sends EREADY. +Sent when the ratchet was initialized as receiving (`rcSnd` is `Nothing` in the pre-decryption ratchet state). Carries `lastExternalSndId` so the other party knows which messages were sent with the old ratchet. ## Message integrity — checkMsgIntegrity -Sequential external sender ID + previous message hash chain. Five outcomes: -- **MsgOk**: `extSndId == prevExtSndId + 1` AND hashes match. -- **MsgBadId**: `extSndId < prevExtSndId` — message from the past. -- **MsgDuplicate**: `extSndId == prevExtSndId` — same ID as last message. -- **MsgSkipped**: `extSndId > prevExtSndId + 1` — gap in sequence, reports range of skipped IDs. -- **MsgBadHash**: IDs are sequential but hashes don't match — message was modified or a different message was inserted. +Sequential external sender ID + previous message hash chain. Five outcomes: `MsgOk` (sequential + hashes match), `MsgBadId` (ID from the past), `MsgDuplicate` (same ID), `MsgSkipped` (gap in sequence), `MsgBadHash` (sequential but hashes differ). -The integrity result is stored in `MsgMeta` and delivered to the client application. The agent does not reject messages with integrity failures — it reports them and continues processing. This is intentional: the client application decides the policy. +The integrity result is delivered to the client application via `MsgMeta`. The agent does not reject messages with integrity failures — it reports them and continues processing. The client decides the policy. ## Async command processing — runCommandProcessing -Uses the worker framework from [Agent/Client.hs](./Agent/Client.md#worker-framework). The worker body calls `withWork` with `getPendingServerCommand` as the task source. +Uses the worker framework from [Agent/Client.hs](./Agent/Client.md#worker-framework). Keyed by `(connId, server)` — each connection/server combination gets its own command worker. Uses `AOSndNetwork` for operation suspension. ### Internal commands -The command processor dispatches internal commands that are enqueued by message handlers and other agent operations: - -- **ICAllowSecure / ICDuplexSecure**: Complete the duplex handshake by securing the queue and sending confirmation. `ICAllowSecure` is the user-initiated path (from `allowConnection`), `ICDuplexSecure` is the automatic path (from receiving AgentConnInfo with senderKey). -- **ICQSecure / ICQDelete**: Queue rotation — secure the new queue (KEY command) and delete the old queue. -- **ICAck / ICAckDel**: Send ACK to the SMP router, optionally deleting the internal message record. -- **ICDeleteConn / ICDeleteRcvQueue**: Connection and queue cleanup. +- **ICAllowSecure**: User-initiated handshake completion (from `allowConnection`). On DuplexConnection (SKEY retry), if the error is temporary and the send queue's server differs from the command's server, the command is **moved** to the correct server queue via `updateCommandServer` + `getAsyncCmdWorker`. Returns `CCMoved` instead of `CCCompleted`. +- **ICDuplexSecure**: Automatic handshake completion (from receiving AgentConnInfo with senderKey). Secures queue and sends HELLO. +- **ICQSecure / ICQDelete**: Queue rotation — secure the new queue (KEY) and delete the old queue. +- **ICAck / ICAckDel**: Send ACK to the router, optionally deleting the internal message record. +- **ICDeleteConn**: No longer used, but may exist in old databases — cleaned up by deleting the command record. +- **ICDeleteRcvQueue**: Queue cleanup during rotation. ### Retry semantics -`runCommandProcessing` has two retry intervals: zero (immediate retry via `0`) for commands that fail with temporary errors, and `asyncCmdRetryInterval` for stuck commands. `tryMoveableCommand` attempts to skip a stuck command by marking it with a future `connId` so `getPendingServerCommand` returns the next one instead. +`tryMoveableCommand` wraps execution with `withRetryInterval`: waits for `waitWhileSuspended` and `waitForUserNetwork`, then executes. Temporary/host errors trigger retry via `retrySndOp`. On success, the command is deleted. On permanent error, the error is notified and the command is deleted. `retrySndOp` separates `endAgentOperation`/`beginAgentOperation` into separate `atomically` blocks — see source comment: if `beginAgentOperation` blocks, `SUSPENDED` won't be sent. -### withConnLockNotify +### withConnLockNotify — deadlock prevention -Wraps command execution with `withConnLock` plus automatic error notification to `subQ`. This ensures that even if a command fails, the client application is notified. +Returns `Maybe ATransmission` and writes to `subQ` **after** releasing the lock. This prevents deadlock: if the lock holder writes to a full `subQ` while the subscriber thread needs the lock to process a message, both block indefinitely. ## Message delivery — runSmpQueueMsgDelivery -Per-queue delivery loop using the worker framework. Each `SndQueue` has its own delivery worker (keyed by queue address in `smpDeliveryWorkers`). +Per-queue delivery loop. Each `SndQueue` has its own worker keyed by queue address in `smpDeliveryWorkers`, paired with a `TMVar ()` retry lock (via `getAgentWorker'`). + +### Deferred encryption + +Message bodies are NOT encrypted at enqueue time. `enqueueMessageB` advances the ratchet header (`agentRatchetEncryptHeader`) and validates padding (`rcCheckCanPad`), but stores only the body reference (`sndMsgBodyId`) and encryption key (`encryptKey`, `paddedLen`). The actual message body encoding (`encodeAgentMsgStr`) and encryption (`rcEncryptMsg`) happen at delivery time. This allows the same body to be shared across multiple send queues via `sndMsgBodyId` — each delivery encrypts independently with its connection's ratchet. + +For confirmation and ratchet key messages (AM_CONN_INFO, AM_CONN_INFO_REPLY, AM_RATCHET_INFO), the body is pre-encrypted and stored in `msgBody` directly — no deferred encryption. ### Per-message-type error handling -Error handling differs by message type and SMP error: +**QUOTA**: Checks `internalTs` against `quotaExceededTimeout`. If the message is older than the timeout, expires it and all subsequent expired messages in the queue (via `getExpiredSndMessages` → bulk `MERRS` notification). If not expired, sends `MWARN` and retries with `RISlow`. For confirmation messages (AM_CONN_INFO/AM_CONN_INFO_REPLY), QUOTA is treated as `NOT_AVAILABLE`. + +**AUTH**: Per message type: +- `AM_CONN_INFO` / `AM_CONN_INFO_REPLY` / `AM_RATCHET_INFO`: connection error `NOT_AVAILABLE` +- `AM_HELLO_` with receive queue (initiating party): `NOT_AVAILABLE`. Without receive queue (joining party): `NOT_ACCEPTED`. +- `AM_A_MSG_` / `AM_A_RCVD_` / `AM_QCONT_` / `AM_EREADY_`: delete message and notify `MERR`. +- Queue rotation messages (`AM_QADD_` through `AM_QTEST_`): queue error with descriptive string. + +**Timeout/network errors**: message-type-aware timeout — `AM_HELLO_` uses `helloTimeout`, all others use `messageTimeout`. If expired, uses `notifyDelMsgs` which expires the current message AND fetches all expired messages for the queue in bulk. If `serverHostError`, sends `MWARN` before retrying. Non-host temporary errors retry silently. + +### Delivery success handling + +On successful send, per message type: +- `AM_CONN_INFO` with `senderCanSecure` (fast handshake): sends `CON` + sets status `Active`. +- `AM_CONN_INFO` without `senderCanSecure`: sets status `Confirmed` only. +- `AM_CONN_INFO_REPLY`: sets status `Confirmed`. +- `AM_HELLO_`: sets status `Active`. If receive queue exists AND its status is `Active`, sends `CON` (accepting party in v2). +- `AM_A_MSG_`: sends `SENT msgId proxySrv_` to notify the client. +- `AM_QKEY_`: re-reads connection and sends `SWITCH QDSnd SPConfirmed`. +- `AM_QTEST_`: see "Sender-side completion" under Queue rotation above. +- All other types: no notification. + +After success, the delivery record is deleted. For `AM_A_MSG_`, `keepForReceipt = True` — the record is kept until a receipt is received. -**QUOTA**: The queue has exceeded its message quota. Sets `quotaExceededTs` and starts an expiry timer if `messageExpireInterval` is configured. Does NOT retry — the sender must wait for the recipient to drain messages (signaled by `A_QCONT`). +### withRetryLock2 — external retry signaling -**AUTH**: Different response per message type: -- `A_MSG_` (user message): sends `SENT` with `SndMsgRcvQueued` status to the client. The message was accepted by the router but auth failed on the receive side — likely the queue was replaced during rotation. -- Other types: sends `MERR` error to the client. -- In both cases, if `messageExpireInterval` is configured, expired messages are deleted. +The delivery loop uses `withRetryLock2` which combines the standard retry interval with `qLock` (the `TMVar ()` paired with the worker). When `A_QCONT` is received, the handler puts `()` into the retry lock, causing the retry to fire immediately instead of waiting for the backoff interval. See `continueSending` in `processSMP`. -**Timeout/network errors**: retried with the worker framework's built-in retry. The `retryLock` TMVar (paired with each delivery worker — see `getAgentWorker'` in [Agent/Client.md](./Agent/Client.md#getagentworker--lifecycle-management)) provides external retry signaling from `A_QCONT`. +### submitPendingMsg — operation counting + +`submitPendingMsg` increments `opsInProgress` on `msgDeliveryOp` BEFORE spawning the delivery worker. This means the operation is counted even before the worker starts, ensuring the suspension cascade waits for all enqueued deliveries. ## Batch message sending — sendMessagesB_ -`sendMessagesB_` sends messages to multiple connections. When multiple messages have the same body (common for group messages), the body is encrypted once and referenced via `VRRef` for subsequent connections. `vrCopyMap` tracks `ByteString → (VRValue encrypted)` mappings. This is a performance optimization — ratchet encryption is expensive, and group messages go to many connections with identical plaintext. +### MsgReq grouping contract + +Messages to the same connection must be contiguous in the traversable, with only the first having a non-empty `connId`. Subsequent messages for the same connection must have empty `connId`. This is validated by `addConnId` which rejects duplicate `connId` values and empty first `connId`. The `getConn_` function uses a `TVar prev` to cache the last connection lookup, avoiding redundant database reads. + +### Connection locking + +`withConnLocks` takes locks for ALL connections in the batch before processing. This prevents concurrent sends to the same connection from interleaving ratchet state updates. + +### PQ support monotonic upgrade + +When `pqEnc == PQEncOn` but the connection has `pqSupport == PQSupportOff`, PQ support is upgraded via `setConnPQSupport`. PQ support can only be enabled, never disabled. The upgrade IDs are accumulated via `mapAccumL` and applied in a single batch database write. -The function partitions connections by send queue and builds per-queue delivery batches. Each connection's message is encrypted with its own ratchet but the plaintext body lookup avoids redundant work. +### VRValue/VRRef — database body deduplication + +VRValue/VRRef deduplication operates at the **database body storage** level, not encryption. `enqueueMessageB` tracks an `IntMap (Maybe Int64, AMessage)` mapping integer indices to database body IDs (`sndMsgBodyId`): + +- `VRValue (Just i) body`: stores the body in `snd_message_bodies`, records the `sndMsgBodyId`, and associates it with index `i` for future reference. +- `VRRef i`: looks up index `i` to get the previously stored `sndMsgBodyId`, and creates a new `snd_messages` record linked to the same body. + +Encryption is NOT deduplicated — each connection's ratchet header is independently advanced at enqueue time, and each delivery encrypts the body independently. The optimization is purely about avoiding redundant database storage of identical message bodies (common for group messages). + +### Error propagation constraint + +When a connection type is wrong (e.g., SndConnection, NewConnection), the error is returned per-message but the batch continues. See source comment: "we can't fail here, as it may prevent delivery of subsequent messages that reference the body of the failed message." If a VRValue message fails, subsequent VRRef messages that reference it would break. ## Subscription management +### subscribeConnections_ + +Partitions connections by type. SndConnection with `Confirmed` status returns success (it's not subscribed, just waiting). SndConnection with `Active` status returns `CONN SIMPLEX` (can't subscribe a send-only connection). After subscribing queues, resumes delivery workers for connections with pending deliveries (via `getConnectionsForDelivery`). + +**Multi-queue result combining.** For connections with multiple receive queues, results are combined using a priority system: Active+Success (1) > Active+Error (2) > non-Active+Success (3) > non-Active+Error (4). The highest-priority (lowest number) result is used. This ensures that if at least one Active queue subscribes successfully, the connection reports success. + ### subscribeAllConnections' -Batch subscription with throttling: `maxPending` limits how many pending subscriptions exist simultaneously. When the pending count exceeds the limit, the function waits before enqueuing more. This prevents memory exhaustion on reconnection when thousands of connections need resubscription. +**Active user priority.** If `activeUserId_` is provided, that user's subscriptions are processed first (`sortOn`). + +**Service subscription with fallback.** Service subscriptions are attempted first. If a service subscription fails with `SSErrorServiceId` or zero subscribed queues, the queues are unassociated from the service and subscribed individually. If the error is a client-level error (not a service-specific error), the same fallback applies. -Service subscriptions are attempted first (`subscribeClientServices'`). If a service subscription succeeds, its associated queues don't need individual SUB commands — they're covered by the service subscription. Queues not associated with any service are subscribed individually. +**Pending throttle.** `maxPending` limits concurrent pending subscriptions. The counter is incremented inside the database transaction (before leaving `withStore'`) and decremented in a `finally` block. When the count exceeds the limit, `subscribeUserServer` blocks in STM via `retry`. -### resubscribeConnection' +### resubscribeConnections' -Individual connection resubscription. Checks connection status and queue status before subscribing — deleted or suspended connections are skipped. Used for targeted resubscription after specific operations (e.g., after `allowConnection`). +Filters out connections that already have active subscriptions (via `hasActiveSubscription`). For store errors, returns `True` for `isActiveConn` — this causes the error to be processed by `subscribeConnections_` which will report it. ## Notification token lifecycle -`registerNtfToken'` → `verifyNtfToken'` → `checkNtfToken'` → `deleteNtfToken'` manage push notification token registration with the NTF server. Token verification uses a challenge-response flow where the NTF server sends a verification code through the push notification channel, and the client confirms receipt. +`registerNtfToken'` is a complex state machine. Key non-obvious behavior: on `NTF AUTH` error during token operations, the token is removed and re-registered from scratch (see `withToken` catch of `NTF AUTH`). Device token changes trigger `replaceToken`, which attempts an in-place replacement; if that fails with a permanent error, the token is removed and recreated. ## Cleanup manager -Runs periodically (configurable interval, typically 1 minute). Operations: -- **Delete marked connections**: connections in "deleted" or "deleted-waiting-delivery" states -- **Delete expired/deleted files**: both receive and send files, with configurable TTLs -- **Clean temp paths**: remove temporary file paths from completed transfers -- **Delete orphaned users**: users with no remaining connections get `DEL_USER` notification +Runs periodically with a `cleanupStepInterval` delay BETWEEN each cleanup operation (not just between cycles). This prevents cleanup from monopolizing database access. -Each cleanup operation catches errors individually (`catchAllErrors`) — a failure in one doesn't prevent others from running. The manager uses `waitActive` to pause during agent suspension, with `tryAny` to handle the case where the agent is being shut down. +Additional cleanup not previously mentioned: +- **Expired receive message hashes**: `deleteRcvMsgHashesExpired` +- **Expired send messages**: `deleteSndMsgsExpired` +- **Expired ratchet key hashes**: `deleteRatchetKeyHashesExpired` +- **Expired notification tokens**: `deleteExpiredNtfTokensToDelete` +- **Expired send chunk replicas**: `deleteDeletedSndChunkReplicasExpired` ## Agent suspension -`suspendAgent` triggers the operation suspension cascade defined in [Agent/Client.md](./Agent/Client.md#operation-suspension-cascade). `foregroundAgent` resumes operations. The cascade ordering (RcvNetwork → MsgDelivery → SndNetwork → Database) ensures that receiving stops first, then in-flight message delivery completes, then sending stops, and finally database operations complete. +`suspendAgent` has two modes: +- **Immediate** (`maxDelay = 0`): sets `ASSuspended` and suspends all operations immediately. +- **Gradual** (`maxDelay > 0`): sets `ASSuspending` and triggers the cascade (NtfNetwork independent; RcvNetwork → MsgDelivery → SndNetwork → Database). A timeout thread fires after `maxDelay` and forces suspension of sending and database if still suspending. + +`foregroundAgent` resumes in reverse order: database → sending → delivery → receiving → notifications. ## connectReplyQueues — background duplex upgrade Used during async command processing to complete the duplex handshake. Handles two cases: -- **Fresh connection** (`sq_ = Nothing`): upgrades `RcvConnection` to `DuplexConnection` by creating a new send queue. -- **SKEY retry** (`sq_ = Just sq`): connection is already duplex from a previous attempt. Reuses the existing send queue. - -Both paths then secure the queue and enqueue the confirmation. +- **Fresh connection** (`sq_ = Nothing`): upgrades `RcvConnection` to `DuplexConnection`. +- **SKEY retry** (`sq_ = Just sq`): connection is already duplex. See source comment: "in case of SKEY retry the connection is already duplex." ## secureConfirmQueue vs secureConfirmQueueAsync -Two paths for sending the confirmation message during duplex handshake: -- **secureConfirmQueue** (synchronous): secures the queue and sends confirmation directly via network. Used in `joinConnection` (foreground user-initiated path). -- **secureConfirmQueueAsync** (asynchronous): secures the queue, stores the confirmation in the database, and submits to the delivery worker. Used in `allowConnection` (background path via `ICAllowSecure`). +- **secureConfirmQueue** (synchronous): secures queue and sends confirmation directly via network. Used in `joinConnection`. +- **secureConfirmQueueAsync** (asynchronous): secures queue, stores confirmation, submits to delivery worker. Used in `allowConnection` (via `ICAllowSecure`). -Both call `agentSecureSndQueue` first, which returns whether the initiator's ratchet should be created on confirmation (v7+ behavior). +Both call `agentSecureSndQueue`, which returns `initiatorRatchetOnConf` — whether the initiator's ratchet should be created on confirmation (v7+ behavior). When the queue was already secured (retry), returns the same flag without re-securing. ## smpConfirmation — version compatibility -The confirmation handler accepts messages where the agent version or client version is either within the configured range OR at-or-below the already-agreed version. See source comment: "checking agreed versions to continue connection in case of client/agent version downgrades." This means a downgraded client can still complete in-progress handshakes. +The confirmation handler accepts messages where the agent version or client version is either within the configured range OR at-or-below the already-agreed version. See source comment. This means a downgraded client can still complete in-progress handshakes. ## smpInvitation — contact address handling -Invitation messages received on a contact address connection are passed through even if version-incompatible. See source comment: "show connection request even if invitation via contact address is not compatible." The client application sees the `REQ` event with `PQSupportOff` when incompatible, allowing it to display the request to the user (who may choose to respond from a compatible client). +Invitation messages received on a contact address are passed through even if version-incompatible. See source comment. The client application sees `REQ` with `PQSupportOff` when incompatible. + +## ackMessage' — receipt sending + +After ACKing a message, if the user provides receipt info (`rcptInfo_`), a receipt message (`A_RCVD`) is enqueued. Receipts are only allowed for `AM_A_MSG_` type. If the user ACKs without receipt info and the message already has a receipt with `MROk` status, the corresponding sent message is deleted from the database — it's confirmed delivered. + +## acceptContactAsync' — rollback on failure + +See source comment. Unlike the synchronous `acceptContact'` which takes a lock first, `acceptContactAsync'` marks the invitation as accepted before joining. On failure, `unacceptInvitation` rolls back. The comment notes this could be improved with an invitation lock map. + +## prepareConnectionToJoin — race prevention + +See source comment. Creates a connection record without queues, returning a `ConnId`. The caller saves this ID before the peer can send a confirmation. Without this, the sequence "joinConnection → peer sends confirmation → caller saves ConnId" could result in the confirmation arriving before the caller has the ID. diff --git a/spec/modules/Simplex/Messaging/Agent/Client.md b/spec/modules/Simplex/Messaging/Agent/Client.md index e5d83675b7..f1f4965b6c 100644 --- a/spec/modules/Simplex/Messaging/Agent/Client.md +++ b/spec/modules/Simplex/Messaging/Agent/Client.md @@ -8,7 +8,7 @@ ## Overview -This module defines `AgentClient`, the central state container for the messaging agent, and all reusable infrastructure that Agent.hs and other consumers (NtfSubSupervisor.hs, FileTransfer/Agent.hs, simplex-chat) build upon. It covers: +This module defines `AgentClient`, the central state container for the SimpleX agent, and all reusable infrastructure that Agent.hs and other consumers (NtfSubSupervisor.hs, FileTransfer/Agent.hs, simplex-chat) build upon. It covers: - **Protocol client lifecycle**: lazy singleton connections to SMP/NTF/XFTP routers via `SessionVar` pattern, with disconnect callbacks and reconnection workers - **Worker framework**: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval with doWork flag atomics) @@ -20,14 +20,17 @@ The module is consumed by Agent.hs (which passes specific worker bodies, task qu ## AgentClient — central state container -`AgentClient` has ~50 fields, almost all TVars or TMaps. Key architectural groupings: +`AgentClient` has ~43 fields, almost all TVars or TMaps. Key architectural groupings: - **Event queues**: `subQ` (events to client application), `msgQ` (messages from SMP routers) - **Protocol client pools**: `smpClients`, `ntfClients`, `xftpClients` — all are TMaps of `TransportSession` → `SessionVar`, implementing lazy singletons via `getSessVar` - **Subscription tracking**: `currentSubs` (TSessionSubs, active+pending per transport session), `removedSubs` (failed subscriptions with errors), `subscrConns` (set of connection IDs currently subscribed) -- **Worker pools**: `smpDeliveryWorkers`, `asyncCmdWorkers`, `smpSubWorkers` — TMaps keyed by work address/connection +- **Worker pools**: `smpDeliveryWorkers`, `asyncCmdWorkers` — TMaps keyed by work address/connection. `smpSubWorkers` — TMaps keyed by transport session for resubscription. - **Operation states**: `ntfNetworkOp`, `rcvNetworkOp`, `msgDeliveryOp`, `sndNetworkOp`, `databaseOp` -- **Locking**: `connLocks`, `invLocks`, `deleteLock`, `getMsgLocks` +- **Locking**: `connLocks`, `invLocks`, `deleteLock`, `getMsgLocks`, `clientNoticesLock` +- **Service state**: `useClientServices` (per-user boolean controlling whether service certificates are used) +- **Proxy routing**: `smpProxiedRelays` (maps destination transport session → proxy server used) +- **Network state**: `userNetworkInfo`, `userNetworkUpdated`, `useNetworkConfig` (slow/fast pair) All TVars are initialized in `newAgentClient`. The `active` TVar is the global kill switch — `closeAgentClient` sets it to `False`, and all protocol client getters check it first. @@ -36,22 +39,43 @@ All TVars are initialized in `newAgentClient`. The `active` TVar is the global k Protocol client connections (SMP, NTF, XFTP) use a lazy singleton pattern implemented by [Session.hs](../../../Session.md): 1. **`getSessVar`** atomically checks the TMap. Returns `Left newVar` if absent (caller must connect), `Right existingVar` if present (caller waits for the TMVar). -2. **`newProtocolClient`** wraps the connection attempt. On success, fills the `sessionVar` TMVar with `Right client`. On failure, fills with `Left (error, maybeRetryTime)` and re-throws. +2. **`newProtocolClient`** wraps the connection attempt. On success, fills the `sessionVar` TMVar with `Right client` and writes a `CONNECT` event to `subQ`. On failure, fills with `Left (error, maybeRetryTime)` and re-throws. 3. **`waitForProtocolClient`** reads the TMVar with a timeout. If the stored error has an expiry time that has passed, it removes the SessionVar and retries from scratch — this is the `persistErrorInterval` retry mechanism. +### Error caching with persistErrorInterval + +When `newProtocolClient` fails and `persistErrorInterval > 0`, the error is cached with an expiry timestamp (`Just ts`). Future connection attempts during the interval immediately receive the cached error from `waitForProtocolClient` without attempting a connection. When `persistErrorInterval == 0`, the SessionVar is removed immediately on failure, so the next attempt starts a fresh connection. This prevents connection storms to unreachable routers. + ### SessionVar compare-and-swap `removeSessVar` (Session.hs) only removes a SessionVar from the map if its `sessionVarId` matches the current entry. The `sessionVarId` is a monotonically increasing counter from `workerSeq`. This prevents a stale disconnection callback from removing a *new* client that was created after the old one disconnected. Without this, the sequence "client A disconnects → client B connects → client A's callback runs" would incorrectly remove client B. +### SMP connection — service credentials and session setup + +`smpConnectClient` connects an SMP client, with two important post-connection steps: + +1. **Session ID registration**: `SS.setSessionId` records the TLS session ID in `currentSubs`, linking the transport session to the actual TLS connection for later session validation. + +2. **Service credential synchronization** (`updateClientService`): After connecting, compares client-side and server-side service state. Four cases: + - Both have service and IDs match → update DB (no-op if same) + - Both have service but IDs differ → update DB and remove old queue-service associations + - Client has service, server doesn't → delete client service (handles server version downgrade) + - Server has service, client doesn't → log error (should not happen in normal flow) + +On connection failure, `smpConnectClient` triggers `resubscribeSMPSession` before re-throwing the error. This ensures pending subscriptions get retry logic even when the initial connection attempt fails. + ### SMP disconnect callback -`smpClientDisconnected` is the most complex disconnect handler (NTF/XFTP have simpler versions that just remove the SessionVar): +`smpClientDisconnected` is the most complex disconnect handler (NTF/XFTP have simpler versions that remove the SessionVar and write a `DISCONNECT` event): 1. `removeSessVar` atomically removes the client if still current 2. If `active`, moves active subscriptions to pending (only those matching the disconnecting client's `sessionId` — see next section) 3. Removes proxied relay sessions that this client created -4. Fires `DOWN` events for affected connections -5. Triggers `resubscribeSMPSession` to spawn a reconnection worker +4. Fires `DISCONNECT`, `DOWN`, and `SERVICE_DOWN` events for affected connections +5. Releases GET locks for affected queues +6. Triggers resubscription (see below) + +**Resubscription mode switching**: The disconnect handler chooses between two resubscription paths based on whether the session mode matches the entity presence: `(mode == TSMEntity) == isJust cId`. When they match, it calls `resubscribeSMPSession` which handles both service and queue resubscription in a single worker. When they don't match (e.g., entity-mode session disconnects but there's also a shared session), it separately resubscribes the service and queues, because they belong to different transport sessions. ### Session-aware subscription cleanup @@ -65,6 +89,8 @@ Unifies SMP/NTF/XFTP client management with associated types: SMP is special: `SMPConnectedClient` bundles the protocol client with `proxiedRelays :: TMap SMPServer ProxiedRelayVar`, a per-connection map of relay sessions for proxy routing. +XFTP is special in a different way: its `getProtocolServerClient` ignores the `NetworkRequestMode` parameter and always uses `NRMBackground` for `waitForProtocolClient`. This means XFTP connections always use background timing regardless of the caller's request mode. + ## Worker framework Defined here, consumed by Agent.hs, NtfSubSupervisor.hs, FileTransfer/Agent.hs, and simplex-chat. Two separable parts: @@ -75,8 +101,7 @@ Creates or reuses a worker for a given key. Workers are stored in a TMap keyed b - **Create-or-reuse**: atomically checks the map. If absent, creates a new `Worker` (with `doWork` TMVar pre-filled with `()`). If present and `hasWork=True`, signals the existing worker. - **Fork**: `runWorkerAsync` takes the `action` TMVar. If `Nothing` (worker idle), it starts work. If `Just weakThreadId` (worker running), it puts the value back and returns. This bracket ensures at-most-one concurrent execution. -- **Restart rate limiting**: on worker exit (success or error), checks `restartCount` against `maxWorkerRestartsPerMin`. If under the limit, restarts with `hasWorkToDo` signal. If over the limit, deletes the worker from the map and sends a `CRITICAL True` error. -- **Worker identity**: `workerId` (from `workerSeq`) prevents a stale restart from interfering with a new worker that replaced it in the map. +- **Restart rate limiting**: on worker exit (success or error), `restartOrDelete` checks `restartCount` against `maxWorkerRestartsPerMin`. If under the limit, resets `action` to `Nothing` (idle), signals `hasWorkToDo`, and reports `INTERNAL` error. If over the limit, deletes the worker from the map and sends a `CRITICAL True` error. The restart only happens if the worker's `workerId` still matches the map entry — a stale restart from a replaced worker silently no-ops. `getAgentWorker'` is the generic version with custom worker wrapper — used by `smpDeliveryWorkers` which pairs each Worker with a `TMVar ()` retry lock. @@ -90,16 +115,20 @@ Takes `getWork` (fetch next task) and `action` (process it) as separate paramete - **Work item error** (`isWorkItemError`): the worker stops and sends `CRITICAL False`. The next iteration would likely produce the same error, so stopping prevents infinite loops. - **Store error**: the flag is re-set and an `INTERNAL` error is reported. The assumption is that store errors are transient (e.g., DB busy) and retrying may succeed. -`withWorkItems` handles batched work — a list of items where some may have individual errors. If all items are work-item errors, the worker stops. If only some are, the worker continues with the successful items and reports errors. +`withWorkItems` handles batched work — a list of items where some may have individual errors. If all items are work-item errors, the worker stops. If only some are, the worker continues with the successful items and reports errors via `ERRS` event. ### runWorkerAsync — at-most-one execution Uses a bracket on the `action` TMVar: - `takeTMVar action` — blocks if another thread is starting the worker (TMVar empty during start) -- If the taken value is `Nothing` — worker is idle, start it. Store `Just weakThreadId` in the TMVar. +- If the taken value is `Nothing` — worker is idle, start it. Store `Just weakThreadId` in the TMVar via `forkIO`. - If `Just _` — worker is already running, put it back and return. -The `Weak ThreadId` in `action` is a weak reference — it doesn't prevent the worker thread from being garbage collected. This is the cleanup mechanism: if the thread dies without explicitly clearing `action`, the weak reference becomes stale and the next `runWorkerAsync` call will detect it as idle. +The `Weak ThreadId` in `action` is a weak reference — it doesn't prevent the worker thread from being garbage collected. It is used by `cancelWorker`, which calls `deRefWeak` to get the thread ID and kills it; if the thread was already GC'd, the kill is a no-op. The primary lifecycle management is through the `restartOrDelete` chain in `getAgentWorker'`, not the weak reference. + +### throwWhenNoDelivery — delivery worker self-termination + +Delivery workers call `throwWhenNoDelivery` to check if their entry still exists in the `smpDeliveryWorkers` map. If the worker was removed (delivery complete), it throws `ThreadKilled` to terminate the worker thread. This is distinct from `throwWhenInactive` (which checks global `active` state) — it allows individual workers to be stopped without shutting down the entire agent. ## Operation suspension cascade @@ -118,10 +147,12 @@ The cascade means: **`beginAgentOperation`** retries (blocks in STM) if the operation is suspended. This provides backpressure: new operations wait until the operation is resumed. -**`agentOperationBracket`** wraps an operation with begin/end. All database access goes through `withStore` which brackets with `AODatabase`. This ensures graceful shutdown propagates: suspending `AORcvNetwork` eventually suspends all downstream operations, and `notifySuspended` only fires when all in-flight operations have completed. +**`agentOperationBracket`** wraps an operation with begin/end. It takes a `check` function that runs before `beginAgentOperation` — typically `throwWhenInactive`, which throws `ThreadKilled` if the agent is inactive. All database access goes through `withStore` which brackets with `AODatabase`. This ensures graceful shutdown propagates: suspending `AORcvNetwork` eventually suspends all downstream operations, and `notifySuspended` only fires when all in-flight operations have completed. **`waitWhileSuspended`** vs **`waitUntilForeground`**: `waitWhileSuspended` proceeds during `ASSuspending` (allowing in-flight operations to complete), while `waitUntilForeground` blocks during both `ASSuspending` and `ASSuspended`. +**`waitForUserNetwork`**: bounded wait for network — if the network doesn't come online within `userNetworkInterval`, proceeds anyway. Uses `registerDelay` for the timeout. + ## Subscription management ### subscribeQueues — batch-by-transport-session @@ -133,17 +164,28 @@ The cascade means: 3. `addPendingSubs` marks all queues as pending before the RPC 4. `mapConcurrently` subscribes each session batch in parallel -### subscribeSessQueues_ — post-hoc session validation +### subscribeSessQueues_ — post-hoc session validation and atomicity + +After the subscription RPC completes, `subscribeSessQueues_` validates `activeClientSession` — checking that the SessionVar still holds the same client that was used for the RPC. If the client was replaced during the RPC (reconnection happened), the results are discarded (errors converted to temporary `BROKER NETWORK` to ensure retry) and resubscription is triggered. + +The post-RPC processing runs under `uninterruptibleMask_` for atomicity. The sequence is: +1. **Atomically**: `processSubResults` partitions results and updates subscription state; if there are client notices, takes `clientNoticesLock` TMVar +2. **IO**: `processRcvServiceAssocs` updates service associations in the DB +3. **IO**: `processClientNotices` updates notice state, always releases `clientNoticesLock` in `finally` + +The `clientNoticesLock` TMVar serializes notice processing across concurrent subscription batches. + +**UP events for newly-active connections only**: After processing, UP events are sent only for connections that were NOT already active before this batch — existing active subscriptions (from `SS.getActiveSubs`) are excluded to prevent duplicate notifications. -After the subscription RPC completes, `subscribeSessQueues_` validates `activeClientSession` — checking that the SessionVar still holds the same client that was used for the RPC. If the client was replaced during the RPC (reconnection happened), the results are discarded and resubscription is triggered. This is optimistic execution with post-hoc validation: do the work, then check if it's still valid. +**Client close on all-temporary-error**: When ALL subscription results are temporary errors, no connections were already active, and the session is still current, the SMP client session is closed. This forces a fresh connection on the next attempt rather than reusing a potentially broken one. ### processSubResults — partitioning -Subscription results are partitioned into four categories: -1. **Failed with client notice** — queue has a server-side notice (e.g., queue status change) -2. **Failed permanently** — non-temporary error, queue is removed from pending and added to `removedSubs` -3. **Failed temporarily** — error is transient, queue stays in pending for retry on reconnect -4. **Subscribed** — moved from pending to active. Further split into: queues whose service ID matches the session service (added as service-associated) and others. +Subscription results are partitioned into five categories: +1. **Failed with client notice** — error has an associated server-side notice (e.g., queue status change). Queue is treated as failed (removed from pending, added to `removedSubs`) AND the notice is recorded for processing. +2. **Failed permanently** — non-temporary error without notice, queue is removed from pending and added to `removedSubs` +3. **Failed temporarily** — error is transient, queue stays in pending unchanged for retry on reconnect +4. **Subscribed** — moved from pending to active. Further split into: queues whose service ID matches the session service (added as service-associated) and others. If the queue had a tracked `clientNoticeId`, it is cleared (notice resolved by successful subscription). 5. **Ignored** — queue was not in the pending map (already activated by a concurrent path), counted for statistics only ### Resubscription worker @@ -155,6 +197,8 @@ Subscription results are partitioned into four categories: 3. Resubscribes service and queues 4. Loops until no pending subs remain +**Spawn guard**: Before creating a new worker, `resubscribeSMPSession` checks `SS.hasPendingSubs`. If there are no pending subs, it returns without spawning. This prevents creating idle workers. + **Cleanup blocks on TMVar fill** — the `cleanup` STM action retries (`whenM (isEmptyTMVar $ sessionVar v) retry`) until the async handle is inserted. This prevents the race where cleanup runs before the worker async is stored, which would leave a terminated worker in the map. ## Proxy routing — sendOrProxySMPCommand @@ -166,6 +210,21 @@ Implements SMP proxy/direct routing with fallback: 3. If proxying fails with a host error and `smpProxyFallback` allows it: falls back to direct connection 4. `deleteRelaySession` carefully validates that the current relay session matches the one that failed before removing it (prevents removing a concurrently-created replacement session) +**NO_SESSION retry limit**: On `NO_SESSION`, `sendViaProxy` is called recursively with `Just proxySrv` to reuse the same proxy server. If the recursive call also gets `NO_SESSION`, it throws `proxyError` instead of recursing again — `proxySrv_` is `Just`, so the `Nothing` branch (which recurses) is not taken. This limits retry to exactly one attempt. + +**Proxy selection caching** (`smpProxiedRelays`): When `getSMPProxyClient` selects a proxy for a destination, it atomically inserts the proxy→destination mapping into `smpProxiedRelays`. If a mapping already exists (another thread selected a proxy for the same destination), the existing mapping is used. On relay creation failure with non-host errors, both the relay session and proxy mapping are removed. On host errors, they are preserved to allow fallback logic. + +## Service credentials lifecycle + +`getServiceCredentials` manages per-user, per-server service certificate credentials: + +1. Checks `useClientServices` — if the user has services disabled, returns `Nothing` +2. Looks up existing credentials in DB via `getClientServiceCredentials` +3. If none exist, generates new TLS credentials on-the-fly (`genCredentials`) and stores them +4. Extracts the private signing key from the X.509 certificate + +The generated credentials are Ed25519 self-signed certificates with `simplex` organization, valid for ~2740 years. The certificate chain and hash are bundled into `ServiceCredentials` for the SMP handshake. + ## withStore — database access bracket `withStore` wraps database access with `agentOperationBracket c AODatabase`, ensuring the operation suspension cascade is respected. SQLite errors are classified: @@ -174,6 +233,8 @@ Implements SMP proxy/direct routing with fallback: `SEAgentError` is a special wrapper that allows agent-level errors to be threaded through store operations — used when "transaction-like" access is needed but the operation involves agent logic, not just DB queries. See source comment: "network IO should NOT be used inside AgentStoreMonad." +`withStoreBatch` / `withStoreBatch'` run multiple DB operations in a single transaction, catching exceptions per-operation to report individual failures. The entire batch is within one `agentOperationBracket`. + ## Server selection — getNextServer / withNextSrv Server selection has two-level diversity: @@ -184,6 +245,14 @@ Server selection has two-level diversity: `withNextSrv` is designed for retry loops — it re-reads user servers on each call (allowing configuration changes during retries) and tracks `triedHosts` across attempts. When all hosts are tried, the tried set is reset (`S.empty`), creating a round-robin effect. +## Locking primitives + +**`withConnLock`**: Per-connection lock via `connLocks` TMap. Non-obvious: `withConnLock'` with empty `ConnId` is a no-op (identity function) — allows agent operations on entities without real connection IDs to skip locking. + +**`withConnLocks`**: Takes a `Set ConnId` and acquires locks for all connections. Uses `withGetLocks` which acquires all locks concurrently via `forConcurrently`. Note: concurrent acquisition of overlapping lock sets from different threads could theoretically deadlock, so callers must ensure non-overlapping lock sets or use a higher-level coordination. + +**`getMapLock`**: Creates a lock on first access and caches it in the TMap. Locks are never removed — the TMap grows monotonically. + ## Network configuration — slow/fast selection `getNetworkConfig` selects between slow and fast network configs based on `userNetworkInfo`: @@ -198,11 +267,15 @@ Both configs are stored together in `useNetworkConfig :: TVar (NetworkConfig, Ne 2. Closes all protocol server clients (SMP, NTF, XFTP) by swapping maps to empty and forking close threads 3. Clears proxied relays 4. Cancels resubscription workers — forks cancellation threads (fire-and-forget, `closeAgentClient` may return before all workers are cancelled) -5. Clears delivery and async command workers +5. Clears delivery and async command workers (delivery workers are also cancelled via `cancelWorker`) 6. Clears subscription state The cancellation of resubscription workers reads the TMVar first (to get the Async handle), then calls `uninterruptibleCancel`. This is wrapped in a forked thread to avoid blocking the shutdown sequence. +**`closeClient_` edge case**: When closing individual clients, `closeClient_` handles `BlockedIndefinitelyOnSTM` — which occurs if the SessionVar TMVar was never filled (connection attempt in progress when shutdown started). The exception is caught and treated as a no-op. + +**`reconnectServerClients` vs `closeProtocolServerClients`**: `closeProtocolServerClients` swaps the map to empty and closes all clients — no new connections can be made to those sessions. `reconnectServerClients` reads the map without clearing it and closes current clients — the disconnect callbacks trigger reconnection, effectively forcing fresh connections while keeping the session entries. + ## Transport session modes `TransportSessionMode` (`TSMEntity` vs other) determines whether the transport session key includes the entity ID (connection/queue ID). When `TSMEntity`, each queue gets its own TLS connection to the router. When not, queues to the same router share a connection. This is controlled by `sessionMode` in the network config. @@ -213,9 +286,15 @@ The cancellation of resubscription workers reads the TMVar first (to get the Asy `getQueueMessage` creates a TMVar lock keyed by `(server, rcvId)` and takes it before sending GET. This prevents concurrent GET and SUB on the same queue (SUB is checked via `hasGetLock` in `checkQueues`). The lock is released by `releaseGetLock` after ACK or on error. +The lock creation uses `TM.alterF` to atomically create-or-reuse: if no lock exists, creates a new `TMVar ()` and immediately takes it; if one exists, takes it. This avoids a race between two concurrent GET attempts on the same queue. + ## Error classification — temporaryAgentError Classifies errors as temporary (retryable) or permanent. Notable non-obvious classifications: - `TEHandshake BAD_SERVICE` is temporary — it indicates a DB error on the router, not a permanent rejection -- `CRITICAL True` is temporary — `True` means the error shows a restart button, implying the user should retry +- `CRITICAL True` is temporary — `True` means the error shows a restart button, implying the user should retry. `CRITICAL False` is permanent. - `INACTIVE` is temporary — the agent may be reactivated +- `SMP.PROXY NO_SESSION` via proxy is temporary — session can be re-established +- `SMP.STORE _` is temporary — server-side store error, not a client issue + +`temporaryOrHostError` extends `temporaryAgentError` to also include host-related errors (`HOST`, `TRANSPORT TEVersion`). Used in subscription management where host errors should trigger resubscription rather than permanent failure. From 8557d2ab291d4c191307e87b86ad9d664dd3e627 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:18:29 +0000 Subject: [PATCH 39/91] agent util specs --- .../Simplex/Messaging/Agent/Env/SQLite.md | 9 +++ spec/modules/Simplex/Messaging/Agent/Lock.md | 7 +++ .../Simplex/Messaging/Agent/QueryString.md | 7 +++ .../Simplex/Messaging/Agent/RetryInterval.md | 35 +++++++++++ spec/modules/Simplex/Messaging/Agent/Stats.md | 7 +++ .../Simplex/Messaging/Agent/TSessionSubs.md | 60 +++++++++++++++++++ 6 files changed, 125 insertions(+) create mode 100644 spec/modules/Simplex/Messaging/Agent/Env/SQLite.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Lock.md create mode 100644 spec/modules/Simplex/Messaging/Agent/QueryString.md create mode 100644 spec/modules/Simplex/Messaging/Agent/RetryInterval.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Stats.md create mode 100644 spec/modules/Simplex/Messaging/Agent/TSessionSubs.md diff --git a/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md b/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md new file mode 100644 index 0000000000..7bfb10bbc0 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md @@ -0,0 +1,9 @@ +# Simplex.Messaging.Agent.Env.SQLite + +> Agent environment configuration, default values, and worker/supervisor record types. + +**Source**: [`Agent/Env/SQLite.hs`](../../../../../../src/Simplex/Messaging/Agent/Env/SQLite.hs) + +## mkUserServers — silent fallback on all-disabled + +See comment on `mkUserServers`. If filtering servers by `enabled && role` yields an empty list, `fromMaybe srvs` falls back to *all* servers regardless of enabled/role status. This prevents a configuration where all servers are disabled from leaving the user with no servers — but means disabled servers can still be used if every server in a role is disabled. diff --git a/spec/modules/Simplex/Messaging/Agent/Lock.md b/spec/modules/Simplex/Messaging/Agent/Lock.md new file mode 100644 index 0000000000..8300266c77 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Lock.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Lock + +> TMVar-based named mutex with concurrent multi-lock acquisition. + +**Source**: [`Agent/Lock.hs`](../../../../../src/Simplex/Messaging/Agent/Lock.hs) + +No non-obvious behavior. See source. See comment on `getPutLock` for the atomicity argument. diff --git a/spec/modules/Simplex/Messaging/Agent/QueryString.md b/spec/modules/Simplex/Messaging/Agent/QueryString.md new file mode 100644 index 0000000000..cfcd994513 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/QueryString.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.QueryString + +> HTTP query string parsing utilities for connection link URIs. + +**Source**: [`Agent/QueryString.hs`](../../../../../src/Simplex/Messaging/Agent/QueryString.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Agent/RetryInterval.md b/spec/modules/Simplex/Messaging/Agent/RetryInterval.md new file mode 100644 index 0000000000..dbc5c35f48 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/RetryInterval.md @@ -0,0 +1,35 @@ +# Simplex.Messaging.Agent.RetryInterval + +> Retry-with-backoff combinators for agent reconnection and worker loops. + +**Source**: [`Agent/RetryInterval.hs`](../../../../../src/Simplex/Messaging/Agent/RetryInterval.hs) + +## Overview + +Four retry combinators with increasing sophistication: basic (`withRetryInterval`), counted (`withRetryIntervalCount`), foreground-aware (`withRetryForeground`), and dual-interval with external wake-up (`withRetryLock2`). All share the same backoff curve via `nextRetryDelay`. + +## Backoff curve — nextRetryDelay + +Delay stays constant at `initialInterval` until `elapsed >= increaseAfter`, then grows by 1.5x per step (`delay * 3 / 2`) up to `maxInterval`. The `delay == maxInterval` guard short-circuits the comparison once the cap is reached. + +## updateRetryInterval2 — resume from saved state + +Sets `increaseAfter = 0` on both intervals. This skips the initial constant-delay phase — the next retry will immediately begin increasing from the saved interval. Used to restore retry state across reconnections without restarting from the initial interval. + +## withRetryForeground — reset on foreground/online transition + +The retry loop resets to `initialInterval` when either: +- The app transitions from background to foreground (`not wasForeground && foreground`) +- The network transitions from offline to online (`not wasOnline && online`) + +The STM transaction blocks on three things simultaneously: the `registerDelay` timer, the `isForeground` TVar, and the `isOnline` TVar. Whichever fires first unblocks the retry. On reset, elapsed time is zeroed. + +The `registerDelay` is capped at `maxBound :: Int` (~36 minutes on 32-bit) to prevent overflow. + +## withRetryLock2 — interruptible dual-interval retry + +Maintains two independent backoff states (slow and fast) that the action toggles between by calling the loop continuation with `RISlow` or `RIFast`. Only the chosen interval advances; the other preserves its state. + +The `wait` function is the non-obvious part: it spawns a timer thread that puts `()` into the `lock` TMVar after the delay, while the main thread blocks on `takeTMVar lock`. This means the retry can be woken early by *external code* putting into the same TMVar — the timer is just a fallback. The `waiting` TVar prevents a stale timer from firing after the main thread has already been woken by an external signal. + +**Consumed by**: [Agent/Client.hs](./Client.md) — `reconnectSMPClient` uses the lock TMVar to allow immediate reconnection when new subscriptions arrive, rather than waiting for the full backoff delay. diff --git a/spec/modules/Simplex/Messaging/Agent/Stats.md b/spec/modules/Simplex/Messaging/Agent/Stats.md new file mode 100644 index 0000000000..d793564e7f --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Stats.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Stats + +> Per-server statistics counters (SMP, XFTP, NTF) with TVar-based live state and serializable snapshots. + +**Source**: [`Agent/Stats.hs`](../../../../../src/Simplex/Messaging/Agent/Stats.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md b/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md new file mode 100644 index 0000000000..0274de59db --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md @@ -0,0 +1,60 @@ +# Simplex.Messaging.Agent.TSessionSubs + +> Per-session subscription state machine tracking active and pending queue subscriptions. + +**Source**: [`Agent/TSessionSubs.hs`](../../../../../src/Simplex/Messaging/Agent/TSessionSubs.hs) + +## Overview + +TSessionSubs manages the two-tier (active/pending) subscription state for SMP queues, keyed by transport session. Every subscription confirmation from a router is validated against the current session ID before being promoted to active — if the session has changed (reconnect happened), the subscription is demoted to pending for resubscription. + +Service subscriptions (aggregate, router-managed) and queue subscriptions (individual, per-recipient-ID) are tracked separately but follow the same active/pending pattern. + +**Consumed by**: [Agent/Client.hs](./Client.md) — `subscribeSMPQueues`, `subscribeSessQueues_`, `resubscribeSMPSession`, `smpClientDisconnected`. + +## Session ID gating + +The central invariant: a subscription is only active if it was confirmed on the *current* TLS session. Every function that promotes subscriptions to active (`addActiveSub'`, `batchAddActiveSubs`, `setActiveServiceSub`) checks `Just sessId == sessId'` (stored session ID). On mismatch, the subscription goes to pending instead — silently, with no error. + +This means subscription RPCs that succeed but return after a reconnect are safely caught: the response carries the old session ID, which won't match the new one stored by `setSessionId`. + +## setSessionId — silent demotion on reconnect + +`setSessionId` has two behaviors: +- **First call** (stored is `Nothing`): stores the session ID. No side effects. +- **Subsequent call with different ID**: calls `setSubsPending_`, which moves *all* active subscriptions to pending and demotes the active service subscription. The new session ID is stored. +- **Same ID**: no-op (the `unless` guard). + +This is the mechanism by which reconnection invalidates all prior subscriptions. Callers don't need to explicitly move subscriptions — setting the new session ID does it atomically. + +## addActiveSub' — service-associated queue elision + +When `serviceId_` is `Just` and `serviceAssoc` is `True`, the queue is **not** added to `activeSubs`. Instead, `updateActiveService` increments the service subscription's count and XORs the queue's `IdsHash`. The queue is also removed from `pendingSubs`. + +This means service-associated queues have no individual representation in `activeSubs` — they exist only as aggregated count + hash in `activeServiceSub`. The router tracks them via the service subscription; the agent doesn't need per-queue state. + +When `serviceAssoc` is `False` (or no service ID), the queue goes to `activeSubs` normally. + +## updateActiveService — accumulative XOR merge + +`updateActiveService` adds to an existing `ServiceSub` rather than replacing it. It increments the queue count (`n + addN`) and appends the IdsHash (`idsHash <> addIdsHash`). The `<>` on `IdsHash` is XOR — this means the hash is order-independent and can be built incrementally as individual subscription confirmations arrive. + +The guard `serviceId == serviceId'` silently drops updates if the service ID has changed (e.g., credential rotation happened between individual queue confirmations). + +## setSubsPending — mode-dependent redistribution + +`setSubsPending` handles two cases based on whether the transport session mode (entity vs shared) matches the session key shape: + +1. **Mode matches key shape** (`entitySession == isJust connId_`): in-place demotion via `setSubsPending_` — active subs move to pending within the same `SessSubs` entry. Session ID is cleared (`Nothing`). + +2. **Mode mismatch** (e.g., switching from shared session to entity mode): the entire `SessSubs` entry is **deleted** from the map (`TM.lookupDelete`), and all subscriptions are redistributed to new per-entity session keys via `addPendingSub (uId, srv, sessEntId (connId rq))`. This changes the map granularity — one shared entry becomes many entity entries. + +Both paths check `Just sessId == sessId'` first — if the stored session ID doesn't match the one being invalidated, no work is done (returns empty). + +## getSessSubs — lazy initialization + +`getSessSubs` creates a new `SessSubs` entry if none exists for the transport session. This means any write operation (`addPendingSub`, `setSessionId`, etc.) will create map entries as a side effect. Read operations (`hasActiveSub`, `getActiveSubs`) use `lookupSubs` instead, which returns `Nothing`/empty without creating entries. + +## updateClientNotices + +Adjusts the `clientNoticeId` field on pending subscriptions in bulk. Uses `M.adjust`, so missing recipient IDs are silently skipped. Only modifies pending subs — active subs are not touched because they've already been confirmed. From bde90500ea50ef180ba95abc4b7393f8c37119ef Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:00:22 +0000 Subject: [PATCH 40/91] agent store and notifications specs --- .../Messaging/Agent/NtfSubSupervisor.md | 75 ++++++++++++++++++ spec/modules/Simplex/Messaging/Agent/Store.md | 44 +++++++++++ .../Messaging/Agent/Store/AgentStore.md | 76 ++++++++++++++++++ .../Simplex/Messaging/Agent/Store/Common.md | 7 ++ .../Simplex/Messaging/Agent/Store/DB.md | 7 ++ .../Simplex/Messaging/Agent/Store/Entity.md | 7 ++ .../Messaging/Agent/Store/Interface.md | 7 ++ .../Simplex/Messaging/Agent/Store/Postgres.md | 23 ++++++ .../Simplex/Messaging/Agent/Store/SQLite.md | 26 ++++++ .../Simplex/Messaging/Agent/Store/Shared.md | 7 ++ .../Simplex/Messaging/Notifications/Client.md | 15 ++++ .../Messaging/Notifications/Protocol.md | 43 ++++++++++ .../Simplex/Messaging/Notifications/Server.md | 79 +++++++++++++++++++ .../Messaging/Notifications/Server/Control.md | 7 ++ .../Messaging/Notifications/Server/Env.md | 21 +++++ .../Messaging/Notifications/Server/Main.md | 7 ++ .../Notifications/Server/Push/APNS.md | 35 ++++++++ .../Server/Push/APNS/Internal.md | 7 ++ .../Messaging/Notifications/Server/Stats.md | 19 +++++ .../Messaging/Notifications/Server/Store.md | 23 ++++++ .../Notifications/Server/Store/Postgres.md | 54 +++++++++++++ .../Notifications/Server/Store/Types.md | 7 ++ .../Messaging/Notifications/Transport.md | 36 ++++----- .../Simplex/Messaging/Notifications/Types.md | 19 +++++ 24 files changed, 630 insertions(+), 21 deletions(-) create mode 100644 spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/Common.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/DB.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/Entity.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/Interface.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/Postgres.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/SQLite.md create mode 100644 spec/modules/Simplex/Messaging/Agent/Store/Shared.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Client.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Protocol.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Control.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Env.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Main.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS/Internal.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Stats.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Store.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Server/Store/Types.md create mode 100644 spec/modules/Simplex/Messaging/Notifications/Types.md diff --git a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md new file mode 100644 index 0000000000..33cd3eacba --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md @@ -0,0 +1,75 @@ +# Simplex.Messaging.Agent.NtfSubSupervisor + +> Supervisor-worker architecture for notification subscription lifecycle management. + +**Source**: [`Agent/NtfSubSupervisor.hs`](../../../../../src/Simplex/Messaging/Agent/NtfSubSupervisor.hs) + +## Architecture + +The notification system uses a supervisor with **three worker pools**, each keyed by server address: + +| Pool | Key | Purpose | +|------|-----|---------| +| `ntfWorkers` | NtfServer | Create/check/delete/rotate subscriptions on notification router | +| `ntfSMPWorkers` | SMPServer | Create/delete notifier credentials on messaging router | +| `ntfTknDelWorkers` | NtfServer | Delete tokens on notification router (background cleanup) | + +The supervisor (`runNtfSupervisor`) reads commands from `ntfSubQ` and dispatches work to the appropriate pools. Workers are created lazily via `getAgentWorker` and process batches from the database. + +## Non-obvious behavior + +### 1. NSCCreate four-way partition + +`partitionQueueSubActions` classifies each (queue, subscription) pair into one of four buckets: + +- **New sub**: no existing subscription record — create from scratch +- **Reset sub**: credentials mismatch (SMP server changed, notifier ID changed, action was nulled by error, or action is a delete) — wipe and restart from SMP key exchange +- **Continue SMP work**: existing action is `NSASMP` and credentials are consistent — kick the SMP worker +- **Continue NTF work**: existing action is `NSANtf` and credentials are consistent — kick the NTF worker + +The key decision point: when `subAction_` is `Nothing` (set by `workerErrors` after permanent failures), the subscription is treated as needing a full reset. This interacts with the null-action sentinel pattern from `AgentStore`. + +### 2. retrySubActions shrinking retry with TVar + +`retrySubActions` holds the list of subs-to-retry in a `TVar`. Each iteration, the action function returns only the subs that got temporary errors (via `splitResults`). The `TVar` is overwritten with this shrinking list. On success or permanent error, subs drop out. This means retry batches get smaller over time. + +`splitResults` implements a three-way partition: temporary errors → retry, permanent errors → null the action + notify, successes → continue pipeline. + +### 3. rescheduleWork deferred wake-up + +When the NTF worker finds that all pending `NSACheck` actions have future timestamps, it does not spin-wait. Instead it: +1. Takes itself out of the `doWork` TMVar (so the worker blocks on `waitForWork`) +2. Forks a thread that sleeps until the first action's timestamp +3. The forked thread re-signals `doWork` when the time arrives + +This is the mechanism for time-scheduled subscription health checks. + +### 4. checkSubs AUTH triggers full recreation + +When the notification router returns `AUTH` for a subscription check, the subscription is not simply marked as failed — it is fully recreated from scratch by resetting to `NSASMP NSASmpKey` state. This handles the case where the notification router has lost its subscription state (restart, data loss). The SMP worker is kicked to re-establish notifier credentials. + +Non-AUTH failure statuses that are not in `subscribeNtfStatuses` also trigger recreation. + +### 5. deleteToken two-phase with restart survival + +Token deletion splits into two phases: +1. **Store phase**: Remove token from active store, persist `(server, privateKey, tokenId)` to a deletion queue via `addNtfTokenToDelete` +2. **Network phase**: `runNtfTknDelWorker` reads from the queue and performs the actual server-side deletion + +On supervisor startup, `startTknDelete` scans for any pending deletion queue entries and launches workers. This ensures token cleanup survives agent restarts. + +If the token has no server-side ID (`ntfTokenId = Nothing`), only the store phase runs — no worker is launched. + +### 6. workerErrors nulls subscription action + +When permanent (non-temporary, non-host) errors occur in batch operations, `workerErrors` sets the subscription's action to `NULL` in the database and notifies the client. The next `NSCCreate` for that connection will see `subAction_ = Nothing` in `contOrReset` and trigger a full subscription reset. + +This null-action sentinel is the bridge between worker failure recovery and supervisor-driven re-creation. + +### 7. NSADelete and NSARotate are deprecated + +These NTF worker actions are no longer generated by current code but are kept for processing legacy database records. They are explicitly not batched (processed one at a time via `mapM`). `NSARotate` deletes the subscription then re-queues `NSCCreate` back to the supervisor. + +### 8. Stats counting groups by userId + +`incStatByUserId` groups batch subscriptions by `userId` before incrementing stats counters, ensuring per-user counts are accurate even when a single batch contains subscriptions from multiple users. diff --git a/spec/modules/Simplex/Messaging/Agent/Store.md b/spec/modules/Simplex/Messaging/Agent/Store.md new file mode 100644 index 0000000000..0eecbf8d15 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store.md @@ -0,0 +1,44 @@ +# Simplex.Messaging.Agent.Store + +> Domain entity types for agent persistence — queues, connections, messages, commands, and store errors. + +**Source**: [`Agent/Store.hs`](../../../../../src/Simplex/Messaging/Agent/Store.hs) + +## Overview + +This module defines the data types that represent agent state. It contains no database operations — those are in [AgentStore.hs](./Store/AgentStore.md). The key abstractions are: + +- **Queue types** (`StoredRcvQueue`, `StoredSndQueue`) parameterized by `DBStored` phantom type for new vs persisted distinction +- **Connection GADT** (`Connection'`) encoding the connection state machine at the type level +- **Message containers** (`RcvMsgData`, `SndMsgData`, `PendingMsgData`) for the message lifecycle +- **Store errors** (`StoreError`) including two sentinel errors with special semantics + +## Connection' — type-level state machine + +The `Connection'` GADT encodes connection lifecycle as a type parameter: `CNew` → `CRcv`/`CSnd` → `CDuplex`, plus `CContact` for reusable contact connections. `SomeConn` wraps an existential to store connections of unknown type. + +`TestEquality SConnType` deliberately omits `SCNew` — `testEquality SCNew SCNew` returns `Nothing`. This is intentional: `NewConnection` has no queues and is not a valid target for type-level connection matching in store operations. + +## canAbortRcvSwitch — race condition boundary + +See comments on `canAbortRcvSwitch`. The `RSSendingQUSE` and `RSReceivedMessage` states cannot be aborted because the sender may have already deleted the original queue. Aborting (deleting the new queue) at that point would break the connection with no recovery path. + +## ratchetSyncAllowed / ratchetSyncSendProhibited — cross-repo contract + +See comments on `ratchetSyncAllowed`. Both functions carry the comment "this function should be mirrored in the clients" — simplex-chat must implement identical logic. The agent enforces these state checks, but the chat client also needs them for UI decisions (e.g., disabling send when `ratchetSyncSendProhibited`). + +## SEWorkItemError — worker suspension sentinel + +`SEWorkItemError` is a sentinel error that triggers worker suspension when encountered during work item retrieval. The `AnyStoreError` typeclass exposes `isWorkItemError` for the worker framework ([Agent/Client.hs](./Client.md)) to detect this case. The comment "do not use!" means it should not be thrown for normal error conditions — only when the work item itself is corrupt/unreadable and the worker should stop rather than retry. + +## SEAgentError — store-level error wrapping + +`SEAgentError` wraps `AgentErrorType` inside store operations. This allows store functions to return agent-level errors (e.g., connection state violations detected during a DB transaction) without breaking the `ExceptT StoreError` type. The "to avoid race conditions" rationale: checking a condition and acting on it must happen in the same DB transaction, so the agent error is returned through the store error channel. + +## InvShortLink — secure-on-read semantics + +See comment on `InvShortLink`. Stored separately from the connection because 1-time invitation short links have a "secure-on-read" property: accessing the link data on the router marks it as read, preventing undetected observation. The `sndPrivateKey` is persisted to allow retries of the link creation without generating new keys. + +## RcvQueueSub — subscription-optimized projection + +`RcvQueueSub` strips cryptographic fields from `RcvQueue`, keeping only what's needed for subscription tracking in [TSessionSubs](./TSessionSubs.md). This reduces memory pressure when tracking thousands of subscriptions in STM. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md new file mode 100644 index 0000000000..9fbc2beb32 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md @@ -0,0 +1,76 @@ +# Simplex.Messaging.Agent.Store.AgentStore + +> Core CRUD operations for agent persistence — users, connections, queues, messages, ratchets, notifications, and file transfers. + +**Source**: [`Agent/Store/AgentStore.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/AgentStore.hs) + +## Overview + +At ~3700 lines, this is the largest module in the codebase. It implements all database operations for the agent, compiled with CPP for both SQLite and PostgreSQL backends. Most functions are straightforward SQL CRUD, but several patterns are non-obvious. + +The module re-exports `withConnection`, `withTransaction`, `withTransactionPriority`, `firstRow`, `firstRow'`, `maybeFirstRow`, and `fromOnlyBI` from the backend-specific Common module. + +## Dual-backend compilation + +The module uses `#if defined(dbPostgres)` throughout. Key behavioral differences: +- **Row locking**: PostgreSQL uses `FOR UPDATE` on reads that precede writes (e.g., `getConnForUpdate`, `getRatchetForUpdate`, `retrieveLastIdsAndHashRcv_`). SQLite relies on its single-writer model instead. +- **Batch queries**: PostgreSQL uses `IN ?` with `In` wrapper for batch operations. SQLite falls back to per-row `forM` loops. +- **Constraint handling**: PostgreSQL uses `constraintViolation`, SQLite checks `SQL.ErrorConstraint`. + +## getWorkItem / getWorkItems — worker store pattern + +`getWorkItem` implements the store-side pattern for the [worker framework](../Client.md): `getId → getItem → markFailed`. If `getId` or `getItem` throws an IO exception, `handleWrkErr` wraps it as `SEWorkItemError` (via `mkWorkItemError`), which signals the worker to suspend rather than retry. This prevents crash loops on corrupt data. + +`getWorkItems` extends this to batch work items, where each item failure is independent. + +**Consumed by**: `getPendingQueueMsg`, `getPendingServerCommand`, `getNextNtfSubNTFActions`, `getNextNtfSubSMPActions`, `getNextDeletedSndChunkReplica`, `getNextNtfTokenToDelete`. + +## Notification subscription — supervisor/worker coordination + +`updateNtfSubscription`, `setNullNtfSubscriptionAction`, and `deleteNtfSubscription` all check `updated_by_supervisor` before writing. When `True`, the worker only updates local fields (ntf IDs, status) and skips action/server fields that the supervisor may have changed. This prevents the worker from overwriting supervisor decisions during concurrent execution. + +`markUpdatedByWorker` resets the flag to `False` before each work item is processed, so the worker "claims" the subscription for the duration of its operation. + +## createServer / getServerKeyHash_ — key hash migration + +`createServer` returns `Maybe KeyHash`: `Nothing` means the server was newly created with the passed hash; `Just kh` means the server already existed and the passed hash differs from the stored one. This `Just` value is stored as `server_key_hash` on queues to allow per-queue key hash overrides. + +The `COALESCE(q.server_key_hash, s.key_hash)` pattern appears throughout queries — queues can override the server-level hash, enabling gradual migration when a router's identity key changes. + +## updateRcvMsgHash / updateSndMsgHash — race condition guard + +Both functions include `AND last_internal_*_msg_id = ?` in their UPDATE WHERE clause. This prevents a race: if another message was processed between `updateIds` and `updateHash` (incrementing the last ID), the hash update is silently skipped rather than corrupting the chain. See comments on these functions. + +## deleteConn — conditional delivery wait + +Three deletion paths: +1. No timeout: immediate delete. +2. Timeout + no pending deliveries: immediate delete. +3. Timeout + pending deliveries + `deleted_at_wait_delivery` expired: delete. +4. Timeout + pending deliveries + not expired: return `Nothing` (skip). + +This allows graceful delivery completion before connection cleanup. + +## createSndConn — confirmed queue guard + +See comment on `createSndConn`. Checks `checkConfirmedSndQueueExists_` before creating, because `insertSndQueue_` uses `ON CONFLICT DO UPDATE` which would silently replace an existing confirmed send queue. The pre-check prevents this destructive upsert. + +## insertRcvQueue_ / insertSndQueue_ — queue ID preservation + +Both functions check if a queue already exists (by server + queue ID) and reuse the existing database `queue_id`. If not found, they generate the next sequential ID (`MAX + 1`). This preserves database IDs across retries of queue creation. + +## createClientService — service_id reset on upsert + +The `ON CONFLICT DO UPDATE` clause sets `service_id = NULL` when credentials are updated. This forces re-registration with the router after credential rotation — the old service ID is invalidated. + +## deleteSndMsgDelivery — conditional message retention + +After removing the delivery record, checks whether any pending deliveries remain for the message. If none remain and the receipt status is `MROk`, the entire message is deleted. Otherwise, if `keepForReceipt` is true, only the message body is cleared (for debugging receipt mismatches). Handles shared `snd_message_bodies` with `FOR UPDATE` locking on PostgreSQL to prevent concurrent deletion races. + +## createWithRandomId' — bounded retry + +Generates random 12-byte IDs (base64url encoded) and retries up to 3 times on constraint violations (unique ID collision). Returns `SEUniqueID` if all attempts fail. + +## setRcvQueuePrimary / setSndQueuePrimary — two-step primary swap + +First clears primary flag on all queues in the connection, then sets it on the target queue. Also clears `replace_*_queue_id` on the new primary — this completes the queue rotation by removing the "replacing" marker. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/Common.md b/spec/modules/Simplex/Messaging/Agent/Store/Common.md new file mode 100644 index 0000000000..45db84995c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/Common.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Store.Common + +> CPP-conditional re-export of backend-specific common utilities (DBStore, withConnection, withTransaction). + +**Source**: [`Agent/Store/Common.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/Common.hs) + +No non-obvious behavior. See source. One of three CPP re-export wrappers (Interface, Common, DB). diff --git a/spec/modules/Simplex/Messaging/Agent/Store/DB.md b/spec/modules/Simplex/Messaging/Agent/Store/DB.md new file mode 100644 index 0000000000..70be997d69 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/DB.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Store.DB + +> CPP-conditional re-export of backend-specific database primitives (Connection, FromField, ToField). + +**Source**: [`Agent/Store/DB.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/DB.hs) + +No non-obvious behavior. See source. One of three CPP re-export wrappers (Interface, Common, DB). diff --git a/spec/modules/Simplex/Messaging/Agent/Store/Entity.md b/spec/modules/Simplex/Messaging/Agent/Store/Entity.md new file mode 100644 index 0000000000..801398f1ad --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/Entity.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Store.Entity + +> Phantom-typed database entity IDs distinguishing new (unsaved) from stored records. + +**Source**: [`Agent/Store/Entity.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/Entity.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/Interface.md b/spec/modules/Simplex/Messaging/Agent/Store/Interface.md new file mode 100644 index 0000000000..923cbfca9b --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/Interface.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Store.Interface + +> CPP-conditional re-export of the active database backend (SQLite or PostgreSQL). + +**Source**: [`Agent/Store/Interface.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/Interface.hs) + +No non-obvious behavior. See source. One of three CPP re-export wrappers (Interface, Common, DB) that select the active backend at compile time via `dbPostgres`. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/Postgres.md b/spec/modules/Simplex/Messaging/Agent/Store/Postgres.md new file mode 100644 index 0000000000..8cb29c1b0e --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/Postgres.md @@ -0,0 +1,23 @@ +# Simplex.Messaging.Agent.Store.Postgres + +> PostgreSQL backend — dual-pool connection management, schema lifecycle, and migration. + +**Source**: [`Agent/Store/Postgres.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/Postgres.hs) + +## Dual pool architecture + +`connectPostgresStore` creates two connection pools (`dbPriorityPool` and `dbPool`), each with `poolSize` connections. Priority pool is used by `withTransactionPriority` for operations that shouldn't be blocked by regular queries. Both pools are TBQueue-based — connections are taken and returned after use. + +All connections are created eagerly at initialization, not lazily on demand. + +## uninterruptibleMask_ — pool atomicity invariant + +See comment on `connectStore`. `uninterruptibleMask_` prevents async exceptions from interrupting pool filling or draining. The invariant: when `dbClosed = True`, queues are empty; when `False`, queues are full (or connections are in-flight with threads that will return them). Interruption mid-fill would break this invariant. + +## Schema creation — fail-fast on missing + +If the PostgreSQL schema doesn't exist and `createSchema` is `False`, the process logs an error and calls `exitFailure`. This prevents silent operation against the wrong schema. + +## execSQL — not implemented + +`execSQL` throws "not implemented" — the PostgreSQL client doesn't support raw SQL execution via the agent API. The function exists only to satisfy the shared interface. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md b/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md new file mode 100644 index 0000000000..2513882ff5 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md @@ -0,0 +1,26 @@ +# Simplex.Messaging.Agent.Store.SQLite + +> SQLite backend — store creation, encrypted connection management, migration, and custom SQL functions. + +**Source**: [`Agent/Store/SQLite.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/SQLite.hs) + +## Security-relevant PRAGMAs + +`connectDB` sets PRAGMAs at connection time: +- `secure_delete = ON`: data is overwritten (not just unlinked) on DELETE +- `auto_vacuum = FULL`: freed pages are reclaimed immediately +- `foreign_keys = ON`: referential integrity enforced + +These are set per-connection, not per-database — every new connection (including re-opens) gets them. + +## simplex_xor_md5_combine — custom SQLite function + +A C-exported SQLite function registered at connection time. Takes an existing `IdsHash` and a `RecipientId`, XORs the hash with the MD5 of the ID. This is the SQLite implementation of the accumulative IdsHash used by service subscriptions (see [TSessionSubs.md](../TSessionSubs.md#updateActiveService--accumulative-xor-merge)). PostgreSQL uses its native `md5()` and `decode()` functions instead. + +## openSQLiteStore_ — connection swap under MVar + +Uses `bracketOnError` with `takeMVar`/`tryPutMVar`: takes the connection MVar, creates a new connection, and puts the new one back. If connection fails, `tryPutMVar` restores the old connection. The `dbClosed` TVar is flipped atomically with the key update. + +## storeKey — conditional key retention + +`storeKey key keepKey` stores the encryption key in the `dbKey` TVar only if `keepKey` is true. This allows `reopenDBStore` to re-open without the caller re-supplying the key. If `keepKey` is false and the store is closed, `reopenDBStore` fails with "no key". diff --git a/spec/modules/Simplex/Messaging/Agent/Store/Shared.md b/spec/modules/Simplex/Messaging/Agent/Store/Shared.md new file mode 100644 index 0000000000..bc60de14e0 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Agent/Store/Shared.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Agent.Store.Shared + +> Migration types, error reporting, and confirmation modes shared across database backends. + +**Source**: [`Agent/Store/Shared.hs`](../../../../../../src/Simplex/Messaging/Agent/Store/Shared.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Notifications/Client.md b/spec/modules/Simplex/Messaging/Notifications/Client.md new file mode 100644 index 0000000000..d2c3eef0e2 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Client.md @@ -0,0 +1,15 @@ +# Simplex.Messaging.Notifications.Client + +> Typed wrappers around `ProtocolClient` for NTF protocol commands. + +**Source**: [`Notifications/Client.hs`](../../../../../src/Simplex/Messaging/Notifications/Client.hs) + +## Non-obvious behavior + +### 1. Subscription operations always use NRMBackground + +`ntfCreateSubscription`, `ntfCheckSubscription`, `ntfDeleteSubscription`, and their batch variants hardcode `NRMBackground` as the network request mode. Token operations (`ntfRegisterToken`, `ntfVerifyToken`, etc.) accept the mode as a parameter. This reflects that subscription management is a background activity driven by the supervisor, while token operations can be user-initiated. + +### 2. Batch operations return per-item errors + +`ntfCreateSubscriptions` and `ntfCheckSubscriptions` return `NonEmpty (Either NtfClientError result)` — individual items in a batch can fail independently. Callers must handle partial success (some created, some failed). The singular variants throw on any error. diff --git a/spec/modules/Simplex/Messaging/Notifications/Protocol.md b/spec/modules/Simplex/Messaging/Notifications/Protocol.md new file mode 100644 index 0000000000..9354e20862 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Protocol.md @@ -0,0 +1,43 @@ +# Simplex.Messaging.Notifications.Protocol + +> NTF protocol entities, commands, responses, and wire encoding for the notification system. + +**Source**: [`Notifications/Protocol.hs`](../../../../../src/Simplex/Messaging/Notifications/Protocol.hs) + +## Non-obvious behavior + +### 1. Asymmetric credential validation + +`checkCredentials` enforces different rules per command category: + +| Category | Signature required | Entity ID | +|----------|-------------------|-----------| +| TNEW, SNEW | Yes | Must be empty (new entity) | +| PING | No | Must be empty | +| All others | Yes | Must be present | + +For responses, the rule inverts: `NRTknId`, `NRSubId`, and `NRPong` must NOT have entity IDs (they are returned before/without entity context), while `NRErr` optionally has one (errors can occur with or without entity context). + +### 2. PNMessageData semicolon separator + +`encodePNMessages` uses `;` as the separator between push notification message items instead of the standard `,` used by `NonEmpty` `strEncode`. This is because `SMPQueueNtf` contains an `SMPServer` whose host list encoding already uses commas, which would create ambiguous parsing. + +### 3. NTInvalid reason is version-gated + +When encoding `NRTkn` responses, the `NTInvalid` reason is only included if the negotiated protocol version is >= `invalidReasonNTFVersion` (v3). Older clients receive `NTInvalid Nothing`. This prevents parse failures on clients that don't understand the reason field. + +### 4. subscribeNtfStatuses migration invariant + +The comment on `subscribeNtfStatuses` (`[NSNew, NSPending, NSActive, NSInactive]`) warns that changing these statuses requires a new database migration for queue ID hashes (see `m20250830_queue_ids_hash`). This is a cross-module invariant between protocol types and server storage. + +### 5. allowNtfSubCommands permits NTInvalid and NTExpired + +Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL), which is counterintuitive. The rationale (noted in a TODO comment) is that invalidation can happen after verification, and existing subscriptions should remain manageable. `NTExpired` is also permitted for the same reason. + +### 6. PPApnsNull test provider + +`PPApnsNull` is a push provider that never communicates with APNS. It's used for end-to-end testing of the notification server from clients without requiring actual push infrastructure. + +### 7. DeviceToken hex validation + +`DeviceToken` string parsing has two paths: a hardcoded literal match for `"apns_null test_ntf_token"` (test tokens), and hex string validation for real tokens (must be even-length hex). The wire encoding (`smpP`) does not perform this validation — it accepts any `ByteString`. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server.md b/spec/modules/Simplex/Messaging/Notifications/Server.md new file mode 100644 index 0000000000..9c88cf7a03 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server.md @@ -0,0 +1,79 @@ +# Simplex.Messaging.Notifications.Server + +> NTF server: manages tokens, subscriptions, SMP subscriber connections, and push notification delivery. + +**Source**: [`Notifications/Server.hs`](../../../../../src/Simplex/Messaging/Notifications/Server.hs) + +## Architecture + +The NTF server runs several concurrent threads via `raceAny_`: + +| Thread | Purpose | +|--------|---------| +| `ntfSubscriber` | Receives SMP messages (NMSG, END, DELD) and agent events (connect/disconnect/subscribe) | +| `ntfPush` | Reads push queue and delivers via APNS provider | +| `periodicNtfsThread` | Sends periodic "check messages" push notifications (cron) | +| `runServer` (per transport) | Accepts client connections and runs NTF protocol | +| Stats/Prometheus/Control | Optional monitoring and admin threads | + +Each client connection spawns `receive`, `send`, and `client` threads via `raceAny_`. + +## Non-obvious behavior + +### 1. Timing attack mitigation on entity lookup + +When `verifyNtfTransmission` encounters an AUTH error (entity not found), it calls `dummyVerifyCmd` to equalize response timing before returning the error. This prevents attackers from distinguishing "entity doesn't exist" from "signature invalid" based on response latency. + +### 2. TNEW idempotent re-registration + +When TNEW is received for an already-registered token, the server: +1. Looks up the existing token via `findNtfTokenRegistration` +2. Verifies the DH secret matches (recomputed from the new `dhPubKey` and stored `tknDhPrivKey`) +3. If DH secrets differ → AUTH error (prevents token hijacking) +4. If they match → re-sends verification push notification + +This makes TNEW safe for client retransmission after connection drops. + +### 3. SNEW idempotent subscription + +When SNEW is received for an existing subscription (same token + SMP queue), the server returns the existing `ntfSubId` if the notifier key matches. If keys differ, AUTH error. New subscriptions are only created when no match exists in `findNtfSubscription`. + +### 4. PPApnsNull suppresses statistics + +`incNtfStatT` skips all stat increments when the device token uses `PPApnsNull` provider. This prevents test tokens from polluting production metrics. + +### 5. END requires active session validation + +SMP END messages are only processed when the originating session is the currently active session for that server (`activeClientSession'` check). This prevents stale END messages from previous (reconnected) sessions from incorrectly marking subscriptions as ended. + +### 6. waitForSMPSubscriber two-phase wait + +`waitForSMPSubscriber` first tries a non-blocking `tryReadTMVar`. If the subscriber isn't ready yet, it falls back to a blocking `readTMVar` with a 10-second timeout. This avoids creating an extra timeout thread in the common case where the subscriber is already available. + +### 7. CAServiceUnavailable triggers individual resubscription + +When a service subscription becomes unavailable (SMP server rejects service credentials), the NTF server: +1. Removes the service association from the database +2. Resubscribes all individual queues for that server via `subscribeSrvSubs` + +This is the fallback path from service-level to queue-level SMP subscriptions. + +### 8. Push delivery single retry + +`deliverNotification` retries exactly once on connection errors (`PPConnection`) or `PPRetryLater`: +1. Creates a new push client (`newPushClient`) to get a fresh connection +2. Retries the delivery + +On the second failure, the error is logged and returned. `PPTokenInvalid` marks the token as `NTInvalid` on either the first or retry attempt. + +### 9. TCRN minimum interval enforcement + +Cron notification interval has a hard minimum of 20 minutes. `TCRN 0` disables cron notifications. `TCRN n` where `1 <= n < 20` returns `QUOTA` error. + +### 10. Startup resubscription is concurrent per server + +`resubscribe` uses `mapConcurrently` to resubscribe to all known SMP servers in parallel. Within each server, subscriptions are paginated via `subscribeLoop` using cursor-based pagination (`afterSubId_`). + +### 11. receive separates error responses from commands + +The `receive` function processes incoming transmissions and partitions results: malformed/unauthorized requests are written directly to `sndQ` as error responses, while valid commands go to `rcvQ` for processing. This ensures protocol errors get immediate responses without competing for the command processing queue. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Control.md b/spec/modules/Simplex/Messaging/Notifications/Server/Control.md new file mode 100644 index 0000000000..897f81c16f --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Control.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Notifications.Server.Control + +> Control port command protocol for NTF server administration. + +**Source**: [`Notifications/Server/Control.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Control.hs) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Env.md b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md new file mode 100644 index 0000000000..96221a0124 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md @@ -0,0 +1,21 @@ +# Simplex.Messaging.Notifications.Server.Env + +> NTF server environment: configuration, subscriber state, and push provider management. + +**Source**: [`Notifications/Server/Env.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Env.hs) + +## Non-obvious behavior + +### 1. Service credentials are lazily generated + +`mkDbService` in `newNtfServerEnv` generates service credentials on demand: when `getCredentials` is called for an SMP server, it first checks the database. If credentials exist, they are used. If not (`Nothing`), new credentials are generated via `genCredentials`, stored in the database, and returned. This happens per SMP server on first connection. + +Service credentials are only used when `useServiceCreds` is enabled in the config. + +### 2. PPApnsNull creates a no-op push client + +`newPushClient` checks `apnsProviderHost` for the push provider. `PPApnsNull` returns `Nothing`, which creates a no-op client (`\_ _ -> pure ()`). Real providers create an actual APNS connection. This is the mechanism that allows `PPApnsNull` tokens to function without push infrastructure. + +### 3. getPushClient lazy initialization + +`getPushClient` looks up the push client by provider in `pushClients` TMap. If not found, it calls `newPushClient` to create and register one. Push provider connections are established on first use, not at server startup. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Main.md b/spec/modules/Simplex/Messaging/Notifications/Server/Main.md new file mode 100644 index 0000000000..3719dcd978 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Main.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Notifications.Server.Main + +> CLI interface and INI configuration parsing for the NTF server. + +**Source**: [`Notifications/Server/Main.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Main.hs) + +No non-obvious behavior. Standard CLI/config boilerplate. Notable defaults: `subsBatchSize = 900`, `periodicNtfsInterval = 5 minutes`, `pushQSize = 32768`, `persistErrorInterval = 0` (disables SMP client reconnection error persistence). diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md new file mode 100644 index 0000000000..2a6d8c0b12 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md @@ -0,0 +1,35 @@ +# Simplex.Messaging.Notifications.Server.Push.APNS + +> Apple Push Notification Service (APNS) client: JWT authentication, HTTP/2 delivery, and e2e encryption. + +**Source**: [`Notifications/Server/Push/APNS.hs`](../../../../../../../src/Simplex/Messaging/Notifications/Server/Push/APNS.hs) + +## Non-obvious behavior + +### 1. PNCheckMessages is not encrypted + +`PNVerification` and `PNMessage` notifications are encrypted with the shared DH secret (`C.cbEncrypt`) and padded to `paddedNtfLength` (3072 bytes) to prevent metadata leakage. `PNCheckMessages` is sent as a plain `{"checkMessages": true}` background notification — it carries no sensitive data and doesn't need e2e encryption. + +### 2. Fixed-length encryption padding + +All encrypted notifications are padded to `paddedNtfLength` (3072 bytes) regardless of actual content size. This prevents notification size from revealing whether it's a verification code (small) or a message batch (larger). + +### 3. JWT token caching with TTL refresh + +`getApnsJWTToken` caches the signed JWT and only regenerates it when the token age exceeds `tokenTTL` (30 minutes). No locking is used — if two threads race to refresh, last writer wins, which is acceptable since both produce valid tokens. + +### 4. HTTP/2 reconnect-on-use + +`createAPNSPushClient` registers a disconnect callback that sets `https2Client` to `Nothing`. `getApnsHTTP2Client` lazily reconnects on the next push delivery attempt. The connection is not proactively maintained. + +### 5. 503 triggers active disconnect before retry + +When APNS returns 503 (Service Unavailable), the client actively closes the HTTP/2 connection (`disconnectApnsHTTP2Client`) before throwing `PPRetryLater`. This ensures a fresh connection is established on retry rather than reusing a potentially degraded connection. + +### 6. ExpiredProviderToken is permanent + +403 errors for `ExpiredProviderToken` and `InvalidProviderToken` are classified as `PPPermanentError` rather than retryable. Since `getApnsJWTToken` just refreshed the JWT before the request, retrying with the same key would produce the same error. This indicates a configuration problem (wrong key/team ID). + +### 7. EC key type assumption + +`readECPrivateKey` uses a specific pattern match for EC keys (`PrivKeyEC_Named`). It will crash at runtime if the APNS key file contains a different key type. The comment acknowledges this limitation. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS/Internal.md b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS/Internal.md new file mode 100644 index 0000000000..b42753e986 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS/Internal.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Notifications.Server.Push.APNS.Internal + +> APNS HTTP header constants and JSON encoding options. + +**Source**: [`Notifications/Server/Push/APNS/Internal.hs`](../../../../../../../../src/Simplex/Messaging/Notifications/Server/Push/APNS/Internal.hs) + +No non-obvious behavior. See source. Defines APNS header names and JSON options (`UntaggedValue` sum encoding, `camelTo2 '-'` for hyphenated field names like `content-available`, `mutable-content`). diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md new file mode 100644 index 0000000000..971419abfe --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md @@ -0,0 +1,19 @@ +# Simplex.Messaging.Notifications.Server.Stats + +> NTF server statistics collection with own-server breakdown and backward-compatible persistence. + +**Source**: [`Notifications/Server/Stats.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Stats.hs) + +## Non-obvious behavior + +### 1. incServerStat double lookup + +`incServerStat` performs a non-STM IO lookup first, then only enters an STM transaction on cache miss. The STM block re-checks the map to handle races (another thread may have inserted between the IO lookup and STM entry). This avoids contention on the shared TMap in the common case where the server's counter TVar already exists. + +### 2. setNtfServerStats is not thread safe + +`setNtfServerStats` is explicitly documented as non-thread-safe and intended for server startup only (restoring from backup file). + +### 3. Backward-compatible parsing + +The `strP` parser uses `opt` which defaults missing fields to 0. This allows reading stats files from older server versions that don't include newer fields (`ntfReceivedAuth`, `ntfFailed`, `ntfVrf*`, etc.). diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md new file mode 100644 index 0000000000..33acdaad9b --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md @@ -0,0 +1,23 @@ +# Simplex.Messaging.Notifications.Server.Store + +> STM-based in-memory store for notification tokens, subscriptions, and last-notification accumulation. + +**Source**: [`Notifications/Server/Store.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Store.hs) + +## Non-obvious behavior + +### 1. Two-level token registration index + +`tokenRegistrations` uses a nested TMap: `DeviceToken -> TMap ByteString NtfTokenId`, where the inner key is the serialized verify key. This allows **multiple concurrent registrations** per device token (with different keys), protecting against malicious registration attempts if a token is compromised. The inner key is derived via `C.toPubKey C.pubKeyBytes`. + +### 2. stmRemoveInactiveTokenRegistrations cleans up rivals + +When a token is activated, `stmRemoveInactiveTokenRegistrations` removes ALL other registrations for the same device token, including their token records, last notifications, and all subscriptions. Only the activating token's registration survives. + +### 3. stmStoreTokenLastNtf guards against stale tokens + +`stmStoreTokenLastNtf` performs a non-STM IO lookup first, then enters STM. Within the STM block, it re-checks the map to handle the race where another thread modified the map between the IO lookup and STM entry. It only inserts for tokens that exist in the `tokens` map — stale token IDs are silently ignored. + +### 4. tokenLastNtfs accumulates via prepend + +New notifications are prepended to the `NonEmpty PNMessageData` list via `(<|)`. The list is unbounded in the STM store — bounding is handled at the push delivery layer (the Postgres store limits to 6). diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md new file mode 100644 index 0000000000..3cb5c9083c --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md @@ -0,0 +1,54 @@ +# Simplex.Messaging.Notifications.Server.Store.Postgres + +> PostgreSQL-backed persistent store for notification tokens, subscriptions, and last-notification delivery. + +**Source**: [`Notifications/Server/Store/Postgres.hs`](../../../../../../../src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs) + +## Non-obvious behavior + +### 1. deleteNtfToken exclusive row lock + +`deleteNtfToken` acquires `FOR UPDATE` on the token row before cascading deletes. This prevents concurrent subscription inserts for this token during the deletion window. The subscriptions are aggregated by SMP server and returned for in-memory subscription cleanup. + +### 2. addTokenLastNtf atomic CTE + +`addTokenLastNtf` executes a single SQL statement with three CTEs that atomically: +1. **Upserts** the new notification into `last_notifications` (one row per token+subscription) +2. **Collects** the most recent notifications for the token (limited to `maxNtfs = 6`) +3. **Deletes** any older notifications beyond the limit + +This ensures the push notification always contains the most recent notifications across all of a token's subscriptions, with bounded storage. + +### 3. setTokenActive cleans duplicate registrations + +After activating a token, `setTokenActive` deletes all other tokens with the same `push_provider` + `push_provider_token` but different `token_id`. This cleans up incomplete or duplicate registration attempts. + +### 4. setTknStatusConfirmed conditional update + +Updates to `NTConfirmed` only if the current status is not already `NTConfirmed` or `NTActive`. This prevents downgrading an already-active token back to confirmed state when a delayed verification push arrives. + +### 5. Silent token date tracking + +`updateTokenDate` is called on every token read (`getNtfToken_`, `findNtfSubscription`, `getNtfSubscription`). It updates `updated_at` only when the current date differs from the stored date. This tracks token activity without explicit client action. + +### 6. getServerNtfSubscriptions marks as pending + +After reading subscriptions for resubscription, `getServerNtfSubscriptions` batch-updates their status to `NSPending`. This prevents the same subscriptions from being picked up by a concurrent resubscription pass — it acts as a "claim" mechanism. + +Only non-service-associated subscriptions (`NOT ntf_service_assoc`) are returned for individual resubscription. + +### 7. Approximate subscription count + +`getEntityCounts` uses `pg_class.reltuples` for the subscription count instead of `count(*)`. This returns an approximate value from PostgreSQL's statistics catalog, avoiding a full table scan on potentially large subscription tables. + +### 8. withFastDB vs withDB priority pools + +`withFastDB` uses `withTransactionPriority ... True` to run on the priority connection pool. Client-facing operations (token registration, subscription commands) use the priority pool, while background operations (batch status updates, resubscription) use the regular pool. + +### 9. Server upsert optimization + +`addNtfSubscription` first tries a plain SELECT for the SMP server, then falls back to INSERT with ON CONFLICT only if the server doesn't exist. This avoids the upsert overhead in the common case where the server already exists. + +### 10. Service association tracking + +`batchUpdateSrvSubStatus` atomically updates both subscription status and `ntf_service_assoc` flag. When notifications arrive via a service subscription (`newServiceId` is `Just`), all affected subscriptions are marked as service-associated. `removeServiceAndAssociations` resets all subscriptions for a server to `NSInactive` with `ntf_service_assoc = FALSE`. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Types.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Types.md new file mode 100644 index 0000000000..97f0fce465 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Types.md @@ -0,0 +1,7 @@ +# Simplex.Messaging.Notifications.Server.Store.Types + +> Pure record types and STM conversion for notification tokens and subscriptions. + +**Source**: [`Notifications/Server/Store/Types.hs`](../../../../../../../src/Simplex/Messaging/Notifications/Server/Store/Types.hs) + +No non-obvious behavior. `mkTknData`/`mkTknRec` convert between pure records and TVar-based STM data. `tknUpdatedAt` is parsed as optional for backward compatibility with store logs that predate it. diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md index 7c7955154e..263b2459a3 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Transport.md +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -1,36 +1,30 @@ # Simplex.Messaging.Notifications.Transport -> Notification Router Protocol transport: manages push notification subscriptions between client and NTF Router. +> NTF protocol version negotiation, TLS handshake, and transport handle setup. **Source**: [`Notifications/Transport.hs`](../../../../../src/Simplex/Messaging/Notifications/Transport.hs) -**Protocol spec**: [`protocol/push-notifications.md`](../../../../../protocol/push-notifications.md) — SimpleX Notification Router protocol. +## Non-obvious behavior -## Overview +### 1. ALPN-dependent version range -This module implements the transport layer for the **Notification Router Protocol**. Per the protocol spec: "To manage notification subscriptions to SMP routers, SimpleX Notification Router provides an RPC protocol with a similar design to SimpleX Messaging Protocol router." +`ntfServerHandshake` advertises `legacyServerNTFVRange` (v1 only) when ALPN is not available (`getSessionALPN` returns `Nothing`). When ALPN is present, it advertises the full `supportedServerNTFVRange`. This is the backward-compatibility mechanism for pre-ALPN clients that cannot negotiate newer protocol features. -The protocol spec diagram shows three separate protocols in the notification flow: -1. **Notification Router Protocol** (this module): client ↔ SimpleX Notification Router — subscription management -2. **SMP protocol**: SMP Router → SimpleX Notifications Subscriber — notification signals -3. **Push provider** (e.g., APN): SimpleX Push Router → device — per the spec: "the notifications are e2e encrypted between SimpleX Notification Router and the user's device" +### 2. Version-gated features -## Differences from SMP transport +Two feature gates exist in the NTF protocol: -The NTF protocol reuses SMP's transport infrastructure but with reduced parameters: +| Version | Feature | Effect | +|---------|---------|--------| +| v2 (`authBatchCmdsNTFVersion`) | Auth key exchange + batching | `authPubKey` sent in handshake, `implySessId` and `batch` enabled | +| v3 (`invalidReasonNTFVersion`) | Token invalid reasons | `NTInvalid` responses include the reason enum | -| Property | SMP | NTF | -|----------|-----|-----| -| Block size | 16384 | 512 | -| Block encryption | Yes (v11+) | No (`encryptBlock = Nothing`) | -| Service certificates | Yes (v16+) | No (`serviceAuth = False`) | -| Version range | 6–19 | 1–3 | -| Handshake messages | 2–3 | 2 | +Pre-v2 connections have no command encryption or batching — commands are sent in plaintext within TLS. -## Same ALPN/legacy fallback pattern as SMP +### 3. Unused Protocol typeclass parameters -`ntfServerHandshake` uses the same pattern as `smpServerHandshake`: if ALPN is not negotiated (`getSessionALPN` returns `Nothing`), the notification router offers only `legacyServerNTFVRange` (v1 only). +`ntfClientHandshake` accepts `_proxyServer` and `_serviceKeys` parameters that are ignored. These exist because the `Protocol` typeclass (shared with SMP) requires `protocolClientHandshake` to accept them. The NTF protocol does not support proxy routing or service authentication. -## NTF handshake uses SMP shared types +### 4. Block size -The handshake reuses SMP's `THandle`, `THandleParams`, `THandleAuth` types. The `encodeAuthEncryptCmds` and `authEncryptCmdsP` helper functions are defined locally in this module (with NTF-specific version thresholds). NTF never sets `sessSecret` / `sessSecret'`, `peerClientService`, or `clientService` — these are always `Nothing`. +NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. Notification commands and responses are short — the main payload is the `PNMessageData` which contains encrypted message metadata. diff --git a/spec/modules/Simplex/Messaging/Notifications/Types.md b/spec/modules/Simplex/Messaging/Notifications/Types.md new file mode 100644 index 0000000000..bb05ccefb2 --- /dev/null +++ b/spec/modules/Simplex/Messaging/Notifications/Types.md @@ -0,0 +1,19 @@ +# Simplex.Messaging.Notifications.Types + +> Agent-side notification token and subscription types with action state machines. + +**Source**: [`Notifications/Types.hs`](../../../../../src/Simplex/Messaging/Notifications/Types.hs) + +## Non-obvious behavior + +### 1. NASDeleted is a transient race condition artifact + +`NASDeleted` can only exist when the notification supervisor updates a subscription record while a worker is mid-operation on that same subscription. The worker's post-operation database update hits a record that was already modified by the supervisor, resulting in an update to `NASDeleted` status instead of a full deletion. This status should not persist — it is cleaned up on the next supervisor pass. + +### 2. Action space split across two worker types + +`NtfSubAction` is an `Either`-like sum of `NtfSubNTFAction` (handled by NTF router workers) and `NtfSubSMPAction` (handled by SMP router workers). The supervisor writes these to the database, and each worker pool only reads its own action type. `isDeleteNtfSubAction` classifies actions across both types for the supervisor's reset logic. + +### 3. NSADelete and NSARotate are deprecated + +These `NtfSubNTFAction` values are no longer generated by current code but are retained in the type for processing legacy database records. `NSARotate` is logically "delete + recreate" while `NSADelete` is "delete notifier on NTF router + delete credentials on SMP router". From 546ee1a0e10db2de4efe3992e02ece7f8de2e416 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:43:02 +0000 Subject: [PATCH 41/91] update specs --- .../Messaging/Agent/NtfSubSupervisor.md | 24 ++++++- spec/modules/Simplex/Messaging/Agent/Store.md | 28 +++++++++ .../Messaging/Agent/Store/AgentStore.md | 62 +++++++++++++++++-- .../Simplex/Messaging/Agent/Store/SQLite.md | 8 ++- .../Simplex/Messaging/Notifications/Client.md | 8 +++ .../Messaging/Notifications/Protocol.md | 12 ++++ .../Simplex/Messaging/Notifications/Server.md | 56 ++++++++++++++++- .../Messaging/Notifications/Server/Env.md | 26 +++++++- .../Notifications/Server/Push/APNS.md | 40 ++++++++++++ .../Messaging/Notifications/Server/Stats.md | 22 ++++++- .../Messaging/Notifications/Server/Store.md | 32 ++++++++++ .../Notifications/Server/Store/Postgres.md | 40 ++++++++++++ .../Messaging/Notifications/Transport.md | 14 ++++- .../Simplex/Messaging/Notifications/Types.md | 2 +- 14 files changed, 357 insertions(+), 17 deletions(-) diff --git a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md index 33cd3eacba..d55cfd7469 100644 --- a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md +++ b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md @@ -33,7 +33,7 @@ The key decision point: when `subAction_` is `Nothing` (set by `workerErrors` af `retrySubActions` holds the list of subs-to-retry in a `TVar`. Each iteration, the action function returns only the subs that got temporary errors (via `splitResults`). The `TVar` is overwritten with this shrinking list. On success or permanent error, subs drop out. This means retry batches get smaller over time. -`splitResults` implements a three-way partition: temporary errors → retry, permanent errors → null the action + notify, successes → continue pipeline. +`splitResults` implements a three-way partition: temporary or host errors → retry, permanent errors → null the action + notify, successes → continue pipeline. ### 3. rescheduleWork deferred wake-up @@ -48,7 +48,7 @@ This is the mechanism for time-scheduled subscription health checks. When the notification router returns `AUTH` for a subscription check, the subscription is not simply marked as failed — it is fully recreated from scratch by resetting to `NSASMP NSASmpKey` state. This handles the case where the notification router has lost its subscription state (restart, data loss). The SMP worker is kicked to re-establish notifier credentials. -Non-AUTH failure statuses that are not in `subscribeNtfStatuses` also trigger recreation. +Successful check responses with statuses not in `subscribeNtfStatuses` also trigger recreation via `recreateNtfSub`. ### 5. deleteToken two-phase with restart survival @@ -73,3 +73,23 @@ These NTF worker actions are no longer generated by current code but are kept fo ### 8. Stats counting groups by userId `incStatByUserId` groups batch subscriptions by `userId` before incrementing stats counters, ensuring per-user counts are accurate even when a single batch contains subscriptions from multiple users. + +### 9. sendNtfSubCommand — gated on instant mode + +`sendNtfSubCommand` only enqueues work if instant notifications are active (`hasInstantNotifications` checks `NTActive` status + `NMInstant` mode). In periodic mode, the entire subscription creation pipeline is dormant — no commands reach the supervisor. + +### 10. deleteNotifierKeys — credential reset before disable + +`resetCredsGetQueue` clears the queue's notification credentials in the store *before* sending the disable command to the SMP router. This "clean first" ordering means local state is already consistent even if the network call fails. + +### 11. runNtfTknDelWorker — permanent error discards record + +When token deletion gets a permanent (non-temporary, non-host) error, the deletion record is removed from the queue rather than retried. This prevents stuck deletion records from blocking the worker. The error is reported to the client. + +### 12. getNtfServer — random selection from multiple + +When multiple notification routers are configured, one is selected randomly using `randomR` with a session-stable `TVar` generator. Single-server configurations skip the randomness. + +### 13. closeNtfSupervisor — atomic swap then cancel + +`swapTVar` atomically replaces the workers map with empty, then cancels all extracted workers. This ensures all existing workers at the point of shutdown are captured for cancellation. Prevention of new work is handled by the supervisor loop termination and operation bracket lifecycle, not by the swap itself. diff --git a/spec/modules/Simplex/Messaging/Agent/Store.md b/spec/modules/Simplex/Messaging/Agent/Store.md index 0eecbf8d15..ca3dabd1fd 100644 --- a/spec/modules/Simplex/Messaging/Agent/Store.md +++ b/spec/modules/Simplex/Messaging/Agent/Store.md @@ -42,3 +42,31 @@ See comment on `InvShortLink`. Stored separately from the connection because 1-t ## RcvQueueSub — subscription-optimized projection `RcvQueueSub` strips cryptographic fields from `RcvQueue`, keeping only what's needed for subscription tracking in [TSessionSubs](./TSessionSubs.md). This reduces memory pressure when tracking thousands of subscriptions in STM. + +## rcvSMPQueueAddress exposes sender-facing ID + +`rcvSMPQueueAddress` constructs the `SMPQueueAddress` from a receive queue using `sndId` (not `rcvId`). The address shared with senders in connection requests contains the sender ID, the public key derived from `e2ePrivKey`, and `queueMode`. The `rcvId` is never exposed externally. + +## enableNtfs is duplicated between queue and connection + +`enableNtfs` exists on both `StoredRcvQueue` and `ConnData`. The comment marks it as "duplicated from ConnData." The queue-level copy enables subscription operations (which work at the queue level) to check notification status without loading the full connection. + +## deleteErrors — queue deletion retry counter + +`StoredRcvQueue` has a `deleteErrors :: Int` field that counts failed deletion attempts. This allows the agent to give up on queue deletion after repeated failures rather than retrying indefinitely. + +## Two-level message preparation + +`SndMsgData` optionally carries `SndMsgPrepData` with a `sndMsgBodyId` reference to a separately stored message body. `PendingMsgData` optionally carries `PendingMsgPrepData` with the actual `AMessage` body. This split allows large message bodies to be stored once and referenced by ID during the send pipeline, avoiding redundant serialization. + +## Per-message retry backoff + +`PendingMsgData` includes `msgRetryState :: Maybe RI2State` — each pending message independently tracks its retry backoff state. This means messages that fail to send don't reset the retry timers of other pending messages in the same connection. + +## Soft deletion and optional contact connection + +`ConnData` has `deleted :: Bool` for soft deletion — connections are marked deleted before queue cleanup completes. `Invitation` has `contactConnId_ :: Maybe ConnId` (note the trailing underscore) — invitations can outlive their originating contact connection. + +## SEBadQueueStatus is vestigial + +`SEBadQueueStatus` is documented in the source as "Currently not used." It was intended for queue status transition validation but was never implemented. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md index 9fbc2beb32..d1271a6d33 100644 --- a/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md +++ b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md @@ -8,7 +8,7 @@ At ~3700 lines, this is the largest module in the codebase. It implements all database operations for the agent, compiled with CPP for both SQLite and PostgreSQL backends. Most functions are straightforward SQL CRUD, but several patterns are non-obvious. -The module re-exports `withConnection`, `withTransaction`, `withTransactionPriority`, `firstRow`, `firstRow'`, `maybeFirstRow`, and `fromOnlyBI` from the backend-specific Common module. +The module re-exports `withConnection`, `withTransaction`, `withTransactionPriority`, `firstRow`, `firstRow'`, and `maybeFirstRow` from the backend-specific Common module. It also exports `fromOnlyBI` (a local helper) and `getWorkItem`/`getWorkItems`. ## Dual-backend compilation @@ -19,11 +19,11 @@ The module uses `#if defined(dbPostgres)` throughout. Key behavioral differences ## getWorkItem / getWorkItems — worker store pattern -`getWorkItem` implements the store-side pattern for the [worker framework](../Client.md): `getId → getItem → markFailed`. If `getId` or `getItem` throws an IO exception, `handleWrkErr` wraps it as `SEWorkItemError` (via `mkWorkItemError`), which signals the worker to suspend rather than retry. This prevents crash loops on corrupt data. +`getWorkItem` implements the store-side pattern for the [worker framework](../Client.md): `getId → getItem → markFailed`. If `getId` throws an IO exception, `handleWrkErr` wraps it as `SEWorkItemError` (via `mkWorkItemError`), which signals the worker to suspend rather than retry. If `getItem` fails (returning Left or throwing), `tryGetItem` calls `markFailed` (also wrapped by `handleWrkErr`) and rethrows the original error. This prevents crash loops on corrupt data. `getWorkItems` extends this to batch work items, where each item failure is independent. -**Consumed by**: `getPendingQueueMsg`, `getPendingServerCommand`, `getNextNtfSubNTFActions`, `getNextNtfSubSMPActions`, `getNextDeletedSndChunkReplica`, `getNextNtfTokenToDelete`. +**Consumed by**: `getPendingQueueMsg`, `getPendingServerCommand`, `getNextNtfSubNTFActions`, `getNextNtfSubSMPActions`, `getNextDeletedSndChunkReplica`, `getNextNtfTokenToDelete`, `getNextRcvChunkToDownload`, `getNextRcvFileToDecrypt`, `getNextSndChunkToUpload`, `getNextSndFileToPrepare`. ## Notification subscription — supervisor/worker coordination @@ -43,11 +43,11 @@ Both functions include `AND last_internal_*_msg_id = ?` in their UPDATE WHERE cl ## deleteConn — conditional delivery wait -Three deletion paths: +Four paths: 1. No timeout: immediate delete. 2. Timeout + no pending deliveries: immediate delete. 3. Timeout + pending deliveries + `deleted_at_wait_delivery` expired: delete. -4. Timeout + pending deliveries + not expired: return `Nothing` (skip). +4. Timeout + pending deliveries + not expired: return `Nothing` (skip deletion). This allows graceful delivery completion before connection cleanup. @@ -74,3 +74,55 @@ Generates random 12-byte IDs (base64url encoded) and retries up to 3 times on co ## setRcvQueuePrimary / setSndQueuePrimary — two-step primary swap First clears primary flag on all queues in the connection, then sets it on the target queue. Also clears `replace_*_queue_id` on the new primary — this completes the queue rotation by removing the "replacing" marker. + +## checkConfirmedSndQueueExists_ — dpPostgres typo + +The CPP guard reads `#if defined(dpPostgres)` (note `dp` instead of `db`). This means the `FOR UPDATE` clause is never included for any backend. The check still works correctly for SQLite (single-writer model) but on PostgreSQL the query runs without row locking, which could allow a TOCTOU race between checking and inserting. + +## createCommand — silent drop for deleted connections + +When `createCommand` encounters a constraint violation (the referenced connection was already deleted), it logs the error and returns successfully rather than throwing. This means commands targeting deleted connections are silently dropped. The rationale: the connection is already gone, so there's nothing useful to do with the error. + +## updateNewConnRcv — retry tolerance + +`updateNewConnRcv` accepts both `NewConnection` and `RcvConnection` connection states. The `RcvConnection` case is explicitly commented as "to allow retries" — if the initial queue insertion succeeded but the caller didn't get the response, a retry would find the connection already upgraded. `updateNewConnSnd` does not have this tolerance. + +## setLastBrokerTs — monotonic advance + +The WHERE clause includes `AND (last_broker_ts IS NULL OR last_broker_ts < ?)`, which ensures the timestamp only moves forward. Out-of-order message processing (e.g., from different queues) cannot regress the broker timestamp. + +## deleteDeliveredSndMsg — FOR UPDATE + count zero check + +On PostgreSQL, acquires a `FOR UPDATE` lock on the message row before counting pending deliveries. This prevents a race where two concurrent delivery completions both see count > 0 before either deletes, then both try to delete. Only deletes the message when the count reaches exactly 0. + +## createWithRandomId' — savepoint-based retry + +Uses `withSavepoint` around each insertion attempt rather than bare execute. This is critical for PostgreSQL: a failed statement within a transaction aborts the entire transaction, but savepoints allow rolling back just the failed INSERT and retrying with a new ID. + +## Explicit row-lock functions + +`lockConnForUpdate`, `lockRcvFileForUpdate`, and `lockSndFileForUpdate` are PostgreSQL-only explicit lock acquisition that compile to no-ops on SQLite. They acquire `FOR UPDATE` locks on rows that need serialized access without modifying them. + +## XFTP work item retry ordering + +`getNextRcvChunkToDownload` and `getNextSndChunkToUpload` order by `retries ASC, created_at ASC`. This prioritizes chunks with fewer retries, ensuring a repeatedly-failing chunk doesn't starve others. Same pattern for `getNextDeletedSndChunkReplica`. + +## getRcvFileRedirects — error resilience + +When loading redirect chains, errors loading individual redirect files are silently swallowed (`either (const $ pure Nothing) (pure . Just)`). This prevents a corrupt redirect from blocking access to the main file. + +## enableNtfs defaults to True when NULL + +Both `toRcvQueue` and `rowToConnData` default `enableNtfs` to `True` when the database value is NULL (`maybe True unBI enableNtfs_`). This is a backward-compatibility default for connections created before the field existed. + +## primaryFirst — queue ordering + +The `primaryFirst` comparator sorts queues with the primary queue first (`Down` on primary flag), then by `dbReplaceQId` to place the "replacing" queue second. This ensures all queue lists are consistently ordered for connection reconstruction. + +## getAnyConn_ — connection GADT reconstruction + +Reconstructs the type-level `Connection'` GADT by combining connection mode with the presence/absence of receive and send queues. The `CMContact` mode only maps to `ContactConnection` (receive-only); all other combinations use `CMInvitation` mode. When neither rcv nor snd queues exist, the result is always `NewConnection` regardless of mode. + +## deleteNtfSubscription — soft delete when supervisor active + +When `updated_by_supervisor` is true, `deleteNtfSubscription` doesn't actually delete the row. Instead, it nulls out the IDs and sets status to `NASDeleted`, preserving the row for the supervisor to observe. Only when the supervisor has not intervened does it perform a real DELETE. diff --git a/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md b/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md index 2513882ff5..14bd97f8e8 100644 --- a/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md +++ b/spec/modules/Simplex/Messaging/Agent/Store/SQLite.md @@ -15,7 +15,7 @@ These are set per-connection, not per-database — every new connection (includi ## simplex_xor_md5_combine — custom SQLite function -A C-exported SQLite function registered at connection time. Takes an existing `IdsHash` and a `RecipientId`, XORs the hash with the MD5 of the ID. This is the SQLite implementation of the accumulative IdsHash used by service subscriptions (see [TSessionSubs.md](../TSessionSubs.md#updateActiveService--accumulative-xor-merge)). PostgreSQL uses its native `md5()` and `decode()` functions instead. +A C-exported SQLite function registered at connection time. Takes an existing `IdsHash` and a `RecipientId`, XORs the hash with the MD5 of the ID. This is the SQLite implementation of the accumulative IdsHash used by service subscriptions (see [TSessionSubs.md](../TSessionSubs.md#updateActiveService--accumulative-xor-merge)). PostgreSQL uses `pgcrypto`'s `digest()` function for MD5 and a custom `xor_combine` PL/pgSQL function for the XOR. ## openSQLiteStore_ — connection swap under MVar @@ -23,4 +23,8 @@ Uses `bracketOnError` with `takeMVar`/`tryPutMVar`: takes the connection MVar, c ## storeKey — conditional key retention -`storeKey key keepKey` stores the encryption key in the `dbKey` TVar only if `keepKey` is true. This allows `reopenDBStore` to re-open without the caller re-supplying the key. If `keepKey` is false and the store is closed, `reopenDBStore` fails with "no key". +`storeKey key keepKey` stores the encryption key in the `dbKey` TVar if `keepKey` is true or if the key is empty (no encryption). This means unencrypted stores can always be reopened. If `keepKey` is false and the key is non-empty, `reopenDBStore` fails with "no key". + +## dbBusyLoop — initial connection retry + +`connectSQLiteStore` wraps `connectDB` in `dbBusyLoop` to handle database locking during initial connection. All transactions (`withTransactionPriority`) are also wrapped in `dbBusyLoop` as a retry layer on top of the `busy_timeout` PRAGMA. diff --git a/spec/modules/Simplex/Messaging/Notifications/Client.md b/spec/modules/Simplex/Messaging/Notifications/Client.md index d2c3eef0e2..ffecbcad01 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Client.md +++ b/spec/modules/Simplex/Messaging/Notifications/Client.md @@ -13,3 +13,11 @@ ### 2. Batch operations return per-item errors `ntfCreateSubscriptions` and `ntfCheckSubscriptions` return `NonEmpty (Either NtfClientError result)` — individual items in a batch can fail independently. Callers must handle partial success (some created, some failed). The singular variants throw on any error. + +### 3. Default port is 443 + +`defaultNTFClientConfig` sets the default transport to `("443", transport @TLS)`. Unlike the SMP protocol which typically uses port 5223, the NTF protocol defaults to the standard HTTPS port. + +### 4. okNtfCommand parameter ordering + +`okNtfCommand` has an unusual parameter order — the command comes first, then client, mode, key, entityId. This enables partial application in the `ntfDeleteToken`, `ntfVerifyToken` etc. definitions, where the command is fixed and the remaining parameters flow through. diff --git a/spec/modules/Simplex/Messaging/Notifications/Protocol.md b/spec/modules/Simplex/Messaging/Notifications/Protocol.md index 9354e20862..71daf771db 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Protocol.md +++ b/spec/modules/Simplex/Messaging/Notifications/Protocol.md @@ -41,3 +41,15 @@ Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL), which ### 7. DeviceToken hex validation `DeviceToken` string parsing has two paths: a hardcoded literal match for `"apns_null test_ntf_token"` (test tokens), and hex string validation for real tokens (must be even-length hex). The wire encoding (`smpP`) does not perform this validation — it accepts any `ByteString`. + +### 8. SMPQueueNtf parsing applies updateSMPServerHosts + +Both `smpP` and `strP` for `SMPQueueNtf` apply `updateSMPServerHosts` to the parsed SMP server. This normalizes server host addresses on deserialization, ensuring consistent comparison even if the on-wire format uses different host representations. + +### 9. NRTknId response tag comment + +The `NRTknId_` tag encodes as `"IDTKN"` with a source comment: "it should be 'TID', 'SID'". This indicates a naming inconsistency that was preserved for backward compatibility — the tag names don't follow the pattern of other NTF protocol tags. + +### 10. useServiceAuth is False + +The `Protocol` instance explicitly returns `False` for `useServiceAuth`, meaning the NTF protocol never uses service-level authentication. All authentication is entity-level (per token/subscription). diff --git a/spec/modules/Simplex/Messaging/Notifications/Server.md b/spec/modules/Simplex/Messaging/Notifications/Server.md index 9c88cf7a03..5c74878d73 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server.md @@ -27,12 +27,12 @@ When `verifyNtfTransmission` encounters an AUTH error (entity not found), it cal ### 2. TNEW idempotent re-registration When TNEW is received for an already-registered token, the server: -1. Looks up the existing token via `findNtfTokenRegistration` +1. Looks up the existing token via `findNtfTokenRegistration` (matches on push provider, device token, AND verify key) 2. Verifies the DH secret matches (recomputed from the new `dhPubKey` and stored `tknDhPrivKey`) 3. If DH secrets differ → AUTH error (prevents token hijacking) 4. If they match → re-sends verification push notification -This makes TNEW safe for client retransmission after connection drops. +If the verify key doesn't match in step 1, the lookup returns `Nothing` and a new token is created instead — the DH secret check never runs. This makes TNEW safe for client retransmission after connection drops. ### 3. SNEW idempotent subscription @@ -77,3 +77,55 @@ Cron notification interval has a hard minimum of 20 minutes. `TCRN 0` disables c ### 11. receive separates error responses from commands The `receive` function processes incoming transmissions and partitions results: malformed/unauthorized requests are written directly to `sndQ` as error responses, while valid commands go to `rcvQ` for processing. This ensures protocol errors get immediate responses without competing for the command processing queue. + +### 12. Maintenance mode saves state then exits immediately + +When `maintenance` is set in `startOptions`, the server restores stats, calls `stopServer` (closes DB, saves stats), and exits with `exitSuccess`. It never starts transport listeners, subscriber threads, or resubscription. This provides a way to run database migrations without the server serving traffic. + +### 13. Resubscription runs as a detached fork + +`resubscribe` is launched via `forkIO` before `raceAny_` starts — it is **not part of the `raceAny_` group**. Most exceptions are silently lost per `forkIO` semantics. However, `ExitCode` exceptions (like `exitFailure` from pattern 20) are special-cased by GHC's runtime and propagate to the main thread, terminating the process. + +### 14. TNEW re-registration resets status for non-verifiable tokens + +When a re-registration TNEW matches on DH secret but `allowTokenVerification tknStatus` is `False` (token is `NTNew`, `NTInvalid`, or `NTExpired`), the server resets status to `NTRegistered` before sending the verification push. This makes TNEW a "status repair" mechanism — clients with stuck tokens can restart the verification flow by re-registering with the same DH key. + +### 15. DELD unconditionally updates status (no session validation) + +Unlike `SMP.END` which checks `activeClientSession'` to prevent stale session messages from changing state, `SMP.DELD` updates subscription status to `NSDeleted` unconditionally. This is correct because DELD means the queue was permanently deleted on the SMP router — the information is valid regardless of which session reports it. + +### 16. TRPL generates new code but reuses the DH key + +`TRPL` (token replace) creates a new registration code and resets status to `NTRegistered`, but does NOT generate a new server DH key pair. The existing `tknDhPrivKey` and `tknDhSecret` are preserved — only the push provider token and registration code change. The encrypted channel between client and NTF router persists across device token replacements. + +### 17. PNMessage delivery requires NTActive, verification and cron do not + +`ntfPush` applies `checkActiveTkn` only to `PNMessage` notifications. Verification pushes (`PNVerification`) and cron check-messages pushes (`PNCheckMessages`) are delivered regardless of token status. This is necessary because verification pushes must be sent before NTActive, and cron pushes are already filtered at the database level. + +### 18. CAServiceSubscribed validates count and hash with warning-only behavior + +When a service subscription is confirmed, the NTF router compares expected and confirmed subscription count and IDs hash. Mismatches in either are logged as warnings but no corrective action is taken. Only when both match is an informational message logged. + +### 19. subscribeLoop uses 100x database batch multiplier + +`dbBatchSize = batchSize * 100` reads subscriptions from the database in chunks 100 times larger than the SMP subscription batches. This reduces database round-trips during resubscription while keeping individual SMP batches small enough to avoid overwhelming SMP routers. + +### 20. subscribeLoop calls exitFailure on database error + +If `getServerNtfSubscriptions` returns `Left _` during startup resubscription, the server terminates via `exitFailure`. Since `resubscribe` runs in a forked thread (pattern 13), this `exitFailure` terminates the entire process — a transient database error during startup resubscription kills the server. + +### 21. Stats log aligns to wall-clock time of day + +The stats logging thread calculates an `initialDelay` to synchronize the first flush to `logStatsStartTime`. If the target time already passed today, it adds 86400 seconds to schedule for the next day. Subsequent flushes occur at exact `logInterval` cadence from that aligned start point. + +### 22. NMSG AUTH errors silently counted, not logged + +When `addTokenLastNtf` returns `Left AUTH` (notification for a queue whose subscription/token association is invalid), the server increments `ntfReceivedAuth` but takes no corrective action. Other error types are silently ignored. This is expected — subscriptions may be deleted while messages are in-flight. + +### 23. PNVerification delivery transitions token to NTConfirmed + +When a verification push is successfully delivered to the push provider, `setTknStatusConfirmed` transitions the token to `NTConfirmed`, but only if not already `NTConfirmed` or `NTActive`. This creates a two-phase confirmation: push delivery confirms the channel works (`NTConfirmed`), then TVFY confirms the client received it (`NTActive`). + +### 24. disconnectTransport always passes noSubscriptions = True + +Unlike the SMP router which checks active subscriptions before disconnecting idle clients, the NTF router always returns `True` for the "no subscriptions" check. NTF clients are disconnected purely on inactivity timeout — the NTF protocol has no long-lived client subscriptions. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Env.md b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md index 96221a0124..c266390d2d 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Env.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md @@ -8,7 +8,7 @@ ### 1. Service credentials are lazily generated -`mkDbService` in `newNtfServerEnv` generates service credentials on demand: when `getCredentials` is called for an SMP server, it first checks the database. If credentials exist, they are used. If not (`Nothing`), new credentials are generated via `genCredentials`, stored in the database, and returned. This happens per SMP server on first connection. +`mkDbService` in `newNtfServerEnv` generates service credentials on demand: when `getCredentials` is called for an SMP server, it checks the database. If the server is known and already has credentials, they are reused. If the server is known but has no credentials yet (first connection), new credentials are generated via `genCredentials`, stored in the database, and returned. If the server is not in the database at all, `PCEServiceUnavailable` is thrown (this case should not occur in practice, as clients only connect to servers already tracked in the database). Service credentials are only used when `useServiceCreds` is enabled in the config. @@ -19,3 +19,27 @@ Service credentials are only used when `useServiceCreds` is enabled in the confi ### 3. getPushClient lazy initialization `getPushClient` looks up the push client by provider in `pushClients` TMap. If not found, it calls `newPushClient` to create and register one. Push provider connections are established on first use, not at server startup. + +### 4. Service credential validity: 25h backdating, ~2700yr forward + +`genCredentials` creates self-signed Ed25519 certificates valid from 25 hours in the past to `24 * 999999` hours (~2,739 years) in the future. The 25-hour backdating protects against clock skew between NTF and SMP routers. The near-permanent forward validity avoids the need for credential rotation infrastructure. + +### 5. newPushClient race creates duplicate clients + +`newPushClient` atomically inserts into `pushClients` after creating the client. A concurrent `getPushClient` call between creation start and TMap insert will see `Nothing`, create a second client, and overwrite the first. This race is tolerable — APNS connections are cheap and the overwritten client is garbage collected. + +### 6. Bidirectional activity timestamps + +`NtfServerClient` has separate `rcvActiveAt` and `sndActiveAt` TVars, both initialized to connection time and updated independently. `disconnectTransport` considers both — a client that only receives (or only sends) is still considered active. + +### 7. pushQ bounded TBQueue creates backpressure + +`pushQ` in `NtfPushServer` is a `TBQueue` sized by `pushQSize`. When full, any thread writing to it (NMSG processing, periodic cron, verification) blocks in STM until space is available. This prevents the push delivery pipeline from being overwhelmed. + +### 8. subscriberSeq provides monotonic session variable ordering + +The `subscriberSeq` TVar is used by `getSessVar` to assign monotonically increasing IDs to subscriber session variables. `removeSessVar` uses compare-and-swap with this ID — only the variable with the matching ID can be removed, preventing stale removal when a new subscriber has already replaced the old one. + +### 9. SMPSubscriber holds Weak ThreadId for GC-based cleanup + +`subThreadId` is `Weak ThreadId`, not `ThreadId`. Using `Weak ThreadId` allows the GC to collect thread resources when no strong references remain. `stopSubscriber` uses `deRefWeak` to obtain the `ThreadId` (if the thread hasn't been GC'd) before calling `killThread`. The `Nothing` case (thread already collected) is simply skipped. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md index 2a6d8c0b12..d2a49471d9 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md @@ -33,3 +33,43 @@ When APNS returns 503 (Service Unavailable), the client actively closes the HTTP ### 7. EC key type assumption `readECPrivateKey` uses a specific pattern match for EC keys (`PrivKeyEC_Named`). It will crash at runtime if the APNS key file contains a different key type. The comment acknowledges this limitation. + +### 8. JWT signature uses DER-encoded ASN.1, not raw r||s + +`signedJWTToken` serializes the ECDSA signature as a DER-encoded ASN.1 SEQUENCE of two INTEGERs, then base64url-encodes it. RFC 7518 Section 3.4 requires raw concatenation of fixed-length r and s values instead. This deviation works because Apple's APNS server accepts DER-encoded signatures, but it would break if Apple enforced strict JWS compliance. + +### 9. Two different base64url encodings + +The encryption path uses `U.encode` (base64url **with** padding `=`), while the JWT path uses `U.encodeUnpadded` (base64url **without** padding). JWT requires unpadded base64url per RFC 7515, but the encrypted notification ciphertext is padded before being embedded as a JSON text value. + +### 10. Error response defaults to empty string on parse failure + +If the APNS error response body is empty, malformed, or not JSON, `decodeStrict'` returns `Nothing` and the reason defaults to `""`. This empty string never matches named error patterns, so unparseable error bodies fall through to the catch-all of whichever status code branch matches. For 410, this means a malformed body is treated as `PPRetryLater` rather than a token invalidation. + +### 11. 410 unknown reasons are retryable, unlike 400/403 unknowns + +Unknown 410 (Gone) reasons fall through to `PPRetryLater`, while unknown 400 and 403 reasons fall through to `PPResponseError`. This means an unexpected APNS 410 reason string triggers retry behavior rather than permanent failure. + +### 12. 429 TooManyRequests is not explicitly handled + +There is a commented-out note but no actual 429 handler. A rate-limiting response falls through to the `otherwise` branch and becomes `PPResponseError`, surfacing as a generic error rather than a retryable condition. + +### 13. Nonce generation is STM-atomic, separate from encryption + +The per-notification nonce is generated inside `atomically` using the `ChaChaDRG` TVar, guaranteeing uniqueness under concurrent delivery. The nonce is then used by `cbEncrypt` outside STM. This separation means the nonce is committed to the DRG state even if encryption or send subsequently fails — correct behavior since nonce reuse would be catastrophic. + +### 14. Background notifications use priority 5, alerts use default 10 + +`apnsRequest` conditionally appends `apns-priority: 5` only for `APNSBackground` notifications. Alert and mutable-content notifications omit the header, relying on APNS's default priority of 10. Apple requires background pushes to use priority 5 — using 10 can cause APNS to reject them. + +### 15. APNSErrorResponse is data, not newtype + +The comment explicitly states `APNSErrorResponse` is `data` rather than `newtype` "to have a correct JSON encoding as a record." With `deriveFromJSON`, a newtype around `Text` would serialize as a bare string, not `{"reason": "..."}`. The `data` wrapper forces record encoding matching APNS's JSON error format. + +### 16. HTTP/2 requests go through a serializing queue + +`sendRequest` routes through the HTTP2Client's `reqQ` (a `TBQueue`), serializing all requests through a single sender thread. Concurrent push deliveries are implicitly serialized at the HTTP/2 layer, meaning high-throughput scenarios bottleneck on this queue rather than utilizing HTTP/2's multiplexing. + +### 17. Connection initialization is fire-and-forget + +`createAPNSPushClient` calls `connectHTTPS2` and discards the result with `void`. If the initial connection fails, the error is only logged — the client is still created. The first push delivery triggers `getApnsHTTP2Client` which reconnects. This means the server can start even if APNS is unreachable. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md index 971419abfe..4a4439f548 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md @@ -8,7 +8,7 @@ ### 1. incServerStat double lookup -`incServerStat` performs a non-STM IO lookup first, then only enters an STM transaction on cache miss. The STM block re-checks the map to handle races (another thread may have inserted between the IO lookup and STM entry). This avoids contention on the shared TMap in the common case where the server's counter TVar already exists. +`incServerStat` performs a non-STM IO lookup first. On cache hit, the STM transaction only touches the per-server `TVar Int` without reading the shared TMap, avoiding contention. On cache miss, the STM block re-checks the map to handle races (another thread may have inserted between the IO lookup and STM entry). ### 2. setNtfServerStats is not thread safe @@ -17,3 +17,23 @@ ### 3. Backward-compatible parsing The `strP` parser uses `opt` which defaults missing fields to 0. This allows reading stats files from older server versions that don't include newer fields (`ntfReceivedAuth`, `ntfFailed`, `ntfVrf*`, etc.). + +### 4. getNtfServerStatsData is a non-atomic snapshot + +`getNtfServerStatsData` reads each `IORef` and `TMap` field sequentially in plain `IO`, not inside a single STM transaction. The returned `NtfServerStatsData` is not a consistent point-in-time snapshot — invariants like "received >= delivered" may not hold. The same applies to `getStatsByServer`, which does one `readTVarIO` for the map root TVar, then a separate `readTVarIO` for each per-server TVar. This is acceptable for periodic reporting where approximate consistency suffices. + +### 5. Mixed IORef/TVar concurrency primitives + +Aggregate counters (`ntfReceived`, `ntfDelivered`, etc.) use `IORef Int` incremented via `atomicModifyIORef'_`, while per-server breakdowns use `TMap Text (TVar Int)` incremented atomically via STM in `incServerStat`. Although both individual operations are atomic, the aggregate and per-server increments are separate operations, so their values can drift: a thread could increment the aggregate `IORef` before `incServerStat` runs, or vice versa. + +### 6. setStatsByServer replaces TMap atomically but orphans old TVars + +`setStatsByServer` builds a fresh `Map Text (TVar Int)` in IO via `newTVarIO`, then atomically replaces the TMap's root TVar. Old per-server TVars are not reused — any other thread holding a reference from a prior `TM.lookupIO` would modify an orphaned counter. Safe only because it's called at startup (like `setNtfServerStats`), but lacks the explicit "not thread safe" comment. + +### 7. Positional parser format despite key=value appearance + +The parser is strictly positional: fields must appear in exactly the serialization order. The `opt` alternatives only handle entirely absent fields (defaulting to 0), not reordered fields. Despite the `key=value` on-disk appearance, this is a sequential format — the named prefixes are for human readability, not key-lookup parsing. + +### 8. B.unlines trailing newline asymmetry + +`strEncode` uses `B.unlines`, which appends `\n` after every element including the last. The parser compensates with `optional A.endOfLine` on the last field. The file always ends with `\n`, but the parser tolerates its absence. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md index 33acdaad9b..05a7e70e2d 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md @@ -21,3 +21,35 @@ When a token is activated, `stmRemoveInactiveTokenRegistrations` removes ALL oth ### 4. tokenLastNtfs accumulates via prepend New notifications are prepended to the `NonEmpty PNMessageData` list via `(<|)`. The list is unbounded in the STM store — bounding is handled at the push delivery layer (the Postgres store limits to 6). + +### 5. stmDeleteNtfToken prunes empty registration maps + +When `stmDeleteNtfToken` removes a token, it deletes the entry from the inner `TMap` of `tokenRegistrations`, then checks whether that inner map is now empty via `TM.null`. If empty, it removes the outer `DeviceToken` key entirely, preventing unbounded growth of empty inner maps. In contrast, `stmRemoveInactiveTokenRegistrations` does **not** perform this cleanup — the surviving active token's registration always remains. + +### 6. stmRemoveTokenRegistration is identity-guarded + +`stmRemoveTokenRegistration` looks up the registration entry for the token's own verify key and only deletes it if the stored `NtfTokenId` matches the token's own ID. This guard prevents a token from accidentally removing a **different** token's registration that was inserted under the same `(DeviceToken, verifyKey)` pair due to a re-registration race. + +### 7. stmDeleteNtfToken silently succeeds on missing tokens + +`stmDeleteNtfToken` uses `lookupDelete` chained with monadic bind over `Maybe`. If the token ID does not exist in the `tokens` map, the registration-cleanup branch is silently skipped, and the function still proceeds to delete from `tokenLastNtfs` and `deleteTokenSubs`. It returns an empty list rather than signaling an error — the caller cannot distinguish "deleted a token with no subscriptions" from "token never existed." + +### 8. deleteTokenSubs returns SMP queues for upstream unsubscription + +`deleteTokenSubs` atomically collects all `SMPQueueNtf` values from the deleted subscriptions and returns them. This is how the server layer knows which SMP notifier subscriptions to tear down. `stmRemoveInactiveTokenRegistrations` discards this list (`void $`), meaning rival-token cleanup does **not** trigger SMP unsubscription — only explicit token deletion does. + +### 9. stmAddNtfSubscription always returns Just (vestigial Maybe) + +`stmAddNtfSubscription` has return type `STM (Maybe ())` with a comment "return Nothing if subscription existed before," but **unconditionally returns `Just ()`**. `TM.insert` overwrites any existing subscription silently. The `Maybe` return type is vestigial — the function never detects duplicates. + +### 10. stmDeleteNtfSubscription leaves empty tokenSubscriptions entries + +When `stmDeleteNtfSubscription` removes a subscription, it deletes the `subId` from the token's `Set NtfSubscriptionId` in `tokenSubscriptions` but never checks whether the set became empty. Tokens with all subscriptions individually deleted accumulate empty set entries — these are only cleaned up when the token itself is deleted via `deleteTokenSubs`. + +### 11. stmSetNtfService — asymmetric cleanup with Postgres store + +`stmSetNtfService` uses `maybe TM.delete TM.insert` to either remove or set the service association for an SMP server. This is purely a key-value update with no cascading effects on subscriptions. The Postgres store's `removeServiceAndAssociations` handles subscription cleanup separately, meaning the STM and Postgres stores have **different cleanup semantics** for service removal. + +### 12. Subscription index triple-write invariant + +`stmAddNtfSubscription` writes to three maps atomically: `subscriptions` (subId → data), `subscriptionLookup` (smpQueue → subId), and `tokenSubscriptions` (tokenId → Set subId). Single-subscription deletion (`stmDeleteNtfSubscription`) cleans the first two but only removes from the Set in the third. Bulk-token deletion (`deleteTokenSubs`) deletes the outer `tokenSubscriptions` entry entirely. Different deletion paths have different completeness guarantees. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md index 3cb5c9083c..440797539a 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md @@ -52,3 +52,43 @@ Only non-service-associated subscriptions (`NOT ntf_service_assoc`) are returned ### 10. Service association tracking `batchUpdateSrvSubStatus` atomically updates both subscription status and `ntf_service_assoc` flag. When notifications arrive via a service subscription (`newServiceId` is `Just`), all affected subscriptions are marked as service-associated. `removeServiceAndAssociations` resets all subscriptions for a server to `NSInactive` with `ntf_service_assoc = FALSE`. + +### 11. uninterruptibleMask_ wraps most store operations + +`withDB_` and `withClientDB` wrap the database transaction in `E.uninterruptibleMask_`. This prevents async exceptions from interrupting a PostgreSQL transaction mid-flight, which could leave a connection in a half-committed state and corrupt the pool. Functions that take a raw `DB.Connection` parameter (`getNtfServiceCredentials`, `setNtfServiceCredentials`, `updateNtfServiceId`) operate within a caller-managed transaction and are not independently wrapped. `getUsedSMPServers` uses `withTransaction` directly (intentionally: it is expected to crash on error at startup). + +### 12. Silent error swallowing with sentinel returns + +`withDB_` catches all `SomeException`, logs the error, and returns `Left (STORE msg)` — callers never see database failures as exceptions. Additionally, `batchUpdateSrvSubStatus` and `batchUpdateSrvSubErrors` use `fromRight (-1)` to convert database errors into a `-1` count, and `withPeriodicNtfTokens` uses `fromRight 0`, making database failures indistinguishable from "zero results" at the call site. + +### 13. getUsedSMPServers uncorrelated EXISTS + +The `EXISTS` subquery in `getUsedSMPServers` has no join condition to the outer `smp_servers` table — it returns ALL servers if ANY subscription anywhere has a subscribable status. This is intentional for server startup: the server needs all SMP server records (including `ServiceSub` data) to rebuild in-memory state, and the EXISTS clause is a cheap guard against an empty subscription table. + +### 14. Trigger-maintained XOR hash aggregates + +Subscription insert, update, and delete trigger functions incrementally maintain `smp_notifier_count` and `smp_notifier_ids_hash` on `smp_servers` using XOR-based hash aggregation of MD5 digests. Every `batchUpdateSrvSubStatus` or cascade-delete from token deletion implicitly fires these triggers. The XOR hash is self-inverting: adding and removing the same notifier ID restores the previous hash. `updateNtfServiceId` resets these counters to zero when the service ID changes, invalidating the previous aggregate. + +### 15. updateNtfServiceId asymmetric credential cleanup + +Setting a new service ID preserves existing TLS credentials (`ntf_service_cert`, etc.) while only resetting aggregate counters. Setting service ID to `NULL` clears both credentials AND counters. In both cases, if a previous service ID existed, all subscription associations are reset first via `removeServiceAssociation_`, and a `logError` is emitted — treating a service ID change as anomalous. + +### 16. Server upsert no-op DO UPDATE for RETURNING + +The `insertServer` fallback uses `ON CONFLICT ... DO UPDATE SET smp_host = EXCLUDED.smp_host` — a no-op update solely to make `RETURNING smp_server_id` work. PostgreSQL's `ON CONFLICT DO NOTHING` does not support `RETURNING` for conflicting rows, so this pattern forces a row to always be "affected" and thus returnable. This handles races where two concurrent `addNtfSubscription` calls both miss the initial SELECT. + +### 17. getNtfServiceCredentials FOR UPDATE serializes provisioning + +`getNtfServiceCredentials` acquires `FOR UPDATE` on the server row even though it is a read operation. The caller needs to atomically check whether credentials exist and then set them in the same transaction. Without `FOR UPDATE`, two concurrent provisioning attempts could both see `Nothing` and both provision, resulting in credential mismatch. + +### 18. deleteNtfToken string_agg with hex parsing + +`deleteNtfToken` uses `string_agg(s.smp_notifier_id :: TEXT, ',')` to aggregate `BYTEA` notifier IDs into comma-separated text, then parses with `parseByteaString` which drops the `\x` prefix and hex-decodes. `mapMaybe` silently drops any IDs that fail hex decoding, which could mask data corruption. + +### 19. withPeriodicNtfTokens streams with DB.fold + +`withPeriodicNtfTokens` uses `DB.fold` to stream token rows one at a time through a callback that performs IO (sending push notifications), meaning the database transaction and connection are held open for the entire duration of all notifications. This is deliberately routed through the non-priority pool to avoid blocking client-facing operations. + +### 20. Cursor-based pagination with byte-ordering + +`getServerNtfSubscriptions` uses `subscription_id > ?` with `ORDER BY subscription_id LIMIT ?`. Since `subscription_id` is `BYTEA`, ordering is by raw byte comparison. The batch status update uses `FROM (VALUES ...)` pattern instead of `WHERE IN (...)`, and the `s.status != upd.status` guard prevents no-op writes from firing XOR hash triggers. diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md index 263b2459a3..df40214752 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Transport.md +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -8,7 +8,7 @@ ### 1. ALPN-dependent version range -`ntfServerHandshake` advertises `legacyServerNTFVRange` (v1 only) when ALPN is not available (`getSessionALPN` returns `Nothing`). When ALPN is present, it advertises the full `supportedServerNTFVRange`. This is the backward-compatibility mechanism for pre-ALPN clients that cannot negotiate newer protocol features. +`ntfServerHandshake` advertises `legacyServerNTFVRange` (v1 only) when ALPN is not available (`getSessionALPN` returns `Nothing`). When ALPN is present, it advertises the caller-provided `ntfVRange`. This is the backward-compatibility mechanism for pre-ALPN clients that cannot negotiate newer protocol features. ### 2. Version-gated features @@ -23,8 +23,16 @@ Pre-v2 connections have no command encryption or batching — commands are sent ### 3. Unused Protocol typeclass parameters -`ntfClientHandshake` accepts `_proxyServer` and `_serviceKeys` parameters that are ignored. These exist because the `Protocol` typeclass (shared with SMP) requires `protocolClientHandshake` to accept them. The NTF protocol does not support proxy routing or service authentication. +`ntfClientHandshake` accepts `_proxyServer` and `_serviceKeys` parameters that are ignored. These are passed through from the `Protocol` typeclass's `protocolClientHandshake` method for consistency with SMP. A third parameter (`Maybe C.KeyPairX25519` for key agreement) is discarded at the Protocol instance wrapper level. The NTF protocol does not support proxy routing or service authentication. ### 4. Block size -NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. Notification commands and responses are short — the main payload is the `PNMessageData` which contains encrypted message metadata. +NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. This is sufficient because NTF protocol commands (TNEW, SNEW, TCHK, etc.) and their responses are short. `PNMessageData` (which contains encrypted message metadata) is not sent over the NTF transport — it is delivered via APNS push notifications. + +### 5. Initial THandle has version 0 + +`ntfTHandle` creates a THandle with `thVersion = VersionNTF 0` — a version that no real protocol supports. This is a placeholder value that gets overwritten during version negotiation. All feature gates check `v >= authBatchCmdsNTFVersion` (v2), so the v0 placeholder disables all optional features. + +### 6. Server handshake always sends authPubKey + +`ntfServerHandshake` always includes `authPubKey = Just sk` in the server handshake, regardless of the advertised version range. The encoding functions (`encodeAuthEncryptCmds`) then decide whether to actually serialize it based on the max version. This means the key is computed even when it won't be sent. diff --git a/spec/modules/Simplex/Messaging/Notifications/Types.md b/spec/modules/Simplex/Messaging/Notifications/Types.md index bb05ccefb2..97cc66913a 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Types.md +++ b/spec/modules/Simplex/Messaging/Notifications/Types.md @@ -16,4 +16,4 @@ ### 3. NSADelete and NSARotate are deprecated -These `NtfSubNTFAction` values are no longer generated by current code but are retained in the type for processing legacy database records. `NSARotate` is logically "delete + recreate" while `NSADelete` is "delete notifier on NTF router + delete credentials on SMP router". +These `NtfSubNTFAction` values are no longer generated by current code but are retained in the type for processing legacy database records. `NSARotate` is logically "delete + recreate" while `NSADelete` is "delete subscription on NTF server + delete notifier credentials on SMP server". From f131531f5aefe51d0525fab59087affefb1172ee Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:12:20 +0000 Subject: [PATCH 42/91] xftp specs --- spec/modules/Simplex/FileTransfer/Agent.md | 86 +++++++++++++++++++ spec/modules/Simplex/FileTransfer/Client.md | 37 ++++++++ .../Simplex/FileTransfer/Client/Agent.md | 27 ++++++ .../Simplex/FileTransfer/Client/Main.md | 43 ++++++++++ spec/modules/Simplex/FileTransfer/Crypto.md | 31 +++++++ .../Simplex/FileTransfer/Description.md | 43 ++++++++++ spec/modules/Simplex/FileTransfer/Protocol.md | 36 ++++++++ spec/modules/Simplex/FileTransfer/Server.md | 85 ++++++++++++++++++ .../Simplex/FileTransfer/Server/Env.md | 24 ++++++ .../Simplex/FileTransfer/Server/Main.md | 28 ++++++ .../Simplex/FileTransfer/Server/Stats.md | 19 ++++ .../Simplex/FileTransfer/Server/Store.md | 39 +++++++++ .../Simplex/FileTransfer/Server/StoreLog.md | 33 +++++++ spec/modules/Simplex/FileTransfer/Types.md | 27 ++++++ 14 files changed, 558 insertions(+) create mode 100644 spec/modules/Simplex/FileTransfer/Agent.md create mode 100644 spec/modules/Simplex/FileTransfer/Client.md create mode 100644 spec/modules/Simplex/FileTransfer/Client/Agent.md create mode 100644 spec/modules/Simplex/FileTransfer/Client/Main.md create mode 100644 spec/modules/Simplex/FileTransfer/Crypto.md create mode 100644 spec/modules/Simplex/FileTransfer/Description.md create mode 100644 spec/modules/Simplex/FileTransfer/Protocol.md create mode 100644 spec/modules/Simplex/FileTransfer/Server.md create mode 100644 spec/modules/Simplex/FileTransfer/Server/Env.md create mode 100644 spec/modules/Simplex/FileTransfer/Server/Main.md create mode 100644 spec/modules/Simplex/FileTransfer/Server/Stats.md create mode 100644 spec/modules/Simplex/FileTransfer/Server/Store.md create mode 100644 spec/modules/Simplex/FileTransfer/Server/StoreLog.md create mode 100644 spec/modules/Simplex/FileTransfer/Types.md diff --git a/spec/modules/Simplex/FileTransfer/Agent.md b/spec/modules/Simplex/FileTransfer/Agent.md new file mode 100644 index 0000000000..fd2a361d0c --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Agent.md @@ -0,0 +1,86 @@ +# Simplex.FileTransfer.Agent + +> XFTP agent: worker-based file send/receive/delete with retry, encryption, redirect chains, and file description generation. + +**Source**: [`FileTransfer/Agent.hs`](../../../../src/Simplex/FileTransfer/Agent.hs) + +## Architecture + +The XFTP agent uses five worker types organized in three categories: + +| Worker | Key (server) | Purpose | +|--------|-------------|---------| +| `xftpRcvWorker` | `Just server` | Download chunks from a specific XFTP server | +| `xftpRcvLocalWorker` | `Nothing` | Decrypt completed downloads locally | +| `xftpSndPrepareWorker` | `Nothing` | Encrypt files and create chunks on servers | +| `xftpSndWorker` | `Just server` | Upload chunks to a specific XFTP server | +| `xftpDelWorker` | `Just server` | Delete chunks from a specific XFTP server | + +Workers are created on-demand via `getAgentWorker` and keyed by server address. The local workers (keyed by `Nothing`) handle CPU-bound operations that don't require network access. + +## Non-obvious behavior + +### 1. startXFTPWorkers vs startXFTPSndWorkers + +`startXFTPWorkers` starts all three worker categories (rcv, snd, del). `startXFTPSndWorkers` starts only snd workers. This distinction exists because receiving and deleting require a full agent context, while sending can operate with a partial setup (used when the agent is in send-only mode). + +### 2. Download completion triggers local worker + +When `downloadFileChunk` determines that all chunks are received (`all chunkReceived chunks`), it calls `getXFTPRcvWorker True c Nothing` to wake the local decryption worker. The `True` parameter signals that work is available. Without this, the local worker would sleep until the next `waitForWork` check. + +### 3. Decryption verifies both digest and size before decrypting + +`decryptFile` first computes the total size of all encrypted chunk files, then their SHA-512 digest. If either mismatches the expected values, it throws an error *before* starting decryption. This prevents wasting CPU on corrupted or tampered downloads. + +### 4. Redirect chain with depth limit + +When a received file has a `redirect`, the local worker: +1. Decrypts the redirect file (a YAML file description) +2. Validates the inner description's size and digest against `RedirectFileInfo` +3. Registers the inner file's chunks and starts downloading them + +The redirect chain is implicitly limited to depth 1: `createRcvFileRedirect` creates the destination file entry with `redirect = Nothing`, and `updateRcvFileRedirect` does not update the redirect column. So even if the decoded inner description contains a redirect field, the database record for the destination file has no redirect, preventing further chaining. + +### 5. Decrypting worker resumes from RFSDecrypting + +If the agent restarts while a file is in `RFSDecrypting` status, the local worker detects this and deletes the partially-decrypted output file before restarting decryption. This prevents corrupted output from a previous incomplete decryption attempt. + +### 6. Encryption worker resumes from SFSEncrypting + +Similarly, `prepareFile` checks `status /= SFSEncrypted` and deletes the partial encrypted file if status is `SFSEncrypting`. This allows clean restart of interrupted encryption. + +### 7. Redirect files must be single-chunk + +`encryptFileForUpload` for redirect files calls `singleChunkSize` instead of `prepareChunkSizes`. If the redirect file description doesn't fit in a single chunk, it throws `FILE SIZE`. This ensures redirect files are atomic — they either download completely or not at all. + +### 8. addRecipients recursive batching + +During upload, `addRecipients` recursively calls itself if a chunk needs more recipients than `xftpMaxRecipientsPerRequest`. Each iteration sends an FADD command for up to `maxRecipients` new recipients, accumulates the results, and recurses until all recipients are registered. + +### 9. File description generation cross-product + +`createRcvFileDescriptions` (in both `Agent.hs` and `Client/Main.hs`) performs a cross-product transformation: M chunks × R replicas × N recipients → N file descriptions, each containing M chunks with R replicas. The `addRcvChunk` accumulator builds a `Map rcvNo (Map chunkNo FileChunk)` to correctly distribute replicas across recipient descriptions. + +### 10. withRetryIntervalLimit caps consecutive retries + +`withRetryIntervalLimit maxN` allows at most `maxN` total attempts (initial attempt at `n=0` plus `maxN-1` retries). When all attempts are exhausted for temporary errors, the operation is silently abandoned for this work cycle — the chunk remains in pending state and may be retried on the next cycle. Only permanent errors (handled by `retryDone`) mark the file as errored. + +### 11. Retry distinguishes temporary from permanent errors + +`retryOnError` checks `temporaryOrHostError`: temporary/host errors trigger retry with exponential backoff; permanent errors (AUTH, SIZE, etc.) immediately mark the file as failed. On host errors during retry, a warning notification is sent to the client. + +### 12. Delete workers skip files older than rcvFilesTTL + +`runXFTPDelWorker` uses `rcvFilesTTL` (not a dedicated delete TTL) to filter pending deletions. Files older than this TTL would already be expired on the server, so attempting deletion is pointless. This reuses the receive TTL as a proxy for server-side expiration. + +### 13. closeXFTPAgent atomically swaps worker maps + +`closeXFTPAgent` uses `swapTVar workers M.empty` to atomically replace each worker map with an empty map, then cancels all retrieved workers. This prevents races where a new worker could be inserted between reading and clearing the map. + +### 14. assertAgentForeground dual check + +`assertAgentForeground` both throws if the agent is inactive (`throwWhenInactive`) and blocks until it's in the foreground (`waitUntilForeground`). This is called before every chunk operation to ensure the agent isn't suspended or backgrounded during file transfers. + +### 15. Per-server stats tracking + +Every chunk download, upload, and delete operation increments per-server statistics (`downloads`, `uploads`, `deletions`, `downloadAttempts`, `uploadAttempts`, `deleteAttempts`, and error variants). Size-based stats (`downloadsSize`, `uploadsSize`) track throughput in kilobytes. diff --git a/spec/modules/Simplex/FileTransfer/Client.md b/spec/modules/Simplex/FileTransfer/Client.md new file mode 100644 index 0000000000..5cf87a5944 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Client.md @@ -0,0 +1,37 @@ +# Simplex.FileTransfer.Client + +> XFTP client: connection management, handshake, chunk upload/download with forward secrecy. + +**Source**: [`FileTransfer/Client.hs`](../../../../src/Simplex/FileTransfer/Client.hs) + +## Non-obvious behavior + +### 1. ALPN-based handshake version selection + +`getXFTPClient` checks the ALPN result after TLS negotiation: +- **`xftpALPNv1` or `httpALPN11`**: performs v1 handshake with key exchange (`httpALPN11` is used for web port connections) +- **No ALPN or unrecognized**: uses legacy v1 transport parameters without handshake + +### 2. Server certificate chain validation + +`xftpClientHandshakeV1` validates the server's identity by checking that the CA fingerprint from the certificate chain matches the expected `keyHash` from the server address. The server signs an authentication public key (X25519) with its long-term key. The client verifies this signature against the certificate chain, then extracts the X25519 key for HMAC-based command authentication. This authentication key is distinct from the per-download ephemeral DH keys. + +### 3. Ephemeral DH key pair per download + +`downloadXFTPChunk` generates a fresh X25519 key pair for each chunk download. The public key is sent with the FGET command; the server responds with its own ephemeral key. The derived shared secret encrypts the file data in transit. This provides forward secrecy — compromising a past DH key doesn't decrypt other downloads. + +### 4. Chunk-size-proportional download timeout + +`downloadXFTPChunk` calculates the timeout as `baseTimeout + (sizeInKB * perKbTimeout)`, where `baseTimeout` is the base TCP timeout and `perKbTimeout` is a per-kilobyte timeout from the network config. Larger chunks get proportionally more time. This prevents premature timeouts on large chunks over slow connections. + +### 5. prepareChunkSizes threshold algorithm + +`prepareChunkSizes` selects chunk sizes using a 75% threshold: if the remaining payload exceeds 75% of the next larger chunk size, it uses the larger size. Otherwise, it uses the smaller size. `singleChunkSize` returns `Just size` only if the payload fits in a single chunk (used for redirect files which must be single-chunk). + +### 6. Upload sends file body after command response + +`uploadXFTPChunk` sends the FPUT command and file body in the same streaming HTTP/2 request: the protocol command block is sent first, followed immediately by the raw file data via `hSendFile`. The server response (`FROk` or error) is received only after both the command and file body have been fully sent. This is a single HTTP/2 round trip, not a two-phase interaction. + +### 7. Empty corrId as nonce + +`sendXFTPCommand` uses `""` (empty bytestring) as the correlation ID for all commands. XFTP is strictly request-response within a single HTTP/2 stream, so correlation IDs are unnecessary. The empty value is passed to `C.cbNonce` to produce a constant nonce for command authentication (HMAC/signing), not encryption — XFTP authenticates commands but does not encrypt them within the TLS tunnel. diff --git a/spec/modules/Simplex/FileTransfer/Client/Agent.md b/spec/modules/Simplex/FileTransfer/Client/Agent.md new file mode 100644 index 0000000000..6ff1eebb77 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Client/Agent.md @@ -0,0 +1,27 @@ +# Simplex.FileTransfer.Client.Agent + +> XFTP client connection management with TMVar-based sharing, async retry, and connection lifecycle. + +**Source**: [`FileTransfer/Client/Agent.hs`](../../../../../src/Simplex/FileTransfer/Client/Agent.hs) + +## Non-obvious behavior + +### 1. TMVar-based connection sharing + +`getXFTPServerClient` first checks the `TMap XFTPServer (TMVar (Either XFTPClientAgentError XFTPClient))`. If no entry exists, it atomically inserts an empty `TMVar` and initiates connection. Other threads requesting the same server block on `readTMVar` until the connection is established or fails. This prevents duplicate connections to the same server. + +### 2. Async retry on temporary errors + +When `newXFTPClient` encounters a temporary error, it launches an async retry loop that attempts reconnection with backoff. The `TMVar` remains in the map but is empty until the retry succeeds. Other threads waiting on `readTMVar` block until either the retry succeeds or a permanent error occurs. + +### 3. Permanent error cleanup + +On permanent error, `newXFTPClient` puts the `Left error` into the `TMVar` (unblocking waiters) AND deletes the entry from the `TMap`. This means the next caller will see no entry and create a fresh connection attempt, rather than reading a stale error. Waiters that already read the `Left` receive the error. + +### 4. Connection timeout + +`waitForXFTPClient` wraps `readTMVar` in a timeout. If the connection establishment takes too long (e.g., server unreachable and retry loop is slow), the caller gets a timeout error rather than blocking indefinitely. The underlying connection attempt continues in the background. + +### 5. closeXFTPServerClient removes from TMap + +Closing a server client deletes its entry from the TMap, so the next request will establish a fresh connection. This is called on connection errors during file operations to force reconnection. diff --git a/spec/modules/Simplex/FileTransfer/Client/Main.md b/spec/modules/Simplex/FileTransfer/Client/Main.md new file mode 100644 index 0000000000..5f7b45af4a --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Client/Main.md @@ -0,0 +1,43 @@ +# Simplex.FileTransfer.Client.Main + +> XFTP CLI client: send, receive, delete files with parallel chunk operations and web URI encoding. + +**Source**: [`FileTransfer/Client/Main.hs`](../../../../../src/Simplex/FileTransfer/Client/Main.hs) + +## Non-obvious behavior + +### 1. Web URI encoding: base64url(deflate(YAML)) + +`encodeWebURI` compresses the YAML-encoded file description with raw DEFLATE, then base64url-encodes the result. `decodeWebURI` reverses this. The compressed description goes in the URL fragment (after `#`), which is never sent to the server — the file description stays client-side. + +### 2. CLI receive accepts both file paths and URLs + +`getInputFileDescription` checks if the input starts with `http://` or `https://`. If so, it extracts the URL fragment, decodes it via `decodeWebURI`, and uses the resulting file description. Otherwise, it reads a YAML file from disk. This allows receiving files via web links without a browser. + +### 3. Redirect chain depth limited to 1 + +`receive` tracks a `depth` parameter starting at 1. After following one redirect, `depth` becomes 0. A second redirect throws "Redirect chain too long". This prevents infinite redirect loops from malicious file descriptions. + +### 4. Parallel chunk uploads with server grouping + +`uploadFile` groups chunks by server via `groupAllOn`, then uses `pooledForConcurrentlyN 16` to process up to 16 server-groups concurrently. Within each group, chunks are uploaded sequentially (`mapM`). Errors from any chunk are collected and the first one is thrown. + +### 5. Random server selection + +`getXFTPServer` selects a random server from the provided list for each chunk. With a single server, it's deterministic. With multiple servers, it uses `StdGen` in a TVar for thread-safe random selection via `stateTVar`. + +### 6. withReconnect nests retry with reconnection + +`withReconnect` wraps `withRetry` twice: the outer retry reconnects to the server, and the inner operation runs against the connection. On failure, the server connection is explicitly closed before retrying, forcing a fresh connection on the next attempt. + +### 7. withRetry rejects zero retries + +`withRetry' 0` returns an "internal: no retry attempts" error. `withRetry' 1` executes the action once without retry. This off-by-one convention means `retryCount = 3` (the default) gives 3 total attempts (1 initial + 2 retries). + +### 8. File description auto-deletion prompt + +After successful receive or delete, `removeFD` either auto-deletes the file description (if `--yes` flag) or prompts the user. This prevents accidental reuse of one-time file descriptions — each receive consumes the description by ACKing chunks on the server. + +### 9. Sender description uses first replica's server + +`createSndFileDescription` takes the server from the first replica of each chunk for the sender's `FileChunkReplica`. This reflects the current limitation that each chunk is uploaded to exactly one server — the sender description records that single server. diff --git a/spec/modules/Simplex/FileTransfer/Crypto.md b/spec/modules/Simplex/FileTransfer/Crypto.md new file mode 100644 index 0000000000..1911de60ec --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Crypto.md @@ -0,0 +1,31 @@ +# Simplex.FileTransfer.Crypto + +> File encryption and decryption with streaming, padding, and auth tag verification. + +**Source**: [`FileTransfer/Crypto.hs`](../../../../src/Simplex/FileTransfer/Crypto.hs) + +## Non-obvious behavior + +### 1. Embedded file header in encrypted stream + +`encryptFile` prepends the `FileHeader` (containing filename and optional `fileExtra`) to the plaintext before encryption. A total data size field (8 bytes, `fileSizeLen`) is prepended before the header, encoding the combined size of header + file content. The decryptor uses this to distinguish real data from padding. The recipient must parse the header after decryption to recover the original filename — the header is not transmitted separately. + +### 2. Fixed-size padding hides actual file size + +The encrypted output is padded to `encSize` (the sum of chunk sizes). Since chunk sizes are fixed powers of 2 (64KB, 256KB, 1MB, 4MB), the encrypted file size reveals only which chunk size bucket the file falls into, not the actual size. The encryption streams data with `LC.sbEncryptChunk` in a loop, pads the remaining space, then manually appends the auth tag via `LC.sbAuth`. This manual streaming approach (rather than using the all-at-once `LC.sbEncryptTailTag`) is necessary because encryption is interleaved with file I/O. + +### 3. Dual decrypt paths: single-chunk vs multi-chunk + +`decryptChunks` takes different paths based on chunk count: +- **Single chunk**: reads the entire file into memory via `LB.readFile`, decrypts in-memory with `LC.sbDecryptTailTag` +- **Multiple chunks**: opens the destination file for writing and streams through each chunk file with `LC.sbDecryptChunkLazy` (lazy bytestring variant), verifying the auth tag from the final chunk + +The single-chunk path avoids file handle management overhead for small files. + +### 4. Auth tag failure deletes output file + +In the multi-chunk streaming path, if `BA.constEq` detects an auth tag mismatch after decrypting all chunks, the partially-written output file is deleted before returning `FTCEInvalidAuthTag`. This prevents consumers from using a file whose integrity is unverified. + +### 5. Streaming encryption uses 64KB blocks + +`encryptFile` reads plaintext in 65536-byte blocks (`LC.sbEncryptChunk`), regardless of the XFTP chunk size. These are encryption blocks within a single continuous stream — not to be confused with XFTP protocol chunks which are much larger (64KB–4MB). diff --git a/spec/modules/Simplex/FileTransfer/Description.md b/spec/modules/Simplex/FileTransfer/Description.md new file mode 100644 index 0000000000..b4c7e2fe95 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Description.md @@ -0,0 +1,43 @@ +# Simplex.FileTransfer.Description + +> File description: YAML encoding/decoding, validation, URI format, and replica optimization. + +**Source**: [`FileTransfer/Description.hs`](../../../../src/Simplex/FileTransfer/Description.hs) + +## Non-obvious behavior + +### 1. ValidFileDescription non-exported constructor + +`ValidFileDescription` is a newtype with a non-exported data constructor (`ValidFD`), but the module exports a bidirectional pattern synonym `ValidFileDescription` that can be used as a constructor. Despite this, `validateFileDescription` provides the canonical validation path, checking: +- Chunk numbers are sequential starting from 1 +- Total chunk sizes equal the declared file size + +Note: an empty chunk list with size 0 passes validation — there is no explicit "at least one chunk" check. + +### 2. First-replica-only digest and chunkSize + +When encoding chunks to YAML via `unfoldChunksToReplicas`, the `digest` and non-default `chunkSize` fields are only included on the first replica of each chunk. Subsequent replicas of the same chunk omit these fields. `foldReplicasToChunks` reconstructs them by carrying forward the digest/size from the first replica. If replicas have conflicting digests or sizes, validation fails. + +### 3. Default chunkSize elision + +The top-level `FileDescription` has a `chunkSize` field. Individual chunk replicas only serialize their `chunkSize` if it differs from this default. This saves space in the common case where most chunks are the same size (only the last chunk may be smaller). + +### 4. YAML encoding groups replicas by server + +`groupReplicasByServer` groups all chunk replicas by their server, producing `FileServerReplica` records. This is the serialization format — replicas are organized by server, not by chunk. The parser (`foldReplicasToChunks`) reverses this grouping back to per-chunk replica lists. + +### 5. FileDescriptionURI uses query-string encoding + +`FileDescriptionURI` serializes file descriptions into a compact query-string format (key=value pairs separated by `&`) with `QEscape` encoding for binary values. This is distinct from the YAML format used for file-based descriptions. The URI format is designed for embedding in links. + +### 6. QR code size limit + +`qrSizeLimit = 1002` bytes limits the maximum size of a file description URI that can be encoded as a QR code. Descriptions exceeding this limit cannot be shared via QR code and require alternative transport. + +### 7. Soft and hard file size limits + +Two limits exist: `maxFileSize = 1GB` (soft limit, checked by CLI client) and `maxFileSizeHard = 5GB` (hard limit, checked during agent-side encryption). The soft limit is a user-facing guard; the hard limit prevents resource exhaustion during encryption. + +### 8. Redirect file descriptions + +A `FileDescription` can contain a `redirect` field pointing to another file's metadata (`RedirectFileInfo` with size and digest). The outer description downloads an encrypted YAML file that, once decrypted, yields the actual `FileDescription` for the real file. This adds one level of indirection for privacy — the relay servers hosting the redirect don't know the actual file's servers. diff --git a/spec/modules/Simplex/FileTransfer/Protocol.md b/spec/modules/Simplex/FileTransfer/Protocol.md new file mode 100644 index 0000000000..f31c905616 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Protocol.md @@ -0,0 +1,36 @@ +# Simplex.FileTransfer.Protocol + +> XFTP protocol types, commands, responses, and credential verification. + +**Source**: [`FileTransfer/Protocol.hs`](../../../../src/Simplex/FileTransfer/Protocol.hs) + +## Non-obvious behavior + +### 1. Asymmetric credential checks by command + +`checkCredentials` enforces different rules per command: +- **FNEW**: requires `auth` (signature) but must NOT have a `fileId` — the sender key from the command body is used for verification +- **PING**: must have NEITHER `auth` NOR `fileId` — actively rejects their presence +- **All others** (FADD, FPUT, FDEL, FGET, FACK): require both `fileId` AND auth key + +This asymmetry means FNEW and PING bypass the standard entity-lookup path entirely — they are handled as separate `XFTPRequest` constructors (`XFTPReqNew`, `XFTPReqPing`). + +### 2. BLOCKED response downgraded to AUTH for old clients + +`encodeProtocol` checks the protocol version: if `v < blockedFilesXFTPVersion`, a `BLOCKED` response is encoded as `AUTH` instead. This prevents old clients that don't understand `BLOCKED` from receiving an unknown error type. The blocking information is silently lost for these clients. + +### 3. Single-transmission batch enforcement + +`xftpDecodeTServer` calls `xftpDecodeTransmission` which rejects batches containing more than one transmission. Despite using the batch framing format (length-prefixed), XFTP requires exactly one command per request. This differs from SMP where true batching is supported. + +### 4. xftpEncodeBatch1 always uses batch framing + +Even for single transmissions, `xftpEncodeBatch1` wraps the encoded transmission in batch format (1-byte count prefix + 2-byte length-prefixed transmission). There is no "non-batch" mode in XFTP — all protocol messages use the batch wire format regardless of the negotiated version. + +### 5. FileParty GADT partitions command space + +Commands are indexed by `FileParty` (`SFSender` / `SFRecipient`) at the type level via `FileCmd`. This ensures at compile time that sender commands (FNEW, FADD, FPUT, FDEL) and recipient commands (FGET, FACK, PING) cannot be confused. The server pattern-matches on `SFileParty` to determine which index (sender vs recipient) to look up in the file store. + +### 6. Empty corrId and implicit session ID + +`sendXFTPCommand` in the client uses an empty bytestring as `corrId`. This empty value is passed to `C.cbNonce` to produce a constant nonce for command authentication (HMAC/signing). With `implySessId = False` in the default XFTP transport setup, the session ID is not prepended to entity IDs during parsing. Session identity is provided by the TLS connection itself. diff --git a/spec/modules/Simplex/FileTransfer/Server.md b/spec/modules/Simplex/FileTransfer/Server.md new file mode 100644 index 0000000000..f3a01314d1 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server.md @@ -0,0 +1,85 @@ +# Simplex.FileTransfer.Server + +> XFTP server: HTTP/2 request handling, handshake state machine, file operations, and statistics. + +**Source**: [`FileTransfer/Server.hs`](../../../../src/Simplex/FileTransfer/Server.hs) + +## Architecture + +The XFTP server runs several concurrent threads via `raceAny_`: + +| Thread | Purpose | +|--------|---------| +| `runServer` | HTTP/2 server accepting file transfer requests | +| `expireFiles` | Periodic file expiration with throttling | +| `logServerStats` | Periodic stats flush to CSV | +| `savePrometheusMetrics` | Periodic Prometheus metrics dump | +| `runCPServer` | Control port for admin commands | + +## Non-obvious behavior + +### 1. Three-state handshake with session caching + +The server maintains a `TMap SessionId Handshake` with three states: +- **No entry**: first request — for non-SNI or `xftp-web-hello` requests, `processHello` generates DH key pair and sends server handshake; for SNI requests without `xftp-web-hello`, returns `SESSION` error +- **`HandshakeSent pk`**: server hello sent, waiting for client handshake with version negotiation +- **`HandshakeAccepted thParams`**: handshake complete, subsequent requests use cached params + +Web clients can re-send hello (`xftp-web-hello` header) even in `HandshakeSent` or `HandshakeAccepted` states — the server reuses the existing private key rather than generating a new one. + +### 2. Web identity proof via challenge-response + +When a web client sends a hello with a non-empty body, the server parses an `XFTPClientHello` containing a `webChallenge`. The server signs `challenge <> sessionId` with its long-term key and includes the signature in the handshake response. This proves server identity to web clients that cannot verify TLS certificates directly. + +### 3. skipCommitted drains request body on re-upload + +If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is `Just`), it cannot simply ignore the request body — the HTTP/2 client would block waiting for the server to consume it. Instead, `skipCommitted` reads and discards the entire body in `fileBlockSize` increments, returning `FROk` when complete. This makes FPUT idempotent from the client's perspective. + +### 4. Atomic quota reservation with rollback + +`receiveServerFile` uses `stateTVar` to atomically check and reserve storage quota before receiving the file. If the upload fails (timeout, size mismatch, IO error), the reserved size is subtracted from `usedStorage` and the partial file is deleted. This prevents failed uploads from permanently consuming quota. + +### 5. retryAdd generates new IDs on collision + +`createFile` and `addRecipient` use `retryAdd` which generates a random ID and makes up to 3 total attempts (initial + 2 retries) on `DUPLICATE_` errors. This handles the astronomically unlikely case of random ID collision without requiring uniqueness checking before insertion. + +### 6. Timing attack mitigation on entity lookup + +`verifyXFTPTransmission` calls `dummyVerifyCmd` (imported from SMP server) when a file entity is not found. This equalizes response timing to prevent attackers from distinguishing "entity doesn't exist" from "signature invalid" based on latency. + +### 7. BLOCKED vs EntityOff distinction + +When `verifyXFTPTransmission` reads `fileStatus`: +- `EntityActive` → proceed with command +- `EntityBlocked info` → return `BLOCKED` with blocking reason +- `EntityOff` → return `AUTH` (same as entity-not-found) + +`EntityOff` is treated identically to missing entities for information-hiding purposes. + +### 8. blockServerFile deletes the physical file + +Despite the name suggesting it only marks a file as blocked, `blockServerFile` also deletes the physical file from disk via `deleteOrBlockServerFile_`. The `deleted = True` parameter to `blockFile` in the store adjusts `usedStorage`. A blocked file returns `BLOCKED` errors on access but has no data on disk. + +### 9. Stats restore overrides counts from live store + +`restoreServerStats` loads stats from the backup file but overrides `_filesCount` and `_filesSize` with values computed from the live file store (TMap size and `usedStorage` TVar). If the backup values differ, warnings are logged. This handles cases where files were expired or deleted while the server was down. + +### 10. File expiration with configurable throttling + +`expireServerFiles` accepts an optional `itemDelay` (100ms when called from the periodic thread, `Nothing` at startup). Between each file check, `threadDelay itemDelay` prevents expiration from monopolizing IO. At startup, files are expired without delay to clean up quickly. + +### 11. Stats log aligns to wall-clock midnight + +`logServerStats` computes an `initialDelay` to align the first stats flush to `logStatsStartTime` (default 0 = midnight UTC). If the target time already passed today, it adds 86400 seconds for the next day. Subsequent flushes use exact `logInterval` cadence. + +### 12. Physical file deleted before store cleanup + +`deleteOrBlockServerFile_` removes the physical file first, then runs the STM store action. If the process crashes between these two operations, the store will reference a file that no longer exists on disk. The next access would return `AUTH` (file not found on disk), and eventual expiration would clean the store entry. + +### 13. SNI-dependent CORS and web serving + +CORS headers require both `sniUsed = True` and `addCORSHeaders = True` in the transport config. Static web page serving is enabled when `sniUsed = True`. Non-SNI connections (direct TLS without hostname) skip both CORS and web serving. This separates the web-facing and protocol-facing behaviors of the same port. + +### 14. Control port file operations use recipient index + +`CPDelete` and `CPBlock` commands look up files via `getFile fs SFRecipient fileId`, meaning the control port takes a recipient ID, not a sender ID. This is the ID visible to recipients and contained in file descriptions. diff --git a/spec/modules/Simplex/FileTransfer/Server/Env.md b/spec/modules/Simplex/FileTransfer/Server/Env.md new file mode 100644 index 0000000000..e9f509a1a8 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server/Env.md @@ -0,0 +1,24 @@ +# Simplex.FileTransfer.Server.Env + +> XFTP server environment: configuration, storage quota tracking, and request routing. + +**Source**: [`FileTransfer/Server/Env.hs`](../../../../../src/Simplex/FileTransfer/Server/Env.hs) + +## Non-obvious behavior + +### 1. Startup storage accounting with quota warning + +`newXFTPServerEnv` computes `usedStorage` by summing file sizes from the in-memory store at startup. If the computed usage exceeds the configured `fileSizeQuota`, a warning is logged but the server still starts. This allows the server to come up even if it's over quota (e.g., after a quota reduction), relying on expiration to reclaim space. + +### 2. XFTPRequest ADT separates new files from commands + +`XFTPRequest` has three constructors: +- `XFTPReqNew`: file creation (carries `FileInfo`, recipient keys, optional basic auth) +- `XFTPReqCmd`: command on an existing file (carries file ID, `FileRec`, and the command) +- `XFTPReqPing`: health check + +This separation occurs after credential verification in `Server.hs`. `XFTPReqNew` bypasses entity lookup entirely since the file doesn't exist yet. + +### 3. fileTimeout for upload deadline + +`fileTimeout` in `XFTPServerConfig` sets the maximum time allowed for a single file upload (FPUT). The server wraps the receive operation in `timeout fileTimeout`. Default is 5 minutes (for 4MB chunks). This prevents slow or stalled uploads from holding server resources indefinitely. diff --git a/spec/modules/Simplex/FileTransfer/Server/Main.md b/spec/modules/Simplex/FileTransfer/Server/Main.md new file mode 100644 index 0000000000..54a45751f2 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server/Main.md @@ -0,0 +1,28 @@ +# Simplex.FileTransfer.Server.Main + +> XFTP server CLI: INI configuration parsing, TLS setup, and default constants. + +**Source**: [`FileTransfer/Server/Main.hs`](../../../../../src/Simplex/FileTransfer/Server/Main.hs) + +## Non-obvious behavior + +### 1. Key server constants + +| Constant | Value | Purpose | +|----------|-------|---------| +| `fileIdSize` | 16 bytes | Random file/recipient ID length | +| `fileTimeout` | 5 minutes | Maximum upload duration per chunk | +| `logStatsInterval` | 86400s (daily) | Stats CSV flush interval | +| `logStatsStartTime` | 0 (midnight UTC) | First stats flush time-of-day | + +### 2. allowedChunkSizes defaults to all four sizes + +If not configured, `allowedChunkSizes` defaults to `[kb 64, kb 256, mb 1, mb 4]`. The INI file can restrict this to a subset, controlling which chunk sizes the server accepts. + +### 3. Storage quota from INI with unit parsing + +`fileSizeQuota` is parsed from the INI `[STORE_LOG]` section using `FileSize` parsing, which accepts byte values with optional unit suffixes (KB, MB, GB). Absence means unlimited quota (`Nothing`). + +### 4. Dual TLS credential support + +The server supports both primary TLS credentials (`caCertificateFile`/`certificateFile`/`privateKeyFile`) and optional HTTP-specific credentials (`httpCaCertificateFile`/etc.). When HTTP credentials are present, the server uses `defaultSupportedParamsHTTPS` which enables broader TLS compatibility for web clients. diff --git a/spec/modules/Simplex/FileTransfer/Server/Stats.md b/spec/modules/Simplex/FileTransfer/Server/Stats.md new file mode 100644 index 0000000000..7e684c58a1 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server/Stats.md @@ -0,0 +1,19 @@ +# Simplex.FileTransfer.Server.Stats + +> XFTP server statistics: IORef-based counters with backward-compatible persistence. + +**Source**: [`FileTransfer/Server/Stats.hs`](../../../../../src/Simplex/FileTransfer/Server/Stats.hs) + +## Non-obvious behavior + +### 1. setFileServerStats is not thread safe + +`setFileServerStats` directly writes to IORefs without synchronization. It is explicitly intended for server startup only (restoring from backup file), before any concurrent threads are running. + +### 2. Backward-compatible parsing + +The `strP` parser uses `opt` for newer fields, defaulting missing fields to 0. This allows reading stats files from older server versions that don't include fields like `filesBlocked` or `fileDownloadAcks`. + +### 3. PeriodStats for download tracking + +`filesDownloaded` uses `PeriodStats` (not a simple `IORef Int`) to track unique file downloads over time periods (day/week/month). This enables the CSV stats log to report distinct files downloaded per period, not just total download count. diff --git a/spec/modules/Simplex/FileTransfer/Server/Store.md b/spec/modules/Simplex/FileTransfer/Server/Store.md new file mode 100644 index 0000000000..89b0c3b36c --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server/Store.md @@ -0,0 +1,39 @@ +# Simplex.FileTransfer.Server.Store + +> STM-based in-memory file store with dual indices, storage accounting, and privacy-preserving expiration. + +**Source**: [`FileTransfer/Server/Store.hs`](../../../../../src/Simplex/FileTransfer/Server/Store.hs) + +## Non-obvious behavior + +### 1. Dual-index lookup by sender and recipient + +The file store maintains two indices: `files :: TMap SenderId FileRec` (by sender ID) and `recipients :: TMap RecipientId (SenderId, RcvPublicAuthKey)` (by recipient ID, storing the sender ID and the recipient's public auth key). `getFile` dispatches on `SFileParty`: sender lookups use `files` directly, recipient lookups use `recipients` to find the `SenderId` then look up the `FileRec` in `files`. This means recipient operations require two TMap lookups. + +### 2. addRecipient checks both inner Set and global TMap + +`addRecipient` first checks the per-file `recipientIds` Set for duplicates, then inserts into the global `recipients` TMap. If either has a collision, it returns `DUPLICATE_`. The dual check is necessary because the Set tracks per-file membership while the TMap enforces global uniqueness of recipient IDs. + +### 3. Storage accounting on upload completion + +`setFilePath` adds the file size to `usedStorage` and records the file path in the `filePath` TVar. However, during normal FPUT handling, `Server.hs` does NOT call `setFilePath` — it directly writes `filePath` via `writeTVar`. The quota reservation in `Server.hs` (`stateTVar` on `usedStorage`) is the sole `usedStorage` increment during upload. `setFilePath` IS called during store log replay (`StoreLog.hs`), where it increments `usedStorage`; `newXFTPServerEnv` then overwrites with the correct value computed from the live store. + +### 4. deleteFile removes all recipients atomically + +`deleteFile` atomically removes the sender entry from `files`, all recipient entries from the global `recipients` TMap, and unconditionally subtracts the file size from `usedStorage` (regardless of whether the file was actually uploaded). The entire operation runs in a single STM transaction. + +### 5. RoundedSystemTime for privacy-preserving expiration + +File timestamps use `RoundedFileTime` which is `RoundedSystemTime 3600` — system time rounded to 1-hour precision. This means files created within the same hour have identical timestamps. An observer with access to the store cannot determine exact file creation times, only the hour. + +### 6. expiredFilePath returns path only if expired + +`expiredFilePath` returns `STM (Maybe (Maybe FilePath))`. The outer `Maybe` is `Nothing` when the file doesn't exist or isn't expired; the inner `Maybe` is the file path (present only if the file was uploaded). The expiration check adds `fileTimePrecision` (one hour) to the creation timestamp before comparing, providing a grace period. The caller uses the inner path to decide whether to also delete the physical file. + +### 7. ackFile removes single recipient + +`ackFile` removes a specific recipient from both the global `recipients` TMap and the per-file `recipientIds` Set. Unlike `deleteFile` which removes the entire file, `ackFile` only removes one recipient's access. The file and other recipients remain intact. + +### 8. blockFile conditional storage adjustment + +`blockFile` takes a `deleted :: Bool` parameter. When `True` (file blocked with physical deletion), it subtracts the file size from `usedStorage`. When `False` (block without deletion), storage is unchanged. This allows blocking without physical deletion for audit purposes. Currently, both the server's `blockServerFile` and the store log replay path pass `True`. diff --git a/spec/modules/Simplex/FileTransfer/Server/StoreLog.md b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md new file mode 100644 index 0000000000..35a339515f --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md @@ -0,0 +1,33 @@ +# Simplex.FileTransfer.Server.StoreLog + +> Append-only store log for XFTP file operations with error-resilient replay and compaction. + +**Source**: [`FileTransfer/Server/StoreLog.hs`](../../../../../src/Simplex/FileTransfer/Server/StoreLog.hs) + +## Non-obvious behavior + +### 1. Error-resilient replay + +`readFileStore` parses the store log line-by-line. Lines that fail to parse or fail to process (e.g., referencing a nonexistent sender ID) are logged as errors but do not halt replay. The store is reconstructed from whatever valid entries exist. This allows the server to recover from partial log corruption. + +### 2. Sender ID validation on recipient writes + +`writeFileStore` during compaction validates that each recipient's sender ID in the `recipients` TMap matches the `senderId` of the corresponding `FileRec`. This guards against in-memory state corruption (e.g., if a bug caused the `recipients` TMap and `FileRec.recipientIds` to get out of sync), not log corruption — the validation happens before writing the compacted log. + +### 3. Backward-compatible status parsing + +`AddFile` log entries include an `EntityStatus` field. The parser uses `<|> pure EntityActive` as a fallback, defaulting to `EntityActive` when the status field is missing. This allows reading store logs from older server versions that didn't record entity status. + +### 4. Compaction on restart + +`readFileStore` replays the full log to rebuild the in-memory store. The caller (in `Server/Env.hs`) then writes a fresh, compacted store log containing only the current state. This eliminates deleted entries and redundant operations, keeping the log size proportional to active state rather than total history. + +### 5. Log entry types track operation lifecycle + +Six log entry types capture the complete file lifecycle: +- `AddFile`: file creation with sender ID, file info, timestamp, and status +- `AddRecipients`: recipient registration (batched as `NonEmpty FileRecipient`) with sender ID association +- `PutFile`: upload completion with file path +- `DeleteFile`: file deletion by sender ID +- `AckFile`: single recipient acknowledgment +- `BlockFile`: file blocking with blocking info diff --git a/spec/modules/Simplex/FileTransfer/Types.md b/spec/modules/Simplex/FileTransfer/Types.md new file mode 100644 index 0000000000..814e651955 --- /dev/null +++ b/spec/modules/Simplex/FileTransfer/Types.md @@ -0,0 +1,27 @@ +# Simplex.FileTransfer.Types + +> Agent-side file transfer types: receive/send file records, status state machines, chunk/replica structures. + +**Source**: [`FileTransfer/Types.hs`](../../../../src/Simplex/FileTransfer/Types.hs) + +## Non-obvious behavior + +### 1. Receive file status state machine + +`RcvFileStatus` progresses: `RFSReceiving` → `RFSReceived` → `RFSDecrypting` → `RFSComplete`, with `RFSError` as a terminal state reachable from any non-complete state. The `RFSReceived` → `RFSDecrypting` transition is significant: all chunks are downloaded but decryption hasn't started. The local worker (server=Nothing) picks up files in `RFSReceived` status. + +### 2. Send file status state machine + +`SndFileStatus` progresses: `SFSNew` → `SFSEncrypting` → `SFSEncrypted` → `SFSUploading` → `SFSComplete`, with `SFSError` as terminal. The prepare worker handles `SFSNew` → `SFSEncrypted` (including retry from `SFSEncrypting`), while per-server upload workers handle `SFSUploading` → `SFSComplete`. + +### 3. Encrypted file path convention + +`sndFileEncPath` constructs the path as `prefixPath "xftp.encrypted"`. This is a convention shared between the agent (`Agent.hs`) and this module — both must agree on where the encrypted intermediate file lives relative to the prefix directory. + +### 4. FileHeader fileExtra for future extension + +`FileHeader` contains `fileName` and an optional `fileExtra :: Maybe Text` field. Currently unused (`Nothing` in all callers), it provides a forward-compatible extension point embedded in the encrypted file header without requiring protocol version changes. + +### 5. authTagSize = 16 bytes + +`authTagSize` is defined as `fromIntegral C.authTagSize` (16 bytes). This is the AES-GCM authentication tag appended to the encrypted file stream. It is included in the payload size calculation (`payloadSize = fileSize' + fileSizeLen + authTagSize`), which is then passed to `prepareChunkSizes` to determine chunk allocation. From ceeeeec4765d644490dde625bb152bce42ff6bf0 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:33:45 +0000 Subject: [PATCH 43/91] more topics --- spec/TOPICS.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/spec/TOPICS.md b/spec/TOPICS.md index c29e617052..0489d79df2 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -35,3 +35,35 @@ - **Queue rotation protocol**: Four agent messages (QADD → QKEY → QUSE → QTEST) on top of SMP commands, with asymmetric state machines on receiver side (`RcvSwitchStatus`: 4 states) and sender side (`SndSwitchStatus`: 2 states). Receiver initiates, creates new queue, sends QADD. Sender responds with QKEY. Receiver sends QUSE. Sender sends QTEST to complete. State types in Agent/Protocol.hs, orchestration in Agent.hs, queue creation/deletion in Agent/Client.hs. Protocol spec in agent-protocol.md. The fast variant (v9+ SMP with SKEY) skips the KEY command step. - **Outside-STM lookup pattern**: Multiple modules use the pattern of looking up TVar references outside STM (via readTVarIO/TM.lookupIO), then reading/modifying the TVar contents inside STM. This avoids transaction re-evaluation from unrelated map changes. Used in: Server.hs (serverThread client lookup, tryDeliverMessage subscriber lookup), Env/STM.hs (deleteSubcribedClient), Client/Agent.hs (removeClientAndSubs, reconnectSMPClient). The safety invariant is that the outer map entries (TVars) are never removed — only their contents change. + +- **NTF token lifecycle**: Token registration (TNEW) → verification push → NTConfirmed → TVFY → NTActive, with idempotent re-registration (DH secret check), TRPL (device token replacement reusing DH key), status repair for stuck tokens, and `PPApnsNull` test tokens suppressing stats. The lifecycle spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md) (command handling, verification push delivery), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) (conditional status updates, duplicate registration cleanup), [Types.hs](modules/Simplex/Messaging/Notifications/Types.md) (NtfTknStatus state machine), and [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (push client lazy initialization). + +- **NTF push delivery pipeline**: Bounded TBQueue (`pushQ`) creates backpressure → `ntfPush` thread reads → `checkActiveTkn` gates PNMessage (but not PNVerification or PNCheckMessages) → APNS delivery with single retry on connection errors (new push client on retry) → PPTokenInvalid marks token NTInvalid. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [APNS.hs](modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) (DER JWT signing, HTTP/2 serializing queue, fire-and-forget connection), [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (push client caching with race tolerance). + +- **NTF service subscription model**: Service-level subscriptions (SUBS/NSUBS on SMP) vs individual queue subscriptions, with fallback from service to individual when `CAServiceUnavailable`. Service credentials are lazily generated per SMP server with 25h backdating and ~2700yr validity. XOR hash triggers on PostgreSQL maintain subscription aggregate counts. Subscription status tracking uses `ntf_service_assoc` flag to distinguish service-associated from individually-subscribed queues. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md) (subscriber thread, service fallback), [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (lazy credential generation, Weak ThreadId subscriber cleanup), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) (XOR hash triggers, batch status updates, cursor-based pagination). + +- **NTF startup resubscription**: `resubscribe` runs as detached `forkIO` (not in `raceAny_` group), uses `mapConcurrently` across SMP servers, each with `subscribeLoop` using 100x database batch multiplier and cursor-based pagination. `ExitCode` exceptions from `exitFailure` on DB error propagate to main thread despite `forkIO`. `getServerNtfSubscriptions` claims subscriptions by batch-updating to `NSPending`. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md). + +- **XFTP file upload pipeline**: Agent-side encryption (streaming 64KB blocks, fixed-size padding) → chunk size selection (75% threshold algorithm) → per-server chunk creation with ID collision retry (3 attempts) → recipient registration (recursive batching up to `maxRecipients` per FADD) → per-server upload (command + file body in single HTTP/2 streaming request) → file description generation (cross-product: M chunks × R replicas × N recipients → N descriptions). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, description generation), [Client.hs](modules/Simplex/FileTransfer/Client.md) (upload protocol), [Server.hs](modules/Simplex/FileTransfer/Server.md) (quota reservation with rollback, skipCommitted idempotency), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (streaming encryption with embedded header), [Description.hs](modules/Simplex/FileTransfer/Description.md) (validation, first-replica-only digest optimization). + +- **XFTP file download pipeline**: Description parsing (ValidFileDescription validation, YAML or web URI) → per-server chunk download with ephemeral DH key pair per download (forward secrecy) → size and digest verification before decryption → streaming decryption with auth tag verification (output deleted on failure) → redirect resolution (depth-1 chain: decrypt redirect YAML, validate size/digest, download actual file). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, redirect handling), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ephemeral DH, chunk-proportional timeout), [Client/Main.hs](modules/Simplex/FileTransfer/Client/Main.md) (web URI decoding, parallel download with server grouping), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (dual decrypt paths, auth tag deletion), [Description.hs](modules/Simplex/FileTransfer/Description.md) (redirect file descriptions). + +- **XFTP handshake state machine**: Three-state session-cached handshake (`No entry` → `HandshakeSent` → `HandshakeAccepted`) per HTTP/2 session. Web clients use `xftp-web-hello` header and challenge-response identity proof; native clients use standard ALPN. SNI presence gates CORS headers, web serving, and SESSION error for unrecognized connections. Key reuse on re-hello preserves existing DH keys. Spans [Server.hs](modules/Simplex/FileTransfer/Server.md) (handshake logic, CORS, web serving), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ALPN selection, cert chain validation), [Transport.hs](modules/Simplex/FileTransfer/Transport.md) (block size, version). + +- **XFTP storage lifecycle**: Quota reservation via atomic `stateTVar` before upload → rollback on failure (subtract + delete partial file) → physical file deleted before store cleanup (crash risk: store references missing file) → `RoundedSystemTime 3600` for privacy-preserving expiration timestamps → expiration with configurable throttling (100ms between files) → startup storage reconciliation (override stats from live store). Spans [Server.hs](modules/Simplex/FileTransfer/Server.md), [Server/Store.hs](modules/Simplex/FileTransfer/Server/Store.md), [Server/Env.hs](modules/Simplex/FileTransfer/Server/Env.md), [Server/StoreLog.hs](modules/Simplex/FileTransfer/Server/StoreLog.md) (error-resilient replay, compaction). + +- **XFTP worker architecture**: Five worker types in three categories: rcv (per-server download + local decryption), snd (local prepare/encrypt + per-server upload), del (per-server delete). TMVar-based connection sharing with async retry on temporary errors, permanent error cleanup (put Left + delete from TMap). `withRetryIntervalLimit` caps consecutive retries; exhausted temporary errors silently abandon work cycle (chunk stays pending). `assertAgentForeground` dual check (throw if inactive + wait if backgrounded) gates every chunk operation. Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md), [Client/Agent.hs](modules/Simplex/FileTransfer/Client/Agent.md). + +- **SessionVar protocol client lifecycle**: Protocol client connections (SMP, NTF, XFTP) use a lazy singleton pattern: `getSessVar` atomically checks TMap → `newProtocolClient` fills TMVar on success/failure → `waitForProtocolClient` reads with timeout. Error caching via `persistErrorInterval` prevents connection storms (failed connections cache the error with expiry; callers receive cached error without reconnecting). `removeSessVar` uses monotonic `sessionVarId` compare-and-swap to prevent stale disconnect callbacks from removing newer clients. SMP has additional complexity: `SMPConnectedClient` wraps client with per-connection proxied relay map, `updateClientService` synchronizes service credentials post-connect, disconnect callback moves subscriptions to pending with session-ID matching. XFTP always uses `NRMBackground` timing regardless of caller request. Spans [Session.md](modules/Simplex/Messaging/Session.md), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (lifecycle, disconnect callbacks, reconnection workers), [Agent.md](modules/Simplex/Messaging/Agent.md) (subscriber loop consuming events). + +- **Dual-backend agent store**: The agent store (~3700 lines in AgentStore.hs) compiles for both SQLite and PostgreSQL via `#if defined(dbPostgres)` CPP guards. Key behavioral differences: PostgreSQL uses `FOR UPDATE` row locking on reads preceding writes (SQLite relies on single-writer model); PostgreSQL uses `IN ?` with `In` wrapper for batch queries (SQLite falls back to per-row `forM` loops); PostgreSQL uses `constraintViolation` (SQLite checks `SQL.ErrorConstraint`); `createWithRandomId'` uses savepoints on PostgreSQL (failed statement aborts entire transaction without them). One known bug: `checkConfirmedSndQueueExists_` uses `#if defined(dpPostgres)` (typo: `dp` not `db`), so the `FOR UPDATE` clause is never included on any backend. Spans [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md), [SQLite.md](modules/Simplex/Messaging/Agent/Store/SQLite.md). + +- **Deferred message encryption**: Message bodies are NOT encrypted at enqueue time. `enqueueMessageB` advances the ratchet header and validates padding, but stores only the body reference (`sndMsgBodyId`) and encryption key. Actual encryption (`rcEncryptMsg`) happens at delivery time in `runSmpQueueMsgDelivery`. This enables body deduplication via `VRValue`/`VRRef` — identical bodies (common for group messages) share one database row, but each connection's delivery encrypts independently with its own ratchet. Confirmation and ratchet key messages bypass deferred encryption (pre-encrypted at enqueue time). Spans [Agent.md](modules/Simplex/Messaging/Agent.md) (enqueue + delivery), [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md) (`snd_message_bodies` storage). + +- **NTF agent subscription lifecycle**: The agent-side notification subscription system uses a supervisor-worker architecture with three worker pools (NTF server, SMP server, token deletion). `NSCCreate` triggers a four-way partition (`partitionQueueSubActions`): new sub, reset sub (credential mismatch or null action), continue SMP work, continue NTF work. Workers coordinate with the supervisor via `updated_by_supervisor` flag — workers only update local fields when the flag is set, preventing overwrite of supervisor decisions. The null-action sentinel (`workerErrors` sets action to NULL on permanent failure) bridges worker failure recovery to supervisor-driven re-creation. `retrySubActions` uses a shrinking TVar — each iteration only retries subs with temporary errors, so batches get smaller over time. `rescheduleWork` handles time-scheduled health checks by forking a sleep thread that re-signals `doWork`. Spans [NtfSubSupervisor.md](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) (supervisor, worker pools), [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md) (updated_by_supervisor, null-action sentinel), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (worker framework). + +- **Session-aware SMP subscription management**: SMP queue subscriptions are tracked per transport session with session-ID validation at multiple points. `subscribeQueues` groups queues by transport session, subscribes concurrently, then validates `activeClientSession` post-RPC — if the client was replaced during the RPC, results are discarded and converted to temporary errors for retry. `removeClientAndSubs` (disconnect cleanup) only demotes subscriptions whose session ID matches the disconnecting client. Batch UP notifications are accumulated across transmissions and deduplicated against already-active subscriptions. When ALL results are temporary errors and no connections were already active, the SMP client is closed to force fresh connection. `maxPending` throttles concurrent pending subscriptions with STM retry backpressure. Spans [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (subscription state, session validation), [Agent.md](modules/Simplex/Messaging/Agent.md) (subscriber loop, processSMPTransmissions, UP accumulation). + +- **Agent message envelope**: Agent messages use a two-layer format — outer `AgentMsgEnvelope` (version + type tag C/M/I/R + payload) and inner `AgentMessage` (after double-ratchet decryption, tags I/D/R/M + AMessage). Tag characters deliberately overlap between layers (disambiguated by context). `AgentInvitation` uses only per-queue E2E encryption (no ratchet established yet); `AgentRatchetKey` uses per-queue E2E (can't use ratchet to renegotiate ratchet); `AgentConfirmation` uses double ratchet. PQ support *shrinks* message size budgets (ratchet header + reply link grow with SNTRUP761 keys). `AEvent` is a GADT indexed by `AEntity` — prevents file events on connection entities at the type level. Spans [Agent/Protocol.md](modules/Simplex/Messaging/Agent/Protocol.md) (types, encoding, size budgets), [Agent.md](modules/Simplex/Messaging/Agent.md) (four e2e key states dispatch, message processing). + +- **Ratchet synchronization protocol**: When the double ratchet gets out of sync (backup restoration, message loss), both parties exchange `AgentRatchetKey` messages with fresh DH keys. Role determination uses hash-ordering: `rkHash(k1, k2)` is computed by both sides — the party with the lower hash initializes the receiving ratchet, the other initializes sending and sends EREADY. This breaks symmetry when both parties simultaneously initiate. State machine: `RSOk`/`RSAllowed`/`RSRequired` → generate keys + reply; `RSStarted` → use stored keys; `RSAgreed` → error (reset to `RSRequired`). EREADY carries `lastExternalSndId` so the peer knows which messages used the old ratchet. `checkRatchetKeyHashExists` prevents processing the same key twice. Successful message decryption resets sync state to `RSOk` (the recovery signal). Spans [Agent.md](modules/Simplex/Messaging/Agent.md) (newRatchetKey, ereadyMsg, resetRatchetSync), [Agent/Protocol.md](modules/Simplex/Messaging/Agent/Protocol.md) (AgentRatchetKey type, cryptoErrToSyncState classification). From b9288544ed34994f9d951e56db13ef5d1b6a808c Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:46:36 +0000 Subject: [PATCH 44/91] xrcp specs --- spec/modules/Simplex/RemoteControl/Client.md | 65 +++++++++++++++++++ .../Simplex/RemoteControl/Discovery.md | 25 +++++++ .../RemoteControl/Discovery/Multicast.md | 7 ++ .../Simplex/RemoteControl/Invitation.md | 26 ++++++++ spec/modules/Simplex/RemoteControl/Types.md | 31 +++++++++ 5 files changed, 154 insertions(+) create mode 100644 spec/modules/Simplex/RemoteControl/Client.md create mode 100644 spec/modules/Simplex/RemoteControl/Discovery.md create mode 100644 spec/modules/Simplex/RemoteControl/Discovery/Multicast.md create mode 100644 spec/modules/Simplex/RemoteControl/Invitation.md create mode 100644 spec/modules/Simplex/RemoteControl/Types.md diff --git a/spec/modules/Simplex/RemoteControl/Client.md b/spec/modules/Simplex/RemoteControl/Client.md new file mode 100644 index 0000000000..55fd05bc17 --- /dev/null +++ b/spec/modules/Simplex/RemoteControl/Client.md @@ -0,0 +1,65 @@ +# Simplex.RemoteControl.Client + +> XRCP session establishment: controller-host handshake with KEM hybrid key exchange, multicast discovery, and session encryption. + +**Source**: [`RemoteControl/Client.hs`](../../../../../../src/Simplex/RemoteControl/Client.hs) + +## Overview + +This module implements the two sides of the XRCP remote control protocol: the **controller** side (`connectRCHost`) and the **host** side (`connectRCCtrl`). The naming follows [Types.md](./Types.md) — "host" means connecting **to** the host (controller's perspective). + +The handshake is a multi-step flow using `RCStepTMVar` — a `TMVar (Either RCErrorType a)` that allows each phase to be observed and controlled by the application. The application receives the session code (TLS channel binding) for user verification before the session proceeds. + +## Handshake flow + +1. **Controller** starts TLS server, creates invitation with ephemeral session key + DH key + identity key +2. **Host** connects via TLS (with mutual certificate authentication), receives invitation out-of-band or via multicast +3. **Host** sends `RCHostEncHello`: ephemeral DH public key + nonce + encrypted hello body (containing KEM public key, CA fingerprint, app info) +4. **Controller** decrypts hello, verifies CA fingerprint matches TLS certificate, performs KEM encapsulation, derives hybrid key (DH + KEM), sends `RCCtrlEncHello` with KEM ciphertext + encrypted response +5. **Host** decrypts with KEM hybrid key, session established with `TSbChainKeys` + +## KEM hybrid key derivation + +The session key combines DH and post-quantum KEM via `kemHybridSecret`: `SHA3_256(dhSecret || kemSharedKey)`. This is used to initialize `sbcInit` chain keys. The chain keys are **swapped** between controller and host — `prepareCtrlSession` explicitly calls `swap` on the `sbcInit` result so that the controller's send key matches the host's receive key. + +## Two-phase session with user confirmation + +`connectRCCtrl` (host side) splits the session into two phases via `confirmSession` TMVar: + +1. TLS connection established → first `RCStepTMVar` resolved with session code +2. Application displays session code for user verification → calls `confirmCtrlSession` with `True`/`False` +3. If confirmed, `runSession` proceeds with hello exchange → second `RCStepTMVar` resolved with session + +`confirmCtrlSession` does a double `putTMVar` — the first signals the decision, the second blocks until the session thread does `takeTMVar` (synchronization point). See TODO in source: no timeout on this wait. + +## TLS hooks — single-session enforcement + +`tlsHooks` on the controller side enforces at most one TLS session: `onNewHandshake` checks if the result TMVar is still empty (`isNothing <$> tryReadTMVar r`). A second TLS connection attempt is rejected because `r` is already filled. Similarly, `onClientCertificate` validates the host's CA certificate chain (must be exactly 2 certs: leaf + CA) and checks the CA fingerprint against the known host pairing. + +## Multicast discovery — prevDhPrivKey fallback + +`findRCCtrlPairing` tries to decrypt the multicast announcement with each known pairing's current DH key, falling back to `prevDhPrivKey` if present. This handles the case where the host rotated its DH key (in `updateCtrlPairing` during `connectRCCtrl`) but the controller still has the old public key — the announcement is encrypted with the host's old DH public key, so the host needs its old private key to decrypt. + +`discoverRCCtrl` wraps this in a 30-second timeout (`timeoutThrow RCENotDiscovered 30000000`) and an error-recovery loop — failed decryption attempts are logged and retried rather than aborting discovery. + +After decryption, the invitation's `dh` field is verified against the announcement's `dhPubKey` to prevent a relay attack where someone re-encrypts a legitimate invitation with a different DH key. + +## announceRC — fire-and-forget loop + +Sends the signed invitation encrypted to the known host's DH key, repeated `maxCount` times (default 60) with 1-second intervals via UDP multicast. The announcement is padded to `encInvitationSize` (900 bytes). The announcer runs as a separate async that is cancelled when the session is established (`uninterruptibleCancel` in `runSession`). + +## Session encryption — no padding + +`rcEncryptBody` / `rcDecryptBody` use `sbEncryptTailTagNoPad` / `sbDecryptTailTagNoPad` — lazy streaming encryption without padding. This is for application-level data after the handshake, where message sizes are variable and padding would be wasteful. The auth tag is appended at the tail (not prepended). + +## putRCError — error propagation to TMVar + +`putRCError` is an error combinator that catches all errors from an `ExceptT` action and writes them to the step TMVar before re-throwing. This ensures the application observes the error via the TMVar even if the async thread terminates. Uses `tryPutTMVar` (not `putTMVar`) so the TMVar write is idempotent — if already filled, the write is skipped, but the error is still re-thrown via `throwE`. + +## Asymmetric hello encryption + +The two directions of the hello exchange use different encryption primitives. The host encrypts `RCHostEncHello` with `cbEncrypt` using the DH shared key directly (classical DH only). The controller encrypts `RCCtrlEncHello` with `sbEncrypt` using a key derived from `sbcHkdf` on the KEM-hybrid chain key (post-quantum protected). This asymmetry means the host's initial hello is only protected by classical DH, while the controller's response has post-quantum protection. + +## Packet framing + +`sendRCPacket` / `receiveRCPacket` use fixed-size 16384-byte blocks with `C.pad`/`C.unPad` (2-byte length prefix + '#' padding). The hello exchange uses a smaller 12288-byte block size (`helloBlockSize`) for the encrypted hello bodies within the padded packet. diff --git a/spec/modules/Simplex/RemoteControl/Discovery.md b/spec/modules/Simplex/RemoteControl/Discovery.md new file mode 100644 index 0000000000..52c861c797 --- /dev/null +++ b/spec/modules/Simplex/RemoteControl/Discovery.md @@ -0,0 +1,25 @@ +# Simplex.RemoteControl.Discovery + +> Network discovery: local address enumeration, multicast group management, and TLS server startup. + +**Source**: [`RemoteControl/Discovery.hs`](../../../../../../src/Simplex/RemoteControl/Discovery.hs) + +## getLocalAddress — filtered interface enumeration + +Enumerates network interfaces and filters out non-routable addresses (0.0.0.0, broadcast, link-local 169.254.x.x). Results are sorted: `mkLastLocalHost` moves localhost (127.x.x.x) to the end. If a preferred address is provided, `preferAddress` moves the matching entry to the front — matches by address first, falling back to interface name. + +## Multicast subscriber counting + +`joinMulticast` / `partMulticast` use a shared `TMVar Int` counter to track active listeners. Multicast group membership is per-host (not per-process — see comment in Multicast.hsc), so the counter ensures `IP_ADD_MEMBERSHIP` is called only when transitioning from 0→1 listeners and `IP_DROP_MEMBERSHIP` only when transitioning from 1→0. If `setMembership` fails, the counter is restored to its previous value and the error is logged (not thrown). + +**TMVar hazard**: Both functions take the counter from the TMVar unconditionally but only put it back in the 0-or-1 branches. If `joinMulticast` is called when the counter is already >0, or `partMulticast` when >1, the TMVar is left empty and subsequent accesses will deadlock. In practice this is safe because `withListener` serializes access through a single `TMVar Int`, but the abstraction does not protect against concurrent use. + +## startTLSServer — ephemeral port support + +When `port_` is `Nothing`, passes `"0"` to `startTCPServer`, which causes the OS to assign an ephemeral port. The assigned port is read via `socketPort` and communicated back through the `startedOnPort` TMVar. On any startup error, `setPort Nothing` is signalled so callers don't block indefinitely on the TMVar. + +The TLS server requires client certificates (`serverWantClientCert = True`) and delegates certificate validation to the caller-provided `TLS.ServerHooks`. + +## withListener — bracket with subscriber tracking + +`openListener` increments the multicast subscriber counter; `closeListener` decrements it in a `finally` block (ensuring cleanup even on exception). The `UDP.stop` call that closes the socket runs after the multicast part — if `partMulticast` fails, the socket is still closed. diff --git a/spec/modules/Simplex/RemoteControl/Discovery/Multicast.md b/spec/modules/Simplex/RemoteControl/Discovery/Multicast.md new file mode 100644 index 0000000000..97b9886b80 --- /dev/null +++ b/spec/modules/Simplex/RemoteControl/Discovery/Multicast.md @@ -0,0 +1,7 @@ +# Simplex.RemoteControl.Discovery.Multicast + +> FFI binding for IPv4 multicast group membership via `setsockopt`. + +**Source**: [`Discovery/Multicast.hsc`](../../../../../../../src/Simplex/RemoteControl/Discovery/Multicast.hsc) + +No non-obvious behavior. See source. diff --git a/spec/modules/Simplex/RemoteControl/Invitation.md b/spec/modules/Simplex/RemoteControl/Invitation.md new file mode 100644 index 0000000000..3f65ec46cb --- /dev/null +++ b/spec/modules/Simplex/RemoteControl/Invitation.md @@ -0,0 +1,26 @@ +# Simplex.RemoteControl.Invitation + +> XRCP invitation creation, dual-signature scheme, and URI encoding. + +**Source**: [`RemoteControl/Invitation.hs`](../../../../../../src/Simplex/RemoteControl/Invitation.hs) + +## Dual-signature chain + +`signInvitation` applies two Ed25519 signatures in a specific order that creates a chain: + +1. `ssig` signs the invitation URI with the **session** private key +2. `idsig` signs the URI **with `ssig` appended** using the **identity** private key + +Verification in `verifySignedInvitation` mirrors this: `ssig` is verified against the bare URI, `idsig` against the URI+ssig concatenation. This chain means `idsig` covers both the invitation content and the session key's signature — a compromised session key cannot forge an identity-valid invitation. + +## Invitation URI format + +The `xrcp:/` scheme uses the SMP-style pattern: CA fingerprint as userinfo (`ca@host:port`), query parameters after `#/?`. The `app` field is raw JSON encoded in a query parameter. `RCInvitation`'s parser uses `parseSimpleQuery` + `lookup` (order-independent), but `RCSignedInvitation`'s parser uses `B.breakSubstring "&ssig="` which assumes the signatures appear at a fixed position — see TODO in source on `RCSignedInvitation`'s `strP`. + +## RCVerifiedInvitation — newtype trust boundary + +`RCVerifiedInvitation` is a newtype wrapper. The constructor is exported (via `RCVerifiedInvitation (..)`), so it can be constructed without validation — the trust boundary is conventional, not enforced by the type system. `verifySignedInvitation` is the intended smart constructor. [Client.hs](./Client.md) accepts only `RCVerifiedInvitation` for `connectRCCtrl`. + +## RCEncInvitation — multicast envelope + +`RCEncInvitation` wraps a signed invitation for UDP multicast: ephemeral DH public key + nonce + encrypted body. The encryption uses a DH shared secret between the host's DH public key (known to the controller from the pairing) and the controller's ephemeral DH private key. Uses `Tail` encoding for the ciphertext (no length prefix — consumes remaining bytes). diff --git a/spec/modules/Simplex/RemoteControl/Types.md b/spec/modules/Simplex/RemoteControl/Types.md new file mode 100644 index 0000000000..ad165f4426 --- /dev/null +++ b/spec/modules/Simplex/RemoteControl/Types.md @@ -0,0 +1,31 @@ +# Simplex.RemoteControl.Types + +> Type definitions for the XRCP remote control protocol: pairing records, session state, hello messages, and error taxonomy. + +**Source**: [`RemoteControl/Types.hs`](../../../../../../src/Simplex/RemoteControl/Types.hs) + +## Overview + +This module defines the data types for the XRCP (remote control) protocol, which connects a "host" (mobile device) to a "controller" (desktop). Key architectural point: the naming is from the **controller's perspective** — the controller connects to the host, so: +- `RCHostPairing` / `RCHostSession` are the controller-side records (connecting **to** the host) +- `RCCtrlPairing` / `RCCtrlSession` are the host-side records (connecting **from** the controller) + +## Asymmetric pairing records + +`RCHostPairing` (controller side) stores the CA key pair (private key + certificate), identity private key, and optionally a `KnownHostPairing` (fingerprint + last DH public key of the host). `RCCtrlPairing` (host side) stores the CA key pair, controller's fingerprint and identity public key, current DH private key, and `prevDhPrivKey` — the previous DH key retained so that announcements encrypted with the old key can still be decrypted during key rotation. + +## Asymmetric session keys + +`HostSessKeys` stores private keys (identity + session) — the controller needs to sign commands. `CtrlSessKeys` stores public keys (identity + session) — the host needs to verify commands. Both store `TSbChainKeys` for the symmetric session encryption, but note that the chain key direction is swapped between the two sides (see `prepareCtrlSession` in [Client.md](./Client.md)). + +## RCCtrlEncHello — two variants + +`RCCtrlEncHello` is a sum type with two variants: `RCCtrlEncHello` (success: KEM ciphertext + encrypted hello body) and `RCCtrlEncError` (failure: nonce + encrypted error message). The error variant uses the original DH shared key for encryption (not the KEM hybrid key), since the error occurs before KEM exchange completes. + +## AnyError instance — TLS UnknownCa promotion + +`fromSomeException` promotes TLS `Terminated` / `Error_Protocol` / `UnknownCa` to `RCEIdentity` rather than the generic `RCEException`. This maps a TLS-level certificate rejection (either side's CA not recognized by the peer) to a meaningful XRCP error. + +## IpProbe — unused discovery type + +`IpProbe` is defined with `Encoding` instance but not used anywhere in the current codebase. It appears to be a placeholder for a planned IP discovery mechanism. Note: the `smpP` parser has a precedence bug — `IpProbe <$> (smpP <* "I") *> smpP` parses as `(IpProbe <$> (smpP <* "I")) *> smpP`, which discards the `IpProbe` wrapper. This has never manifested because the type is unused. From 3bde77da10a428f66264692f76a7b604aa23bfc7 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:17:31 +0000 Subject: [PATCH 45/91] update terms --- spec/TOPICS.md | 20 ++++++------- spec/modules/Simplex/FileTransfer/Agent.md | 18 +++++------ spec/modules/Simplex/FileTransfer/Client.md | 8 ++--- .../Simplex/FileTransfer/Client/Agent.md | 6 ++-- .../Simplex/FileTransfer/Client/Main.md | 18 +++++------ .../Simplex/FileTransfer/Description.md | 6 ++-- spec/modules/Simplex/FileTransfer/Protocol.md | 2 +- spec/modules/Simplex/FileTransfer/Server.md | 28 ++++++++--------- .../Simplex/FileTransfer/Server/Env.md | 6 ++-- .../Simplex/FileTransfer/Server/Main.md | 8 ++--- .../Simplex/FileTransfer/Server/Stats.md | 6 ++-- .../Simplex/FileTransfer/Server/Store.md | 2 +- .../Simplex/FileTransfer/Server/StoreLog.md | 6 ++-- spec/modules/Simplex/FileTransfer/Types.md | 2 +- spec/modules/Simplex/Messaging/Agent.md | 2 +- .../modules/Simplex/Messaging/Agent/Client.md | 30 +++++++++---------- .../Simplex/Messaging/Agent/Env/SQLite.md | 2 +- .../Messaging/Agent/NtfSubSupervisor.md | 10 +++---- .../Simplex/Messaging/Agent/Protocol.md | 4 +-- spec/modules/Simplex/Messaging/Agent/Stats.md | 2 +- .../Messaging/Notifications/Protocol.md | 6 ++-- .../Simplex/Messaging/Notifications/Server.md | 28 ++++++++--------- .../Messaging/Notifications/Server/Control.md | 2 +- .../Messaging/Notifications/Server/Env.md | 6 ++-- .../Messaging/Notifications/Server/Main.md | 2 +- .../Notifications/Server/Push/APNS.md | 2 +- .../Messaging/Notifications/Server/Stats.md | 14 ++++----- .../Messaging/Notifications/Server/Store.md | 4 +-- .../Notifications/Server/Store/Postgres.md | 8 ++--- .../Messaging/Notifications/Transport.md | 4 +-- .../Simplex/Messaging/Notifications/Types.md | 2 +- 31 files changed, 132 insertions(+), 132 deletions(-) diff --git a/spec/TOPICS.md b/spec/TOPICS.md index 0489d79df2..a62e23c297 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -2,7 +2,7 @@ > Cross-cutting patterns noticed during module documentation. Each entry may become a topic doc in `spec/` after all module docs are complete. -- **Exception handling strategy**: `catchOwn`/`catchAll`/`tryAllErrors` pattern (defined in Util.hs) used across server, client, and agent modules. The three-category classification (synchronous, own-async, cancellation) and when to use which catch variant is not obvious from any single call site. +- **Exception handling strategy**: `catchOwn`/`catchAll`/`tryAllErrors` pattern (defined in Util.hs) used across router, client, and agent modules. The three-category classification (synchronous, own-async, cancellation) and when to use which catch variant is not obvious from any single call site. - **Padding schemes**: Three different padding formats across the codebase — Crypto.hs uses 2-byte Word16 length prefix (max ~65KB), Crypto/Lazy.hs uses 8-byte Int64 prefix (file-sized), and both use '#' fill character. Ratchet header padding uses fixed sizes (88 or 2310 bytes). All use `pad`/`unPad` but with incompatible formats. The relationship between padding, encryption, and message size limits spans Crypto, Lazy, Ratchet, and the protocol layer. @@ -16,17 +16,17 @@ - **Service certificate subscription model**: Service subscriptions (SUBS/NSUBS) and per-queue subscriptions (SUB/NSUB) coexist with complex state transitions. Client/Agent.hs manages dual active/pending subscription maps with session-aware cleanup. Protocol.hs defines useServiceAuth (only NEW/SUB/NSUB). Client.hs implements authTransmission with dual signing (entity key over cert hash + transmission, service key over transmission only). Transport.hs handles the service certificate handshake extension (v16+). The full subscription lifecycle — from DBService credentials through handshake to service subscription to disconnect/reconnect — spans all four modules. -- **Two agent layers**: Client/Agent.hs ("small agent") is used only in servers — SMP proxy and notification server — to manage client connections to other SMP servers. Agent.hs + Agent/Client.hs ("big agent") is used in client applications. Both manage SMP client connections with subscription tracking and reconnection, but the big agent adds the full messaging agent layer (connections, double ratchet, file transfer). When documenting Agent/Client.hs, Client/Agent.hs should be reviewed for shared patterns and differences. +- **Two agent layers**: Client/Agent.hs ("small agent") is used only in routers — SMP proxy and notification router — to manage client connections to other SMP routers. Agent.hs + Agent/Client.hs ("big agent") is used in client applications. Both manage SMP client connections with subscription tracking and reconnection, but the big agent adds the full messaging agent layer (connections, double ratchet, file transfer). When documenting Agent/Client.hs, Client/Agent.hs should be reviewed for shared patterns and differences. - **Handshake protocol family**: SMP (Transport.hs), NTF (Notifications/Transport.hs), and XFTP (FileTransfer/Transport.hs) all have handshake protocols with the same structure (version negotiation + session binding + key exchange) but different feature sets. NTF is a strict subset. XFTP doesn't use the TLS handshake at all (HTTP2 layer). The shared types (THandle, THandleParams, THandleAuth) mean changes to the handshake infrastructure affect all three protocols. -- **Server subscription architecture**: The SMP server's subscription model spans Server.hs (serverThread split-STM lifecycle, tryDeliverMessage sync/async, ProhibitSub/ServerSub state machine), Env/STM.hs (SubscribedClients TVar-of-Maybe continuity, Client three-queue architecture), and Client/Agent.hs (small agent dual subscription model). The interaction between service subscriptions, direct queue subscriptions, notification subscriptions, and the serverThread subQ processing is not visible from any single module. +- **Router subscription architecture**: The SMP router's subscription model spans Server.hs (serverThread split-STM lifecycle, tryDeliverMessage sync/async, ProhibitSub/ServerSub state machine), Env/STM.hs (SubscribedClients TVar-of-Maybe continuity, Client three-queue architecture), and Client/Agent.hs (small agent dual subscription model). The interaction between service subscriptions, direct queue subscriptions, notification subscriptions, and the serverThread subQ processing is not visible from any single module. - **Duplex connection handshake**: The SMP duplex connection procedure (standard 10-step and fast 7-step) spans Agent.hs (orchestration, state machine), Agent/Protocol.hs (message types: AgentConfirmation/AgentConnInfoReply/AgentInvitation/HELLO, queue status types), Client.hs (SMP command dispatch), Protocol.hs (SMP-level KEY/SKEY commands). The handshake involves two-layer encryption (per-queue E2E + double ratchet), version-dependent paths (v2+ duplex, v6+ sender auth key, v7+ ratchet on confirmation, v9+ fast handshake with SKEY), and the asymmetry between initiating and accepting parties (different message types, different confirmation processing). The protocol spec (`agent-protocol.md`) defines the procedure but the implementation details — error handling, state persistence across restarts, race conditions between confirmation and message delivery — are only visible by reading the code across these modules. - **Connection links**: Full connection links (URI format with `#/?` query parameters) and binary-encoded links (`Encoding` instances) serve different contexts — URIs for out-of-band sharing, binary for agent-to-agent messages. Each has independent version-conditional encoding with different backward-compat rules (URI parser adjusts agent version ranges for old contact links, binary parser patches `queueMode` for forward compat). The `VersionI`/`VersionRangeI` typeclasses convert between `SMPQueueInfo` (versioned, in confirmations) and `SMPQueueUri` (version-ranged, in links). Full picture requires Agent/Protocol.hs, Protocol.hs, and agent-protocol.md. -- **Short links**: Short links are a compact representation for sharing via URLs, not a replacement for full connection links — both are used. Short links store encrypted link data on the router and encode only a server hostname, link type character, and key hash in the URL. The link data lifecycle (creation, encryption with key derivation, owner chain-of-trust validation, mutable user data updates) spans Agent/Protocol.hs (types, serialization, owner validation, server shortening/restoration), Agent.hs (link creation and resolution API), and the router-side link storage. The `FixedLinkData`/`ConnLinkData` split (immutable vs mutable), `OwnerAuth` chain validation, and `PreparedLinkParams` pre-computation are not visible from any single module. +- **Short links**: Short links are a compact representation for sharing via URLs, not a replacement for full connection links — both are used. Short links store encrypted link data on the router and encode only a router hostname, link type character, and key hash in the URL. The link data lifecycle (creation, encryption with key derivation, owner chain-of-trust validation, mutable user data updates) spans Agent/Protocol.hs (types, serialization, owner validation, router shortening/restoration), Agent.hs (link creation and resolution API), and the router-side link storage. The `FixedLinkData`/`ConnLinkData` split (immutable vs mutable), `OwnerAuth` chain validation, and `PreparedLinkParams` pre-computation are not visible from any single module. - **Agent worker framework**: `getAgentWorker` (lifecycle, restart rate limiting, crash recovery) + `withWork`/`withWork_`/`withWorkItems` (task retrieval with doWork flag atomics) defined in Agent/Client.hs, consumed by Agent.hs (async commands, message delivery), NtfSubSupervisor.hs (notification workers), FileTransfer/Agent.hs (XFTP workers), and simplex-chat. The framework separates two concerns: worker lifecycle (create-or-reuse, fork async, rate-limit restarts, escalate to CRITICAL) and task pattern (get next task, do task, as separate parameters). The doWork TMVar flag choreography (clear before query to prevent race) and the work-item-error vs store-error distinction are not obvious from any single consumer. @@ -40,19 +40,19 @@ - **NTF push delivery pipeline**: Bounded TBQueue (`pushQ`) creates backpressure → `ntfPush` thread reads → `checkActiveTkn` gates PNMessage (but not PNVerification or PNCheckMessages) → APNS delivery with single retry on connection errors (new push client on retry) → PPTokenInvalid marks token NTInvalid. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [APNS.hs](modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) (DER JWT signing, HTTP/2 serializing queue, fire-and-forget connection), [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (push client caching with race tolerance). -- **NTF service subscription model**: Service-level subscriptions (SUBS/NSUBS on SMP) vs individual queue subscriptions, with fallback from service to individual when `CAServiceUnavailable`. Service credentials are lazily generated per SMP server with 25h backdating and ~2700yr validity. XOR hash triggers on PostgreSQL maintain subscription aggregate counts. Subscription status tracking uses `ntf_service_assoc` flag to distinguish service-associated from individually-subscribed queues. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md) (subscriber thread, service fallback), [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (lazy credential generation, Weak ThreadId subscriber cleanup), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) (XOR hash triggers, batch status updates, cursor-based pagination). +- **NTF service subscription model**: Service-level subscriptions (SUBS/NSUBS on SMP) vs individual queue subscriptions, with fallback from service to individual when `CAServiceUnavailable`. Service credentials are lazily generated per SMP router with 25h backdating and ~2700yr validity. XOR hash triggers on PostgreSQL maintain subscription aggregate counts. Subscription status tracking uses `ntf_service_assoc` flag to distinguish service-associated from individually-subscribed queues. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md) (subscriber thread, service fallback), [Env.hs](modules/Simplex/Messaging/Notifications/Server/Env.md) (lazy credential generation, Weak ThreadId subscriber cleanup), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) (XOR hash triggers, batch status updates, cursor-based pagination). -- **NTF startup resubscription**: `resubscribe` runs as detached `forkIO` (not in `raceAny_` group), uses `mapConcurrently` across SMP servers, each with `subscribeLoop` using 100x database batch multiplier and cursor-based pagination. `ExitCode` exceptions from `exitFailure` on DB error propagate to main thread despite `forkIO`. `getServerNtfSubscriptions` claims subscriptions by batch-updating to `NSPending`. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md). +- **NTF startup resubscription**: `resubscribe` runs as detached `forkIO` (not in `raceAny_` group), uses `mapConcurrently` across SMP routers, each with `subscribeLoop` using 100x database batch multiplier and cursor-based pagination. `ExitCode` exceptions from `exitFailure` on DB error propagate to main thread despite `forkIO`. `getServerNtfSubscriptions` claims subscriptions by batch-updating to `NSPending`. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md). -- **XFTP file upload pipeline**: Agent-side encryption (streaming 64KB blocks, fixed-size padding) → chunk size selection (75% threshold algorithm) → per-server chunk creation with ID collision retry (3 attempts) → recipient registration (recursive batching up to `maxRecipients` per FADD) → per-server upload (command + file body in single HTTP/2 streaming request) → file description generation (cross-product: M chunks × R replicas × N recipients → N descriptions). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, description generation), [Client.hs](modules/Simplex/FileTransfer/Client.md) (upload protocol), [Server.hs](modules/Simplex/FileTransfer/Server.md) (quota reservation with rollback, skipCommitted idempotency), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (streaming encryption with embedded header), [Description.hs](modules/Simplex/FileTransfer/Description.md) (validation, first-replica-only digest optimization). +- **XFTP file upload pipeline**: Agent-side encryption (streaming 64KB blocks, fixed-size padding) → chunk size selection (75% threshold algorithm) → per-router chunk creation with ID collision retry (3 attempts) → recipient registration (recursive batching up to `maxRecipients` per FADD) → per-router upload (command + file body in single HTTP/2 streaming request) → file description generation (cross-product: M chunks × R replicas × N recipients → N descriptions). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, description generation), [Client.hs](modules/Simplex/FileTransfer/Client.md) (upload protocol), [Server.hs](modules/Simplex/FileTransfer/Server.md) (quota reservation with rollback, skipCommitted idempotency), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (streaming encryption with embedded header), [Description.hs](modules/Simplex/FileTransfer/Description.md) (validation, first-replica-only digest optimization). -- **XFTP file download pipeline**: Description parsing (ValidFileDescription validation, YAML or web URI) → per-server chunk download with ephemeral DH key pair per download (forward secrecy) → size and digest verification before decryption → streaming decryption with auth tag verification (output deleted on failure) → redirect resolution (depth-1 chain: decrypt redirect YAML, validate size/digest, download actual file). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, redirect handling), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ephemeral DH, chunk-proportional timeout), [Client/Main.hs](modules/Simplex/FileTransfer/Client/Main.md) (web URI decoding, parallel download with server grouping), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (dual decrypt paths, auth tag deletion), [Description.hs](modules/Simplex/FileTransfer/Description.md) (redirect file descriptions). +- **XFTP file download pipeline**: Description parsing (ValidFileDescription validation, YAML or web URI) → per-router chunk download with ephemeral DH key pair per download (forward secrecy) → size and digest verification before decryption → streaming decryption with auth tag verification (output deleted on failure) → redirect resolution (depth-1 chain: decrypt redirect YAML, validate size/digest, download actual file). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, redirect handling), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ephemeral DH, chunk-proportional timeout), [Client/Main.hs](modules/Simplex/FileTransfer/Client/Main.md) (web URI decoding, parallel download with router grouping), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (dual decrypt paths, auth tag deletion), [Description.hs](modules/Simplex/FileTransfer/Description.md) (redirect file descriptions). - **XFTP handshake state machine**: Three-state session-cached handshake (`No entry` → `HandshakeSent` → `HandshakeAccepted`) per HTTP/2 session. Web clients use `xftp-web-hello` header and challenge-response identity proof; native clients use standard ALPN. SNI presence gates CORS headers, web serving, and SESSION error for unrecognized connections. Key reuse on re-hello preserves existing DH keys. Spans [Server.hs](modules/Simplex/FileTransfer/Server.md) (handshake logic, CORS, web serving), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ALPN selection, cert chain validation), [Transport.hs](modules/Simplex/FileTransfer/Transport.md) (block size, version). - **XFTP storage lifecycle**: Quota reservation via atomic `stateTVar` before upload → rollback on failure (subtract + delete partial file) → physical file deleted before store cleanup (crash risk: store references missing file) → `RoundedSystemTime 3600` for privacy-preserving expiration timestamps → expiration with configurable throttling (100ms between files) → startup storage reconciliation (override stats from live store). Spans [Server.hs](modules/Simplex/FileTransfer/Server.md), [Server/Store.hs](modules/Simplex/FileTransfer/Server/Store.md), [Server/Env.hs](modules/Simplex/FileTransfer/Server/Env.md), [Server/StoreLog.hs](modules/Simplex/FileTransfer/Server/StoreLog.md) (error-resilient replay, compaction). -- **XFTP worker architecture**: Five worker types in three categories: rcv (per-server download + local decryption), snd (local prepare/encrypt + per-server upload), del (per-server delete). TMVar-based connection sharing with async retry on temporary errors, permanent error cleanup (put Left + delete from TMap). `withRetryIntervalLimit` caps consecutive retries; exhausted temporary errors silently abandon work cycle (chunk stays pending). `assertAgentForeground` dual check (throw if inactive + wait if backgrounded) gates every chunk operation. Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md), [Client/Agent.hs](modules/Simplex/FileTransfer/Client/Agent.md). +- **XFTP worker architecture**: Five worker types in three categories: rcv (per-router download + local decryption), snd (local prepare/encrypt + per-router upload), del (per-router delete). TMVar-based connection sharing with async retry on temporary errors, permanent error cleanup (put Left + delete from TMap). `withRetryIntervalLimit` caps consecutive retries; exhausted temporary errors silently abandon work cycle (chunk stays pending). `assertAgentForeground` dual check (throw if inactive + wait if backgrounded) gates every chunk operation. Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md), [Client/Agent.hs](modules/Simplex/FileTransfer/Client/Agent.md). - **SessionVar protocol client lifecycle**: Protocol client connections (SMP, NTF, XFTP) use a lazy singleton pattern: `getSessVar` atomically checks TMap → `newProtocolClient` fills TMVar on success/failure → `waitForProtocolClient` reads with timeout. Error caching via `persistErrorInterval` prevents connection storms (failed connections cache the error with expiry; callers receive cached error without reconnecting). `removeSessVar` uses monotonic `sessionVarId` compare-and-swap to prevent stale disconnect callbacks from removing newer clients. SMP has additional complexity: `SMPConnectedClient` wraps client with per-connection proxied relay map, `updateClientService` synchronizes service credentials post-connect, disconnect callback moves subscriptions to pending with session-ID matching. XFTP always uses `NRMBackground` timing regardless of caller request. Spans [Session.md](modules/Simplex/Messaging/Session.md), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (lifecycle, disconnect callbacks, reconnection workers), [Agent.md](modules/Simplex/Messaging/Agent.md) (subscriber loop consuming events). @@ -60,7 +60,7 @@ - **Deferred message encryption**: Message bodies are NOT encrypted at enqueue time. `enqueueMessageB` advances the ratchet header and validates padding, but stores only the body reference (`sndMsgBodyId`) and encryption key. Actual encryption (`rcEncryptMsg`) happens at delivery time in `runSmpQueueMsgDelivery`. This enables body deduplication via `VRValue`/`VRRef` — identical bodies (common for group messages) share one database row, but each connection's delivery encrypts independently with its own ratchet. Confirmation and ratchet key messages bypass deferred encryption (pre-encrypted at enqueue time). Spans [Agent.md](modules/Simplex/Messaging/Agent.md) (enqueue + delivery), [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md) (`snd_message_bodies` storage). -- **NTF agent subscription lifecycle**: The agent-side notification subscription system uses a supervisor-worker architecture with three worker pools (NTF server, SMP server, token deletion). `NSCCreate` triggers a four-way partition (`partitionQueueSubActions`): new sub, reset sub (credential mismatch or null action), continue SMP work, continue NTF work. Workers coordinate with the supervisor via `updated_by_supervisor` flag — workers only update local fields when the flag is set, preventing overwrite of supervisor decisions. The null-action sentinel (`workerErrors` sets action to NULL on permanent failure) bridges worker failure recovery to supervisor-driven re-creation. `retrySubActions` uses a shrinking TVar — each iteration only retries subs with temporary errors, so batches get smaller over time. `rescheduleWork` handles time-scheduled health checks by forking a sleep thread that re-signals `doWork`. Spans [NtfSubSupervisor.md](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) (supervisor, worker pools), [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md) (updated_by_supervisor, null-action sentinel), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (worker framework). +- **NTF agent subscription lifecycle**: The agent-side notification subscription system uses a supervisor-worker architecture with three worker pools (NTF router, SMP router, token deletion). `NSCCreate` triggers a four-way partition (`partitionQueueSubActions`): new sub, reset sub (credential mismatch or null action), continue SMP work, continue NTF work. Workers coordinate with the supervisor via `updated_by_supervisor` flag — workers only update local fields when the flag is set, preventing overwrite of supervisor decisions. The null-action sentinel (`workerErrors` sets action to NULL on permanent failure) bridges worker failure recovery to supervisor-driven re-creation. `retrySubActions` uses a shrinking TVar — each iteration only retries subs with temporary errors, so batches get smaller over time. `rescheduleWork` handles time-scheduled health checks by forking a sleep thread that re-signals `doWork`. Spans [NtfSubSupervisor.md](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) (supervisor, worker pools), [AgentStore.md](modules/Simplex/Messaging/Agent/Store/AgentStore.md) (updated_by_supervisor, null-action sentinel), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (worker framework). - **Session-aware SMP subscription management**: SMP queue subscriptions are tracked per transport session with session-ID validation at multiple points. `subscribeQueues` groups queues by transport session, subscribes concurrently, then validates `activeClientSession` post-RPC — if the client was replaced during the RPC, results are discarded and converted to temporary errors for retry. `removeClientAndSubs` (disconnect cleanup) only demotes subscriptions whose session ID matches the disconnecting client. Batch UP notifications are accumulated across transmissions and deduplicated against already-active subscriptions. When ALL results are temporary errors and no connections were already active, the SMP client is closed to force fresh connection. `maxPending` throttles concurrent pending subscriptions with STM retry backpressure. Spans [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (subscription state, session validation), [Agent.md](modules/Simplex/Messaging/Agent.md) (subscriber loop, processSMPTransmissions, UP accumulation). diff --git a/spec/modules/Simplex/FileTransfer/Agent.md b/spec/modules/Simplex/FileTransfer/Agent.md index fd2a361d0c..e5f58e9964 100644 --- a/spec/modules/Simplex/FileTransfer/Agent.md +++ b/spec/modules/Simplex/FileTransfer/Agent.md @@ -8,15 +8,15 @@ The XFTP agent uses five worker types organized in three categories: -| Worker | Key (server) | Purpose | +| Worker | Key (router) | Purpose | |--------|-------------|---------| -| `xftpRcvWorker` | `Just server` | Download chunks from a specific XFTP server | +| `xftpRcvWorker` | `Just server` | Download chunks from a specific XFTP router | | `xftpRcvLocalWorker` | `Nothing` | Decrypt completed downloads locally | -| `xftpSndPrepareWorker` | `Nothing` | Encrypt files and create chunks on servers | -| `xftpSndWorker` | `Just server` | Upload chunks to a specific XFTP server | -| `xftpDelWorker` | `Just server` | Delete chunks from a specific XFTP server | +| `xftpSndPrepareWorker` | `Nothing` | Encrypt files and create chunks on routers | +| `xftpSndWorker` | `Just server` | Upload chunks to a specific XFTP router | +| `xftpDelWorker` | `Just server` | Delete chunks from a specific XFTP router | -Workers are created on-demand via `getAgentWorker` and keyed by server address. The local workers (keyed by `Nothing`) handle CPU-bound operations that don't require network access. +Workers are created on-demand via `getAgentWorker` and keyed by router address. The local workers (keyed by `Nothing`) handle CPU-bound operations that don't require network access. ## Non-obvious behavior @@ -71,7 +71,7 @@ During upload, `addRecipients` recursively calls itself if a chunk needs more re ### 12. Delete workers skip files older than rcvFilesTTL -`runXFTPDelWorker` uses `rcvFilesTTL` (not a dedicated delete TTL) to filter pending deletions. Files older than this TTL would already be expired on the server, so attempting deletion is pointless. This reuses the receive TTL as a proxy for server-side expiration. +`runXFTPDelWorker` uses `rcvFilesTTL` (not a dedicated delete TTL) to filter pending deletions. Files older than this TTL would already be expired on the router, so attempting deletion is pointless. This reuses the receive TTL as a proxy for router-side expiration. ### 13. closeXFTPAgent atomically swaps worker maps @@ -81,6 +81,6 @@ During upload, `addRecipients` recursively calls itself if a chunk needs more re `assertAgentForeground` both throws if the agent is inactive (`throwWhenInactive`) and blocks until it's in the foreground (`waitUntilForeground`). This is called before every chunk operation to ensure the agent isn't suspended or backgrounded during file transfers. -### 15. Per-server stats tracking +### 15. Per-router stats tracking -Every chunk download, upload, and delete operation increments per-server statistics (`downloads`, `uploads`, `deletions`, `downloadAttempts`, `uploadAttempts`, `deleteAttempts`, and error variants). Size-based stats (`downloadsSize`, `uploadsSize`) track throughput in kilobytes. +Every chunk download, upload, and delete operation increments per-router statistics (`downloads`, `uploads`, `deletions`, `downloadAttempts`, `uploadAttempts`, `deleteAttempts`, and error variants). Size-based stats (`downloadsSize`, `uploadsSize`) track throughput in kilobytes. diff --git a/spec/modules/Simplex/FileTransfer/Client.md b/spec/modules/Simplex/FileTransfer/Client.md index 5cf87a5944..27fb50bc35 100644 --- a/spec/modules/Simplex/FileTransfer/Client.md +++ b/spec/modules/Simplex/FileTransfer/Client.md @@ -12,13 +12,13 @@ - **`xftpALPNv1` or `httpALPN11`**: performs v1 handshake with key exchange (`httpALPN11` is used for web port connections) - **No ALPN or unrecognized**: uses legacy v1 transport parameters without handshake -### 2. Server certificate chain validation +### 2. Router certificate chain validation -`xftpClientHandshakeV1` validates the server's identity by checking that the CA fingerprint from the certificate chain matches the expected `keyHash` from the server address. The server signs an authentication public key (X25519) with its long-term key. The client verifies this signature against the certificate chain, then extracts the X25519 key for HMAC-based command authentication. This authentication key is distinct from the per-download ephemeral DH keys. +`xftpClientHandshakeV1` validates the router's identity by checking that the CA fingerprint from the certificate chain matches the expected `keyHash` from the router address. The router signs an authentication public key (X25519) with its long-term key. The client verifies this signature against the certificate chain, then extracts the X25519 key for HMAC-based command authentication. This authentication key is distinct from the per-download ephemeral DH keys. ### 3. Ephemeral DH key pair per download -`downloadXFTPChunk` generates a fresh X25519 key pair for each chunk download. The public key is sent with the FGET command; the server responds with its own ephemeral key. The derived shared secret encrypts the file data in transit. This provides forward secrecy — compromising a past DH key doesn't decrypt other downloads. +`downloadXFTPChunk` generates a fresh X25519 key pair for each chunk download. The public key is sent with the FGET command; the router responds with its own ephemeral key. The derived shared secret encrypts the file data in transit. This provides forward secrecy — compromising a past DH key doesn't decrypt other downloads. ### 4. Chunk-size-proportional download timeout @@ -30,7 +30,7 @@ ### 6. Upload sends file body after command response -`uploadXFTPChunk` sends the FPUT command and file body in the same streaming HTTP/2 request: the protocol command block is sent first, followed immediately by the raw file data via `hSendFile`. The server response (`FROk` or error) is received only after both the command and file body have been fully sent. This is a single HTTP/2 round trip, not a two-phase interaction. +`uploadXFTPChunk` sends the FPUT command and file body in the same streaming HTTP/2 request: the protocol command block is sent first, followed immediately by the raw file data via `hSendFile`. The router response (`FROk` or error) is received only after both the command and file body have been fully sent. This is a single HTTP/2 round trip, not a two-phase interaction. ### 7. Empty corrId as nonce diff --git a/spec/modules/Simplex/FileTransfer/Client/Agent.md b/spec/modules/Simplex/FileTransfer/Client/Agent.md index 6ff1eebb77..c03400d908 100644 --- a/spec/modules/Simplex/FileTransfer/Client/Agent.md +++ b/spec/modules/Simplex/FileTransfer/Client/Agent.md @@ -8,7 +8,7 @@ ### 1. TMVar-based connection sharing -`getXFTPServerClient` first checks the `TMap XFTPServer (TMVar (Either XFTPClientAgentError XFTPClient))`. If no entry exists, it atomically inserts an empty `TMVar` and initiates connection. Other threads requesting the same server block on `readTMVar` until the connection is established or fails. This prevents duplicate connections to the same server. +`getXFTPServerClient` first checks the `TMap XFTPServer (TMVar (Either XFTPClientAgentError XFTPClient))`. If no entry exists, it atomically inserts an empty `TMVar` and initiates connection. Other threads requesting the same router block on `readTMVar` until the connection is established or fails. This prevents duplicate connections to the same router. ### 2. Async retry on temporary errors @@ -20,8 +20,8 @@ On permanent error, `newXFTPClient` puts the `Left error` into the `TMVar` (unbl ### 4. Connection timeout -`waitForXFTPClient` wraps `readTMVar` in a timeout. If the connection establishment takes too long (e.g., server unreachable and retry loop is slow), the caller gets a timeout error rather than blocking indefinitely. The underlying connection attempt continues in the background. +`waitForXFTPClient` wraps `readTMVar` in a timeout. If the connection establishment takes too long (e.g., router unreachable and retry loop is slow), the caller gets a timeout error rather than blocking indefinitely. The underlying connection attempt continues in the background. ### 5. closeXFTPServerClient removes from TMap -Closing a server client deletes its entry from the TMap, so the next request will establish a fresh connection. This is called on connection errors during file operations to force reconnection. +Closing a router client deletes its entry from the TMap, so the next request will establish a fresh connection. This is called on connection errors during file operations to force reconnection. diff --git a/spec/modules/Simplex/FileTransfer/Client/Main.md b/spec/modules/Simplex/FileTransfer/Client/Main.md index 5f7b45af4a..abb9eceb5e 100644 --- a/spec/modules/Simplex/FileTransfer/Client/Main.md +++ b/spec/modules/Simplex/FileTransfer/Client/Main.md @@ -8,7 +8,7 @@ ### 1. Web URI encoding: base64url(deflate(YAML)) -`encodeWebURI` compresses the YAML-encoded file description with raw DEFLATE, then base64url-encodes the result. `decodeWebURI` reverses this. The compressed description goes in the URL fragment (after `#`), which is never sent to the server — the file description stays client-side. +`encodeWebURI` compresses the YAML-encoded file description with raw DEFLATE, then base64url-encodes the result. `decodeWebURI` reverses this. The compressed description goes in the URL fragment (after `#`), which is never sent to the router — the file description stays client-side. ### 2. CLI receive accepts both file paths and URLs @@ -18,17 +18,17 @@ `receive` tracks a `depth` parameter starting at 1. After following one redirect, `depth` becomes 0. A second redirect throws "Redirect chain too long". This prevents infinite redirect loops from malicious file descriptions. -### 4. Parallel chunk uploads with server grouping +### 4. Parallel chunk uploads with router grouping -`uploadFile` groups chunks by server via `groupAllOn`, then uses `pooledForConcurrentlyN 16` to process up to 16 server-groups concurrently. Within each group, chunks are uploaded sequentially (`mapM`). Errors from any chunk are collected and the first one is thrown. +`uploadFile` groups chunks by router via `groupAllOn`, then uses `pooledForConcurrentlyN 16` to process up to 16 router-groups concurrently. Within each group, chunks are uploaded sequentially (`mapM`). Errors from any chunk are collected and the first one is thrown. -### 5. Random server selection +### 5. Random router selection -`getXFTPServer` selects a random server from the provided list for each chunk. With a single server, it's deterministic. With multiple servers, it uses `StdGen` in a TVar for thread-safe random selection via `stateTVar`. +`getXFTPServer` selects a random router from the provided list for each chunk. With a single router, it's deterministic. With multiple routers, it uses `StdGen` in a TVar for thread-safe random selection via `stateTVar`. ### 6. withReconnect nests retry with reconnection -`withReconnect` wraps `withRetry` twice: the outer retry reconnects to the server, and the inner operation runs against the connection. On failure, the server connection is explicitly closed before retrying, forcing a fresh connection on the next attempt. +`withReconnect` wraps `withRetry` twice: the outer retry reconnects to the router, and the inner operation runs against the connection. On failure, the router connection is explicitly closed before retrying, forcing a fresh connection on the next attempt. ### 7. withRetry rejects zero retries @@ -36,8 +36,8 @@ ### 8. File description auto-deletion prompt -After successful receive or delete, `removeFD` either auto-deletes the file description (if `--yes` flag) or prompts the user. This prevents accidental reuse of one-time file descriptions — each receive consumes the description by ACKing chunks on the server. +After successful receive or delete, `removeFD` either auto-deletes the file description (if `--yes` flag) or prompts the user. This prevents accidental reuse of one-time file descriptions — each receive consumes the description by ACKing chunks on the router. -### 9. Sender description uses first replica's server +### 9. Sender description uses first replica's router -`createSndFileDescription` takes the server from the first replica of each chunk for the sender's `FileChunkReplica`. This reflects the current limitation that each chunk is uploaded to exactly one server — the sender description records that single server. +`createSndFileDescription` takes the router from the first replica of each chunk for the sender's `FileChunkReplica`. This reflects the current limitation that each chunk is uploaded to exactly one router — the sender description records that single router. diff --git a/spec/modules/Simplex/FileTransfer/Description.md b/spec/modules/Simplex/FileTransfer/Description.md index b4c7e2fe95..0edd0bee8d 100644 --- a/spec/modules/Simplex/FileTransfer/Description.md +++ b/spec/modules/Simplex/FileTransfer/Description.md @@ -22,9 +22,9 @@ When encoding chunks to YAML via `unfoldChunksToReplicas`, the `digest` and non- The top-level `FileDescription` has a `chunkSize` field. Individual chunk replicas only serialize their `chunkSize` if it differs from this default. This saves space in the common case where most chunks are the same size (only the last chunk may be smaller). -### 4. YAML encoding groups replicas by server +### 4. YAML encoding groups replicas by router -`groupReplicasByServer` groups all chunk replicas by their server, producing `FileServerReplica` records. This is the serialization format — replicas are organized by server, not by chunk. The parser (`foldReplicasToChunks`) reverses this grouping back to per-chunk replica lists. +`groupReplicasByServer` groups all chunk replicas by their router, producing `FileServerReplica` records. This is the serialization format — replicas are organized by router, not by chunk. The parser (`foldReplicasToChunks`) reverses this grouping back to per-chunk replica lists. ### 5. FileDescriptionURI uses query-string encoding @@ -40,4 +40,4 @@ Two limits exist: `maxFileSize = 1GB` (soft limit, checked by CLI client) and `m ### 8. Redirect file descriptions -A `FileDescription` can contain a `redirect` field pointing to another file's metadata (`RedirectFileInfo` with size and digest). The outer description downloads an encrypted YAML file that, once decrypted, yields the actual `FileDescription` for the real file. This adds one level of indirection for privacy — the relay servers hosting the redirect don't know the actual file's servers. +A `FileDescription` can contain a `redirect` field pointing to another file's metadata (`RedirectFileInfo` with size and digest). The outer description downloads an encrypted YAML file that, once decrypted, yields the actual `FileDescription` for the real file. This adds one level of indirection for privacy — the relay routers hosting the redirect don't know the actual file's routers. diff --git a/spec/modules/Simplex/FileTransfer/Protocol.md b/spec/modules/Simplex/FileTransfer/Protocol.md index f31c905616..4bbcb87262 100644 --- a/spec/modules/Simplex/FileTransfer/Protocol.md +++ b/spec/modules/Simplex/FileTransfer/Protocol.md @@ -29,7 +29,7 @@ Even for single transmissions, `xftpEncodeBatch1` wraps the encoded transmission ### 5. FileParty GADT partitions command space -Commands are indexed by `FileParty` (`SFSender` / `SFRecipient`) at the type level via `FileCmd`. This ensures at compile time that sender commands (FNEW, FADD, FPUT, FDEL) and recipient commands (FGET, FACK, PING) cannot be confused. The server pattern-matches on `SFileParty` to determine which index (sender vs recipient) to look up in the file store. +Commands are indexed by `FileParty` (`SFSender` / `SFRecipient`) at the type level via `FileCmd`. This ensures at compile time that sender commands (FNEW, FADD, FPUT, FDEL) and recipient commands (FGET, FACK, PING) cannot be confused. The router pattern-matches on `SFileParty` to determine which index (sender vs recipient) to look up in the file store. ### 6. Empty corrId and implicit session ID diff --git a/spec/modules/Simplex/FileTransfer/Server.md b/spec/modules/Simplex/FileTransfer/Server.md index f3a01314d1..99e17a4277 100644 --- a/spec/modules/Simplex/FileTransfer/Server.md +++ b/spec/modules/Simplex/FileTransfer/Server.md @@ -1,16 +1,16 @@ # Simplex.FileTransfer.Server -> XFTP server: HTTP/2 request handling, handshake state machine, file operations, and statistics. +> XFTP router: HTTP/2 request handling, handshake state machine, file operations, and statistics. **Source**: [`FileTransfer/Server.hs`](../../../../src/Simplex/FileTransfer/Server.hs) ## Architecture -The XFTP server runs several concurrent threads via `raceAny_`: +The XFTP router runs several concurrent threads via `raceAny_`: | Thread | Purpose | |--------|---------| -| `runServer` | HTTP/2 server accepting file transfer requests | +| `runServer` | HTTP/2 router accepting file transfer requests | | `expireFiles` | Periodic file expiration with throttling | | `logServerStats` | Periodic stats flush to CSV | | `savePrometheusMetrics` | Periodic Prometheus metrics dump | @@ -20,24 +20,24 @@ The XFTP server runs several concurrent threads via `raceAny_`: ### 1. Three-state handshake with session caching -The server maintains a `TMap SessionId Handshake` with three states: -- **No entry**: first request — for non-SNI or `xftp-web-hello` requests, `processHello` generates DH key pair and sends server handshake; for SNI requests without `xftp-web-hello`, returns `SESSION` error -- **`HandshakeSent pk`**: server hello sent, waiting for client handshake with version negotiation +The router maintains a `TMap SessionId Handshake` with three states: +- **No entry**: first request — for non-SNI or `xftp-web-hello` requests, `processHello` generates DH key pair and sends router handshake; for SNI requests without `xftp-web-hello`, returns `SESSION` error +- **`HandshakeSent pk`**: router hello sent, waiting for client handshake with version negotiation - **`HandshakeAccepted thParams`**: handshake complete, subsequent requests use cached params -Web clients can re-send hello (`xftp-web-hello` header) even in `HandshakeSent` or `HandshakeAccepted` states — the server reuses the existing private key rather than generating a new one. +Web clients can re-send hello (`xftp-web-hello` header) even in `HandshakeSent` or `HandshakeAccepted` states — the router reuses the existing private key rather than generating a new one. ### 2. Web identity proof via challenge-response -When a web client sends a hello with a non-empty body, the server parses an `XFTPClientHello` containing a `webChallenge`. The server signs `challenge <> sessionId` with its long-term key and includes the signature in the handshake response. This proves server identity to web clients that cannot verify TLS certificates directly. +When a web client sends a hello with a non-empty body, the router parses an `XFTPClientHello` containing a `webChallenge`. The router signs `challenge <> sessionId` with its long-term key and includes the signature in the handshake response. This proves router identity to web clients that cannot verify TLS certificates directly. ### 3. skipCommitted drains request body on re-upload -If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is `Just`), it cannot simply ignore the request body — the HTTP/2 client would block waiting for the server to consume it. Instead, `skipCommitted` reads and discards the entire body in `fileBlockSize` increments, returning `FROk` when complete. This makes FPUT idempotent from the client's perspective. +If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is `Just`), it cannot simply ignore the request body — the HTTP/2 client would block waiting for the router to consume it. Instead, `skipCommitted` reads and discards the entire body in `fileBlockSize` increments, returning `FROk` when complete. This makes FPUT idempotent from the client's perspective. ### 4. Atomic quota reservation with rollback -`receiveServerFile` uses `stateTVar` to atomically check and reserve storage quota before receiving the file. If the upload fails (timeout, size mismatch, IO error), the reserved size is subtracted from `usedStorage` and the partial file is deleted. This prevents failed uploads from permanently consuming quota. +`receiveServerFile` uses `stateTVar` to atomically check and reserve storage quota before receiving the file. If the upload fails (timeout, size mismatch, IO error), the reserved size is subtracted from `usedStorage` and the partial file is deleted on the router. This prevents failed uploads from permanently consuming quota. ### 5. retryAdd generates new IDs on collision @@ -45,7 +45,7 @@ If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is ### 6. Timing attack mitigation on entity lookup -`verifyXFTPTransmission` calls `dummyVerifyCmd` (imported from SMP server) when a file entity is not found. This equalizes response timing to prevent attackers from distinguishing "entity doesn't exist" from "signature invalid" based on latency. +`verifyXFTPTransmission` calls `dummyVerifyCmd` (imported from SMP router) when a file entity is not found. This equalizes response timing to prevent attackers from distinguishing "entity doesn't exist" from "signature invalid" based on latency. ### 7. BLOCKED vs EntityOff distinction @@ -62,11 +62,11 @@ Despite the name suggesting it only marks a file as blocked, `blockServerFile` a ### 9. Stats restore overrides counts from live store -`restoreServerStats` loads stats from the backup file but overrides `_filesCount` and `_filesSize` with values computed from the live file store (TMap size and `usedStorage` TVar). If the backup values differ, warnings are logged. This handles cases where files were expired or deleted while the server was down. +`restoreServerStats` loads stats from the backup file but overrides `_filesCount` and `_filesSize` with values computed from the live file store (TMap size and `usedStorage` TVar). If the backup values differ, warnings are logged. This handles cases where files were expired or deleted while the router was down. ### 10. File expiration with configurable throttling -`expireServerFiles` accepts an optional `itemDelay` (100ms when called from the periodic thread, `Nothing` at startup). Between each file check, `threadDelay itemDelay` prevents expiration from monopolizing IO. At startup, files are expired without delay to clean up quickly. +`expireServerFiles` accepts an optional `itemDelay` (100ms when called from the periodic thread, `Nothing` at router startup). Between each file check, `threadDelay itemDelay` prevents expiration from monopolizing IO. At startup, files are expired without delay to clean up quickly. ### 11. Stats log aligns to wall-clock midnight @@ -78,7 +78,7 @@ Despite the name suggesting it only marks a file as blocked, `blockServerFile` a ### 13. SNI-dependent CORS and web serving -CORS headers require both `sniUsed = True` and `addCORSHeaders = True` in the transport config. Static web page serving is enabled when `sniUsed = True`. Non-SNI connections (direct TLS without hostname) skip both CORS and web serving. This separates the web-facing and protocol-facing behaviors of the same port. +CORS headers require both `sniUsed = True` and `addCORSHeaders = True` in the transport config. Static web page serving is enabled when `sniUsed = True`. Non-SNI connections (direct TLS without hostname) skip both CORS and web serving. This separates the web-facing and protocol-facing behaviors of the same router port. ### 14. Control port file operations use recipient index diff --git a/spec/modules/Simplex/FileTransfer/Server/Env.md b/spec/modules/Simplex/FileTransfer/Server/Env.md index e9f509a1a8..0b3bba3ff3 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Env.md +++ b/spec/modules/Simplex/FileTransfer/Server/Env.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Server.Env -> XFTP server environment: configuration, storage quota tracking, and request routing. +> XFTP router environment: configuration, storage quota tracking, and request routing. **Source**: [`FileTransfer/Server/Env.hs`](../../../../../src/Simplex/FileTransfer/Server/Env.hs) @@ -8,7 +8,7 @@ ### 1. Startup storage accounting with quota warning -`newXFTPServerEnv` computes `usedStorage` by summing file sizes from the in-memory store at startup. If the computed usage exceeds the configured `fileSizeQuota`, a warning is logged but the server still starts. This allows the server to come up even if it's over quota (e.g., after a quota reduction), relying on expiration to reclaim space. +`newXFTPServerEnv` computes `usedStorage` by summing file sizes from the in-memory store at startup. If the computed usage exceeds the configured `fileSizeQuota`, a warning is logged but the router still starts. This allows the router to come up even if it's over quota (e.g., after a quota reduction), relying on expiration to reclaim space. ### 2. XFTPRequest ADT separates new files from commands @@ -21,4 +21,4 @@ This separation occurs after credential verification in `Server.hs`. `XFTPReqNew ### 3. fileTimeout for upload deadline -`fileTimeout` in `XFTPServerConfig` sets the maximum time allowed for a single file upload (FPUT). The server wraps the receive operation in `timeout fileTimeout`. Default is 5 minutes (for 4MB chunks). This prevents slow or stalled uploads from holding server resources indefinitely. +`fileTimeout` in `XFTPServerConfig` sets the maximum time allowed for a single file upload (FPUT). The router wraps the receive operation in `timeout fileTimeout`. Default is 5 minutes (for 4MB chunks). This prevents slow or stalled uploads from holding router resources indefinitely. diff --git a/spec/modules/Simplex/FileTransfer/Server/Main.md b/spec/modules/Simplex/FileTransfer/Server/Main.md index 54a45751f2..c892e6bf5d 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Main.md +++ b/spec/modules/Simplex/FileTransfer/Server/Main.md @@ -1,12 +1,12 @@ # Simplex.FileTransfer.Server.Main -> XFTP server CLI: INI configuration parsing, TLS setup, and default constants. +> XFTP router CLI: INI configuration parsing, TLS setup, and default constants. **Source**: [`FileTransfer/Server/Main.hs`](../../../../../src/Simplex/FileTransfer/Server/Main.hs) ## Non-obvious behavior -### 1. Key server constants +### 1. Key router constants | Constant | Value | Purpose | |----------|-------|---------| @@ -17,7 +17,7 @@ ### 2. allowedChunkSizes defaults to all four sizes -If not configured, `allowedChunkSizes` defaults to `[kb 64, kb 256, mb 1, mb 4]`. The INI file can restrict this to a subset, controlling which chunk sizes the server accepts. +If not configured, `allowedChunkSizes` defaults to `[kb 64, kb 256, mb 1, mb 4]`. The INI file can restrict this to a subset, controlling which chunk sizes the router accepts. ### 3. Storage quota from INI with unit parsing @@ -25,4 +25,4 @@ If not configured, `allowedChunkSizes` defaults to `[kb 64, kb 256, mb 1, mb 4]` ### 4. Dual TLS credential support -The server supports both primary TLS credentials (`caCertificateFile`/`certificateFile`/`privateKeyFile`) and optional HTTP-specific credentials (`httpCaCertificateFile`/etc.). When HTTP credentials are present, the server uses `defaultSupportedParamsHTTPS` which enables broader TLS compatibility for web clients. +The router supports both primary TLS credentials (`caCertificateFile`/`certificateFile`/`privateKeyFile`) and optional HTTP-specific credentials (`httpCaCertificateFile`/etc.). When HTTP credentials are present, the router uses `defaultSupportedParamsHTTPS` which enables broader TLS compatibility for web clients. diff --git a/spec/modules/Simplex/FileTransfer/Server/Stats.md b/spec/modules/Simplex/FileTransfer/Server/Stats.md index 7e684c58a1..7eb2ad47bb 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Stats.md +++ b/spec/modules/Simplex/FileTransfer/Server/Stats.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Server.Stats -> XFTP server statistics: IORef-based counters with backward-compatible persistence. +> XFTP router statistics: IORef-based counters with backward-compatible persistence. **Source**: [`FileTransfer/Server/Stats.hs`](../../../../../src/Simplex/FileTransfer/Server/Stats.hs) @@ -8,11 +8,11 @@ ### 1. setFileServerStats is not thread safe -`setFileServerStats` directly writes to IORefs without synchronization. It is explicitly intended for server startup only (restoring from backup file), before any concurrent threads are running. +`setFileServerStats` directly writes to IORefs without synchronization. It is explicitly intended for router startup only (restoring from backup file), before any concurrent threads are running. ### 2. Backward-compatible parsing -The `strP` parser uses `opt` for newer fields, defaulting missing fields to 0. This allows reading stats files from older server versions that don't include fields like `filesBlocked` or `fileDownloadAcks`. +The `strP` parser uses `opt` for newer fields, defaulting missing fields to 0. This allows reading stats files from older router versions that don't include fields like `filesBlocked` or `fileDownloadAcks`. ### 3. PeriodStats for download tracking diff --git a/spec/modules/Simplex/FileTransfer/Server/Store.md b/spec/modules/Simplex/FileTransfer/Server/Store.md index 89b0c3b36c..f2ded441eb 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Store.md +++ b/spec/modules/Simplex/FileTransfer/Server/Store.md @@ -36,4 +36,4 @@ File timestamps use `RoundedFileTime` which is `RoundedSystemTime 3600` — syst ### 8. blockFile conditional storage adjustment -`blockFile` takes a `deleted :: Bool` parameter. When `True` (file blocked with physical deletion), it subtracts the file size from `usedStorage`. When `False` (block without deletion), storage is unchanged. This allows blocking without physical deletion for audit purposes. Currently, both the server's `blockServerFile` and the store log replay path pass `True`. +`blockFile` takes a `deleted :: Bool` parameter. When `True` (file blocked with physical deletion), it subtracts the file size from `usedStorage`. When `False` (block without deletion), storage is unchanged. This allows blocking without physical deletion for audit purposes. Currently, both the router's `blockServerFile` and the store log replay path pass `True`. diff --git a/spec/modules/Simplex/FileTransfer/Server/StoreLog.md b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md index 35a339515f..6549c3666b 100644 --- a/spec/modules/Simplex/FileTransfer/Server/StoreLog.md +++ b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Server.StoreLog -> Append-only store log for XFTP file operations with error-resilient replay and compaction. +> Append-only store log for XFTP router file operations with error-resilient replay and compaction. **Source**: [`FileTransfer/Server/StoreLog.hs`](../../../../../src/Simplex/FileTransfer/Server/StoreLog.hs) @@ -8,7 +8,7 @@ ### 1. Error-resilient replay -`readFileStore` parses the store log line-by-line. Lines that fail to parse or fail to process (e.g., referencing a nonexistent sender ID) are logged as errors but do not halt replay. The store is reconstructed from whatever valid entries exist. This allows the server to recover from partial log corruption. +`readFileStore` parses the store log line-by-line. Lines that fail to parse or fail to process (e.g., referencing a nonexistent sender ID) are logged as errors but do not halt replay. The store is reconstructed from whatever valid entries exist. This allows the router to recover from partial log corruption. ### 2. Sender ID validation on recipient writes @@ -16,7 +16,7 @@ ### 3. Backward-compatible status parsing -`AddFile` log entries include an `EntityStatus` field. The parser uses `<|> pure EntityActive` as a fallback, defaulting to `EntityActive` when the status field is missing. This allows reading store logs from older server versions that didn't record entity status. +`AddFile` log entries include an `EntityStatus` field. The parser uses `<|> pure EntityActive` as a fallback, defaulting to `EntityActive` when the status field is missing. This allows reading store logs from older router versions that didn't record entity status. ### 4. Compaction on restart diff --git a/spec/modules/Simplex/FileTransfer/Types.md b/spec/modules/Simplex/FileTransfer/Types.md index 814e651955..14abc7b214 100644 --- a/spec/modules/Simplex/FileTransfer/Types.md +++ b/spec/modules/Simplex/FileTransfer/Types.md @@ -12,7 +12,7 @@ ### 2. Send file status state machine -`SndFileStatus` progresses: `SFSNew` → `SFSEncrypting` → `SFSEncrypted` → `SFSUploading` → `SFSComplete`, with `SFSError` as terminal. The prepare worker handles `SFSNew` → `SFSEncrypted` (including retry from `SFSEncrypting`), while per-server upload workers handle `SFSUploading` → `SFSComplete`. +`SndFileStatus` progresses: `SFSNew` → `SFSEncrypting` → `SFSEncrypted` → `SFSUploading` → `SFSComplete`, with `SFSError` as terminal. The prepare worker handles `SFSNew` → `SFSEncrypted` (including retry from `SFSEncrypting`), while per-router upload workers handle `SFSUploading` → `SFSComplete`. ### 3. Encrypted file path convention diff --git a/spec/modules/Simplex/Messaging/Agent.md b/spec/modules/Simplex/Messaging/Agent.md index a52be21561..e2cac06384 100644 --- a/spec/modules/Simplex/Messaging/Agent.md +++ b/spec/modules/Simplex/Messaging/Agent.md @@ -15,7 +15,7 @@ This module is the top-level SimpleX agent, consumed by simplex-chat and other c ### Agent startup — backgroundMode `getSMPAgentClient_` accepts a `backgroundMode` flag that fundamentally changes agent capabilities: -- **Normal mode** (`backgroundMode = False`): starts four threads raced via `raceAny_` — `subscriber` (main event loop), `runNtfSupervisor` (notification management), `cleanupManager` (garbage collection), `logServersStats` (statistics). Also restores persisted server statistics. If any thread crashes, all are cancelled; statistics are saved in a `finally` block. +- **Normal mode** (`backgroundMode = False`): starts four threads raced via `raceAny_` — `subscriber` (main event loop), `runNtfSupervisor` (notification management), `cleanupManager` (garbage collection), `logServersStats` (statistics). Also restores persisted router statistics. If any thread crashes, all are cancelled; statistics are saved in a `finally` block. - **Background mode** (`backgroundMode = True`): starts only the `subscriber` thread. No cleanup, no notifications, no stats persistence. Used when the agent needs minimal receive-only operation. Thread crashes are caught by the `run` wrapper: if the agent is still active (`acThread` is set), the exception is reported as `CRITICAL True` to `subQ`. If the agent is being disposed, crashes are silently ignored. diff --git a/spec/modules/Simplex/Messaging/Agent/Client.md b/spec/modules/Simplex/Messaging/Agent/Client.md index f1f4965b6c..0177b4f708 100644 --- a/spec/modules/Simplex/Messaging/Agent/Client.md +++ b/spec/modules/Simplex/Messaging/Agent/Client.md @@ -29,7 +29,7 @@ The module is consumed by Agent.hs (which passes specific worker bodies, task qu - **Operation states**: `ntfNetworkOp`, `rcvNetworkOp`, `msgDeliveryOp`, `sndNetworkOp`, `databaseOp` - **Locking**: `connLocks`, `invLocks`, `deleteLock`, `getMsgLocks`, `clientNoticesLock` - **Service state**: `useClientServices` (per-user boolean controlling whether service certificates are used) -- **Proxy routing**: `smpProxiedRelays` (maps destination transport session → proxy server used) +- **Proxy routing**: `smpProxiedRelays` (maps destination transport session → proxy router used) - **Network state**: `userNetworkInfo`, `userNetworkUpdated`, `useNetworkConfig` (slow/fast pair) All TVars are initialized in `newAgentClient`. The `active` TVar is the global kill switch — `closeAgentClient` sets it to `False`, and all protocol client getters check it first. @@ -56,11 +56,11 @@ When `newProtocolClient` fails and `persistErrorInterval > 0`, the error is cach 1. **Session ID registration**: `SS.setSessionId` records the TLS session ID in `currentSubs`, linking the transport session to the actual TLS connection for later session validation. -2. **Service credential synchronization** (`updateClientService`): After connecting, compares client-side and server-side service state. Four cases: +2. **Service credential synchronization** (`updateClientService`): After connecting, compares client-side and router-side service state. Four cases: - Both have service and IDs match → update DB (no-op if same) - Both have service but IDs differ → update DB and remove old queue-service associations - - Client has service, server doesn't → delete client service (handles server version downgrade) - - Server has service, client doesn't → log error (should not happen in normal flow) + - Client has service, router doesn't → delete client service (handles router version downgrade) + - Router has service, client doesn't → log error (should not happen in normal flow) On connection failure, `smpConnectClient` triggers `resubscribeSMPSession` before re-throwing the error. This ensures pending subscriptions get retry logic even when the initial connection attempt fails. @@ -182,7 +182,7 @@ The `clientNoticesLock` TMVar serializes notice processing across concurrent sub ### processSubResults — partitioning Subscription results are partitioned into five categories: -1. **Failed with client notice** — error has an associated server-side notice (e.g., queue status change). Queue is treated as failed (removed from pending, added to `removedSubs`) AND the notice is recorded for processing. +1. **Failed with client notice** — error has an associated router-side notice (e.g., queue status change). Queue is treated as failed (removed from pending, added to `removedSubs`) AND the notice is recorded for processing. 2. **Failed permanently** — non-temporary error without notice, queue is removed from pending and added to `removedSubs` 3. **Failed temporarily** — error is transient, queue stays in pending unchanged for retry on reconnect 4. **Subscribed** — moved from pending to active. Further split into: queues whose service ID matches the session service (added as service-associated) and others. If the queue had a tracked `clientNoticeId`, it is cleared (notice resolved by successful subscription). @@ -205,18 +205,18 @@ Subscription results are partitioned into five categories: Implements SMP proxy/direct routing with fallback: -1. `shouldUseProxy` checks `smpProxyMode` (Always/Unknown/Unprotected/Never) and whether the destination server is "known" (in the user's server list) +1. `shouldUseProxy` checks `smpProxyMode` (Always/Unknown/Unprotected/Never) and whether the destination router is "known" (in the user's router list) 2. If proxying: `getSMPProxyClient` creates or reuses a proxy connection, then `connectSMPProxiedRelay` establishes the relay session. On `NO_SESSION` error, re-creates the relay session through the same proxy. 3. If proxying fails with a host error and `smpProxyFallback` allows it: falls back to direct connection 4. `deleteRelaySession` carefully validates that the current relay session matches the one that failed before removing it (prevents removing a concurrently-created replacement session) -**NO_SESSION retry limit**: On `NO_SESSION`, `sendViaProxy` is called recursively with `Just proxySrv` to reuse the same proxy server. If the recursive call also gets `NO_SESSION`, it throws `proxyError` instead of recursing again — `proxySrv_` is `Just`, so the `Nothing` branch (which recurses) is not taken. This limits retry to exactly one attempt. +**NO_SESSION retry limit**: On `NO_SESSION`, `sendViaProxy` is called recursively with `Just proxySrv` to reuse the same proxy router. If the recursive call also gets `NO_SESSION`, it throws `proxyError` instead of recursing again — `proxySrv_` is `Just`, so the `Nothing` branch (which recurses) is not taken. This limits retry to exactly one attempt. **Proxy selection caching** (`smpProxiedRelays`): When `getSMPProxyClient` selects a proxy for a destination, it atomically inserts the proxy→destination mapping into `smpProxiedRelays`. If a mapping already exists (another thread selected a proxy for the same destination), the existing mapping is used. On relay creation failure with non-host errors, both the relay session and proxy mapping are removed. On host errors, they are preserved to allow fallback logic. ## Service credentials lifecycle -`getServiceCredentials` manages per-user, per-server service certificate credentials: +`getServiceCredentials` manages per-user, per-router service certificate credentials: 1. Checks `useClientServices` — if the user has services disabled, returns `Nothing` 2. Looks up existing credentials in DB via `getClientServiceCredentials` @@ -235,15 +235,15 @@ The generated credentials are Ed25519 self-signed certificates with `simplex` or `withStoreBatch` / `withStoreBatch'` run multiple DB operations in a single transaction, catching exceptions per-operation to report individual failures. The entire batch is within one `agentOperationBracket`. -## Server selection — getNextServer / withNextSrv +## Router selection — getNextServer / withNextSrv -Server selection has two-level diversity: -1. **Operator diversity**: prefer servers from operators not already used (tracked by `usedOperators` set) -2. **Host diversity**: prefer servers with hosts not already used (tracked by `usedHosts` set) +Router selection has two-level diversity: +1. **Operator diversity**: prefer routers from operators not already used (tracked by `usedOperators` set) +2. **Host diversity**: prefer routers with hosts not already used (tracked by `usedHosts` set) -`filterOrAll` ensures that if all servers are "used," the full list is returned rather than an empty one. +`filterOrAll` ensures that if all routers are "used," the full list is returned rather than an empty one. -`withNextSrv` is designed for retry loops — it re-reads user servers on each call (allowing configuration changes during retries) and tracks `triedHosts` across attempts. When all hosts are tried, the tried set is reset (`S.empty`), creating a round-robin effect. +`withNextSrv` is designed for retry loops — it re-reads user routers on each call (allowing configuration changes during retries) and tracks `triedHosts` across attempts. When all hosts are tried, the tried set is reset (`S.empty`), creating a round-robin effect. ## Locking primitives @@ -295,6 +295,6 @@ Classifies errors as temporary (retryable) or permanent. Notable non-obvious cla - `CRITICAL True` is temporary — `True` means the error shows a restart button, implying the user should retry. `CRITICAL False` is permanent. - `INACTIVE` is temporary — the agent may be reactivated - `SMP.PROXY NO_SESSION` via proxy is temporary — session can be re-established -- `SMP.STORE _` is temporary — server-side store error, not a client issue +- `SMP.STORE _` is temporary — router-side store error, not a client issue `temporaryOrHostError` extends `temporaryAgentError` to also include host-related errors (`HOST`, `TRANSPORT TEVersion`). Used in subscription management where host errors should trigger resubscription rather than permanent failure. diff --git a/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md b/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md index 7bfb10bbc0..ec7852acf2 100644 --- a/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md +++ b/spec/modules/Simplex/Messaging/Agent/Env/SQLite.md @@ -6,4 +6,4 @@ ## mkUserServers — silent fallback on all-disabled -See comment on `mkUserServers`. If filtering servers by `enabled && role` yields an empty list, `fromMaybe srvs` falls back to *all* servers regardless of enabled/role status. This prevents a configuration where all servers are disabled from leaving the user with no servers — but means disabled servers can still be used if every server in a role is disabled. +See comment on `mkUserServers`. If filtering routers by `enabled && role` yields an empty list, `fromMaybe srvs` falls back to *all* routers regardless of enabled/role status. This prevents a configuration where all routers are disabled from leaving the user with no routers — but means disabled routers can still be used if every router in a role is disabled. diff --git a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md index d55cfd7469..ac591c1927 100644 --- a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md +++ b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md @@ -6,7 +6,7 @@ ## Architecture -The notification system uses a supervisor with **three worker pools**, each keyed by server address: +The notification system uses a supervisor with **three worker pools**, each keyed by router address: | Pool | Key | Purpose | |------|-----|---------| @@ -23,7 +23,7 @@ The supervisor (`runNtfSupervisor`) reads commands from `ntfSubQ` and dispatches `partitionQueueSubActions` classifies each (queue, subscription) pair into one of four buckets: - **New sub**: no existing subscription record — create from scratch -- **Reset sub**: credentials mismatch (SMP server changed, notifier ID changed, action was nulled by error, or action is a delete) — wipe and restart from SMP key exchange +- **Reset sub**: credentials mismatch (SMP router changed, notifier ID changed, action was nulled by error, or action is a delete) — wipe and restart from SMP key exchange - **Continue SMP work**: existing action is `NSASMP` and credentials are consistent — kick the SMP worker - **Continue NTF work**: existing action is `NSANtf` and credentials are consistent — kick the NTF worker @@ -54,11 +54,11 @@ Successful check responses with statuses not in `subscribeNtfStatuses` also trig Token deletion splits into two phases: 1. **Store phase**: Remove token from active store, persist `(server, privateKey, tokenId)` to a deletion queue via `addNtfTokenToDelete` -2. **Network phase**: `runNtfTknDelWorker` reads from the queue and performs the actual server-side deletion +2. **Network phase**: `runNtfTknDelWorker` reads from the queue and performs the actual router-side deletion On supervisor startup, `startTknDelete` scans for any pending deletion queue entries and launches workers. This ensures token cleanup survives agent restarts. -If the token has no server-side ID (`ntfTokenId = Nothing`), only the store phase runs — no worker is launched. +If the token has no router-side ID (`ntfTokenId = Nothing`), only the store phase runs — no worker is launched. ### 6. workerErrors nulls subscription action @@ -88,7 +88,7 @@ When token deletion gets a permanent (non-temporary, non-host) error, the deleti ### 12. getNtfServer — random selection from multiple -When multiple notification routers are configured, one is selected randomly using `randomR` with a session-stable `TVar` generator. Single-server configurations skip the randomness. +When multiple notification routers are configured, one is selected randomly using `randomR` with a session-stable `TVar` generator. Single-router configurations skip the randomness. ### 13. closeNtfSupervisor — atomic swap then cancel diff --git a/spec/modules/Simplex/Messaging/Agent/Protocol.md b/spec/modules/Simplex/Messaging/Agent/Protocol.md index ad95df8094..c6e65fbdfe 100644 --- a/spec/modules/Simplex/Messaging/Agent/Protocol.md +++ b/spec/modules/Simplex/Messaging/Agent/Protocol.md @@ -64,9 +64,9 @@ The semicolon separator for SMP queues in the URI query string is deliberate — Short links encode `ContactConnType` as a single lowercase letter in the URL path: `a` (contact), `c` (channel), `g` (group), `r` (relay). Invitation links use `i`. The parser uses `toUpper` before dispatching to `ctTypeP` (which expects uppercase), while the encoder uses `toLower` on `ctTypeChar` output. This case dance happens because the wire format wants lowercase URLs but the internal representation uses uppercase. -## Short link server shortening +## Short link router shortening -`shortenShortLink` strips port and key hash from preset servers, leaving only the hostname (`SMPServerOnlyHost` pattern). This makes short links shorter for well-known servers. `restoreShortLink` reverses this by looking up the full server definition from the preset list. Both functions match on primary hostname only (first in the `NonEmpty` list). +`shortenShortLink` strips port and key hash from preset routers, leaving only the hostname (`SMPServerOnlyHost` pattern). This makes short links shorter for well-known routers. `restoreShortLink` reverses this by looking up the full router definition from the preset list. Both functions match on primary hostname only (first in the `NonEmpty` list). `isPresetServer` has a non-obvious port matching rule: empty port in the preset matches `"443"` or `"5223"` in the link. This handles servers that use default ports without explicitly listing them. diff --git a/spec/modules/Simplex/Messaging/Agent/Stats.md b/spec/modules/Simplex/Messaging/Agent/Stats.md index d793564e7f..d501c3f7e1 100644 --- a/spec/modules/Simplex/Messaging/Agent/Stats.md +++ b/spec/modules/Simplex/Messaging/Agent/Stats.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Agent.Stats -> Per-server statistics counters (SMP, XFTP, NTF) with TVar-based live state and serializable snapshots. +> Per-router statistics counters (SMP, XFTP, NTF) with TVar-based live state and serializable snapshots. **Source**: [`Agent/Stats.hs`](../../../../../src/Simplex/Messaging/Agent/Stats.hs) diff --git a/spec/modules/Simplex/Messaging/Notifications/Protocol.md b/spec/modules/Simplex/Messaging/Notifications/Protocol.md index 71daf771db..fb718fd804 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Protocol.md +++ b/spec/modules/Simplex/Messaging/Notifications/Protocol.md @@ -28,7 +28,7 @@ When encoding `NRTkn` responses, the `NTInvalid` reason is only included if the ### 4. subscribeNtfStatuses migration invariant -The comment on `subscribeNtfStatuses` (`[NSNew, NSPending, NSActive, NSInactive]`) warns that changing these statuses requires a new database migration for queue ID hashes (see `m20250830_queue_ids_hash`). This is a cross-module invariant between protocol types and server storage. +The comment on `subscribeNtfStatuses` (`[NSNew, NSPending, NSActive, NSInactive]`) warns that changing these statuses requires a new database migration for queue ID hashes (see `m20250830_queue_ids_hash`). This is a cross-module invariant between protocol types and router storage. ### 5. allowNtfSubCommands permits NTInvalid and NTExpired @@ -36,7 +36,7 @@ Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL), which ### 6. PPApnsNull test provider -`PPApnsNull` is a push provider that never communicates with APNS. It's used for end-to-end testing of the notification server from clients without requiring actual push infrastructure. +`PPApnsNull` is a push provider that never communicates with APNS. It's used for end-to-end testing of the notification router from clients without requiring actual push infrastructure. ### 7. DeviceToken hex validation @@ -44,7 +44,7 @@ Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL), which ### 8. SMPQueueNtf parsing applies updateSMPServerHosts -Both `smpP` and `strP` for `SMPQueueNtf` apply `updateSMPServerHosts` to the parsed SMP server. This normalizes server host addresses on deserialization, ensuring consistent comparison even if the on-wire format uses different host representations. +Both `smpP` and `strP` for `SMPQueueNtf` apply `updateSMPServerHosts` to the parsed SMP server. This normalizes router host addresses on deserialization, ensuring consistent comparison even if the on-wire format uses different host representations. ### 9. NRTknId response tag comment diff --git a/spec/modules/Simplex/Messaging/Notifications/Server.md b/spec/modules/Simplex/Messaging/Notifications/Server.md index 5c74878d73..d77a30a00a 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server.md @@ -1,12 +1,12 @@ # Simplex.Messaging.Notifications.Server -> NTF server: manages tokens, subscriptions, SMP subscriber connections, and push notification delivery. +> NTF router: manages tokens, subscriptions, SMP subscriber connections, and push notification delivery. **Source**: [`Notifications/Server.hs`](../../../../../src/Simplex/Messaging/Notifications/Server.hs) ## Architecture -The NTF server runs several concurrent threads via `raceAny_`: +The NTF router runs several concurrent threads via `raceAny_`: | Thread | Purpose | |--------|---------| @@ -26,7 +26,7 @@ When `verifyNtfTransmission` encounters an AUTH error (entity not found), it cal ### 2. TNEW idempotent re-registration -When TNEW is received for an already-registered token, the server: +When TNEW is received for an already-registered token, the router: 1. Looks up the existing token via `findNtfTokenRegistration` (matches on push provider, device token, AND verify key) 2. Verifies the DH secret matches (recomputed from the new `dhPubKey` and stored `tknDhPrivKey`) 3. If DH secrets differ → AUTH error (prevents token hijacking) @@ -36,7 +36,7 @@ If the verify key doesn't match in step 1, the lookup returns `Nothing` and a ne ### 3. SNEW idempotent subscription -When SNEW is received for an existing subscription (same token + SMP queue), the server returns the existing `ntfSubId` if the notifier key matches. If keys differ, AUTH error. New subscriptions are only created when no match exists in `findNtfSubscription`. +When SNEW is received for an existing subscription (same token + SMP queue), the router returns the existing `ntfSubId` if the notifier key matches. If keys differ, AUTH error. New subscriptions are only created when no match exists in `findNtfSubscription`. ### 4. PPApnsNull suppresses statistics @@ -44,7 +44,7 @@ When SNEW is received for an existing subscription (same token + SMP queue), the ### 5. END requires active session validation -SMP END messages are only processed when the originating session is the currently active session for that server (`activeClientSession'` check). This prevents stale END messages from previous (reconnected) sessions from incorrectly marking subscriptions as ended. +SMP END messages are only processed when the originating session is the currently active session for that router (`activeClientSession'` check). This prevents stale END messages from previous (reconnected) sessions from incorrectly marking subscriptions as ended. ### 6. waitForSMPSubscriber two-phase wait @@ -52,9 +52,9 @@ SMP END messages are only processed when the originating session is the currentl ### 7. CAServiceUnavailable triggers individual resubscription -When a service subscription becomes unavailable (SMP server rejects service credentials), the NTF server: +When a service subscription becomes unavailable (SMP router rejects service credentials), the NTF router: 1. Removes the service association from the database -2. Resubscribes all individual queues for that server via `subscribeSrvSubs` +2. Resubscribes all individual queues for that router via `subscribeSrvSubs` This is the fallback path from service-level to queue-level SMP subscriptions. @@ -70,9 +70,9 @@ On the second failure, the error is logged and returned. `PPTokenInvalid` marks Cron notification interval has a hard minimum of 20 minutes. `TCRN 0` disables cron notifications. `TCRN n` where `1 <= n < 20` returns `QUOTA` error. -### 10. Startup resubscription is concurrent per server +### 10. Startup resubscription is concurrent per router -`resubscribe` uses `mapConcurrently` to resubscribe to all known SMP servers in parallel. Within each server, subscriptions are paginated via `subscribeLoop` using cursor-based pagination (`afterSubId_`). +`resubscribe` uses `mapConcurrently` to resubscribe to all known SMP routers in parallel. Within each router, subscriptions are paginated via `subscribeLoop` using cursor-based pagination (`afterSubId_`). ### 11. receive separates error responses from commands @@ -80,7 +80,7 @@ The `receive` function processes incoming transmissions and partitions results: ### 12. Maintenance mode saves state then exits immediately -When `maintenance` is set in `startOptions`, the server restores stats, calls `stopServer` (closes DB, saves stats), and exits with `exitSuccess`. It never starts transport listeners, subscriber threads, or resubscription. This provides a way to run database migrations without the server serving traffic. +When `maintenance` is set in `startOptions`, the router restores stats, calls `stopServer` (closes DB, saves stats), and exits with `exitSuccess`. It never starts transport listeners, subscriber threads, or resubscription. This provides a way to run database migrations without the router serving traffic. ### 13. Resubscription runs as a detached fork @@ -88,7 +88,7 @@ When `maintenance` is set in `startOptions`, the server restores stats, calls `s ### 14. TNEW re-registration resets status for non-verifiable tokens -When a re-registration TNEW matches on DH secret but `allowTokenVerification tknStatus` is `False` (token is `NTNew`, `NTInvalid`, or `NTExpired`), the server resets status to `NTRegistered` before sending the verification push. This makes TNEW a "status repair" mechanism — clients with stuck tokens can restart the verification flow by re-registering with the same DH key. +When a re-registration TNEW matches on DH secret but `allowTokenVerification tknStatus` is `False` (token is `NTNew`, `NTInvalid`, or `NTExpired`), the router resets status to `NTRegistered` before sending the verification push. This makes TNEW a "status repair" mechanism — clients with stuck tokens can restart the verification flow by re-registering with the same DH key. ### 15. DELD unconditionally updates status (no session validation) @@ -96,7 +96,7 @@ Unlike `SMP.END` which checks `activeClientSession'` to prevent stale session me ### 16. TRPL generates new code but reuses the DH key -`TRPL` (token replace) creates a new registration code and resets status to `NTRegistered`, but does NOT generate a new server DH key pair. The existing `tknDhPrivKey` and `tknDhSecret` are preserved — only the push provider token and registration code change. The encrypted channel between client and NTF router persists across device token replacements. +`TRPL` (token replace) creates a new registration code and resets status to `NTRegistered`, but does NOT generate a new router DH key pair. The existing `tknDhPrivKey` and `tknDhSecret` are preserved — only the push provider token and registration code change. The encrypted channel between client and NTF router persists across device token replacements. ### 17. PNMessage delivery requires NTActive, verification and cron do not @@ -112,7 +112,7 @@ When a service subscription is confirmed, the NTF router compares expected and c ### 20. subscribeLoop calls exitFailure on database error -If `getServerNtfSubscriptions` returns `Left _` during startup resubscription, the server terminates via `exitFailure`. Since `resubscribe` runs in a forked thread (pattern 13), this `exitFailure` terminates the entire process — a transient database error during startup resubscription kills the server. +If `getServerNtfSubscriptions` returns `Left _` during startup resubscription, the router terminates via `exitFailure`. Since `resubscribe` runs in a forked thread (pattern 13), this `exitFailure` terminates the entire process — a transient database error during startup resubscription kills the router. ### 21. Stats log aligns to wall-clock time of day @@ -120,7 +120,7 @@ The stats logging thread calculates an `initialDelay` to synchronize the first f ### 22. NMSG AUTH errors silently counted, not logged -When `addTokenLastNtf` returns `Left AUTH` (notification for a queue whose subscription/token association is invalid), the server increments `ntfReceivedAuth` but takes no corrective action. Other error types are silently ignored. This is expected — subscriptions may be deleted while messages are in-flight. +When `addTokenLastNtf` returns `Left AUTH` (notification for a queue whose subscription/token association is invalid), the router increments `ntfReceivedAuth` but takes no corrective action. Other error types are silently ignored. This is expected — subscriptions may be deleted while messages are in-flight. ### 23. PNVerification delivery transitions token to NTConfirmed diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Control.md b/spec/modules/Simplex/Messaging/Notifications/Server/Control.md index 897f81c16f..cbdb5b4161 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Control.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Control.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Notifications.Server.Control -> Control port command protocol for NTF server administration. +> Control port command protocol for NTF router administration. **Source**: [`Notifications/Server/Control.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Control.hs) diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Env.md b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md index c266390d2d..17ae63862f 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Env.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Env.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Notifications.Server.Env -> NTF server environment: configuration, subscriber state, and push provider management. +> NTF router environment: configuration, subscriber state, and push provider management. **Source**: [`Notifications/Server/Env.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Env.hs) @@ -8,7 +8,7 @@ ### 1. Service credentials are lazily generated -`mkDbService` in `newNtfServerEnv` generates service credentials on demand: when `getCredentials` is called for an SMP server, it checks the database. If the server is known and already has credentials, they are reused. If the server is known but has no credentials yet (first connection), new credentials are generated via `genCredentials`, stored in the database, and returned. If the server is not in the database at all, `PCEServiceUnavailable` is thrown (this case should not occur in practice, as clients only connect to servers already tracked in the database). +`mkDbService` in `newNtfServerEnv` generates service credentials on demand: when `getCredentials` is called for an SMP router, it checks the database. If the router is known and already has credentials, they are reused. If the router is known but has no credentials yet (first connection), new credentials are generated via `genCredentials`, stored in the database, and returned. If the router is not in the database at all, `PCEServiceUnavailable` is thrown (this case should not occur in practice, as clients only connect to routers already tracked in the database). Service credentials are only used when `useServiceCreds` is enabled in the config. @@ -18,7 +18,7 @@ Service credentials are only used when `useServiceCreds` is enabled in the confi ### 3. getPushClient lazy initialization -`getPushClient` looks up the push client by provider in `pushClients` TMap. If not found, it calls `newPushClient` to create and register one. Push provider connections are established on first use, not at server startup. +`getPushClient` looks up the push client by provider in `pushClients` TMap. If not found, it calls `newPushClient` to create and register one. Push provider connections are established on first use, not at router startup. ### 4. Service credential validity: 25h backdating, ~2700yr forward diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Main.md b/spec/modules/Simplex/Messaging/Notifications/Server/Main.md index 3719dcd978..54136f1c3a 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Main.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Main.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Notifications.Server.Main -> CLI interface and INI configuration parsing for the NTF server. +> CLI interface and INI configuration parsing for the NTF router. **Source**: [`Notifications/Server/Main.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Main.hs) diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md index d2a49471d9..3fd2bd8803 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md @@ -72,4 +72,4 @@ The comment explicitly states `APNSErrorResponse` is `data` rather than `newtype ### 17. Connection initialization is fire-and-forget -`createAPNSPushClient` calls `connectHTTPS2` and discards the result with `void`. If the initial connection fails, the error is only logged — the client is still created. The first push delivery triggers `getApnsHTTP2Client` which reconnects. This means the server can start even if APNS is unreachable. +`createAPNSPushClient` calls `connectHTTPS2` and discards the result with `void`. If the initial connection fails, the error is only logged — the client is still created. The first push delivery triggers `getApnsHTTP2Client` which reconnects. This means the router can start even if APNS is unreachable. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md index 4a4439f548..d954f03d13 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Stats.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Notifications.Server.Stats -> NTF server statistics collection with own-server breakdown and backward-compatible persistence. +> NTF router statistics collection with own-router breakdown and backward-compatible persistence. **Source**: [`Notifications/Server/Stats.hs`](../../../../../../src/Simplex/Messaging/Notifications/Server/Stats.hs) @@ -8,27 +8,27 @@ ### 1. incServerStat double lookup -`incServerStat` performs a non-STM IO lookup first. On cache hit, the STM transaction only touches the per-server `TVar Int` without reading the shared TMap, avoiding contention. On cache miss, the STM block re-checks the map to handle races (another thread may have inserted between the IO lookup and STM entry). +`incServerStat` performs a non-STM IO lookup first. On cache hit, the STM transaction only touches the per-router `TVar Int` without reading the shared TMap, avoiding contention. On cache miss, the STM block re-checks the map to handle races (another thread may have inserted between the IO lookup and STM entry). ### 2. setNtfServerStats is not thread safe -`setNtfServerStats` is explicitly documented as non-thread-safe and intended for server startup only (restoring from backup file). +`setNtfServerStats` is explicitly documented as non-thread-safe and intended for router startup only (restoring from backup file). ### 3. Backward-compatible parsing -The `strP` parser uses `opt` which defaults missing fields to 0. This allows reading stats files from older server versions that don't include newer fields (`ntfReceivedAuth`, `ntfFailed`, `ntfVrf*`, etc.). +The `strP` parser uses `opt` which defaults missing fields to 0. This allows reading stats files from older router versions that don't include newer fields (`ntfReceivedAuth`, `ntfFailed`, `ntfVrf*`, etc.). ### 4. getNtfServerStatsData is a non-atomic snapshot -`getNtfServerStatsData` reads each `IORef` and `TMap` field sequentially in plain `IO`, not inside a single STM transaction. The returned `NtfServerStatsData` is not a consistent point-in-time snapshot — invariants like "received >= delivered" may not hold. The same applies to `getStatsByServer`, which does one `readTVarIO` for the map root TVar, then a separate `readTVarIO` for each per-server TVar. This is acceptable for periodic reporting where approximate consistency suffices. +`getNtfServerStatsData` reads each `IORef` and `TMap` field sequentially in plain `IO`, not inside a single STM transaction. The returned `NtfServerStatsData` is not a consistent point-in-time snapshot — invariants like "received >= delivered" may not hold. The same applies to `getStatsByServer`, which does one `readTVarIO` for the map root TVar, then a separate `readTVarIO` for each per-router TVar. This is acceptable for periodic reporting where approximate consistency suffices. ### 5. Mixed IORef/TVar concurrency primitives -Aggregate counters (`ntfReceived`, `ntfDelivered`, etc.) use `IORef Int` incremented via `atomicModifyIORef'_`, while per-server breakdowns use `TMap Text (TVar Int)` incremented atomically via STM in `incServerStat`. Although both individual operations are atomic, the aggregate and per-server increments are separate operations, so their values can drift: a thread could increment the aggregate `IORef` before `incServerStat` runs, or vice versa. +Aggregate counters (`ntfReceived`, `ntfDelivered`, etc.) use `IORef Int` incremented via `atomicModifyIORef'_`, while per-router breakdowns use `TMap Text (TVar Int)` incremented atomically via STM in `incServerStat`. Although both individual operations are atomic, the aggregate and per-router increments are separate operations, so their values can drift: a thread could increment the aggregate `IORef` before `incServerStat` runs, or vice versa. ### 6. setStatsByServer replaces TMap atomically but orphans old TVars -`setStatsByServer` builds a fresh `Map Text (TVar Int)` in IO via `newTVarIO`, then atomically replaces the TMap's root TVar. Old per-server TVars are not reused — any other thread holding a reference from a prior `TM.lookupIO` would modify an orphaned counter. Safe only because it's called at startup (like `setNtfServerStats`), but lacks the explicit "not thread safe" comment. +`setStatsByServer` builds a fresh `Map Text (TVar Int)` in IO via `newTVarIO`, then atomically replaces the TMap's root TVar. Old per-router TVars are not reused — any other thread holding a reference from a prior `TM.lookupIO` would modify an orphaned counter. Safe only because it's called at startup (like `setNtfServerStats`), but lacks the explicit "not thread safe" comment. ### 7. Positional parser format despite key=value appearance diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md index 05a7e70e2d..d9deedbf48 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md @@ -36,7 +36,7 @@ When `stmDeleteNtfToken` removes a token, it deletes the entry from the inner `T ### 8. deleteTokenSubs returns SMP queues for upstream unsubscription -`deleteTokenSubs` atomically collects all `SMPQueueNtf` values from the deleted subscriptions and returns them. This is how the server layer knows which SMP notifier subscriptions to tear down. `stmRemoveInactiveTokenRegistrations` discards this list (`void $`), meaning rival-token cleanup does **not** trigger SMP unsubscription — only explicit token deletion does. +`deleteTokenSubs` atomically collects all `SMPQueueNtf` values from the deleted subscriptions and returns them. This is how the router layer knows which SMP notifier subscriptions to tear down. `stmRemoveInactiveTokenRegistrations` discards this list (`void $`), meaning rival-token cleanup does **not** trigger SMP unsubscription — only explicit token deletion does. ### 9. stmAddNtfSubscription always returns Just (vestigial Maybe) @@ -48,7 +48,7 @@ When `stmDeleteNtfSubscription` removes a subscription, it deletes the `subId` f ### 11. stmSetNtfService — asymmetric cleanup with Postgres store -`stmSetNtfService` uses `maybe TM.delete TM.insert` to either remove or set the service association for an SMP server. This is purely a key-value update with no cascading effects on subscriptions. The Postgres store's `removeServiceAndAssociations` handles subscription cleanup separately, meaning the STM and Postgres stores have **different cleanup semantics** for service removal. +`stmSetNtfService` uses `maybe TM.delete TM.insert` to either remove or set the service association for an SMP router. This is purely a key-value update with no cascading effects on subscriptions. The Postgres store's `removeServiceAndAssociations` handles subscription cleanup separately, meaning the STM and Postgres stores have **different cleanup semantics** for service removal. ### 12. Subscription index triple-write invariant diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md index 440797539a..bde863eb61 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md @@ -8,7 +8,7 @@ ### 1. deleteNtfToken exclusive row lock -`deleteNtfToken` acquires `FOR UPDATE` on the token row before cascading deletes. This prevents concurrent subscription inserts for this token during the deletion window. The subscriptions are aggregated by SMP server and returned for in-memory subscription cleanup. +`deleteNtfToken` acquires `FOR UPDATE` on the token row before cascading deletes. This prevents concurrent subscription inserts for this token during the deletion window. The subscriptions are aggregated by SMP router and returned for in-memory subscription cleanup. ### 2. addTokenLastNtf atomic CTE @@ -47,11 +47,11 @@ Only non-service-associated subscriptions (`NOT ntf_service_assoc`) are returned ### 9. Server upsert optimization -`addNtfSubscription` first tries a plain SELECT for the SMP server, then falls back to INSERT with ON CONFLICT only if the server doesn't exist. This avoids the upsert overhead in the common case where the server already exists. +`addNtfSubscription` first tries a plain SELECT for the SMP router, then falls back to INSERT with ON CONFLICT only if the router doesn't exist. This avoids the upsert overhead in the common case where the router already exists. ### 10. Service association tracking -`batchUpdateSrvSubStatus` atomically updates both subscription status and `ntf_service_assoc` flag. When notifications arrive via a service subscription (`newServiceId` is `Just`), all affected subscriptions are marked as service-associated. `removeServiceAndAssociations` resets all subscriptions for a server to `NSInactive` with `ntf_service_assoc = FALSE`. +`batchUpdateSrvSubStatus` atomically updates both subscription status and `ntf_service_assoc` flag. When notifications arrive via a service subscription (`newServiceId` is `Just`), all affected subscriptions are marked as service-associated. `removeServiceAndAssociations` resets all subscriptions for a router to `NSInactive` with `ntf_service_assoc = FALSE`. ### 11. uninterruptibleMask_ wraps most store operations @@ -63,7 +63,7 @@ Only non-service-associated subscriptions (`NOT ntf_service_assoc`) are returned ### 13. getUsedSMPServers uncorrelated EXISTS -The `EXISTS` subquery in `getUsedSMPServers` has no join condition to the outer `smp_servers` table — it returns ALL servers if ANY subscription anywhere has a subscribable status. This is intentional for server startup: the server needs all SMP server records (including `ServiceSub` data) to rebuild in-memory state, and the EXISTS clause is a cheap guard against an empty subscription table. +The `EXISTS` subquery in `getUsedSMPServers` has no join condition to the outer `smp_servers` table — it returns ALL servers if ANY subscription anywhere has a subscribable status. This is intentional for router startup: the router needs all SMP router records (including `ServiceSub` data) to rebuild in-memory state, and the EXISTS clause is a cheap guard against an empty subscription table. ### 14. Trigger-maintained XOR hash aggregates diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md index df40214752..9b94d7e0d9 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Transport.md +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -33,6 +33,6 @@ NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. `ntfTHandle` creates a THandle with `thVersion = VersionNTF 0` — a version that no real protocol supports. This is a placeholder value that gets overwritten during version negotiation. All feature gates check `v >= authBatchCmdsNTFVersion` (v2), so the v0 placeholder disables all optional features. -### 6. Server handshake always sends authPubKey +### 6. Router handshake always sends authPubKey -`ntfServerHandshake` always includes `authPubKey = Just sk` in the server handshake, regardless of the advertised version range. The encoding functions (`encodeAuthEncryptCmds`) then decide whether to actually serialize it based on the max version. This means the key is computed even when it won't be sent. +`ntfServerHandshake` always includes `authPubKey = Just sk` in the router handshake, regardless of the advertised version range. The encoding functions (`encodeAuthEncryptCmds`) then decide whether to actually serialize it based on the max version. This means the key is computed even when it won't be sent. diff --git a/spec/modules/Simplex/Messaging/Notifications/Types.md b/spec/modules/Simplex/Messaging/Notifications/Types.md index 97cc66913a..576d9c0882 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Types.md +++ b/spec/modules/Simplex/Messaging/Notifications/Types.md @@ -16,4 +16,4 @@ ### 3. NSADelete and NSARotate are deprecated -These `NtfSubNTFAction` values are no longer generated by current code but are retained in the type for processing legacy database records. `NSARotate` is logically "delete + recreate" while `NSADelete` is "delete subscription on NTF server + delete notifier credentials on SMP server". +These `NtfSubNTFAction` values are no longer generated by current code but are retained in the type for processing legacy database records. `NSARotate` is logically "delete + recreate" while `NSADelete` is "delete subscription on NTF router + delete notifier credentials on SMP router". From 1cc4d98dd082ffa0ab9b5b153f8d7d97768c2134 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:56:14 +0000 Subject: [PATCH 46/91] terms 2 --- spec/TOPICS.md | 8 ++--- spec/modules/Simplex/FileTransfer/Agent.md | 18 +++++++---- spec/modules/Simplex/FileTransfer/Client.md | 14 ++++---- .../Simplex/FileTransfer/Client/Agent.md | 4 +-- .../Simplex/FileTransfer/Client/Main.md | 8 ++--- spec/modules/Simplex/FileTransfer/Crypto.md | 4 +-- .../Simplex/FileTransfer/Description.md | 6 ++-- spec/modules/Simplex/FileTransfer/Protocol.md | 6 ++-- spec/modules/Simplex/FileTransfer/Server.md | 32 +++++++++---------- .../Simplex/FileTransfer/Server/Env.md | 12 +++---- .../Simplex/FileTransfer/Server/Main.md | 2 +- .../Simplex/FileTransfer/Server/Stats.md | 2 +- .../Simplex/FileTransfer/Server/Store.md | 12 +++---- .../Simplex/FileTransfer/Server/StoreLog.md | 10 +++--- spec/modules/Simplex/FileTransfer/Types.md | 4 +-- spec/modules/Simplex/Messaging/Agent.md | 4 +-- .../Messaging/Agent/NtfSubSupervisor.md | 2 +- .../Simplex/Messaging/Agent/TSessionSubs.md | 2 +- spec/modules/Simplex/Messaging/Client.md | 26 +++++++-------- .../modules/Simplex/Messaging/Client/Agent.md | 4 +-- .../Messaging/Notifications/Protocol.md | 8 ++--- .../Simplex/Messaging/Notifications/Server.md | 6 ++-- .../Messaging/Notifications/Transport.md | 4 +-- spec/modules/Simplex/Messaging/Protocol.md | 4 +-- .../Simplex/Messaging/Protocol/Types.md | 2 +- spec/modules/Simplex/Messaging/Server.md | 10 +++--- spec/modules/Simplex/Messaging/Transport.md | 4 +-- 27 files changed, 111 insertions(+), 107 deletions(-) diff --git a/spec/TOPICS.md b/spec/TOPICS.md index a62e23c297..8ce45e8007 100644 --- a/spec/TOPICS.md +++ b/spec/TOPICS.md @@ -44,15 +44,15 @@ - **NTF startup resubscription**: `resubscribe` runs as detached `forkIO` (not in `raceAny_` group), uses `mapConcurrently` across SMP routers, each with `subscribeLoop` using 100x database batch multiplier and cursor-based pagination. `ExitCode` exceptions from `exitFailure` on DB error propagate to main thread despite `forkIO`. `getServerNtfSubscriptions` claims subscriptions by batch-updating to `NSPending`. Spans [Server.hs](modules/Simplex/Messaging/Notifications/Server.md), [Store/Postgres.hs](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md). -- **XFTP file upload pipeline**: Agent-side encryption (streaming 64KB blocks, fixed-size padding) → chunk size selection (75% threshold algorithm) → per-router chunk creation with ID collision retry (3 attempts) → recipient registration (recursive batching up to `maxRecipients` per FADD) → per-router upload (command + file body in single HTTP/2 streaming request) → file description generation (cross-product: M chunks × R replicas × N recipients → N descriptions). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, description generation), [Client.hs](modules/Simplex/FileTransfer/Client.md) (upload protocol), [Server.hs](modules/Simplex/FileTransfer/Server.md) (quota reservation with rollback, skipCommitted idempotency), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (streaming encryption with embedded header), [Description.hs](modules/Simplex/FileTransfer/Description.md) (validation, first-replica-only digest optimization). +- **XFTP file upload pipeline**: Agent-side encryption (streaming 64KB blocks, fixed-size padding) → chunk size selection (75% threshold algorithm) → per-router data packet creation with ID collision retry (3 attempts) → recipient registration (recursive batching up to `maxRecipients` per FADD) → per-router data packet upload (command + data in single HTTP/2 streaming request) → file description generation (cross-product: M chunks × R replicas × N recipients → N descriptions). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, description generation), [Client.hs](modules/Simplex/FileTransfer/Client.md) (upload protocol), [Server.hs](modules/Simplex/FileTransfer/Server.md) (quota reservation with rollback, skipCommitted idempotency), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (streaming encryption with embedded header), [Description.hs](modules/Simplex/FileTransfer/Description.md) (validation, first-replica-only digest optimization). -- **XFTP file download pipeline**: Description parsing (ValidFileDescription validation, YAML or web URI) → per-router chunk download with ephemeral DH key pair per download (forward secrecy) → size and digest verification before decryption → streaming decryption with auth tag verification (output deleted on failure) → redirect resolution (depth-1 chain: decrypt redirect YAML, validate size/digest, download actual file). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, redirect handling), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ephemeral DH, chunk-proportional timeout), [Client/Main.hs](modules/Simplex/FileTransfer/Client/Main.md) (web URI decoding, parallel download with router grouping), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (dual decrypt paths, auth tag deletion), [Description.hs](modules/Simplex/FileTransfer/Description.md) (redirect file descriptions). +- **XFTP file download pipeline**: Description parsing (ValidFileDescription validation, YAML or web URI) → per-router data packet download with ephemeral DH key pair per download (forward secrecy) → size and digest verification before decryption → streaming decryption with auth tag verification (output deleted on failure) → redirect resolution (depth-1 chain: decrypt redirect YAML, validate size/digest, download actual file). Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md) (worker orchestration, redirect handling), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ephemeral DH, size-proportional timeout), [Client/Main.hs](modules/Simplex/FileTransfer/Client/Main.md) (web URI decoding, parallel download with router grouping), [Crypto.hs](modules/Simplex/FileTransfer/Crypto.md) (dual decrypt paths, auth tag deletion), [Description.hs](modules/Simplex/FileTransfer/Description.md) (redirect file descriptions). - **XFTP handshake state machine**: Three-state session-cached handshake (`No entry` → `HandshakeSent` → `HandshakeAccepted`) per HTTP/2 session. Web clients use `xftp-web-hello` header and challenge-response identity proof; native clients use standard ALPN. SNI presence gates CORS headers, web serving, and SESSION error for unrecognized connections. Key reuse on re-hello preserves existing DH keys. Spans [Server.hs](modules/Simplex/FileTransfer/Server.md) (handshake logic, CORS, web serving), [Client.hs](modules/Simplex/FileTransfer/Client.md) (ALPN selection, cert chain validation), [Transport.hs](modules/Simplex/FileTransfer/Transport.md) (block size, version). -- **XFTP storage lifecycle**: Quota reservation via atomic `stateTVar` before upload → rollback on failure (subtract + delete partial file) → physical file deleted before store cleanup (crash risk: store references missing file) → `RoundedSystemTime 3600` for privacy-preserving expiration timestamps → expiration with configurable throttling (100ms between files) → startup storage reconciliation (override stats from live store). Spans [Server.hs](modules/Simplex/FileTransfer/Server.md), [Server/Store.hs](modules/Simplex/FileTransfer/Server/Store.md), [Server/Env.hs](modules/Simplex/FileTransfer/Server/Env.md), [Server/StoreLog.hs](modules/Simplex/FileTransfer/Server/StoreLog.md) (error-resilient replay, compaction). +- **XFTP storage lifecycle**: Quota reservation via atomic `stateTVar` before upload → rollback on failure (subtract + delete partial data packet) → stored data packet deleted before store cleanup (crash risk: store references missing data packet) → `RoundedSystemTime 3600` for privacy-preserving expiration timestamps → expiration with configurable throttling (100ms between data packets) → startup storage reconciliation (override stats from live store). Spans [Server.hs](modules/Simplex/FileTransfer/Server.md), [Server/Store.hs](modules/Simplex/FileTransfer/Server/Store.md), [Server/Env.hs](modules/Simplex/FileTransfer/Server/Env.md), [Server/StoreLog.hs](modules/Simplex/FileTransfer/Server/StoreLog.md) (error-resilient replay, compaction). -- **XFTP worker architecture**: Five worker types in three categories: rcv (per-router download + local decryption), snd (local prepare/encrypt + per-router upload), del (per-router delete). TMVar-based connection sharing with async retry on temporary errors, permanent error cleanup (put Left + delete from TMap). `withRetryIntervalLimit` caps consecutive retries; exhausted temporary errors silently abandon work cycle (chunk stays pending). `assertAgentForeground` dual check (throw if inactive + wait if backgrounded) gates every chunk operation. Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md), [Client/Agent.hs](modules/Simplex/FileTransfer/Client/Agent.md). +- **XFTP worker architecture**: Five worker types in three categories: rcv (per-router data packet download + local decryption), snd (local prepare/encrypt + per-router data packet upload), del (per-router data packet delete). TMVar-based connection sharing with async retry on temporary errors, permanent error cleanup (put Left + delete from TMap). `withRetryIntervalLimit` caps consecutive retries; exhausted temporary errors silently abandon work cycle (chunk stays pending). `assertAgentForeground` dual check (throw if inactive + wait if backgrounded) gates every data packet operation. Spans [Agent.hs](modules/Simplex/FileTransfer/Agent.md), [Client/Agent.hs](modules/Simplex/FileTransfer/Client/Agent.md). - **SessionVar protocol client lifecycle**: Protocol client connections (SMP, NTF, XFTP) use a lazy singleton pattern: `getSessVar` atomically checks TMap → `newProtocolClient` fills TMVar on success/failure → `waitForProtocolClient` reads with timeout. Error caching via `persistErrorInterval` prevents connection storms (failed connections cache the error with expiry; callers receive cached error without reconnecting). `removeSessVar` uses monotonic `sessionVarId` compare-and-swap to prevent stale disconnect callbacks from removing newer clients. SMP has additional complexity: `SMPConnectedClient` wraps client with per-connection proxied relay map, `updateClientService` synchronizes service credentials post-connect, disconnect callback moves subscriptions to pending with session-ID matching. XFTP always uses `NRMBackground` timing regardless of caller request. Spans [Session.md](modules/Simplex/Messaging/Session.md), [Agent/Client.md](modules/Simplex/Messaging/Agent/Client.md) (lifecycle, disconnect callbacks, reconnection workers), [Agent.md](modules/Simplex/Messaging/Agent.md) (subscriber loop consuming events). diff --git a/spec/modules/Simplex/FileTransfer/Agent.md b/spec/modules/Simplex/FileTransfer/Agent.md index e5f58e9964..84238e1d0a 100644 --- a/spec/modules/Simplex/FileTransfer/Agent.md +++ b/spec/modules/Simplex/FileTransfer/Agent.md @@ -4,17 +4,21 @@ **Source**: [`FileTransfer/Agent.hs`](../../../../src/Simplex/FileTransfer/Agent.hs) +## Terminology + +The agent splits a **file** into **chunks** determined by the chunking algorithm. Each chunk is stored on an XFTP router as a **data packet** — the router has no concept of files or chunks, only directly addressable data packets. This document uses "chunk" for the agent's internal tracking and "data packet" when referring to what is transferred to/from or stored on routers. + ## Architecture The XFTP agent uses five worker types organized in three categories: | Worker | Key (router) | Purpose | |--------|-------------|---------| -| `xftpRcvWorker` | `Just server` | Download chunks from a specific XFTP router | +| `xftpRcvWorker` | `Just server` | Download data packets from a specific XFTP router | | `xftpRcvLocalWorker` | `Nothing` | Decrypt completed downloads locally | -| `xftpSndPrepareWorker` | `Nothing` | Encrypt files and create chunks on routers | -| `xftpSndWorker` | `Just server` | Upload chunks to a specific XFTP router | -| `xftpDelWorker` | `Just server` | Delete chunks from a specific XFTP router | +| `xftpSndPrepareWorker` | `Nothing` | Encrypt files and create data packets on routers | +| `xftpSndWorker` | `Just server` | Upload data packets to a specific XFTP router | +| `xftpDelWorker` | `Just server` | Delete data packets from a specific XFTP router | Workers are created on-demand via `getAgentWorker` and keyed by router address. The local workers (keyed by `Nothing`) handle CPU-bound operations that don't require network access. @@ -55,7 +59,7 @@ Similarly, `prepareFile` checks `status /= SFSEncrypted` and deletes the partial ### 8. addRecipients recursive batching -During upload, `addRecipients` recursively calls itself if a chunk needs more recipients than `xftpMaxRecipientsPerRequest`. Each iteration sends an FADD command for up to `maxRecipients` new recipients, accumulates the results, and recurses until all recipients are registered. +During upload, `addRecipients` recursively calls itself if a data packet needs more recipients than `xftpMaxRecipientsPerRequest`. Each iteration sends an FADD command for up to `maxRecipients` new recipients, accumulates the results, and recurses until all recipients are registered. ### 9. File description generation cross-product @@ -71,7 +75,7 @@ During upload, `addRecipients` recursively calls itself if a chunk needs more re ### 12. Delete workers skip files older than rcvFilesTTL -`runXFTPDelWorker` uses `rcvFilesTTL` (not a dedicated delete TTL) to filter pending deletions. Files older than this TTL would already be expired on the router, so attempting deletion is pointless. This reuses the receive TTL as a proxy for router-side expiration. +`runXFTPDelWorker` uses `rcvFilesTTL` (not a dedicated delete TTL) to filter pending deletions. Data packets older than this TTL would already be expired on the router, so attempting deletion is pointless. This reuses the receive TTL as a proxy for router-side expiration. ### 13. closeXFTPAgent atomically swaps worker maps @@ -83,4 +87,4 @@ During upload, `addRecipients` recursively calls itself if a chunk needs more re ### 15. Per-router stats tracking -Every chunk download, upload, and delete operation increments per-router statistics (`downloads`, `uploads`, `deletions`, `downloadAttempts`, `uploadAttempts`, `deleteAttempts`, and error variants). Size-based stats (`downloadsSize`, `uploadsSize`) track throughput in kilobytes. +Every data packet download, upload, and delete operation increments per-router statistics (`downloads`, `uploads`, `deletions`, `downloadAttempts`, `uploadAttempts`, `deleteAttempts`, and error variants). Size-based stats (`downloadsSize`, `uploadsSize`) track throughput in kilobytes. diff --git a/spec/modules/Simplex/FileTransfer/Client.md b/spec/modules/Simplex/FileTransfer/Client.md index 27fb50bc35..da659cf49a 100644 --- a/spec/modules/Simplex/FileTransfer/Client.md +++ b/spec/modules/Simplex/FileTransfer/Client.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Client -> XFTP client: connection management, handshake, chunk upload/download with forward secrecy. +> XFTP client: connection management, handshake, data packet upload/download with forward secrecy. **Source**: [`FileTransfer/Client.hs`](../../../../src/Simplex/FileTransfer/Client.hs) @@ -18,19 +18,19 @@ ### 3. Ephemeral DH key pair per download -`downloadXFTPChunk` generates a fresh X25519 key pair for each chunk download. The public key is sent with the FGET command; the router responds with its own ephemeral key. The derived shared secret encrypts the file data in transit. This provides forward secrecy — compromising a past DH key doesn't decrypt other downloads. +`downloadXFTPChunk` generates a fresh X25519 key pair for each data packet download. The public key is sent with the FGET command; the router returns its own ephemeral key. The derived shared secret encrypts the data packet in transit. This provides forward secrecy — compromising a past DH key doesn't decrypt other downloads. -### 4. Chunk-size-proportional download timeout +### 4. Size-proportional download timeout -`downloadXFTPChunk` calculates the timeout as `baseTimeout + (sizeInKB * perKbTimeout)`, where `baseTimeout` is the base TCP timeout and `perKbTimeout` is a per-kilobyte timeout from the network config. Larger chunks get proportionally more time. This prevents premature timeouts on large chunks over slow connections. +`downloadXFTPChunk` calculates the timeout as `baseTimeout + (sizeInKB * perKbTimeout)`, where `baseTimeout` is the base TCP timeout and `perKbTimeout` is a per-kilobyte timeout from the network config. Larger data packets get proportionally more time. This prevents premature timeouts on large data packets over slow connections. ### 5. prepareChunkSizes threshold algorithm -`prepareChunkSizes` selects chunk sizes using a 75% threshold: if the remaining payload exceeds 75% of the next larger chunk size, it uses the larger size. Otherwise, it uses the smaller size. `singleChunkSize` returns `Just size` only if the payload fits in a single chunk (used for redirect files which must be single-chunk). +`prepareChunkSizes` selects data packet sizes using a 75% threshold: if the remaining payload exceeds 75% of the next larger size, it uses the larger size. Otherwise, it uses the smaller size. `singleChunkSize` returns `Just size` only if the payload fits in a single data packet (used for redirect files which must be single-packet). -### 6. Upload sends file body after command response +### 6. Upload sends data packet after command block -`uploadXFTPChunk` sends the FPUT command and file body in the same streaming HTTP/2 request: the protocol command block is sent first, followed immediately by the raw file data via `hSendFile`. The router response (`FROk` or error) is received only after both the command and file body have been fully sent. This is a single HTTP/2 round trip, not a two-phase interaction. +`uploadXFTPChunk` sends the FPUT command and data packet body in the same streaming HTTP/2 request: the protocol command block is sent first, followed immediately by the raw encrypted data via `hSendFile`. The command result (`FROk` or error) is received only after both the command and data have been fully sent. This is a single HTTP/2 round trip, not a two-phase interaction. ### 7. Empty corrId as nonce diff --git a/spec/modules/Simplex/FileTransfer/Client/Agent.md b/spec/modules/Simplex/FileTransfer/Client/Agent.md index c03400d908..0a8f17bcf5 100644 --- a/spec/modules/Simplex/FileTransfer/Client/Agent.md +++ b/spec/modules/Simplex/FileTransfer/Client/Agent.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Client.Agent -> XFTP client connection management with TMVar-based sharing, async retry, and connection lifecycle. +> XFTP client: router connection management with TMVar-based sharing, async retry, and connection lifecycle. **Source**: [`FileTransfer/Client/Agent.hs`](../../../../../src/Simplex/FileTransfer/Client/Agent.hs) @@ -24,4 +24,4 @@ On permanent error, `newXFTPClient` puts the `Left error` into the `TMVar` (unbl ### 5. closeXFTPServerClient removes from TMap -Closing a router client deletes its entry from the TMap, so the next request will establish a fresh connection. This is called on connection errors during file operations to force reconnection. +Closing a router client deletes its entry from the TMap, so the next request will establish a fresh connection. This is called on connection errors during data packet operations to force reconnection. diff --git a/spec/modules/Simplex/FileTransfer/Client/Main.md b/spec/modules/Simplex/FileTransfer/Client/Main.md index abb9eceb5e..a7589c05e2 100644 --- a/spec/modules/Simplex/FileTransfer/Client/Main.md +++ b/spec/modules/Simplex/FileTransfer/Client/Main.md @@ -18,9 +18,9 @@ `receive` tracks a `depth` parameter starting at 1. After following one redirect, `depth` becomes 0. A second redirect throws "Redirect chain too long". This prevents infinite redirect loops from malicious file descriptions. -### 4. Parallel chunk uploads with router grouping +### 4. Parallel data packet uploads with router grouping -`uploadFile` groups chunks by router via `groupAllOn`, then uses `pooledForConcurrentlyN 16` to process up to 16 router-groups concurrently. Within each group, chunks are uploaded sequentially (`mapM`). Errors from any chunk are collected and the first one is thrown. +`uploadFile` groups data packets by router via `groupAllOn`, then uses `pooledForConcurrentlyN 16` to process up to 16 router-groups concurrently. Within each group, data packets are uploaded sequentially (`mapM`). Errors from any upload are collected and the first one is thrown. ### 5. Random router selection @@ -36,8 +36,8 @@ ### 8. File description auto-deletion prompt -After successful receive or delete, `removeFD` either auto-deletes the file description (if `--yes` flag) or prompts the user. This prevents accidental reuse of one-time file descriptions — each receive consumes the description by ACKing chunks on the router. +After successful receive or delete, `removeFD` either auto-deletes the file description (if `--yes` flag) or prompts the user. This prevents accidental reuse of one-time file descriptions — each receive consumes the description by ACKing data packets on the router. ### 9. Sender description uses first replica's router -`createSndFileDescription` takes the router from the first replica of each chunk for the sender's `FileChunkReplica`. This reflects the current limitation that each chunk is uploaded to exactly one router — the sender description records that single router. +`createSndFileDescription` takes the router from the first replica of each chunk for the sender's `FileChunkReplica`. This reflects the current limitation that each data packet is uploaded to exactly one router — the sender description records that single router. diff --git a/spec/modules/Simplex/FileTransfer/Crypto.md b/spec/modules/Simplex/FileTransfer/Crypto.md index 1911de60ec..a3e625a8e6 100644 --- a/spec/modules/Simplex/FileTransfer/Crypto.md +++ b/spec/modules/Simplex/FileTransfer/Crypto.md @@ -12,7 +12,7 @@ ### 2. Fixed-size padding hides actual file size -The encrypted output is padded to `encSize` (the sum of chunk sizes). Since chunk sizes are fixed powers of 2 (64KB, 256KB, 1MB, 4MB), the encrypted file size reveals only which chunk size bucket the file falls into, not the actual size. The encryption streams data with `LC.sbEncryptChunk` in a loop, pads the remaining space, then manually appends the auth tag via `LC.sbAuth`. This manual streaming approach (rather than using the all-at-once `LC.sbEncryptTailTag`) is necessary because encryption is interleaved with file I/O. +The encrypted output is padded to `encSize` (the sum of data packet sizes). Since data packet sizes are fixed powers of 2 (64KB, 256KB, 1MB, 4MB), the encrypted file size reveals only which size bucket the file falls into, not the actual size. The encryption streams data with `LC.sbEncryptChunk` in a loop, pads the remaining space, then manually appends the auth tag via `LC.sbAuth`. This manual streaming approach (rather than using the all-at-once `LC.sbEncryptTailTag`) is necessary because encryption is interleaved with file I/O. ### 3. Dual decrypt paths: single-chunk vs multi-chunk @@ -28,4 +28,4 @@ In the multi-chunk streaming path, if `BA.constEq` detects an auth tag mismatch ### 5. Streaming encryption uses 64KB blocks -`encryptFile` reads plaintext in 65536-byte blocks (`LC.sbEncryptChunk`), regardless of the XFTP chunk size. These are encryption blocks within a single continuous stream — not to be confused with XFTP protocol chunks which are much larger (64KB–4MB). +`encryptFile` reads plaintext in 65536-byte blocks (`LC.sbEncryptChunk`), regardless of the XFTP data packet size. These are encryption blocks within a single continuous stream — not to be confused with XFTP data packets which are much larger (64KB–4MB). diff --git a/spec/modules/Simplex/FileTransfer/Description.md b/spec/modules/Simplex/FileTransfer/Description.md index 0edd0bee8d..835ca081a5 100644 --- a/spec/modules/Simplex/FileTransfer/Description.md +++ b/spec/modules/Simplex/FileTransfer/Description.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Description -> File description: YAML encoding/decoding, validation, URI format, and replica optimization. +> File description: YAML encoding/decoding, validation, URI format, and replica optimization. A file description maps a file's chunks to data packets stored on XFTP routers — each chunk corresponds to one data packet, and each data packet may have multiple replicas on different routers. **Source**: [`FileTransfer/Description.hs`](../../../../src/Simplex/FileTransfer/Description.hs) @@ -24,7 +24,7 @@ The top-level `FileDescription` has a `chunkSize` field. Individual chunk replic ### 4. YAML encoding groups replicas by router -`groupReplicasByServer` groups all chunk replicas by their router, producing `FileServerReplica` records. This is the serialization format — replicas are organized by router, not by chunk. The parser (`foldReplicasToChunks`) reverses this grouping back to per-chunk replica lists. +`groupReplicasByServer` groups all data packet replicas by their router, producing `FileServerReplica` records. This is the serialization format — replicas are organized by router, not by chunk. The parser (`foldReplicasToChunks`) reverses this grouping back to per-chunk replica lists. ### 5. FileDescriptionURI uses query-string encoding @@ -40,4 +40,4 @@ Two limits exist: `maxFileSize = 1GB` (soft limit, checked by CLI client) and `m ### 8. Redirect file descriptions -A `FileDescription` can contain a `redirect` field pointing to another file's metadata (`RedirectFileInfo` with size and digest). The outer description downloads an encrypted YAML file that, once decrypted, yields the actual `FileDescription` for the real file. This adds one level of indirection for privacy — the relay routers hosting the redirect don't know the actual file's routers. +A `FileDescription` can contain a `redirect` field pointing to another file's metadata (`RedirectFileInfo` with size and digest). The outer description downloads an encrypted YAML data packet that, once decrypted, yields the actual `FileDescription` for the real file. This adds one level of indirection for privacy — the routers hosting the redirect data packet don't know the actual file's routers. diff --git a/spec/modules/Simplex/FileTransfer/Protocol.md b/spec/modules/Simplex/FileTransfer/Protocol.md index 4bbcb87262..8b99e6849d 100644 --- a/spec/modules/Simplex/FileTransfer/Protocol.md +++ b/spec/modules/Simplex/FileTransfer/Protocol.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Protocol -> XFTP protocol types, commands, responses, and credential verification. +> XFTP protocol types, commands, command results, and credential verification. **Source**: [`FileTransfer/Protocol.hs`](../../../../src/Simplex/FileTransfer/Protocol.hs) @@ -15,9 +15,9 @@ This asymmetry means FNEW and PING bypass the standard entity-lookup path entirely — they are handled as separate `XFTPRequest` constructors (`XFTPReqNew`, `XFTPReqPing`). -### 2. BLOCKED response downgraded to AUTH for old clients +### 2. BLOCKED result downgraded to AUTH for old clients -`encodeProtocol` checks the protocol version: if `v < blockedFilesXFTPVersion`, a `BLOCKED` response is encoded as `AUTH` instead. This prevents old clients that don't understand `BLOCKED` from receiving an unknown error type. The blocking information is silently lost for these clients. +`encodeProtocol` checks the protocol version: if `v < blockedFilesXFTPVersion`, a `BLOCKED` result is encoded as `AUTH` instead. This prevents old clients that don't understand `BLOCKED` from receiving an unknown error type. The blocking information is silently lost for these clients. ### 3. Single-transmission batch enforcement diff --git a/spec/modules/Simplex/FileTransfer/Server.md b/spec/modules/Simplex/FileTransfer/Server.md index 99e17a4277..cb64adad22 100644 --- a/spec/modules/Simplex/FileTransfer/Server.md +++ b/spec/modules/Simplex/FileTransfer/Server.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Server -> XFTP router: HTTP/2 request handling, handshake state machine, file operations, and statistics. +> XFTP router: HTTP/2 request handling, handshake state machine, data packet operations, and statistics. **Source**: [`FileTransfer/Server.hs`](../../../../src/Simplex/FileTransfer/Server.hs) @@ -10,8 +10,8 @@ The XFTP router runs several concurrent threads via `raceAny_`: | Thread | Purpose | |--------|---------| -| `runServer` | HTTP/2 router accepting file transfer requests | -| `expireFiles` | Periodic file expiration with throttling | +| `runServer` | HTTP/2 router accepting data packet transfer requests | +| `expireFiles` | Periodic data packet expiration with throttling | | `logServerStats` | Periodic stats flush to CSV | | `savePrometheusMetrics` | Periodic Prometheus metrics dump | | `runCPServer` | Control port for admin commands | @@ -29,15 +29,15 @@ Web clients can re-send hello (`xftp-web-hello` header) even in `HandshakeSent` ### 2. Web identity proof via challenge-response -When a web client sends a hello with a non-empty body, the router parses an `XFTPClientHello` containing a `webChallenge`. The router signs `challenge <> sessionId` with its long-term key and includes the signature in the handshake response. This proves router identity to web clients that cannot verify TLS certificates directly. +When a web client sends a hello with a non-empty body, the router parses an `XFTPClientHello` containing a `webChallenge`. The router signs `challenge <> sessionId` with its long-term key and includes the signature in the handshake result. This proves router identity to web clients that cannot verify TLS certificates directly. ### 3. skipCommitted drains request body on re-upload -If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is `Just`), it cannot simply ignore the request body — the HTTP/2 client would block waiting for the router to consume it. Instead, `skipCommitted` reads and discards the entire body in `fileBlockSize` increments, returning `FROk` when complete. This makes FPUT idempotent from the client's perspective. +If `receiveServerFile` detects the data packet is already uploaded (`filePath` TVar is `Just`), it cannot simply ignore the request body — the HTTP/2 client would block waiting for the router to consume it. Instead, `skipCommitted` reads and discards the entire body in `fileBlockSize` increments, returning `FROk` when complete. This makes FPUT idempotent from the client's perspective. ### 4. Atomic quota reservation with rollback -`receiveServerFile` uses `stateTVar` to atomically check and reserve storage quota before receiving the file. If the upload fails (timeout, size mismatch, IO error), the reserved size is subtracted from `usedStorage` and the partial file is deleted on the router. This prevents failed uploads from permanently consuming quota. +`receiveServerFile` uses `stateTVar` to atomically check and reserve storage quota before receiving the data packet. If the upload fails (timeout, size mismatch, IO error), the reserved size is subtracted from `usedStorage` and the partial data packet is deleted on the router. This prevents failed uploads from permanently consuming quota. ### 5. retryAdd generates new IDs on collision @@ -45,7 +45,7 @@ If `receiveServerFile` detects the file is already uploaded (`filePath` TVar is ### 6. Timing attack mitigation on entity lookup -`verifyXFTPTransmission` calls `dummyVerifyCmd` (imported from SMP router) when a file entity is not found. This equalizes response timing to prevent attackers from distinguishing "entity doesn't exist" from "signature invalid" based on latency. +`verifyXFTPTransmission` calls `dummyVerifyCmd` (imported from SMP router) when a data packet entity is not found. This equalizes result timing to prevent attackers from distinguishing "entity doesn't exist" from "signature invalid" based on latency. ### 7. BLOCKED vs EntityOff distinction @@ -56,30 +56,30 @@ When `verifyXFTPTransmission` reads `fileStatus`: `EntityOff` is treated identically to missing entities for information-hiding purposes. -### 8. blockServerFile deletes the physical file +### 8. blockServerFile deletes the stored data packet -Despite the name suggesting it only marks a file as blocked, `blockServerFile` also deletes the physical file from disk via `deleteOrBlockServerFile_`. The `deleted = True` parameter to `blockFile` in the store adjusts `usedStorage`. A blocked file returns `BLOCKED` errors on access but has no data on disk. +Despite the name suggesting it only marks a data packet as blocked, `blockServerFile` also deletes the stored data packet from disk via `deleteOrBlockServerFile_`. The `deleted = True` parameter to `blockFile` in the store adjusts `usedStorage`. A blocked data packet returns `BLOCKED` errors on access but has no data on disk. ### 9. Stats restore overrides counts from live store -`restoreServerStats` loads stats from the backup file but overrides `_filesCount` and `_filesSize` with values computed from the live file store (TMap size and `usedStorage` TVar). If the backup values differ, warnings are logged. This handles cases where files were expired or deleted while the router was down. +`restoreServerStats` loads stats from the backup file but overrides `_filesCount` and `_filesSize` with values computed from the live file store (TMap size and `usedStorage` TVar). If the backup values differ, warnings are logged. This handles cases where data packets were expired or deleted while the router was down. -### 10. File expiration with configurable throttling +### 10. Data packet expiration with configurable throttling -`expireServerFiles` accepts an optional `itemDelay` (100ms when called from the periodic thread, `Nothing` at router startup). Between each file check, `threadDelay itemDelay` prevents expiration from monopolizing IO. At startup, files are expired without delay to clean up quickly. +`expireServerFiles` accepts an optional `itemDelay` (100ms when called from the periodic thread, `Nothing` at router startup). Between each data packet check, `threadDelay itemDelay` prevents expiration from monopolizing IO. At startup, data packets are expired without delay to clean up quickly. ### 11. Stats log aligns to wall-clock midnight `logServerStats` computes an `initialDelay` to align the first stats flush to `logStatsStartTime` (default 0 = midnight UTC). If the target time already passed today, it adds 86400 seconds for the next day. Subsequent flushes use exact `logInterval` cadence. -### 12. Physical file deleted before store cleanup +### 12. Stored data packet deleted before store cleanup -`deleteOrBlockServerFile_` removes the physical file first, then runs the STM store action. If the process crashes between these two operations, the store will reference a file that no longer exists on disk. The next access would return `AUTH` (file not found on disk), and eventual expiration would clean the store entry. +`deleteOrBlockServerFile_` removes the stored data packet first, then runs the STM store action. If the process crashes between these two operations, the store will reference a data packet that no longer exists on disk. The next access would return `AUTH` (data packet not found on disk), and eventual expiration would clean the store entry. ### 13. SNI-dependent CORS and web serving CORS headers require both `sniUsed = True` and `addCORSHeaders = True` in the transport config. Static web page serving is enabled when `sniUsed = True`. Non-SNI connections (direct TLS without hostname) skip both CORS and web serving. This separates the web-facing and protocol-facing behaviors of the same router port. -### 14. Control port file operations use recipient index +### 14. Control port data packet operations use recipient index -`CPDelete` and `CPBlock` commands look up files via `getFile fs SFRecipient fileId`, meaning the control port takes a recipient ID, not a sender ID. This is the ID visible to recipients and contained in file descriptions. +`CPDelete` and `CPBlock` commands look up data packets via `getFile fs SFRecipient fileId`, meaning the control port takes a recipient ID, not a sender ID. This is the ID visible to recipients and contained in data packet descriptions. diff --git a/spec/modules/Simplex/FileTransfer/Server/Env.md b/spec/modules/Simplex/FileTransfer/Server/Env.md index 0b3bba3ff3..161bcd4877 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Env.md +++ b/spec/modules/Simplex/FileTransfer/Server/Env.md @@ -8,17 +8,17 @@ ### 1. Startup storage accounting with quota warning -`newXFTPServerEnv` computes `usedStorage` by summing file sizes from the in-memory store at startup. If the computed usage exceeds the configured `fileSizeQuota`, a warning is logged but the router still starts. This allows the router to come up even if it's over quota (e.g., after a quota reduction), relying on expiration to reclaim space. +`newXFTPServerEnv` computes `usedStorage` by summing data packet sizes from the in-memory store at startup. If the computed usage exceeds the configured `fileSizeQuota`, a warning is logged but the router still starts. This allows the router to come up even if it's over quota (e.g., after a quota reduction), relying on expiration to reclaim space. -### 2. XFTPRequest ADT separates new files from commands +### 2. XFTPRequest ADT separates new data packets from commands `XFTPRequest` has three constructors: -- `XFTPReqNew`: file creation (carries `FileInfo`, recipient keys, optional basic auth) -- `XFTPReqCmd`: command on an existing file (carries file ID, `FileRec`, and the command) +- `XFTPReqNew`: data packet creation (carries `FileInfo`, recipient keys, optional basic auth) +- `XFTPReqCmd`: command on an existing data packet (carries file ID, `FileRec`, and the command) - `XFTPReqPing`: health check -This separation occurs after credential verification in `Server.hs`. `XFTPReqNew` bypasses entity lookup entirely since the file doesn't exist yet. +This separation occurs after credential verification in `Server.hs`. `XFTPReqNew` bypasses entity lookup entirely since the data packet doesn't exist yet. ### 3. fileTimeout for upload deadline -`fileTimeout` in `XFTPServerConfig` sets the maximum time allowed for a single file upload (FPUT). The router wraps the receive operation in `timeout fileTimeout`. Default is 5 minutes (for 4MB chunks). This prevents slow or stalled uploads from holding router resources indefinitely. +`fileTimeout` in `XFTPServerConfig` sets the maximum time allowed for a single data packet upload (FPUT). The router wraps the receive operation in `timeout fileTimeout`. Default is 5 minutes (for 4MB chunks). This prevents slow or stalled uploads from holding router resources indefinitely. diff --git a/spec/modules/Simplex/FileTransfer/Server/Main.md b/spec/modules/Simplex/FileTransfer/Server/Main.md index c892e6bf5d..2a5c782886 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Main.md +++ b/spec/modules/Simplex/FileTransfer/Server/Main.md @@ -10,7 +10,7 @@ | Constant | Value | Purpose | |----------|-------|---------| -| `fileIdSize` | 16 bytes | Random file/recipient ID length | +| `fileIdSize` | 16 bytes | Random data packet/recipient ID length | | `fileTimeout` | 5 minutes | Maximum upload duration per chunk | | `logStatsInterval` | 86400s (daily) | Stats CSV flush interval | | `logStatsStartTime` | 0 (midnight UTC) | First stats flush time-of-day | diff --git a/spec/modules/Simplex/FileTransfer/Server/Stats.md b/spec/modules/Simplex/FileTransfer/Server/Stats.md index 7eb2ad47bb..30b04c496f 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Stats.md +++ b/spec/modules/Simplex/FileTransfer/Server/Stats.md @@ -16,4 +16,4 @@ The `strP` parser uses `opt` for newer fields, defaulting missing fields to 0. T ### 3. PeriodStats for download tracking -`filesDownloaded` uses `PeriodStats` (not a simple `IORef Int`) to track unique file downloads over time periods (day/week/month). This enables the CSV stats log to report distinct files downloaded per period, not just total download count. +`filesDownloaded` uses `PeriodStats` (not a simple `IORef Int`) to track unique data packet downloads over time periods (day/week/month). This enables the CSV stats log to report distinct data packets downloaded per period, not just total download count. diff --git a/spec/modules/Simplex/FileTransfer/Server/Store.md b/spec/modules/Simplex/FileTransfer/Server/Store.md index f2ded441eb..bbcc419f61 100644 --- a/spec/modules/Simplex/FileTransfer/Server/Store.md +++ b/spec/modules/Simplex/FileTransfer/Server/Store.md @@ -16,24 +16,24 @@ The file store maintains two indices: `files :: TMap SenderId FileRec` (by sende ### 3. Storage accounting on upload completion -`setFilePath` adds the file size to `usedStorage` and records the file path in the `filePath` TVar. However, during normal FPUT handling, `Server.hs` does NOT call `setFilePath` — it directly writes `filePath` via `writeTVar`. The quota reservation in `Server.hs` (`stateTVar` on `usedStorage`) is the sole `usedStorage` increment during upload. `setFilePath` IS called during store log replay (`StoreLog.hs`), where it increments `usedStorage`; `newXFTPServerEnv` then overwrites with the correct value computed from the live store. +`setFilePath` adds the data packet size to `usedStorage` and records the file path in the `filePath` TVar. However, during normal FPUT handling, `Server.hs` does NOT call `setFilePath` — it directly writes `filePath` via `writeTVar`. The quota reservation in `Server.hs` (`stateTVar` on `usedStorage`) is the sole `usedStorage` increment during upload. `setFilePath` IS called during store log replay (`StoreLog.hs`), where it increments `usedStorage`; `newXFTPServerEnv` then overwrites with the correct value computed from the live store. ### 4. deleteFile removes all recipients atomically -`deleteFile` atomically removes the sender entry from `files`, all recipient entries from the global `recipients` TMap, and unconditionally subtracts the file size from `usedStorage` (regardless of whether the file was actually uploaded). The entire operation runs in a single STM transaction. +`deleteFile` atomically removes the sender entry from `files`, all recipient entries from the global `recipients` TMap, and unconditionally subtracts the data packet size from `usedStorage` (regardless of whether the data packet was actually uploaded). The entire operation runs in a single STM transaction. ### 5. RoundedSystemTime for privacy-preserving expiration -File timestamps use `RoundedFileTime` which is `RoundedSystemTime 3600` — system time rounded to 1-hour precision. This means files created within the same hour have identical timestamps. An observer with access to the store cannot determine exact file creation times, only the hour. +Data packet timestamps use `RoundedFileTime` which is `RoundedSystemTime 3600` — system time rounded to 1-hour precision. This means data packets created within the same hour have identical timestamps. An observer with access to the store cannot determine exact data packet creation times, only the hour. ### 6. expiredFilePath returns path only if expired -`expiredFilePath` returns `STM (Maybe (Maybe FilePath))`. The outer `Maybe` is `Nothing` when the file doesn't exist or isn't expired; the inner `Maybe` is the file path (present only if the file was uploaded). The expiration check adds `fileTimePrecision` (one hour) to the creation timestamp before comparing, providing a grace period. The caller uses the inner path to decide whether to also delete the physical file. +`expiredFilePath` returns `STM (Maybe (Maybe FilePath))`. The outer `Maybe` is `Nothing` when the data packet doesn't exist or isn't expired; the inner `Maybe` is the file path (present only if the data packet was uploaded). The expiration check adds `fileTimePrecision` (one hour) to the creation timestamp before comparing, providing a grace period. The caller uses the inner path to decide whether to also delete the stored data packet. ### 7. ackFile removes single recipient -`ackFile` removes a specific recipient from both the global `recipients` TMap and the per-file `recipientIds` Set. Unlike `deleteFile` which removes the entire file, `ackFile` only removes one recipient's access. The file and other recipients remain intact. +`ackFile` removes a specific recipient from both the global `recipients` TMap and the per-file `recipientIds` Set. Unlike `deleteFile` which removes the entire data packet, `ackFile` only removes one recipient's access. The data packet and other recipients remain intact. ### 8. blockFile conditional storage adjustment -`blockFile` takes a `deleted :: Bool` parameter. When `True` (file blocked with physical deletion), it subtracts the file size from `usedStorage`. When `False` (block without deletion), storage is unchanged. This allows blocking without physical deletion for audit purposes. Currently, both the router's `blockServerFile` and the store log replay path pass `True`. +`blockFile` takes a `deleted :: Bool` parameter. When `True` (data packet blocked with physical deletion), it subtracts the data packet size from `usedStorage`. When `False` (block without deletion), storage is unchanged. This allows blocking without physical deletion for audit purposes. Currently, both the router's `blockServerFile` and the store log replay path pass `True`. diff --git a/spec/modules/Simplex/FileTransfer/Server/StoreLog.md b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md index 6549c3666b..5514cbd272 100644 --- a/spec/modules/Simplex/FileTransfer/Server/StoreLog.md +++ b/spec/modules/Simplex/FileTransfer/Server/StoreLog.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Server.StoreLog -> Append-only store log for XFTP router file operations with error-resilient replay and compaction. +> Append-only store log for XFTP router data packet operations with error-resilient replay and compaction. **Source**: [`FileTransfer/Server/StoreLog.hs`](../../../../../src/Simplex/FileTransfer/Server/StoreLog.hs) @@ -24,10 +24,10 @@ ### 5. Log entry types track operation lifecycle -Six log entry types capture the complete file lifecycle: -- `AddFile`: file creation with sender ID, file info, timestamp, and status +Six log entry types capture the complete data packet lifecycle: +- `AddFile`: data packet creation with sender ID, file info, timestamp, and status - `AddRecipients`: recipient registration (batched as `NonEmpty FileRecipient`) with sender ID association - `PutFile`: upload completion with file path -- `DeleteFile`: file deletion by sender ID +- `DeleteFile`: data packet deletion by sender ID - `AckFile`: single recipient acknowledgment -- `BlockFile`: file blocking with blocking info +- `BlockFile`: data packet blocking with blocking info diff --git a/spec/modules/Simplex/FileTransfer/Types.md b/spec/modules/Simplex/FileTransfer/Types.md index 14abc7b214..0cd889dcf0 100644 --- a/spec/modules/Simplex/FileTransfer/Types.md +++ b/spec/modules/Simplex/FileTransfer/Types.md @@ -1,6 +1,6 @@ # Simplex.FileTransfer.Types -> Agent-side file transfer types: receive/send file records, status state machines, chunk/replica structures. +> Agent-side file transfer types: receive/send file records, status state machines, and chunk/replica structures. Chunks are the agent's view of file pieces; each chunk maps to a data packet on an XFTP router. **Source**: [`FileTransfer/Types.hs`](../../../../src/Simplex/FileTransfer/Types.hs) @@ -24,4 +24,4 @@ ### 5. authTagSize = 16 bytes -`authTagSize` is defined as `fromIntegral C.authTagSize` (16 bytes). This is the AES-GCM authentication tag appended to the encrypted file stream. It is included in the payload size calculation (`payloadSize = fileSize' + fileSizeLen + authTagSize`), which is then passed to `prepareChunkSizes` to determine chunk allocation. +`authTagSize` is defined as `fromIntegral C.authTagSize` (16 bytes). This is the AES-GCM authentication tag appended to the encrypted file stream. It is included in the payload size calculation (`payloadSize = fileSize' + fileSizeLen + authTagSize`), which is then passed to `prepareChunkSizes` to determine data packet allocation. diff --git a/spec/modules/Simplex/Messaging/Agent.md b/spec/modules/Simplex/Messaging/Agent.md index e2cac06384..1b87694169 100644 --- a/spec/modules/Simplex/Messaging/Agent.md +++ b/spec/modules/Simplex/Messaging/Agent.md @@ -38,9 +38,9 @@ The subscriber thread reads batches from `msgQ` (filled by SMP protocol clients) **Batch UP notification accumulation.** Successful subscription confirmations (`processSubOk`) append to a shared `upConnIds` TVar across the batch. A single `UP` event is emitted after all transmissions are processed, not per-transmission. Similarly, `serviceRQs` accumulates service-associated receive queues for batch processing via `processRcvServiceAssocs`. -**Double validation for subscription results.** `isPendingSub` checks two conditions atomically: the queue must be in the pending map AND the client session must still be active (`activeClientSession`). If either fails, the result is counted as ignored (statistics only). This handles the race where a subscription response arrives after reconnection. +**Double validation for subscription results.** `isPendingSub` checks two conditions atomically: the queue must be in the pending map AND the client session must still be active (`activeClientSession`). If either fails, the result is counted as ignored (statistics only). This handles the race where a subscription result arrives after reconnection. -**SUB response piggybacking MSG.** When a SUB response arrives as `Right msg@SMP.MSG {}`, the connection is marked UP (via `processSubOk`) AND the MSG is processed. The UP notification happens even if the MSG processing fails — the connection is up regardless. +**SUB result piggybacking MSG.** When a SUB result arrives as `Right msg@SMP.MSG {}`, the connection is marked UP (via `processSubOk`) AND the MSG is processed. The UP notification happens even if the MSG processing fails — the connection is up regardless. **subQ overflow to pendingMsgs.** `processSMP` writes events to `subQ` (bounded TBQueue) but when full, events go into a `pendingMsgs` TVar. After processing, pending messages are drained in reverse order (LIFO). This prevents the message processing thread from blocking on a full queue, which would stall the entire SMP client. diff --git a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md index ac591c1927..ae4c803e32 100644 --- a/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md +++ b/spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md @@ -48,7 +48,7 @@ This is the mechanism for time-scheduled subscription health checks. When the notification router returns `AUTH` for a subscription check, the subscription is not simply marked as failed — it is fully recreated from scratch by resetting to `NSASMP NSASmpKey` state. This handles the case where the notification router has lost its subscription state (restart, data loss). The SMP worker is kicked to re-establish notifier credentials. -Successful check responses with statuses not in `subscribeNtfStatuses` also trigger recreation via `recreateNtfSub`. +Successful check results with statuses not in `subscribeNtfStatuses` also trigger recreation via `recreateNtfSub`. ### 5. deleteToken two-phase with restart survival diff --git a/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md b/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md index 0274de59db..68337208ce 100644 --- a/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md +++ b/spec/modules/Simplex/Messaging/Agent/TSessionSubs.md @@ -16,7 +16,7 @@ Service subscriptions (aggregate, router-managed) and queue subscriptions (indiv The central invariant: a subscription is only active if it was confirmed on the *current* TLS session. Every function that promotes subscriptions to active (`addActiveSub'`, `batchAddActiveSubs`, `setActiveServiceSub`) checks `Just sessId == sessId'` (stored session ID). On mismatch, the subscription goes to pending instead — silently, with no error. -This means subscription RPCs that succeed but return after a reconnect are safely caught: the response carries the old session ID, which won't match the new one stored by `setSessionId`. +This means subscription RPCs that succeed but return after a reconnect are safely caught: the result carries the old session ID, which won't match the new one stored by `setSessionId`. ## setSessionId — silent demotion on reconnect diff --git a/spec/modules/Simplex/Messaging/Client.md b/spec/modules/Simplex/Messaging/Client.md index 35fee92262..f23d5a0056 100644 --- a/spec/modules/Simplex/Messaging/Client.md +++ b/spec/modules/Simplex/Messaging/Client.md @@ -8,37 +8,37 @@ ## Overview -This module implements the client side of the `Protocol` typeclass — connecting to SMP routers, sending commands, receiving responses, and managing connection lifecycle. It is generic over `Protocol v err msg`, instantiated for SMP as `SMPClient` (= `ProtocolClient SMPVersion ErrorType BrokerMsg`). The SMP proxy protocol (PRXY/PFWD/RFWD) is also implemented here. +This module implements the client side of the `Protocol` typeclass — connecting to SMP routers, sending commands, receiving command results, and managing connection lifecycle. It is generic over `Protocol v err msg`, instantiated for SMP as `SMPClient` (= `ProtocolClient SMPVersion ErrorType BrokerMsg`). The SMP proxy protocol (PRXY/PFWD/RFWD) is also implemented here. ## Four concurrent threads — teardown semantics `getProtocolClient` launches four threads via `raceAny_`: - `send`: reads from `sndQ` (TBQueue) and writes to TLS - `receive`: reads from TLS and writes to `rcvQ` (TBQueue), updates `lastReceived` -- `process`: reads from `rcvQ` and dispatches to response vars or `msgQ` +- `process`: reads from `rcvQ` and dispatches to result vars or `msgQ` - `monitor`: periodic ping loop (only when `smpPingInterval > 0`) When ANY thread exits (normally or exceptionally), `raceAny_` cancels all others. `E.finally` ensures the `disconnected` callback always fires. Implication: a single stuck thread (e.g., TLS read blocked on a half-open connection) keeps the entire client alive until `monitor` drops it. There is no per-thread health check — liveness depends entirely on the monitor's timeout logic. ## Request lifecycle and leak risk -`mkRequest` inserts a `Request` into `sentCommands` TMap BEFORE the transmission is written to TLS. If the TLS write fails silently or the connection drops before the response, the entry remains in `sentCommands` until the monitor's timeout counter exceeds `maxCnt` and drops the entire client. There is no per-request cleanup on send failure — individual request entries are only removed by `processMsg` (on response) or by `getResponse` timeout (which sets `pending = False` but doesn't remove the entry). +`mkRequest` inserts a `Request` into `sentCommands` TMap BEFORE the transmission is written to TLS. If the TLS write fails silently or the connection drops before the result arrives, the entry remains in `sentCommands` until the monitor's timeout counter exceeds `maxCnt` and drops the entire client. There is no per-request cleanup on send failure — individual request entries are only removed by `processMsg` (on result) or by `getResponse` timeout (which sets `pending = False` but doesn't remove the entry). ## getResponse — pending flag race contract -This is the core concurrency contract between timeout and response processing: +This is the core concurrency contract between timeout and result processing: 1. `getResponse` waits with `timeout` for `takeTMVar responseVar` 2. Regardless of result, atomically sets `pending = False` and tries `tryTakeTMVar` again (see comment on `getResponse`) -3. In `processMsg`, when a response arrives for a request where `pending` is already `False` (timeout won), `wasPending` is `False` and the response is forwarded to `msgQ` as `STResponse` rather than discarded +3. In `processMsg`, when a result arrives for a request where `pending` is already `False` (timeout won), `wasPending` is `False` and the result is forwarded to `msgQ` as `STResponse` rather than discarded -The double-check pattern (`swapTVar pending False` + `tryTakeTMVar`) handles the race window where a response arrives between timeout firing and `pending` being set to `False`. Without this, responses arriving in that gap would be silently lost. +The double-check pattern (`swapTVar pending False` + `tryTakeTMVar`) handles the race window where a result arrives between timeout firing and `pending` being set to `False`. Without this, results arriving in that gap would be silently lost. -`timeoutErrorCount` is reset to 0 in three places: in `getResponse` when a response arrives, in `receive` on every TLS read, and the monitor uses this count to decide when to drop the connection. +`timeoutErrorCount` is reset to 0 in three places: in `getResponse` when a result arrives, in `receive` on every TLS read, and the monitor uses this count to decide when to drop the connection. -## processMsg — router events vs expired responses +## processMsg — router events vs expired results -When `corrId` is empty, the message is an `STEvent` (router-initiated). When non-empty and the request was already expired (`wasPending` is `False`), the response becomes `STResponse` — not discarded, but forwarded to `msgQ` with the original command context. Entity ID mismatch is `STUnexpectedError`. +When `corrId` is empty, the message is an `STEvent` (router-initiated). When non-empty and the request was already expired (`wasPending` is `False`), the result becomes `STResponse` — not discarded, but forwarded to `msgQ` with the original command context. Entity ID mismatch is `STUnexpectedError`. ## nonBlockingWriteTBQueue — fork on full @@ -46,13 +46,13 @@ If `tryWriteTBQueue` returns `False`, a new thread is forked for the blocking wr ## Batch commands do not expire -See comment on `sendBatch`. Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands use `Just r` and the send thread checks `pending` after dequeue. The coupling: if the router stops responding, batched commands can block the send queue indefinitely since they have no timeout-based expiry. +See comment on `sendBatch`. Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands use `Just r` and the send thread checks `pending` after dequeue. The coupling: if the router stops returning results, batched commands can block the send queue indefinitely since they have no timeout-based expiry. ## monitor — quasi-periodic adaptive ping The ping loop sleeps for `smpPingInterval`, then checks elapsed time since `lastReceived`. If significant time remains in the interval (> 1 second), it re-sleeps for just the remaining time rather than sending a ping. This means ping frequency adapts to actual receive activity — frequent receives suppress pings. -Pings are only sent when `sendPings` is `True`, set by `enablePings` (called from `subscribeSMPQueue`, `subscribeSMPQueues`, `subscribeSMPQueueNotifications`, `subscribeSMPQueuesNtfs`, `subscribeService`). The client drops the connection when `maxCnt` commands have timed out in sequence AND at least `recoverWindow` (15 minutes) has passed since the last received response. +Pings are only sent when `sendPings` is `True`, set by `enablePings` (called from `subscribeSMPQueue`, `subscribeSMPQueues`, `subscribeSMPQueueNotifications`, `subscribeSMPQueuesNtfs`, `subscribeService`). The client drops the connection when `maxCnt` commands have timed out in sequence AND at least `recoverWindow` (15 minutes) has passed since the last received result. ## clientCorrId — dual-purpose random values @@ -68,7 +68,7 @@ See comment above `proxySMPCommand` for the 9 error scenarios (0-9) mapping each ## forwardSMPTransmission — proxy-side forwarding -Used by the proxy router to forward `RFWD` to the destination relay. Uses `cbEncryptNoPad`/`cbDecryptNoPad` (no padding) with the session secret from the proxy-relay connection. Response nonce is `reverseNonce` of the request nonce. +Used by the proxy router to forward `RFWD` to the destination relay. Uses `cbEncryptNoPad`/`cbDecryptNoPad` (no padding) with the session secret from the proxy-relay connection. Result nonce is `reverseNonce` of the request nonce. ## authTransmission — dual auth with service signature @@ -82,4 +82,4 @@ The service signature is only added when the entity authenticator is non-empty. ## writeSMPMessage — router-side event injection -`writeSMPMessage` writes directly to `msgQ` as `STEvent`, bypassing the entire command/response pipeline. This is used by the router to inject MSG events into the subscription response path. +`writeSMPMessage` writes directly to `msgQ` as `STEvent`, bypassing the entire command/result pipeline. This is used by the router to inject MSG events into the subscription result path. diff --git a/spec/modules/Simplex/Messaging/Client/Agent.md b/spec/modules/Simplex/Messaging/Client/Agent.md index 30fbe2ac23..7c62dce82f 100644 --- a/spec/modules/Simplex/Messaging/Client/Agent.md +++ b/spec/modules/Simplex/Messaging/Client/Agent.md @@ -45,9 +45,9 @@ When `connectClient` calls `newSMPClient` and it fails, the error is stored with Both `smpSubscribeQueues` and `smpSubscribeService` validate `activeClientSession` AFTER the subscription RPC completes, before committing results to state. If the session changed during the RPC (client reconnected), results are discarded and reconnection is triggered. This is optimistic execution with post-hoc validation — the RPC may succeed but its results are thrown away if the session is stale. -## groupSub — subscription response classification +## groupSub — subscription result classification -Each queue response is classified by a `foldr` over the (subs, responses) zip: +Each queue result is classified by a `foldr` over the (subs, results) zip: - **Success with matching serviceId**: counted as service-subscribed (`sQs` list) - **Success without matching serviceId**: counted as queue-only (`qOks` list with SessionId and key) diff --git a/spec/modules/Simplex/Messaging/Notifications/Protocol.md b/spec/modules/Simplex/Messaging/Notifications/Protocol.md index fb718fd804..6347aef117 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Protocol.md +++ b/spec/modules/Simplex/Messaging/Notifications/Protocol.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Notifications.Protocol -> NTF protocol entities, commands, responses, and wire encoding for the notification system. +> NTF protocol entities, commands, command results, and wire encoding for the notification system. **Source**: [`Notifications/Protocol.hs`](../../../../../src/Simplex/Messaging/Notifications/Protocol.hs) @@ -16,7 +16,7 @@ | PING | No | Must be empty | | All others | Yes | Must be present | -For responses, the rule inverts: `NRTknId`, `NRSubId`, and `NRPong` must NOT have entity IDs (they are returned before/without entity context), while `NRErr` optionally has one (errors can occur with or without entity context). +For command results, the rule inverts: `NRTknId`, `NRSubId`, and `NRPong` must NOT have entity IDs (they are returned before/without entity context), while `NRErr` optionally has one (errors can occur with or without entity context). ### 2. PNMessageData semicolon separator @@ -24,7 +24,7 @@ For responses, the rule inverts: `NRTknId`, `NRSubId`, and `NRPong` must NOT hav ### 3. NTInvalid reason is version-gated -When encoding `NRTkn` responses, the `NTInvalid` reason is only included if the negotiated protocol version is >= `invalidReasonNTFVersion` (v3). Older clients receive `NTInvalid Nothing`. This prevents parse failures on clients that don't understand the reason field. +When encoding `NRTkn` results, the `NTInvalid` reason is only included if the negotiated protocol version is >= `invalidReasonNTFVersion` (v3). Older clients receive `NTInvalid Nothing`. This prevents parse failures on clients that don't understand the reason field. ### 4. subscribeNtfStatuses migration invariant @@ -46,7 +46,7 @@ Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL), which Both `smpP` and `strP` for `SMPQueueNtf` apply `updateSMPServerHosts` to the parsed SMP server. This normalizes router host addresses on deserialization, ensuring consistent comparison even if the on-wire format uses different host representations. -### 9. NRTknId response tag comment +### 9. NRTknId result tag comment The `NRTknId_` tag encodes as `"IDTKN"` with a source comment: "it should be 'TID', 'SID'". This indicates a naming inconsistency that was preserved for backward compatibility — the tag names don't follow the pattern of other NTF protocol tags. diff --git a/spec/modules/Simplex/Messaging/Notifications/Server.md b/spec/modules/Simplex/Messaging/Notifications/Server.md index d77a30a00a..b87f64ce87 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server.md @@ -22,7 +22,7 @@ Each client connection spawns `receive`, `send`, and `client` threads via `raceA ### 1. Timing attack mitigation on entity lookup -When `verifyNtfTransmission` encounters an AUTH error (entity not found), it calls `dummyVerifyCmd` to equalize response timing before returning the error. This prevents attackers from distinguishing "entity doesn't exist" from "signature invalid" based on response latency. +When `verifyNtfTransmission` encounters an AUTH error (entity not found), it calls `dummyVerifyCmd` to equalize result timing before returning the error. This prevents attackers from distinguishing "entity doesn't exist" from "signature invalid" based on result latency. ### 2. TNEW idempotent re-registration @@ -74,9 +74,9 @@ Cron notification interval has a hard minimum of 20 minutes. `TCRN 0` disables c `resubscribe` uses `mapConcurrently` to resubscribe to all known SMP routers in parallel. Within each router, subscriptions are paginated via `subscribeLoop` using cursor-based pagination (`afterSubId_`). -### 11. receive separates error responses from commands +### 11. receive separates error results from commands -The `receive` function processes incoming transmissions and partitions results: malformed/unauthorized requests are written directly to `sndQ` as error responses, while valid commands go to `rcvQ` for processing. This ensures protocol errors get immediate responses without competing for the command processing queue. +The `receive` function processes incoming transmissions and partitions results: malformed/unauthorized requests are written directly to `sndQ` as error results, while valid commands go to `rcvQ` for processing. This ensures protocol errors get immediate results without competing for the command processing queue. ### 12. Maintenance mode saves state then exits immediately diff --git a/spec/modules/Simplex/Messaging/Notifications/Transport.md b/spec/modules/Simplex/Messaging/Notifications/Transport.md index 9b94d7e0d9..dfc4cdb5e1 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Transport.md +++ b/spec/modules/Simplex/Messaging/Notifications/Transport.md @@ -17,7 +17,7 @@ Two feature gates exist in the NTF protocol: | Version | Feature | Effect | |---------|---------|--------| | v2 (`authBatchCmdsNTFVersion`) | Auth key exchange + batching | `authPubKey` sent in handshake, `implySessId` and `batch` enabled | -| v3 (`invalidReasonNTFVersion`) | Token invalid reasons | `NTInvalid` responses include the reason enum | +| v3 (`invalidReasonNTFVersion`) | Token invalid reasons | `NTInvalid` results include the reason enum | Pre-v2 connections have no command encryption or batching — commands are sent in plaintext within TLS. @@ -27,7 +27,7 @@ Pre-v2 connections have no command encryption or batching — commands are sent ### 4. Block size -NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. This is sufficient because NTF protocol commands (TNEW, SNEW, TCHK, etc.) and their responses are short. `PNMessageData` (which contains encrypted message metadata) is not sent over the NTF transport — it is delivered via APNS push notifications. +NTF uses a 512-byte block size (`ntfBlockSize`), significantly smaller than SMP. This is sufficient because NTF protocol commands (TNEW, SNEW, TCHK, etc.) and their results are short. `PNMessageData` (which contains encrypted message metadata) is not sent over the NTF transport — it is delivered via APNS push notifications. ### 5. Initial THandle has version 0 diff --git a/spec/modules/Simplex/Messaging/Protocol.md b/spec/modules/Simplex/Messaging/Protocol.md index 2ed7113c8b..082e7d0e8a 100644 --- a/spec/modules/Simplex/Messaging/Protocol.md +++ b/spec/modules/Simplex/Messaging/Protocol.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Protocol -> SMP protocol types, commands, responses, encoding/decoding, and transport functions. +> SMP protocol types, commands, command results, encoding/decoding, and transport functions. **Source**: [`Protocol.hs`](../../../../src/Simplex/Messaging/Protocol.hs) @@ -65,4 +65,4 @@ The `NETWORK` variant of `BrokerErrorType` encodes as just `"NETWORK"` (detail d ## SUBS/NSUBS — asymmetric defaulting -When the router parses `SUBS`/`NSUBS` from a client using a version older than `rcvServiceSMPVersion`, both count and hash default (`-1` and `mempty`). For the response side (`SOKS`/`ENDS` via `serviceRespP`), count is still parsed from the wire — only hash defaults to `mempty`. This asymmetry means command-side and response-side parsing have different fallback behavior for the same version boundary. +When the router parses `SUBS`/`NSUBS` from a client using a version older than `rcvServiceSMPVersion`, both count and hash default (`-1` and `mempty`). For the result side (`SOKS`/`ENDS` via `serviceRespP`), count is still parsed from the wire — only hash defaults to `mempty`. This asymmetry means command-side and result-side parsing have different fallback behavior for the same version boundary. diff --git a/spec/modules/Simplex/Messaging/Protocol/Types.md b/spec/modules/Simplex/Messaging/Protocol/Types.md index 0797bc185f..06e60adaa5 100644 --- a/spec/modules/Simplex/Messaging/Protocol/Types.md +++ b/spec/modules/Simplex/Messaging/Protocol/Types.md @@ -1,6 +1,6 @@ # Simplex.Messaging.Protocol.Types -> Client notice type with optional TTL, used in BLOCKED error responses. +> Client notice type with optional TTL, used in BLOCKED error results. **Source**: [`Protocol/Types.hs`](../../../../../src/Simplex/Messaging/Protocol/Types.hs) diff --git a/spec/modules/Simplex/Messaging/Server.md b/spec/modules/Simplex/Messaging/Server.md index 8d23404c99..5cfdfa24a3 100644 --- a/spec/modules/Simplex/Messaging/Server.md +++ b/spec/modules/Simplex/Messaging/Server.md @@ -59,7 +59,7 @@ Stats classification: exactly one of `srvSubOk`/`srvSubMore`/`srvSubFewer`/`srvS See comment on `processForwardedCommand`. Only single forwarded transmissions are allowed — batches are rejected with `BLOCK`. The synthetic `THandleAuth` has `peerClientService = Nothing`, preventing forwarded clients from claiming service identity. Only SEND, SKEY, LKEY, and LGET are allowed through `rejectOrVerify`. -Double encryption: response is encrypted first to the client (with `C.cbEncrypt` using `reverseNonce clientNonce`), then wrapped and encrypted to the proxy (with `C.cbEncryptNoPad` using `reverseNonce proxyNonce`). Using reversed nonces ensures request and response directions use distinct nonces. +Double encryption: the result is encrypted first to the client (with `C.cbEncrypt` using `reverseNonce clientNonce`), then wrapped and encrypted to the proxy (with `C.cbEncryptNoPad` using `reverseNonce proxyNonce`). Using reversed nonces ensures command and result directions use distinct nonces. ## Proxy concurrency limiter @@ -73,13 +73,13 @@ See `wait`/`signal` around `forkProxiedCmd`. `procThreads` TVar implements a cou See `withSubscribed`. When a service client unsubscribes between the TVar read and the flush, `throwSTM (userError "service unsubscribed")` aborts the STM transaction. This is caught by `tryAny` and logged as "cancelled" — it's a successful path, not an error. The `flushSubscribedNtfs` function also cancels via `throwSTM` if the client is no longer current or sndQ is full. -## Batch subscription responses — SOK grouped with MSG +## Batch subscription results — SOK grouped with MSG -See comment on `processSubBatch`. When batched SUB commands produce SOK responses plus messages, the first message is appended to the SOK batch (up to 4 SOKs per block) in a single transmission. Remaining messages go to `msgQ` for separate delivery. This ensures the client receives at least one message quickly with its subscription acknowledgments. +See comment on `processSubBatch`. When batched SUB commands produce SOK results plus messages, the first message is appended to the SOK batch (up to 4 SOKs per block) in a single transmission. Remaining messages go to `msgQ` for separate delivery. This ensures the client receives at least one message quickly with its subscription acknowledgments. ## send thread — MVar fair lock -The TLS handle is wrapped in an `MVar` (`newMVar h`). Both `send` (command responses from `sndQ`) and `sendMsg` (messages from `msgQ`) acquire this lock via `withMVar`. This ensures fair interleaving between response batches and individual messages, preventing either from starving the other. +The TLS handle is wrapped in an `MVar` (`newMVar h`). Both `send` (command results from `sndQ`) and `sendMsg` (messages from `msgQ`) acquire this lock via `withMVar`. This ensures fair interleaving between result batches and individual messages, preventing either from starving the other. ## Queue creation — ID oracle prevention @@ -103,4 +103,4 @@ Every queue command calls `withQueue_` which checks if `updatedAt` matches today ## foldrM in client command processing -`foldrM process ([], [])` processes a batch of verified commands right-to-left, accumulating responses and messages. The responses list is built with `(:)`, so the final order matches the original command order. Messages from SUB are collected separately and passed as the second element of the `sndQ` tuple. +`foldrM process ([], [])` processes a batch of verified commands right-to-left, accumulating results and messages. The results list is built with `(:)`, so the final order matches the original command order. Messages from SUB are collected separately and passed as the second element of the `sndQ` tuple. diff --git a/spec/modules/Simplex/Messaging/Transport.md b/spec/modules/Simplex/Messaging/Transport.md index 1b4656071b..4daa5b23fa 100644 --- a/spec/modules/Simplex/Messaging/Transport.md +++ b/spec/modules/Simplex/Messaging/Transport.md @@ -24,7 +24,7 @@ The version history jumps from 12 (`blockedEntitySMPVersion`) to 14 (`proxyServe `proxiedSMPRelayVersion = 18`, one below `currentClientSMPRelayVersion = 19`. The code comment states: "SMP proxy sets it to lower than its current version to prevent client version fingerprinting by the destination relays when clients upgrade at different times." -In practice (Server.hs), the SMP proxy uses `proxiedSMPRelayVRange` to cap the destination relay's version range in the `PKEY` response sent to the client, so the client sees a capped version range rather than the relay's actual range. +In practice (Server.hs), the SMP proxy uses `proxiedSMPRelayVRange` to cap the destination relay's version range in the `PKEY` result sent to the client, so the client sees a capped version range rather than the relay's actual range. ## withTlsUnique — different API calls yield same value @@ -67,7 +67,7 @@ When `clientService` is present in the client handshake, the router performs add - On success, the router sends `SMPServerHandshakeResponse` with a `serviceId` - On failure, the router sends `SMPServerHandshakeError` before raising the error -Per the protocol spec (v16+): "`clientService` provides long-term service client certificate for high-volume services using SMP router (chat relays, notification routers, high traffic bots). The router responds with a third handshake message containing the assigned service ID." +Per the protocol spec (v16+): "`clientService` provides long-term service client certificate for high-volume services using SMP router (chat relays, notification routers, high traffic bots). The router returns a third handshake message containing the assigned service ID." The client only includes service credentials when `v >= serviceCertsSMPVersion && certificateSent c` (the TLS client certificate was actually sent). From fc5b601cb43b5065c58bfc93cf8386a96f4b2b7d Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 21:45:24 +0000 Subject: [PATCH 47/91] notes --- spec/modules/NOTES.md | 155 ++++++++++++++++++ .../Messaging/Agent/Store/AgentStore.md | 4 - spec/modules/Simplex/Messaging/Client.md | 2 +- spec/modules/Simplex/Messaging/Encoding.md | 6 - .../Simplex/Messaging/Encoding/String.md | 2 +- .../Messaging/Notifications/Server/Store.md | 6 +- .../Notifications/Server/Store/Postgres.md | 2 +- .../Messaging/Server/MsgStore/Postgres.md | 3 - .../Messaging/Server/QueueStore/Postgres.md | 8 +- spec/modules/Simplex/Messaging/Util.md | 3 - spec/modules/Simplex/RemoteControl/Client.md | 2 +- .../Simplex/RemoteControl/Discovery.md | 2 - .../Simplex/RemoteControl/Invitation.md | 2 +- spec/modules/Simplex/RemoteControl/Types.md | 2 +- 14 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 spec/modules/NOTES.md diff --git a/spec/modules/NOTES.md b/spec/modules/NOTES.md new file mode 100644 index 0000000000..0fad99561b --- /dev/null +++ b/spec/modules/NOTES.md @@ -0,0 +1,155 @@ +# Design Notes + +Non-bug observations from module specs that are worth tracking. These remain documented in their respective module specs — this file serves as an index. + +## Backend Observations + +### N-01: SNotifier path doesn't cache + +**Location**: `Simplex.Messaging.Server.QueueStore.Postgres` — `getQueues_` SNotifier branch +**Description**: The SRecipient path caches loaded queues via `cacheRcvQueue` with double-check locking. The SNotifier path does NOT cache — it uses a stale TMap snapshot and `maybe (mkQ False rId qRec) pure`, so concurrent loads for the same notifier can create duplicate ephemeral queue objects. Functionally correct but wasteful. +**Module spec**: [QueueStore/Postgres.md](Simplex/Messaging/Server/QueueStore/Postgres.md) + +### N-02: assertUpdated error conflation + +**Location**: `Simplex.Messaging.Server.QueueStore.Postgres` — `assertUpdated` +**Description**: `assertUpdated` returns `AUTH` for zero-rows-affected. This is the same error code used for "not found" (via `readQueueRecIO`) and "duplicate" (via `handleDuplicate`). The actual cause — stale cache, deleted queue, or constraint violation — is indistinguishable in logs. +**Module spec**: [QueueStore/Postgres.md](Simplex/Messaging/Server/QueueStore/Postgres.md) + +## Design Characteristics + +### N-03: RCVerifiedInvitation constructor exported + +**Location**: `Simplex.RemoteControl.Invitation` — `RCVerifiedInvitation` +**Description**: `RCVerifiedInvitation` is a newtype with constructor exported via `(..)`. It can be constructed without calling `verifySignedInvitation`, bypassing signature verification. The trust boundary is conventional, not enforced by the type system. `connectRCCtrl` accepts only `RCVerifiedInvitation`. +**Module spec**: [RemoteControl/Invitation.md](Simplex/RemoteControl/Invitation.md) + +### N-04: smpEncode Word16 silent truncation + +**Location**: `Simplex.Messaging.Encoding` — `Encoding Word16` instance +**Description**: `smpEncode` for ByteString uses a 1-byte length prefix. Maximum encodable length is 255 bytes. Longer values silently wrap via `w2c . fromIntegral`. Callers must ensure ByteStrings fit or use `Large`. +**Module spec**: [Encoding.md](Simplex/Messaging/Encoding.md) + +### N-05: writeIORef for period stats — not atomic + +**Location**: `Simplex.Messaging.Server.Stats` — `setPeriodStats` +**Description**: Uses `writeIORef` (not atomic). Only safe during router startup when no other threads are running. If called concurrently, period data could be corrupted. +**Module spec**: [Server/Stats.md](Simplex/Messaging/Server/Stats.md) + +### N-06: setStatsByServer orphans old TVars + +**Location**: `Simplex.Messaging.Notifications.Server.Stats` — `setStatsByServer` +**Description**: Builds a fresh `Map Text (TVar Int)` in IO, then atomically replaces the TMap's root TVar. Old per-router TVars are not reused — any other thread holding a reference from a prior `TM.lookupIO` would modify an orphaned counter. Called at startup, but lacks the explicit "not thread safe" comment. +**Module spec**: [Notifications/Server/Stats.md](Simplex/Messaging/Notifications/Server/Stats.md) + +### N-07: Lazy.unPad doesn't validate data length + +**Location**: `Simplex.Messaging.Crypto.Lazy` — `unPad` / `splitLen` +**Description**: `splitLen` does not validate that the remaining data is at least `len` bytes — `LB.take len` silently returns a shorter result. The source comment notes this is intentional to avoid consuming all lazy chunks for validation. +**Module spec**: [Crypto/Lazy.md](Simplex/Messaging/Crypto/Lazy.md) + +### N-08: Batched commands have no timeout-based expiry + +**Location**: `Simplex.Messaging.Client` — `sendBatch` +**Description**: Batched commands are written with `Nothing` as the request parameter — the send thread skips the `pending` flag check. Individual commands have timeout-based expiry. If the router stops returning results, batched commands can block the send queue indefinitely. +**Module spec**: [Client.md](Simplex/Messaging/Client.md) + +### N-09: Postgres MsgStore nanosecond precision + +**Location**: `Simplex.Messaging.Server.MsgStore.Postgres` — `toMessage` +**Description**: `MkSystemTime ts 0` constructs timestamps with zero nanoseconds. Only whole seconds are stored. Messages read from Postgres have coarser timestamps than STM/Journal stores. Not a practical issue — timestamps are typically rounded to hours or days. +**Module spec**: [Server/MsgStore/Postgres.md](Simplex/Messaging/Server/MsgStore/Postgres.md) + +### N-10: MsgStore Postgres — error stubs crash at runtime + +**Location**: `Simplex.Messaging.Server.MsgStore.Postgres` — multiple `MsgStoreClass` methods +**Description**: Multiple `MsgStoreClass` methods are `error "X not used"`. Required by the type class but not applicable to Postgres. Calling any at runtime crashes. Safe because Postgres overrides the relevant default methods, but a new caller using the wrong method would crash with no compile-time warning. +**Module spec**: [Server/MsgStore/Postgres.md](Simplex/Messaging/Server/MsgStore/Postgres.md) + +### N-11: strP default assumes base64url for all types + +**Location**: `Simplex.Messaging.Encoding.String` — `StrEncoding` class default +**Description**: The `MINIMAL` pragma allows defining only `strDecode` without `strP`. The default `strP = strDecode <$?> base64urlP` assumes input is base64url-encoded for any type. A new `StrEncoding` instance that defines only `strDecode` for non-base64 data would get a broken parser. +**Module spec**: [Encoding/String.md](Simplex/Messaging/Encoding/String.md) + +## Silent Behaviors + +Intentional design choices that are correct but non-obvious. A code modifier who doesn't know these could introduce bugs. + +### N-12: Service signing silently skipped on empty authenticator + +**Location**: `Simplex.Messaging.Client` — service signature path +**Description**: The service signature is only added when the entity authenticator is non-empty. If authenticator generation fails silently (returns empty bytes), service signing is silently skipped. +**Module spec**: [Client.md](Simplex/Messaging/Client.md) + +### N-13: stmDeleteNtfToken — nonexistent token indistinguishable from empty + +**Location**: `Simplex.Messaging.Notifications.Server.Store` — `stmDeleteNtfToken` +**Description**: If the token ID doesn't exist in the `tokens` map, the registration-cleanup branch is skipped and the function returns an empty list. The caller cannot distinguish "deleted a token with no subscriptions" from "token never existed." +**Module spec**: [Notifications/Server/Store.md](Simplex/Messaging/Notifications/Server/Store.md) + +### N-14: createCommand silently drops commands for deleted connections + +**Location**: `Simplex.Messaging.Agent.Store.AgentStore` — `createCommand` +**Description**: When `createCommand` encounters a constraint violation (the referenced connection was already deleted), it logs the error and returns successfully. Commands targeting deleted connections are silently dropped. +**Module spec**: [Agent/Store/AgentStore.md](Simplex/Messaging/Agent/Store/AgentStore.md) + +### N-15: Redirect chain loading errors silently swallowed + +**Location**: `Simplex.Messaging.Agent.Store.AgentStore` +**Description**: When loading redirect chains, errors loading individual redirect files are silently swallowed via `either (const $ pure Nothing) (pure . Just)`. Prevents a corrupt redirect from blocking access to the main file. +**Module spec**: [Agent/Store/AgentStore.md](Simplex/Messaging/Agent/Store/AgentStore.md) + +### N-16: BLOCKED encoded as AUTH for old XFTP clients + +**Location**: `Simplex.FileTransfer.Protocol` — `encodeProtocol` +**Description**: If the protocol version is below `blockedFilesXFTPVersion`, a `BLOCKED` result is encoded as `AUTH` instead. The blocking information (reason) is permanently lost for these clients. +**Module spec**: [FileTransfer/Protocol.md](Simplex/FileTransfer/Protocol.md) + +### N-17: restore_messages three-valued logic with implicit default + +**Location**: `Simplex.Messaging.Server.Main` — INI config +**Description**: The `restore_messages` INI setting has three-valued logic: explicit "on" → restore, explicit "off" → skip, missing → inherits from `enable_store_log`. This implicit default is not captured in the type system — callers see `Maybe Bool`. +**Module spec**: [Server/Main.md](Simplex/Messaging/Server/Main.md) + +### N-18: Stats format migration permanently loses precision + +**Location**: `Simplex.Messaging.Server.Stats` — `strP` for `ServerStatsData` +**Description**: The parser handles multiple format generations. Old format `qDeleted=` is read as `(value, 0, 0)`. `qSubNoMsg` is parsed and discarded. `subscribedQueues` is parsed but replaced with empty data. Data loaded from old formats is coerced — precision is permanently lost. +**Module spec**: [Server/Stats.md](Simplex/Messaging/Server/Stats.md) + +### N-19: resubscribe exceptions silently lost + +**Location**: `Simplex.Messaging.Notifications.Server` — `resubscribe` +**Description**: `resubscribe` is launched via `forkIO` before `raceAny_` starts — not part of the `raceAny_` group. Most exceptions are silently lost per `forkIO` semantics. `ExitCode` exceptions are special-cased by GHC's runtime and do propagate. +**Module spec**: [Notifications/Server.md](Simplex/Messaging/Notifications/Server.md) + +### N-20: closeSMPClientAgent worker cancellation is fire-and-forget + +**Location**: `Simplex.Messaging.Client.Agent` — `closeSMPClientAgent` +**Description**: Executes in order: set `active = False`, close all client connections, swap workers map to empty and fork cancellation threads. Cancel threads use `uninterruptibleCancel` but are fire-and-forget — the function may return before all workers are cancelled. +**Module spec**: [Client/Agent.md](Simplex/Messaging/Client/Agent.md) + +### N-21: APNS unknown 410 reasons trigger retry instead of permanent failure + +**Location**: `Simplex.Messaging.Notifications.Server.Push.APNS` +**Description**: Unknown 410 (Gone) reasons fall through to `PPRetryLater`, while unknown 400 and 403 reasons fall through to `PPResponseError`. An unexpected APNS 410 reason string triggers retry rather than permanent failure. +**Module spec**: [Notifications/Server/Push/APNS.md](Simplex/Messaging/Notifications/Server/Push/APNS.md) + +### N-22: NTInvalid/NTExpired tokens can create subscriptions + +**Location**: `Simplex.Messaging.Notifications.Protocol` — token status permissions +**Description**: Token status `NTInvalid` allows subscription commands (SNEW, SCHK, SDEL). A TODO comment explains: invalidation can happen after verification, and existing subscriptions should remain manageable. `NTExpired` is also permitted. +**Module spec**: [Notifications/Protocol.md](Simplex/Messaging/Notifications/Protocol.md) + +### N-23: removeInactiveTokenRegistrations doesn't clean up empty inner maps + +**Location**: `Simplex.Messaging.Notifications.Server.Store` — `stmRemoveInactiveTokenRegistrations` +**Description**: `stmDeleteNtfToken` checks whether inner TMap is empty after removal and cleans up the outer key. `stmRemoveInactiveTokenRegistrations` does not — surviving active tokens' registrations remain, but empty inner maps can persist. +**Module spec**: [Notifications/Server/Store.md](Simplex/Messaging/Notifications/Server/Store.md) + +### N-24: cbNonce silently truncates or pads + +**Location**: `Simplex.Messaging.Crypto` — `cbNonce` +**Description**: If the input is longer than 24 bytes, it is silently truncated. If shorter, it is silently padded. No error is raised. Callers must ensure correct length. +**Module spec**: [Crypto.md](Simplex/Messaging/Crypto.md) diff --git a/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md index d1271a6d33..59d7c20105 100644 --- a/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md +++ b/spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md @@ -75,10 +75,6 @@ Generates random 12-byte IDs (base64url encoded) and retries up to 3 times on co First clears primary flag on all queues in the connection, then sets it on the target queue. Also clears `replace_*_queue_id` on the new primary — this completes the queue rotation by removing the "replacing" marker. -## checkConfirmedSndQueueExists_ — dpPostgres typo - -The CPP guard reads `#if defined(dpPostgres)` (note `dp` instead of `db`). This means the `FOR UPDATE` clause is never included for any backend. The check still works correctly for SQLite (single-writer model) but on PostgreSQL the query runs without row locking, which could allow a TOCTOU race between checking and inserting. - ## createCommand — silent drop for deleted connections When `createCommand` encounters a constraint violation (the referenced connection was already deleted), it logs the error and returns successfully rather than throwing. This means commands targeting deleted connections are silently dropped. The rationale: the connection is already gone, so there's nothing useful to do with the error. diff --git a/spec/modules/Simplex/Messaging/Client.md b/spec/modules/Simplex/Messaging/Client.md index f23d5a0056..4e97c9a5cd 100644 --- a/spec/modules/Simplex/Messaging/Client.md +++ b/spec/modules/Simplex/Messaging/Client.md @@ -42,7 +42,7 @@ When `corrId` is empty, the message is an `STEvent` (router-initiated). When non ## nonBlockingWriteTBQueue — fork on full -If `tryWriteTBQueue` returns `False`, a new thread is forked for the blocking write. No backpressure mechanism — under sustained overload, thread count grows without bound. This is a deliberate tradeoff: the caller never blocks (preventing deadlock between send and process threads), at the cost of potential unbounded thread creation. +If `tryWriteTBQueue` returns `False` (queue full), a new thread is forked for the blocking write. The caller never blocks, preventing deadlock between send and process threads. ## Batch commands do not expire diff --git a/spec/modules/Simplex/Messaging/Encoding.md b/spec/modules/Simplex/Messaging/Encoding.md index 8db63d0cc2..984498dd4d 100644 --- a/spec/modules/Simplex/Messaging/Encoding.md +++ b/spec/modules/Simplex/Messaging/Encoding.md @@ -14,8 +14,6 @@ The two encoding classes share some instances (`Char`, `Bool`, `SystemTime`) but **Length prefix is 1 byte.** Maximum encodable length is 255 bytes. If a ByteString exceeds 255 bytes, the length silently wraps via `w2c . fromIntegral` — a 300-byte string encodes length as 44 (300 mod 256). Callers must ensure ByteStrings fit in 255 bytes, or use `Large` for longer values. -**Security**: silent truncation means a caller encoding untrusted input without length validation could produce a malformed message where the decoder reads fewer bytes than were intended, then misparses the remainder as the next field. - ## Large 2-byte length prefix (`Word16`). Use for ByteStrings that may exceed 255 bytes. Maximum 65535 bytes. @@ -36,10 +34,6 @@ Sequential concatenation with no separators. Works because each element's encodi Only seconds are encoded (as Int64); nanoseconds are discarded on encode and set to 0 on decode. -## String instance - -`smpEncode` goes through `B.pack`, which silently truncates any Unicode character above codepoint 255 to its lowest byte. A String containing non-Latin-1 characters is silently corrupted on encode with no error. Same issue exists in the `StrEncoding String` instance — see [Simplex.Messaging.Encoding.String](./Encoding/String.md#string-instance). - ## smpEncodeList / smpListP 1-byte length prefix for lists — same 255-item limit as ByteString's 255-byte limit. diff --git a/spec/modules/Simplex/Messaging/Encoding/String.md b/spec/modules/Simplex/Messaging/Encoding/String.md index 1e60295b81..378bed11f0 100644 --- a/spec/modules/Simplex/Messaging/Encoding/String.md +++ b/spec/modules/Simplex/Messaging/Encoding/String.md @@ -21,7 +21,7 @@ Encodes as base64url. The parser (`strP`) only accepts non-empty strings — emp ## String instance -Inherits from ByteString via `B.pack` / `B.unpack`. Only Char8 (Latin-1) characters round-trip; `B.pack` truncates unicode codepoints above 255. The source comment warns about this. +Inherits from ByteString via `B.pack` / `B.unpack`. Only Char8 (Latin-1) characters round-trip. ## strToJSON / strParseJSON diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md index d9deedbf48..4259b44c73 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store.md @@ -20,7 +20,7 @@ When a token is activated, `stmRemoveInactiveTokenRegistrations` removes ALL oth ### 4. tokenLastNtfs accumulates via prepend -New notifications are prepended to the `NonEmpty PNMessageData` list via `(<|)`. The list is unbounded in the STM store — bounding is handled at the push delivery layer (the Postgres store limits to 6). +New notifications are prepended to the `NonEmpty PNMessageData` list via `(<|)`. ### 5. stmDeleteNtfToken prunes empty registration maps @@ -46,9 +46,9 @@ When `stmDeleteNtfToken` removes a token, it deletes the entry from the inner `T When `stmDeleteNtfSubscription` removes a subscription, it deletes the `subId` from the token's `Set NtfSubscriptionId` in `tokenSubscriptions` but never checks whether the set became empty. Tokens with all subscriptions individually deleted accumulate empty set entries — these are only cleaned up when the token itself is deleted via `deleteTokenSubs`. -### 11. stmSetNtfService — asymmetric cleanup with Postgres store +### 11. stmSetNtfService — key-value service association -`stmSetNtfService` uses `maybe TM.delete TM.insert` to either remove or set the service association for an SMP router. This is purely a key-value update with no cascading effects on subscriptions. The Postgres store's `removeServiceAndAssociations` handles subscription cleanup separately, meaning the STM and Postgres stores have **different cleanup semantics** for service removal. +`stmSetNtfService` uses `maybe TM.delete TM.insert` to either remove or set the service association for an SMP router. This is purely a key-value update with no cascading effects on subscriptions. ### 12. Subscription index triple-write invariant diff --git a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md index bde863eb61..1950ee1e15 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md @@ -83,7 +83,7 @@ The `insertServer` fallback uses `ON CONFLICT ... DO UPDATE SET smp_host = EXCLU ### 18. deleteNtfToken string_agg with hex parsing -`deleteNtfToken` uses `string_agg(s.smp_notifier_id :: TEXT, ',')` to aggregate `BYTEA` notifier IDs into comma-separated text, then parses with `parseByteaString` which drops the `\x` prefix and hex-decodes. `mapMaybe` silently drops any IDs that fail hex decoding, which could mask data corruption. +`deleteNtfToken` uses `string_agg(s.smp_notifier_id :: TEXT, ',')` to aggregate `BYTEA` notifier IDs into comma-separated text, then parses with `parseByteaString` which drops the `\x` prefix and hex-decodes. ### 19. withPeriodicNtfTokens streams with DB.fold diff --git a/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md index eaeca3b903..a69e2e9ee7 100644 --- a/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md +++ b/spec/modules/Simplex/Messaging/Server/MsgStore/Postgres.md @@ -52,6 +52,3 @@ Creates a temp table with aggregated message stats, then updates `msg_queues` in `deleteQueueSize` calls `getQueueSize` BEFORE `deleteStoreQueue`. The returned size is the count at query time — a concurrent `writeMsg` between the size query and the delete means the reported size is stale. This is acceptable because the size is used for statistics, not for correctness. -## unsafeMaxLenBS - -`toMessage` uses `C.unsafeMaxLenBS` to bypass the `MaxLen` length check on message bodies read from the database. A TODO comment questions this choice. If the database contains oversized data, the length invariant is silently violated. diff --git a/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md b/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md index f97acaa2d3..39d833169e 100644 --- a/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md +++ b/spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md @@ -6,7 +6,7 @@ ## addQueue_ — no in-memory duplicate check, relies on DB constraint -See comment on `addQueue_`: "Not doing duplicate checks in maps as the probability of duplicates is very low." The STM implementation checks all four ID maps before insertion and returns `DUPLICATE_`. The Postgres implementation skips this and relies on `UniqueViolation` from the DB, which `handleDuplicate` maps to `AUTH`, not `DUPLICATE_`. The same logical error produces different error codes depending on the store backend. +See comment on `addQueue_`: "Not doing duplicate checks in maps as the probability of duplicates is very low." The Postgres implementation relies on `UniqueViolation` from the DB rather than pre-checking in-memory maps. ## addQueue_ — non-atomic cache updates @@ -56,17 +56,13 @@ Re-securing with the same key falls through the verify function to `pure ()`, th (1) **Cache check**: `checkCachedNotifier` acquires a per-notifier-ID lock via `notifierLocks`, then checks `TM.memberIO`. Returns `DUPLICATE_`. (2) **Queue lock**: Via `withQueueRec`, prevents concurrent modifications to the same queue. (3) **Database constraint**: `handleDuplicate` catches `UniqueViolation`, returns `AUTH`. Same duplicate, different error codes depending on whether cache was warm. The `notifierLocks` map grows unboundedly — locks are never removed except when the queue is deleted. -## addQueueNotifier — always clears notification service - -The SQL UPDATE always sets `ntf_service_id = NULL` when adding/replacing a notifier. The previous notifier's service association is silently lost. The STM implementation additionally calls `removeServiceQueue` to update service-level tracking; the Postgres version does not. - ## rowToQueueRec — link data replaced with empty stubs The standard `queueRecQuery` does NOT select `fixed_data` and `user_data` columns. When converting to `QueueRec`, link data is stubbed: `(,(EncDataBytes "", EncDataBytes "")) <$> linkId_`. Actual link data is loaded on demand via `getQueueLinkData`. Any code reading `queueData` from a cached `QueueRec` without going through `getQueueLinkData` sees empty bytes. The separate `rowToQueueRecWithData` (used by `foldQueueRecs` with `withData = True`) includes real data. ## getCreateService — serialization via serviceLocks -Entire operation wrapped in `withLockMap (serviceLocks st) fp`, serializing all creation/lookup for the same certificate fingerprint. Inside the lock: SELECT by `service_cert_hash`, if not found attempt INSERT catching `UniqueViolation`. The `serviceLocks` map grows unboundedly — no cleanup mechanism. +Entire operation wrapped in `withLockMap (serviceLocks st) fp`, serializing all creation/lookup for the same certificate fingerprint. Inside the lock: SELECT by `service_cert_hash`, if not found attempt INSERT catching `UniqueViolation`. ## batchInsertQueues — COPY protocol with manual CSV serialization diff --git a/spec/modules/Simplex/Messaging/Util.md b/spec/modules/Simplex/Messaging/Util.md index 3b9fd37777..d89e27bf16 100644 --- a/spec/modules/Simplex/Messaging/Util.md +++ b/spec/modules/Simplex/Messaging/Util.md @@ -47,6 +47,3 @@ Runs all actions concurrently, waits for any one to complete, then cancels all o Handles `Int64` delays exceeding `maxBound :: Int` (~2147 seconds on 32-bit) by looping in chunks. Necessary because `threadDelay` takes `Int`, not `Int64`. -## toChunks - -Precondition: `n > 0` (comment-only, not enforced). Passing `n = 0` causes infinite loop. diff --git a/spec/modules/Simplex/RemoteControl/Client.md b/spec/modules/Simplex/RemoteControl/Client.md index 55fd05bc17..84a5d2dcaa 100644 --- a/spec/modules/Simplex/RemoteControl/Client.md +++ b/spec/modules/Simplex/RemoteControl/Client.md @@ -30,7 +30,7 @@ The session key combines DH and post-quantum KEM via `kemHybridSecret`: `SHA3_25 2. Application displays session code for user verification → calls `confirmCtrlSession` with `True`/`False` 3. If confirmed, `runSession` proceeds with hello exchange → second `RCStepTMVar` resolved with session -`confirmCtrlSession` does a double `putTMVar` — the first signals the decision, the second blocks until the session thread does `takeTMVar` (synchronization point). See TODO in source: no timeout on this wait. +`confirmCtrlSession` does a double `putTMVar` — the first signals the decision, the second blocks until the session thread does `takeTMVar` (synchronization point). ## TLS hooks — single-session enforcement diff --git a/spec/modules/Simplex/RemoteControl/Discovery.md b/spec/modules/Simplex/RemoteControl/Discovery.md index 52c861c797..22fd9d6d66 100644 --- a/spec/modules/Simplex/RemoteControl/Discovery.md +++ b/spec/modules/Simplex/RemoteControl/Discovery.md @@ -12,8 +12,6 @@ Enumerates network interfaces and filters out non-routable addresses (0.0.0.0, b `joinMulticast` / `partMulticast` use a shared `TMVar Int` counter to track active listeners. Multicast group membership is per-host (not per-process — see comment in Multicast.hsc), so the counter ensures `IP_ADD_MEMBERSHIP` is called only when transitioning from 0→1 listeners and `IP_DROP_MEMBERSHIP` only when transitioning from 1→0. If `setMembership` fails, the counter is restored to its previous value and the error is logged (not thrown). -**TMVar hazard**: Both functions take the counter from the TMVar unconditionally but only put it back in the 0-or-1 branches. If `joinMulticast` is called when the counter is already >0, or `partMulticast` when >1, the TMVar is left empty and subsequent accesses will deadlock. In practice this is safe because `withListener` serializes access through a single `TMVar Int`, but the abstraction does not protect against concurrent use. - ## startTLSServer — ephemeral port support When `port_` is `Nothing`, passes `"0"` to `startTCPServer`, which causes the OS to assign an ephemeral port. The assigned port is read via `socketPort` and communicated back through the `startedOnPort` TMVar. On any startup error, `setPort Nothing` is signalled so callers don't block indefinitely on the TMVar. diff --git a/spec/modules/Simplex/RemoteControl/Invitation.md b/spec/modules/Simplex/RemoteControl/Invitation.md index 3f65ec46cb..a12a12f992 100644 --- a/spec/modules/Simplex/RemoteControl/Invitation.md +++ b/spec/modules/Simplex/RemoteControl/Invitation.md @@ -15,7 +15,7 @@ Verification in `verifySignedInvitation` mirrors this: `ssig` is verified agains ## Invitation URI format -The `xrcp:/` scheme uses the SMP-style pattern: CA fingerprint as userinfo (`ca@host:port`), query parameters after `#/?`. The `app` field is raw JSON encoded in a query parameter. `RCInvitation`'s parser uses `parseSimpleQuery` + `lookup` (order-independent), but `RCSignedInvitation`'s parser uses `B.breakSubstring "&ssig="` which assumes the signatures appear at a fixed position — see TODO in source on `RCSignedInvitation`'s `strP`. +The `xrcp:/` scheme uses the SMP-style pattern: CA fingerprint as userinfo (`ca@host:port`), query parameters after `#/?`. The `app` field is raw JSON encoded in a query parameter. `RCInvitation`'s parser uses `parseSimpleQuery` + `lookup` (order-independent). ## RCVerifiedInvitation — newtype trust boundary diff --git a/spec/modules/Simplex/RemoteControl/Types.md b/spec/modules/Simplex/RemoteControl/Types.md index ad165f4426..f752d465f5 100644 --- a/spec/modules/Simplex/RemoteControl/Types.md +++ b/spec/modules/Simplex/RemoteControl/Types.md @@ -28,4 +28,4 @@ This module defines the data types for the XRCP (remote control) protocol, which ## IpProbe — unused discovery type -`IpProbe` is defined with `Encoding` instance but not used anywhere in the current codebase. It appears to be a placeholder for a planned IP discovery mechanism. Note: the `smpP` parser has a precedence bug — `IpProbe <$> (smpP <* "I") *> smpP` parses as `(IpProbe <$> (smpP <* "I")) *> smpP`, which discards the `IpProbe` wrapper. This has never manifested because the type is unused. +`IpProbe` is defined with `Encoding` instance but not used anywhere in the current codebase. It appears to be a placeholder for a planned IP discovery mechanism. From 388b13d417d1a88d4411ec0949b01cbb3e4350cf Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 22:16:49 +0000 Subject: [PATCH 48/91] docs --- README.md | 319 ++++++++---------------------------------------- docs/AGENT.md | 75 ++++++++++++ docs/CLIENT.md | 75 ++++++++++++ docs/ROUTERS.md | 194 +++++++++++++++++++++++++++++ 4 files changed, 392 insertions(+), 271 deletions(-) create mode 100644 docs/AGENT.md create mode 100644 docs/CLIENT.md create mode 100644 docs/ROUTERS.md diff --git a/README.md b/README.md index b138c8cc9a..8d6333f3a4 100644 --- a/README.md +++ b/README.md @@ -3,84 +3,78 @@ [![GitHub build](https://github.com/simplex-chat/simplexmq/actions/workflows/build.yml/badge.svg)](https://github.com/simplex-chat/simplexmq/actions/workflows/build.yml) [![GitHub release](https://img.shields.io/github/v/release/simplex-chat/simplexmq)](https://github.com/simplex-chat/simplexmq/releases) -📢 SimpleXMQ v1 is released - with many security, privacy and efficiency improvements, new functionality - see [release notes](https://github.com/simplex-chat/simplexmq/releases/tag/v1.0.0). +## SimpleX Network software -**Please note**: v1 is not backwards compatible, but it has the version negotiation built into all protocol layers for forwards compatibility of this version and backwards compatibility of the future versions, that will be backwards compatible for at least two versions back. +SimpleXMQ provides the software for [SimpleX Network](./protocol/overview-tjr.md) — a general-purpose packet routing network where endpoints exchange data through independently operated routers using resource-based addressing. Unlike IP networks, SimpleX addresses identify resources on routers (queues, data packets), not endpoint devices. Participants do not need globally unique identifiers to communicate. -If you have a server deployed please deploy a new server to a new host and retire the previous version once it is no longer used. +The software is organized in three layers: -## Message broker for unidirectional (simplex) queues - -SimpleXMQ is a message broker for managing message queues and sending messages over public network. It consists of SMP server, SMP client library and SMP agent that implement [SMP protocol](./protocol/simplex-messaging.md) for client-server communication and [SMP agent protocol](./protocol/agent-protocol.md) to manage duplex connections via simplex queues on multiple SMP servers. - -SMP protocol is inspired by [Redis serialization protocol](https://redis.io/topics/protocol), but it is much simpler - it currently has only 10 client commands and 8 server responses. - -SimpleXMQ is implemented in Haskell - it benefits from robust software transactional memory (STM) and concurrency primitives that Haskell provides. - -## SimpleXMQ roadmap +``` + Application (e.g. SimpleX Chat) ++----------------------------------+ +| SimpleX Agent | Layer 3 — bidirectional connections, e2e encryption ++----------------------------------+ +| SimpleX Client Libraries | Layer 2 — protocol clients for SMP, XFTP ++----------------------------------+ +| SimpleX Routers | Layer 1 — network infrastructure (SMP, XFTP, NTF) ++----------------------------------+ +``` -- SimpleX service protocol and application template - to enable users building services and chat bots that work over SimpleX protocol stack. The first such service will be a notification service for a mobile app. -- SMP queue redundancy and rotation in SMP agent connections. -- SMP agents synchronization to share connections and messages between multiple agents (it would allow using multiple devices for [simplex-chat](https://github.com/simplex-chat/simplex-chat)). +[SimpleX Chat](https://github.com/simplex-chat/simplex-chat) is one application built on Layer 3. IoT devices, AI services, monitoring systems, and automated services are other applications that can use Layers 2 or 3 directly. -## Components +SimpleXMQ is implemented in Haskell, benefiting from robust software transactional memory (STM) and concurrency primitives. -### SMP server +See the [SimpleX Network overview](./protocol/overview-tjr.md) for the full protocol architecture, trust model, and security analysis. -[SMP server](./apps/smp-server/Main.hs) can be run on any Linux distribution, including low power/low memory devices. OpenSSL library is required for initialization. +## Architecture -To initialize the server use `smp-server init -n ` (or `smp-server init --ip ` for IP based address) command - it will generate keys and certificates for TLS transport. The fingerprint of offline certificate is used as part of the server address to protect client/server connection against man-in-the-middle attacks: `smp://@[:5223]`. +### SimpleX Routers -SMP server uses in-memory persistence with an optional append-only log of created queues that allows to re-start the server without losing the connections. This log is compacted on every server restart, permanently removing suspended and removed queues. +Routers are the network infrastructure — they accept, buffer, and deliver packets. Three router types serve different purposes: -To enable store log, initialize server using `smp-server -l` command, or modify `smp-server.ini` created during initialization (uncomment `enable: on` option in the store log section). Use `smp-server --help` for other usage tips. +- **SMP routers** provide messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes). Protocol: [SMP](./protocol/simplex-messaging.md). +- **XFTP routers** provide data packet storage — individually addressed blocks in fixed sizes (64KB–4MB) for larger payloads. Protocol: [XFTP](./protocol/xftp.md). +- **NTF routers** bridge to platform push services (APNS) for mobile notification delivery. Protocol: [Push Notifications](./protocol/push-notifications.md). -Starting from version 2.3.0, when store log is enabled, the server would also enable saving undelivered messages on exit and restoring them on start. This can be disabled via a separate setting `restore_messages` in `smp-server.ini` file. Saving messages would only work if the server is stopped with SIGINT signal (keyboard interrupt), if it is stopped with SIGTERM signal the messages would not be saved. +#### Running an SMP router -> **Please note:** On initialization SMP server creates a chain of two certificates: a self-signed CA certificate ("offline") and a server certificate used for TLS handshake ("online"). **You should store CA certificate private key securely and delete it from the server. If server TLS credential is compromised this key can be used to sign a new one, keeping the same server identity and established connections.** CA private key location by default is `/etc/opt/simplex/ca.key`. +[SMP server](./apps/smp-server/Main.hs) runs on any Linux distribution. OpenSSL is required for initialization. -SMP server implements [SMP protocol](./protocol/simplex-messaging.md). +Initialize: `smp-server init -n ` (or `--ip `). This generates TLS certificates. The CA certificate fingerprint becomes part of the server address: `smp://@[:5223]`. -#### Running SMP server on MacOS +The server uses in-memory persistence with an optional append-only store log for queue persistence across restarts. Enable with `smp-server init -l` or in `smp-server.ini`. The log is compacted on every restart. -SMP server requires OpenSSL library for initialization. On MacOS OpenSSL library may be replaced with LibreSSL, which doesn't support required algorithms. Before initializing SMP server verify you have OpenSSL installed: +When store log is enabled, undelivered messages are saved on exit (SIGINT only, not SIGTERM) and restored on start. Control this independently with the `restore_messages` setting. -```sh -openssl version -``` +> **Please note:** On initialization, SMP server creates a certificate chain: a self-signed CA certificate ("offline") and a server certificate for TLS ("online"). **Store the CA private key securely and delete it from the server.** If the server TLS credential is compromised, this key can sign a new one while keeping the same server identity. Default location: `/etc/opt/simplex/ca.key`. -If it says "LibreSSL", please install original OpenSSL: +See [docs/ROUTERS.md](./docs/ROUTERS.md) for XFTP/NTF router setup, advanced configuration, MacOS notes, and all deployment options (Docker, installation script, building from source, Linode, DigitalOcean). -```sh -brew update -brew install openssl -echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile # or follow whatever instructions brew suggests -. ~/.zprofile # or restart your terminal to start a new session -``` +### SimpleX Client Libraries -Now `openssl version` should be saying "OpenSSL". You can now run `smp-server init` to initialize your SMP server. +[Client libraries](./docs/CLIENT.md) provide low-level protocol access to SimpleX routers. They implement the wire protocols (SMP, XFTP) and handle connection lifecycle, command authentication, and keep-alive. -### SMP client library +The [SMP client](./src/Simplex/Messaging/Client.hs) offers a functional Haskell API with STM queues for asynchronous event delivery. The [XFTP client](./src/Simplex/FileTransfer/Client.hs) handles data packet upload/download with per-download forward secrecy. -[SMP client](./src/Simplex/Messaging/Client.hs) is a Haskell library to connect to SMP servers that allows to: +Applications that manage their own encryption and connection logic — IoT devices, sensors, simple data pipelines — can use this layer directly. See [docs/CLIENT.md](./docs/CLIENT.md). -- execute commands with a functional API. -- receive messages and other notifications via STM queue. -- automatically send keep-alive commands. +### SimpleX Agent -### SMP agent +The [Agent](./docs/AGENT.md) builds bidirectional encrypted connections on top of the client libraries. It manages: -[SMP agent library](./src/Simplex/Messaging/Agent.hs) can be used to run SMP agent as part of another application and to communicate with the agent via STM queues, without serializing and parsing commands and responses. +- Duplex connections from unidirectional queue pairs +- End-to-end encryption with double ratchet and post-quantum extensions +- File transfer with chunking, encryption, and multi-router distribution +- Queue rotation for metadata privacy +- Push notification subscriptions -Haskell type [ACommand](./src/Simplex/Messaging/Agent/Protocol.hs) represents SMP agent protocol to communicate via STM queues. +The [Agent library](./src/Simplex/Messaging/Agent.hs) communicates via STM queues using the [ACommand](./src/Simplex/Messaging/Agent/Protocol.hs) type — no serialization needed. -See [simplex-chat](https://github.com/simplex-chat/simplex-chat) terminal UI for the example of integrating SMP agent into another application. +See [docs/AGENT.md](./docs/AGENT.md). -[SMP agent executable](./apps/smp-agent/Main.hs) can be used to run a standalone SMP agent process that implements plaintext [SMP agent protocol](./protocol/agent-protocol.md) via TCP port 5224, so it can be used via telnet. It can be deployed in private networks to share access to the connections between multiple applications and services. +## Quick start -## Using SMP server and SMP agent - -You can either run your own SMP server locally or deploy using [Linode StackScript](https://cloud.linode.com/stackscripts/748014), or try local SMP agent with the deployed servers: +Public SMP routers for testing: `smp://u2dS9sG8nMNURyZwqASV4yROM28Er0luVTx5X1CsMrU=@smp4.simplex.im` @@ -88,233 +82,16 @@ You can either run your own SMP server locally or deploy using [Linode StackScri `smp://PQUV2eL0t7OStZOoAsPEV2QYWt4-xilbakvGUGOItUo=@smp6.simplex.im` -It's the easiest to try SMP agent via a prototype [simplex-chat](https://github.com/simplex-chat/simplex-chat) terminal UI. - -## Deploy SMP/XFTP servers on Linux - -You can run your SMP/XFTP server as a Linux process, optionally using a service manager for booting and restarts. +## Deploy routers -Notice that `smp-server` and `xftp-server` requires `openssl` as run-time dependency (it is used to generate server certificates during initialization). Install it with your packet manager: +You can run SMP/XFTP routers on any Linux distribution. OpenSSL is required: ```sh -# For Ubuntu +# Ubuntu apt update && apt install openssl ``` -### Install binaries - -#### Using Docker - -On Linux, you can deploy smp and xftp server using Docker. This will download image from [Docker Hub](https://hub.docker.com/r/simplexchat). - -1. Create directories for persistent Docker configuration: - - ```sh - mkdir -p $HOME/simplex/{xftp,smp}/{config,logs} && mkdir -p $HOME/simplex/xftp/files - ``` - -2. Run your Docker container. - - - `smp-server` - - You must change **your_ip_or_domain**. `-e "pass=password"` is optional variable to password-protect your `smp` server: - ```sh - docker run -d \ - -e "ADDR=your_ip_or_domain" \ - -e "PASS=password" \ - -p 5223:5223 \ - -v $HOME/simplex/smp/config:/etc/opt/simplex:z \ - -v $HOME/simplex/smp/logs:/var/opt/simplex:z \ - simplexchat/smp-server:latest - ``` - - - `xftp-server` - - You must change **your_ip_or_domain** and **maximum_storage**. - ```sh - docker run -d \ - -e "ADDR=your_ip_or_domain" \ - -e "QUOTA=maximum_storage" \ - -p 443:443 \ - -v $HOME/simplex/xftp/config:/etc/opt/simplex-xftp:z \ - -v $HOME/simplex/xftp/logs:/var/opt/simplex-xftp:z \ - -v $HOME/simplex/xftp/files:/srv/xftp:z \ - simplexchat/xftp-server:latest - ``` - -#### Using installation script - -**Please note** that currently, only Ubuntu distribution is supported. - -You can install and setup servers automatically using our script: - -```sh -curl --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/simplex-chat/simplexmq/stable/install.sh -o simplex-server-install.sh &&\ -if echo '53fcdb4ceab324316e2c4cda7e84dbbb344f32550a65975a7895425e5a1be757 simplex-server-install.sh' | sha256sum -c; then - chmod +x ./simplex-server-install.sh - ./simplex-server-install.sh - rm ./simplex-server-install.sh -else - echo "SHA-256 checksum is incorrect!" - rm ./simplex-server-install.sh -fi -``` - -### Build from source - -#### Using Docker - -> **Please note:** to build the app use source code from [stable branch](https://github.com/simplex-chat/simplexmq/tree/stable). - -On Linux, you can build smp server using Docker. - -1. Build your images: - - ```sh - git clone https://github.com/simplex-chat/simplexmq - cd simplexmq - git checkout stable - DOCKER_BUILDKIT=1 docker build -t local/smp-server --build-arg APP="smp-server" --build-arg APP_PORT="5223" . # For xmp-server - DOCKER_BUILDKIT=1 docker build -t local/xftp-server --build-arg APP="xftp-server" --build-arg APP_PORT="443" . # For xftp-server - ``` - -2. Create directories for persistent Docker configuration: - - ```sh - mkdir -p $HOME/simplex/{xftp,smp}/{config,logs} && mkdir -p $HOME/simplex/xftp/files - ``` - -3. Run your Docker container. - - - `smp-server` - - You must change **your_ip_or_domain**. `-e "pass=password"` is optional variable to password-protect your `smp` server: - ```sh - docker run -d \ - -e "ADDR=your_ip_or_domain" \ - -e "PASS=password" \ - -p 5223:5223 \ - -v $HOME/simplex/smp/config:/etc/opt/simplex:z \ - -v $HOME/simplex/smp/logs:/var/opt/simplex:z \ - simplexchat/smp-server:latest - ``` - - - `xftp-server` - - You must change **your_ip_or_domain** and **maximum_storage**. - ```sh - docker run -d \ - -e "ADDR=your_ip_or_domain" \ - -e "QUOTA=maximum_storage" \ - -p 443:443 \ - -v $HOME/simplex/xftp/config:/etc/opt/simplex-xftp:z \ - -v $HOME/simplex/xftp/logs:/var/opt/simplex-xftp:z \ - -v $HOME/simplex/xftp/files:/srv/xftp:z \ - simplexchat/xftp-server:latest - ``` - -#### Using your distribution - -1. Install dependencies and build tools (`GHC`, `cabal` and dev libs): - - ```sh - # On Ubuntu. Depending on your distribution, use your package manager to determine package names. - sudo apt-get update && apt-get install -y build-essential curl libffi-dev libffi7 libgmp3-dev libgmp10 libncurses-dev libncurses5 libtinfo5 pkg-config zlib1g-dev libnuma-dev libssl-dev - export BOOTSTRAP_HASKELL_GHC_VERSION=9.6.3 - export BOOTSTRAP_HASKELL_CABAL_VERSION=3.10.3.0 - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 sh - ghcup set ghc "${BOOTSTRAP_HASKELL_GHC_VERSION}" - ghcup set cabal "${BOOTSTRAP_HASKELL_CABAL_VERSION}" - source ~/.ghcup/env - ``` - -2. Build the project: - - ```sh - git clone https://github.com/simplex-chat/simplexmq - cd simplexmq - git checkout stable - cabal update - cabal build exe:smp-server exe:xftp-server - ``` - -3. List compiled binaries: - - `smp-server` - ```sh - cabal list-bin exe:smp-server - ``` - - `xftp-server` - ```sh - cabal list-bin exe:xftp-server - ``` - -- Initialize SMP server with `smp-server init [-l] -n ` or `smp-server init [-l] --ip ` - depending on how you initialize it, either FQDN or IP will be used for server's address. - -- Run `smp-server start` to start SMP server, or you can configure a service manager to run it as a service. - -- Optionally, `smp-server` can be setup for having an onion address in `tor` network. See: [`scripts/tor`](./scripts/tor/). In this case, the server address can have both public and onion hostname pointing to the same server, to allow two people connect when only one of them is using Tor. The server address would be: `smp://@,` - -See [this section](#smp-server) for more information. Run `smp-server -h` and `smp-server init -h` for explanation of commands and options. - -[Linode](https://cloud.linode.com/stackscripts/748014) - -## Deploy SMP server on Linode - -\* You can use free credit Linode offers when [creating a new account](https://www.linode.com/) to deploy an SMP server. - -Deployment on Linode is performed via StackScripts, which serve as recipes for Linode instances, also called Linodes. To deploy SMP server on Linode: - -- Create a Linode account or login with an already existing one. -- Open [SMP server StackScript](https://cloud.linode.com/stackscripts/748014) and click "Deploy New Linode". -- You can optionally configure the following parameters: - - SMP Server store log flag for queue persistence on server restart, recommended. - - [Linode API token](https://www.linode.com/docs/guides/getting-started-with-the-linode-api#get-an-access-token) to attach server address etc. as tags to Linode and to add A record to your 2nd level domain (e.g. `example.com` [domain should be created](https://cloud.linode.com/domains/create) in your account prior to deployment). The API token access scopes: - - read/write for "linodes" - - read/write for "domains" - - Domain name to use instead of Linode IP address, e.g. `smp1.example.com`. -- Choose the region and plan, Shared CPU Nanode with 1Gb is sufficient. -- Provide ssh key to be able to connect to your Linode via ssh. If you haven't provided a Linode API token this step is required to login to your Linode and get the server's fingerprint either from the welcome message or from the file `/etc/opt/simplex/fingerprint` after server starts. See [Linode's guide on ssh](https://www.linode.com/docs/guides/use-public-key-authentication-with-ssh/) . -- Deploy your Linode. After it starts wait for SMP server to start and for tags to appear (if a Linode API token was provided). It may take up to 5 minutes depending on the connection speed on the Linode. Connecting Linode IP address to provided domain name may take some additional time. -- Get `address` and `fingerprint` either from Linode tags (click on a tag and copy it's value from the browser search panel) or via ssh. -- Great, your own SMP server is ready! If you provided FQDN use `smp://@` as SMP server address in the client, otherwise use `smp://@`. - -Please submit an [issue](https://github.com/simplex-chat/simplexmq/issues) if any problems occur. - -[DigitalOcean](https://marketplace.digitalocean.com/apps/simplex-server) - -## Deploy SMP server on DigitalOcean - -> 🚧 DigitalOcean snapshot is currently not up to date, it will soon be updated 🏗️ - -\* When creating a DigitalOcean account you can use [this link](https://try.digitalocean.com/freetrialoffer/) to get free credit. (You would still be required either to provide your credit card details or make a confirmation pre-payment with PayPal) - -To deploy SMP server use [SimpleX Server 1-click app](https://marketplace.digitalocean.com/apps/simplex-server) from DigitalOcean marketplace: - -- Create a DigitalOcean account or login with an already existing one. -- Click 'Create SimpleX server Droplet' button. -- Choose the region and plan according to your requirements (Basic plan should be sufficient). -- Finalize Droplet creation. -- Open "Console" on your Droplet management page to get SMP server fingerprint - either from the welcome message or from `/etc/opt/simplex/fingerprint`. Alternatively you can manually SSH to created Droplet, see [DigitalOcean instruction](https://docs.digitalocean.com/products/droplets/how-to/connect-with-ssh/). -- Great, your own SMP server is ready! Use `smp://@` as SMP server address in the client. - -Please submit an [issue](https://github.com/simplex-chat/simplexmq/issues) if any problems occur. - -> **Please note:** SMP server uses server address as a Common Name for server certificate generated during initialization. If you would like your server address to be FQDN instead of IP address, you can log in to your Droplet and run the commands below to re-initialize the server. Alternatively you can use [Linode StackScript](https://cloud.linode.com/stackscripts/748014) which allows this parameterization. - -```sh -smp-server delete -smp-server init [-l] -n -``` - -## SMP server design - -![SMP server design](./design/server.svg) - -## SMP agent design - -![SMP agent design](./design/agent2.svg) +See [docs/ROUTERS.md](./docs/ROUTERS.md) for Docker, binary installation, building from source, and cloud deployment (Linode, DigitalOcean). ## License diff --git a/docs/AGENT.md b/docs/AGENT.md new file mode 100644 index 0000000000..128edc76be --- /dev/null +++ b/docs/AGENT.md @@ -0,0 +1,75 @@ +# SimpleX Agent + +The SimpleX Agent builds bidirectional encrypted connections on top of [SimpleX client libraries](CLIENT.md). It manages the full lifecycle of secure communication: connection establishment, end-to-end encryption, queue rotation, file transfer, and push notifications. + +This is **Layer 3** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers; Layer 2 is the [client libraries](CLIENT.md) that speak the wire protocols. The Agent adds the connection semantics that applications need. + +**Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs) + +## Connections + +The Agent turns unidirectional SMP queues into bidirectional connections: + +- **Duplex connections**: each connection uses a pair of SMP queues — one for each direction. The queues can be on different routers chosen independently by each party. +- **Connection establishment**: one party creates a connection and generates an invitation (containing router address, queue ID, and public keys). The invitation is passed out-of-band (QR code, link, etc.). The other party joins by creating a reverse queue and completing the handshake. +- **Connection links**: the Agent supports connection links (long and short) for sharing connection invitations via URLs. Short links use a separate SMP queue to store the full invitation, allowing compact QR codes. +- **Queue rotation**: the Agent periodically rotates the underlying SMP queues, limiting the window for metadata correlation. Rotation is transparent to the application — the connection identity is stable while the underlying queues change. +- **Redundant queues**: connections can use multiple queues for reliability. If one router becomes unreachable, messages flow through the remaining queues. + +## Encryption + +The Agent provides end-to-end encryption with forward secrecy and break-in recovery: + +- **Double ratchet**: messages are encrypted using a double ratchet protocol derived from the Signal protocol. Each message uses a unique key; compromising one key does not reveal past or future messages. +- **Post-quantum extensions**: the ratchet supports hybrid key exchange using SNTRUP761 (a lattice-based KEM) combined with X25519 DH. This provides protection against future quantum computers that could break classical DH. +- **Ratchet synchronization**: if the ratchet state becomes desynchronized (e.g., due to message loss or device restore), the Agent detects this and can negotiate resynchronization with the peer. +- **Per-queue encryption**: in addition to end-to-end encryption, each queue has a separate encryption layer between sender and router, preventing traffic correlation even if TLS is compromised. + +## File Transfer + +The Agent handles file transfer over [XFTP](../protocol/xftp.md) routers: + +- **Chunking**: files are split into chunks, each stored as a data packet on an XFTP router. Chunk sizes are fixed powers of 2 (64KB to 4MB), hiding the actual file size. +- **Client-side encryption**: files are encrypted and padded before upload. The recipient decrypts after downloading all chunks. The encryption key and file metadata are sent through the SMP connection, not through XFTP. +- **Multi-router distribution**: chunks can be uploaded to different XFTP routers, and each chunk can have multiple replicas on different routers for redundancy. +- **Redirect chains**: for metadata privacy, file descriptors can be stored as XFTP data packets themselves, creating an indirection layer between the SMP message and the actual file location. + +## Notifications + +The Agent manages push notification subscriptions for mobile devices: + +- **Token registration**: registers device push tokens with NTF (notification) routers, which bridge to platform push services (APNS). +- **Notification subscriptions**: creates NTF subscriptions for SMP queues so that incoming messages trigger push notifications without requiring persistent connections. +- **Privacy preservation**: push notifications contain only a notification ID, not message content. The device wakes, connects to the SMP router, and retrieves the actual message. + +## Integration + +The Agent is designed to be embedded as a Haskell library: + +- **STM queues**: the application communicates with the Agent via STM queues. Commands go in (`ACommand`), events come out (`AEvent`). No serialization or parsing — direct Haskell values. +- **Async operation**: all network operations are asynchronous. The Agent manages internal worker threads for each router connection, message processing, and background tasks (cleanup, statistics, notification supervision). +- **Background mode**: on mobile platforms, the Agent can run in a reduced mode with only the message receiver active, minimizing resource usage when the app is backgrounded. +- **Dual database backends**: the Agent supports both SQLite (for mobile/desktop) and PostgreSQL (for server deployments) as persistence backends, selected at compile time. + +## Use cases + +- **Chat applications**: [SimpleX Chat](https://github.com/simplex-chat/simplex-chat) is the reference application, using the full Agent API for messaging, file sharing, groups, and calls. +- **Bots and automated services**: services that need bidirectional encrypted communication with SimpleX Chat users or other Agent-based applications. +- **Any application needing secure bidirectional communication** over the SimpleX Network without implementing the connection management, encryption, and queue rotation logic directly. + +## What this layer adds over client libraries + +| Capability | Client (Layer 2) | Agent (Layer 3) | +|---|---|---| +| Queue operations | Direct | Managed transparently | +| Connection model | Unidirectional queues | Bidirectional connections | +| Encryption | Application's responsibility | Double ratchet with PQ extensions | +| File transfer | Raw data packet upload/download | Chunking, encryption, reassembly | +| Identity | Per-queue keys | Per-connection, rotatable | +| Notifications | Not available | NTF router integration | + +## Protocol references + +- [Agent Protocol](../protocol/agent-protocol.md) — duplex connection procedure, message format +- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model +- [PQDR](../protocol/pqdr.md) — post-quantum double ratchet specification diff --git a/docs/CLIENT.md b/docs/CLIENT.md new file mode 100644 index 0000000000..6e2ca0c537 --- /dev/null +++ b/docs/CLIENT.md @@ -0,0 +1,75 @@ +# SimpleX Client Libraries + +SimpleX client libraries provide low-level protocol access to SimpleX routers. They implement the wire protocols ([SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md)) and handle connection lifecycle, but leave encryption, identity management, and connection orchestration to the application. + +This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds bidirectional encrypted connections on top of these libraries. + +## SMP Client + +**Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs) + +The SMP client connects to SMP routers and manages messaging queues — the fundamental addressing primitive of the SimpleX Network. Each queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. + +### Capabilities + +- **Queue management**: create, secure, subscribe to, and delete queues on any SMP router +- **Message sending and receiving**: send messages to a queue's sender address; receive messages from a queue's recipient address +- **Command authentication**: each queue operation is authenticated with per-queue cryptographic keys (Ed25519, Ed448, or X25519) +- **Keep-alive**: automatic ping loop detects and recovers from half-open connections +- **Proxy forwarding**: send messages through a proxy router via 2-hop onion routing (PRXY/PFWD/RFWD commands), protecting the sender's IP address from the destination router +- **Batched commands**: multiple commands can be sent in a single transmission for efficiency + +### API model + +The client uses a functional Haskell API with STM queues for asynchronous event delivery: + +- **Commands** are sent via `sendProtocolCommand` (single) or `sendBatch` (multiple). Each returns a result synchronously or via timeout. +- **Router events** (incoming messages, subscription notifications) arrive on `msgQ`, an STM `TBQueue` that the application reads from its own thread. +- **Connection lifecycle** is managed automatically: the client maintains send, receive, process, and monitor threads internally. When any thread fails, all are torn down and the `disconnected` callback fires. + +### Router identity + +Routers are identified by the SHA-256 hash of their CA certificate fingerprint, not by hostname. The client validates the full X.509 certificate chain on every TLS connection and compares the CA fingerprint against the expected hash from the queue address. This means a DNS or IP-level attacker who cannot produce the correct certificate is detected at connection time. + +## XFTP Client + +**Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs) + +The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. + +### Capabilities + +- **Data packet creation**: create data packets on routers with sender, recipient, and optional additional recipient credentials +- **Upload**: send encrypted data in a single HTTP/2 streaming request (command + body) +- **Download**: retrieve data packets with per-download ephemeral Diffie-Hellman key exchange, providing forward secrecy — compromising one download key does not reveal other downloads +- **Acknowledgment and deletion**: recipients acknowledge receipt; senders delete data packets after delivery + +### Size selection + +`prepareChunkSizes` selects data packet sizes using a threshold algorithm: if the remaining payload exceeds 75% of the next larger size, it uses the larger size. This balances storage efficiency against the number of round trips. Single-chunk payloads (e.g., redirect descriptors) can use `singleChunkSize` to verify they fit in one data packet. + +## Use cases + +These libraries are appropriate when the application manages its own encryption and connection logic: + +- **IoT sensor data collection**: a sensor creates an SMP queue and sends readings; a collector subscribes and receives them. The queue address (router + queue ID + keys) is provisioned once, out-of-band. +- **Device control**: a controller sends commands to an actuator's queue. Separate queues for commands and telemetry provide unidirectional isolation. +- **Bulk data delivery**: an application encrypts and chunks a file, uploads data packets to XFTP routers, and shares the packet addresses with the recipient out-of-band. +- **Custom protocols**: any application that needs unidirectional, router-mediated packet delivery without the overhead of the Agent's connection management. + +## What this layer does NOT provide + +The following capabilities require the [Agent](AGENT.md) (Layer 3): + +- **Bidirectional connections** — the Agent pairs two unidirectional queues into a duplex connection +- **End-to-end encryption** — the Agent manages double ratchet with post-quantum extensions +- **File transfer** — the Agent handles chunking, encryption, padding, multi-router upload, and reassembly +- **Queue rotation** — the Agent transparently rotates queues to limit metadata correlation +- **Connection discovery** — connection links, short links, and contact addresses are Agent-level abstractions +- **Push notifications** — notification token management and subscription is Agent-level + +## Protocol references + +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format, commands, and security properties +- [XFTP Protocol](../protocol/xftp.md) — XFTP wire format, data packet lifecycle +- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model, and design rationale diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md new file mode 100644 index 0000000000..3169d480eb --- /dev/null +++ b/docs/ROUTERS.md @@ -0,0 +1,194 @@ +# SimpleX Routers — Deployment and Configuration + +SimpleX routers are the network infrastructure of the [SimpleX Network](../protocol/overview-tjr.md). They accept, buffer, and deliver data packets between endpoints. Each router operates independently and can be run by any party on standard computing hardware. + +This document covers deployment and advanced configuration. For an overview of the router architecture and trust model, see the [SimpleX Network overview](../protocol/overview-tjr.md). + +## SMP Router + +The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). + +### Advanced configuration + +`smp-server.ini` is created during initialization and controls all runtime behavior. + +**Message persistence**: when store log is enabled (`enable: on`), the server saves undelivered messages on exit and restores them on start. This only works with SIGINT (keyboard interrupt); SIGTERM does not trigger message saving. The `restore_messages` setting can be used to override this behavior independently of the store log setting. + +**Tor onion addresses**: the server can have both a public hostname and an onion hostname, allowing two users to connect when only one is using Tor. Configure as: `smp://@,`. See [`scripts/tor/`](../scripts/tor/) for setup instructions. + +### Running on MacOS + +SMP server requires OpenSSL for initialization. MacOS may ship LibreSSL instead, which doesn't support the required algorithms. + +```sh +openssl version +``` + +If it says "LibreSSL", install OpenSSL: + +```sh +brew update +brew install openssl +echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile +. ~/.zprofile +``` + +## XFTP Router + +The XFTP router provides data packet storage — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. + +Initialize with `xftp-server init` and configure storage quota in `xftp-server.ini`. + +## NTF Router + +The NTF router bridges SimpleX Network to platform push notification services (APNS). It implements the [Push Notifications protocol](../protocol/push-notifications.md). Mobile clients register push tokens with the NTF router, which subscribes to their SMP queues and sends push notifications when messages arrive. The push notification contains only a notification ID, not message content. + +Initialize with `ntf-server init` and configure APNS credentials in `ntf-server.ini`. + +## Deployment methods + +All routers require `openssl` as a runtime dependency for certificate generation during initialization: + +```sh +# Ubuntu +apt update && apt install openssl +``` + +### Docker (prebuilt images) + +Prebuilt images are available from [Docker Hub](https://hub.docker.com/r/simplexchat). + +1. Create directories for persistent configuration: + + ```sh + mkdir -p $HOME/simplex/{xftp,smp}/{config,logs} && mkdir -p $HOME/simplex/xftp/files + ``` + +2. Run: + + **SMP router** — change `your_ip_or_domain`; `-e "PASS=password"` is optional: + ```sh + docker run -d \ + -e "ADDR=your_ip_or_domain" \ + -e "PASS=password" \ + -p 5223:5223 \ + -v $HOME/simplex/smp/config:/etc/opt/simplex:z \ + -v $HOME/simplex/smp/logs:/var/opt/simplex:z \ + simplexchat/smp-server:latest + ``` + + **XFTP router** — change `your_ip_or_domain` and `maximum_storage`: + ```sh + docker run -d \ + -e "ADDR=your_ip_or_domain" \ + -e "QUOTA=maximum_storage" \ + -p 443:443 \ + -v $HOME/simplex/xftp/config:/etc/opt/simplex-xftp:z \ + -v $HOME/simplex/xftp/logs:/var/opt/simplex-xftp:z \ + -v $HOME/simplex/xftp/files:/srv/xftp:z \ + simplexchat/xftp-server:latest + ``` + +### Installation script (Ubuntu) + +```sh +curl --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/simplex-chat/simplexmq/stable/install.sh -o simplex-server-install.sh &&\ +if echo '53fcdb4ceab324316e2c4cda7e84dbbb344f32550a65975a7895425e5a1be757 simplex-server-install.sh' | sha256sum -c; then + chmod +x ./simplex-server-install.sh + ./simplex-server-install.sh + rm ./simplex-server-install.sh +else + echo "SHA-256 checksum is incorrect!" + rm ./simplex-server-install.sh +fi +``` + +### Build from source + +#### Using Docker + +Build from the [stable branch](https://github.com/simplex-chat/simplexmq/tree/stable): + +```sh +git clone https://github.com/simplex-chat/simplexmq +cd simplexmq +git checkout stable +DOCKER_BUILDKIT=1 docker build -t local/smp-server --build-arg APP="smp-server" --build-arg APP_PORT="5223" . +DOCKER_BUILDKIT=1 docker build -t local/xftp-server --build-arg APP="xftp-server" --build-arg APP_PORT="443" . +``` + +Then run with the same Docker commands as above, replacing `simplexchat/smp-server:latest` with `local/smp-server` (and similarly for XFTP). + +#### Native build + +1. Install dependencies: + + ```sh + # Ubuntu + sudo apt-get update && apt-get install -y build-essential curl libffi-dev libffi7 libgmp3-dev libgmp10 libncurses-dev libncurses5 libtinfo5 pkg-config zlib1g-dev libnuma-dev libssl-dev + export BOOTSTRAP_HASKELL_GHC_VERSION=9.6.3 + export BOOTSTRAP_HASKELL_CABAL_VERSION=3.10.3.0 + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 sh + ghcup set ghc "${BOOTSTRAP_HASKELL_GHC_VERSION}" + ghcup set cabal "${BOOTSTRAP_HASKELL_CABAL_VERSION}" + source ~/.ghcup/env + ``` + +2. Build: + + ```sh + git clone https://github.com/simplex-chat/simplexmq + cd simplexmq + git checkout stable + cabal update + cabal build exe:smp-server exe:xftp-server + ``` + +3. Find binaries: + + ```sh + cabal list-bin exe:smp-server + cabal list-bin exe:xftp-server + ``` + +4. Initialize and run: + + ```sh + smp-server init [-l] -n # or --ip + smp-server start + ``` + +### Linode StackScript + +[Deploy via Linode StackScript](https://cloud.linode.com/stackscripts/748014) — Shared CPU Nanode with 1GB is sufficient. + +Configuration options: +- SMP Server store log flag for queue persistence (recommended) +- [Linode API token](https://www.linode.com/docs/guides/getting-started-with-the-linode-api#get-an-access-token) for automatic DNS and tagging (scopes: read/write for "linodes" and "domains") +- Domain name (e.g., `smp1.example.com`) — the [domain must exist](https://cloud.linode.com/domains/create) in your Linode account + +After deployment (up to 5 minutes), get the server address from Linode tags or SSH: `smp://@`. + +### DigitalOcean 1-click + +[SimpleX Server 1-click app](https://marketplace.digitalocean.com/apps/simplex-server) from DigitalOcean marketplace. + +After deployment, get the fingerprint from the Droplet console (`/etc/opt/simplex/fingerprint`). Server address: `smp://@`. + +To use FQDN instead of IP: + +```sh +smp-server delete +smp-server init [-l] -n +``` + +## Monitoring + +SMP and XFTP routers expose Prometheus metrics via a control port. The control port also supports commands for runtime inspection (queue counts, client counts, statistics). See module specs for details on available metrics and control commands. + +## Protocol references + +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format and security properties +- [XFTP Protocol](../protocol/xftp.md) — data packet protocol +- [Push Notifications Protocol](../protocol/push-notifications.md) — NTF protocol +- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture and trust model From 13e8b7b41189a758921464cca06cb9a4b47a6ddc Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 22:48:09 +0000 Subject: [PATCH 49/91] links --- README.md | 26 ++++++++++----------- docs/AGENT.md | 62 +++++++++++++++++++++++++++++++------------------ docs/CLIENT.md | 42 +++++++++++++++++++-------------- docs/ROUTERS.md | 35 ++++++++++++++++++++++++---- 4 files changed, 106 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 8d6333f3a4..4e3327c186 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,16 @@ -# SimpleXMQ +# SimpleX Network [![GitHub build](https://github.com/simplex-chat/simplexmq/actions/workflows/build.yml/badge.svg)](https://github.com/simplex-chat/simplexmq/actions/workflows/build.yml) [![GitHub release](https://img.shields.io/github/v/release/simplex-chat/simplexmq)](https://github.com/simplex-chat/simplexmq/releases) -## SimpleX Network software - -SimpleXMQ provides the software for [SimpleX Network](./protocol/overview-tjr.md) — a general-purpose packet routing network where endpoints exchange data through independently operated routers using resource-based addressing. Unlike IP networks, SimpleX addresses identify resources on routers (queues, data packets), not endpoint devices. Participants do not need globally unique identifiers to communicate. +The simplexmq package provides the software for [SimpleX Network](./protocol/overview-tjr.md) — a general-purpose packet routing network where endpoints exchange data through independently operated routers using resource-based addressing. Unlike IP networks, SimpleX addresses identify resources on routers (queues, data packets), not endpoint devices. Participants do not need globally unique identifiers to communicate. The software is organized in three layers: ``` Application (e.g. SimpleX Chat) +----------------------------------+ -| SimpleX Agent | Layer 3 — bidirectional connections, e2e encryption +| SimpleX Agent | Layer 3 — duplex connections, e2e encryption +----------------------------------+ | SimpleX Client Libraries | Layer 2 — protocol clients for SMP, XFTP +----------------------------------+ @@ -22,9 +20,9 @@ The software is organized in three layers: [SimpleX Chat](https://github.com/simplex-chat/simplex-chat) is one application built on Layer 3. IoT devices, AI services, monitoring systems, and automated services are other applications that can use Layers 2 or 3 directly. -SimpleXMQ is implemented in Haskell, benefiting from robust software transactional memory (STM) and concurrency primitives. +The simplexmq package is implemented in Haskell, benefiting from robust software transactional memory (STM) and concurrency primitives. -See the [SimpleX Network overview](./protocol/overview-tjr.md) for the full protocol architecture, trust model, and security analysis. +See the [SimpleX Network overview](./protocol/overview-tjr.md) for the full protocol architecture, trust model, and [security analysis](./protocol/security.md). ## Architecture @@ -32,9 +30,9 @@ See the [SimpleX Network overview](./protocol/overview-tjr.md) for the full prot Routers are the network infrastructure — they accept, buffer, and deliver packets. Three router types serve different purposes: -- **SMP routers** provide messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes). Protocol: [SMP](./protocol/simplex-messaging.md). -- **XFTP routers** provide data packet storage — individually addressed blocks in fixed sizes (64KB–4MB) for larger payloads. Protocol: [XFTP](./protocol/xftp.md). -- **NTF routers** bridge to platform push services (APNS) for mobile notification delivery. Protocol: [Push Notifications](./protocol/push-notifications.md). +- **SMP routers** provide messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes). Protocol: [SMP](./protocol/simplex-messaging.md). Module spec: [`Simplex.Messaging.Server`](./spec/modules/Simplex/Messaging/Server.md). +- **XFTP routers** accept and deliver data packets — individually addressed blocks in fixed sizes (64KB–4MB) for larger payloads. Protocol: [XFTP](./protocol/xftp.md). Module spec: [`Simplex.FileTransfer.Server`](./spec/modules/Simplex/FileTransfer/Server.md). +- **NTF routers** bridge to platform push services (APNS) for mobile notification delivery. Protocol: [Push Notifications](./protocol/push-notifications.md). Module spec: [`Simplex.Messaging.Notifications.Server`](./spec/modules/Simplex/Messaging/Notifications/Server.md). #### Running an SMP router @@ -54,21 +52,21 @@ See [docs/ROUTERS.md](./docs/ROUTERS.md) for XFTP/NTF router setup, advanced con [Client libraries](./docs/CLIENT.md) provide low-level protocol access to SimpleX routers. They implement the wire protocols (SMP, XFTP) and handle connection lifecycle, command authentication, and keep-alive. -The [SMP client](./src/Simplex/Messaging/Client.hs) offers a functional Haskell API with STM queues for asynchronous event delivery. The [XFTP client](./src/Simplex/FileTransfer/Client.hs) handles data packet upload/download with per-download forward secrecy. +The [SMP client](./src/Simplex/Messaging/Client.hs) ([module spec](./spec/modules/Simplex/Messaging/Client.md)) offers a functional Haskell API with STM queues for asynchronous event delivery. The [XFTP client](./src/Simplex/FileTransfer/Client.hs) ([module spec](./spec/modules/Simplex/FileTransfer/Client.md)) sends and receives data packets with per-request forward secrecy. Applications that manage their own encryption and connection logic — IoT devices, sensors, simple data pipelines — can use this layer directly. See [docs/CLIENT.md](./docs/CLIENT.md). ### SimpleX Agent -The [Agent](./docs/AGENT.md) builds bidirectional encrypted connections on top of the client libraries. It manages: +The [Agent](./docs/AGENT.md) builds duplex encrypted connections on top of the client libraries. It manages: -- Duplex connections from unidirectional queue pairs +- Duplex connections from simplex queue pairs - End-to-end encryption with double ratchet and post-quantum extensions - File transfer with chunking, encryption, and multi-router distribution - Queue rotation for metadata privacy - Push notification subscriptions -The [Agent library](./src/Simplex/Messaging/Agent.hs) communicates via STM queues using the [ACommand](./src/Simplex/Messaging/Agent/Protocol.hs) type — no serialization needed. +The [Agent library](./src/Simplex/Messaging/Agent.hs) ([module spec](./spec/modules/Simplex/Messaging/Agent.md)) communicates via STM queues using the [ACommand](./src/Simplex/Messaging/Agent/Protocol.hs) type — no serialization needed. The Agent implements the [Agent protocol](./protocol/agent-protocol.md) for duplex connections and uses the [PQDR protocol](./protocol/pqdr.md) for end-to-end encryption. Cross-device remote control uses the [XRCP protocol](./protocol/xrcp.md). See [docs/AGENT.md](./docs/AGENT.md). diff --git a/docs/AGENT.md b/docs/AGENT.md index 128edc76be..8079736e86 100644 --- a/docs/AGENT.md +++ b/docs/AGENT.md @@ -1,16 +1,16 @@ # SimpleX Agent -The SimpleX Agent builds bidirectional encrypted connections on top of [SimpleX client libraries](CLIENT.md). It manages the full lifecycle of secure communication: connection establishment, end-to-end encryption, queue rotation, file transfer, and push notifications. +The SimpleX Agent builds duplex encrypted connections on top of [SimpleX client libraries](CLIENT.md). It manages the full lifecycle of secure communication: connection establishment, end-to-end encryption, queue rotation, file transfer, and push notifications. This is **Layer 3** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers; Layer 2 is the [client libraries](CLIENT.md) that speak the wire protocols. The Agent adds the connection semantics that applications need. -**Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs) +**Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Agent.md`](../spec/modules/Simplex/Messaging/Agent.md) ## Connections -The Agent turns unidirectional SMP queues into bidirectional connections: +The Agent turns simplex (unidirectional) SMP queues into duplex connections, implementing the [Agent protocol](../protocol/agent-protocol.md): -- **Duplex connections**: each connection uses a pair of SMP queues — one for each direction. The queues can be on different routers chosen independently by each party. +- **Duplex connections**: each connection uses a pair of SMP queues — one for each direction. The queues can be on different routers chosen independently by each party. See the [duplex connection procedure](../protocol/agent-protocol.md) for the full handshake. - **Connection establishment**: one party creates a connection and generates an invitation (containing router address, queue ID, and public keys). The invitation is passed out-of-band (QR code, link, etc.). The other party joins by creating a reverse queue and completing the handshake. - **Connection links**: the Agent supports connection links (long and short) for sharing connection invitations via URLs. Short links use a separate SMP queue to store the full invitation, allowing compact QR codes. - **Queue rotation**: the Agent periodically rotates the underlying SMP queues, limiting the window for metadata correlation. Rotation is transparent to the application — the connection identity is stable while the underlying queues change. @@ -18,53 +18,53 @@ The Agent turns unidirectional SMP queues into bidirectional connections: ## Encryption -The Agent provides end-to-end encryption with forward secrecy and break-in recovery: +The Agent provides end-to-end encryption with forward secrecy and break-in recovery, specified in the [Post-Quantum Double Ratchet protocol](../protocol/pqdr.md): -- **Double ratchet**: messages are encrypted using a double ratchet protocol derived from the Signal protocol. Each message uses a unique key; compromising one key does not reveal past or future messages. -- **Post-quantum extensions**: the ratchet supports hybrid key exchange using SNTRUP761 (a lattice-based KEM) combined with X25519 DH. This provides protection against future quantum computers that could break classical DH. +- **Double ratchet**: messages are encrypted using a double ratchet protocol derived from the Signal protocol. Each message uses a unique key; compromising one key does not reveal past or future messages. See the [PQDR specification](../protocol/pqdr.md) for the full ratchet state machine. +- **Post-quantum extensions**: the ratchet supports hybrid key exchange using SNTRUP761 (a lattice-based KEM) combined with X25519 DH. This provides protection against future quantum computers that could break classical DH. See the [SNTRUP761 module spec](../spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md) and [Ratchet module spec](../spec/modules/Simplex/Messaging/Crypto/Ratchet.md) for implementation details. - **Ratchet synchronization**: if the ratchet state becomes desynchronized (e.g., due to message loss or device restore), the Agent detects this and can negotiate resynchronization with the peer. -- **Per-queue encryption**: in addition to end-to-end encryption, each queue has a separate encryption layer between sender and router, preventing traffic correlation even if TLS is compromised. +- **Per-queue encryption**: in addition to end-to-end encryption, each queue has a separate encryption layer between sender and router, preventing traffic correlation even if TLS is compromised. See the [SMP protocol security model](../protocol/simplex-messaging.md). ## File Transfer -The Agent handles file transfer over [XFTP](../protocol/xftp.md) routers: +The Agent handles file transfer over [XFTP](../protocol/xftp.md) routers. File transfer orchestration is implemented in the [XFTP Agent module](../spec/modules/Simplex/FileTransfer/Agent.md): -- **Chunking**: files are split into chunks, each stored as a data packet on an XFTP router. Chunk sizes are fixed powers of 2 (64KB to 4MB), hiding the actual file size. -- **Client-side encryption**: files are encrypted and padded before upload. The recipient decrypts after downloading all chunks. The encryption key and file metadata are sent through the SMP connection, not through XFTP. -- **Multi-router distribution**: chunks can be uploaded to different XFTP routers, and each chunk can have multiple replicas on different routers for redundancy. -- **Redirect chains**: for metadata privacy, file descriptors can be stored as XFTP data packets themselves, creating an indirection layer between the SMP message and the actual file location. +- **Chunking**: files are split into chunks, each sent as a data packet to an XFTP router. Chunk sizes are fixed powers of 2 (64KB to 4MB), hiding the actual file size. See the [file description module spec](../spec/modules/Simplex/FileTransfer/Description.md) for chunk size selection and file descriptor format. +- **Client-side encryption**: files are encrypted and padded before being sent to XFTP routers. The recipient decrypts after receiving all chunks. The encryption key and file metadata are sent through the SMP connection, not through XFTP. See [file crypto module spec](../spec/modules/Simplex/FileTransfer/Crypto.md). +- **Multi-router distribution**: chunks can be sent to different XFTP routers, and each chunk can have multiple replicas on different routers for redundancy. +- **Redirect chains**: for metadata privacy, file descriptors can be sent as XFTP data packets themselves, creating an indirection layer between the SMP message and the actual file location. ## Notifications -The Agent manages push notification subscriptions for mobile devices: +The Agent manages push notification subscriptions for mobile devices, using the [Push Notifications protocol](../protocol/push-notifications.md). Notification supervision is handled by the [NtfSubSupervisor](../spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md): -- **Token registration**: registers device push tokens with NTF (notification) routers, which bridge to platform push services (APNS). +- **Token registration**: registers device push tokens with NTF (notification) routers, which bridge to platform push services (APNS). See the [NTF client module spec](../spec/modules/Simplex/Messaging/Notifications/Client.md). - **Notification subscriptions**: creates NTF subscriptions for SMP queues so that incoming messages trigger push notifications without requiring persistent connections. -- **Privacy preservation**: push notifications contain only a notification ID, not message content. The device wakes, connects to the SMP router, and retrieves the actual message. +- **Privacy preservation**: push notifications contain only a notification ID, not message content. The device wakes, connects to the SMP router, and retrieves the actual message. See the [Push Notifications protocol](../protocol/push-notifications.md) for the full flow. ## Integration The Agent is designed to be embedded as a Haskell library: -- **STM queues**: the application communicates with the Agent via STM queues. Commands go in (`ACommand`), events come out (`AEvent`). No serialization or parsing — direct Haskell values. -- **Async operation**: all network operations are asynchronous. The Agent manages internal worker threads for each router connection, message processing, and background tasks (cleanup, statistics, notification supervision). +- **STM queues**: the application communicates with the Agent via STM queues. Commands go in (`ACommand`), events come out (`AEvent`). No serialization or parsing — direct Haskell values. The command/event types are defined in the [Agent Protocol module](../spec/modules/Simplex/Messaging/Agent/Protocol.md). +- **Async operation**: all network operations are asynchronous. The Agent manages internal worker threads for each router connection, message processing, and background tasks (cleanup, statistics, notification supervision). See the [Agent Client module spec](../spec/modules/Simplex/Messaging/Agent/Client.md) for worker architecture. - **Background mode**: on mobile platforms, the Agent can run in a reduced mode with only the message receiver active, minimizing resource usage when the app is backgrounded. -- **Dual database backends**: the Agent supports both SQLite (for mobile/desktop) and PostgreSQL (for server deployments) as persistence backends, selected at compile time. +- **Dual database backends**: the Agent supports both SQLite (for mobile/desktop) and PostgreSQL (for server deployments) as persistence backends, selected at compile time. See [Agent Store Interface](../spec/modules/Simplex/Messaging/Agent/Store/Interface.md) and [Agent Store Postgres](../spec/modules/Simplex/Messaging/Agent/Store/Postgres.md). ## Use cases - **Chat applications**: [SimpleX Chat](https://github.com/simplex-chat/simplex-chat) is the reference application, using the full Agent API for messaging, file sharing, groups, and calls. -- **Bots and automated services**: services that need bidirectional encrypted communication with SimpleX Chat users or other Agent-based applications. -- **Any application needing secure bidirectional communication** over the SimpleX Network without implementing the connection management, encryption, and queue rotation logic directly. +- **Bots and automated services**: services that need duplex encrypted communication with SimpleX Chat users or other Agent-based applications. +- **Any application needing secure duplex communication** over the SimpleX Network without implementing the connection management, encryption, and queue rotation logic directly. ## What this layer adds over client libraries | Capability | Client (Layer 2) | Agent (Layer 3) | |---|---|---| | Queue operations | Direct | Managed transparently | -| Connection model | Unidirectional queues | Bidirectional connections | +| Connection model | Simplex (unidirectional) queues | Duplex connections | | Encryption | Application's responsibility | Double ratchet with PQ extensions | -| File transfer | Raw data packet upload/download | Chunking, encryption, reassembly | +| File transfer | Raw data packet send/receive | Chunking, encryption, reassembly | | Identity | Per-queue keys | Per-connection, rotatable | | Notifications | Not available | NTF router integration | @@ -73,3 +73,19 @@ The Agent is designed to be embedded as a Haskell library: - [Agent Protocol](../protocol/agent-protocol.md) — duplex connection procedure, message format - [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model - [PQDR](../protocol/pqdr.md) — post-quantum double ratchet specification +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP queue operations used by the Agent +- [XFTP Protocol](../protocol/xftp.md) — data packet operations for file transfer +- [Push Notifications Protocol](../protocol/push-notifications.md) — NTF token and subscription management +- [XRCP Protocol](../protocol/xrcp.md) — remote control protocol for cross-device Agent access + +## Module specs + +- [Agent](../spec/modules/Simplex/Messaging/Agent.md) — main Agent module, connection lifecycle, message processing +- [Agent Client](../spec/modules/Simplex/Messaging/Agent/Client.md) — worker threads, router connections, subscription management +- [Agent Protocol](../spec/modules/Simplex/Messaging/Agent/Protocol.md) — ACommand/AEvent types, connection invitations +- [Agent Store Interface](../spec/modules/Simplex/Messaging/Agent/Store/Interface.md) — database abstraction for SQLite/Postgres +- [Agent Store (AgentStore)](../spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md) — connection, queue, and message persistence +- [NtfSubSupervisor](../spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) — notification subscription management +- [XFTP Agent](../spec/modules/Simplex/FileTransfer/Agent.md) — file transfer orchestration +- [Ratchet](../spec/modules/Simplex/Messaging/Crypto/Ratchet.md) — double ratchet implementation +- [SNTRUP761](../spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md) — post-quantum KEM diff --git a/docs/CLIENT.md b/docs/CLIENT.md index 6e2ca0c537..8534e4f1af 100644 --- a/docs/CLIENT.md +++ b/docs/CLIENT.md @@ -2,21 +2,21 @@ SimpleX client libraries provide low-level protocol access to SimpleX routers. They implement the wire protocols ([SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md)) and handle connection lifecycle, but leave encryption, identity management, and connection orchestration to the application. -This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds bidirectional encrypted connections on top of these libraries. +This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds duplex encrypted connections on top of these libraries. ## SMP Client -**Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs) +**Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Client.md`](../spec/modules/Simplex/Messaging/Client.md) -The SMP client connects to SMP routers and manages messaging queues — the fundamental addressing primitive of the SimpleX Network. Each queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. +The SMP client connects to SMP routers and manages simplex messaging queues — the fundamental addressing primitive of the SimpleX Network. Each simplex queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. The queue model and command set are defined in the [SMP protocol](../protocol/simplex-messaging.md). ### Capabilities -- **Queue management**: create, secure, subscribe to, and delete queues on any SMP router +- **Queue management**: create, secure, subscribe to, and delete queues on any SMP router. Queue operations use the [SMP command set](../protocol/simplex-messaging.md) (NEW, KEY, SUB, DEL, etc.). - **Message sending and receiving**: send messages to a queue's sender address; receive messages from a queue's recipient address -- **Command authentication**: each queue operation is authenticated with per-queue cryptographic keys (Ed25519, Ed448, or X25519) +- **Command authentication**: each queue operation is authenticated with per-queue cryptographic keys (Ed25519, Ed448, or X25519). See the [SMP protocol security model](../protocol/simplex-messaging.md) for key roles. - **Keep-alive**: automatic ping loop detects and recovers from half-open connections -- **Proxy forwarding**: send messages through a proxy router via 2-hop onion routing (PRXY/PFWD/RFWD commands), protecting the sender's IP address from the destination router +- **Proxy forwarding**: send messages through a proxy router via 2-hop onion routing (PRXY/PFWD/RFWD commands), protecting the sender's IP address from the destination router. See [proxy forwarding details](../spec/modules/Simplex/Messaging/Client.md) in the module spec. - **Batched commands**: multiple commands can be sent in a single transmission for efficiency ### API model @@ -33,37 +33,33 @@ Routers are identified by the SHA-256 hash of their CA certificate fingerprint, ## XFTP Client -**Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs) +**Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs) — **Module spec**: [`spec/modules/Simplex/FileTransfer/Client.md`](../spec/modules/Simplex/FileTransfer/Client.md) -The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. +The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). ### Capabilities -- **Data packet creation**: create data packets on routers with sender, recipient, and optional additional recipient credentials -- **Upload**: send encrypted data in a single HTTP/2 streaming request (command + body) -- **Download**: retrieve data packets with per-download ephemeral Diffie-Hellman key exchange, providing forward secrecy — compromising one download key does not reveal other downloads +- **Data packet creation**: create data packets on routers with sender, recipient, and optional additional recipient credentials. See the [XFTP protocol](../protocol/xftp.md) for credential roles and packet lifecycle. +- **Send** (FPUT): send encrypted data to the router in a single HTTP/2 streaming request (command + body) +- **Receive** (FGET): receive data packets with per-request ephemeral Diffie-Hellman key exchange, providing forward secrecy — compromising one DH key does not reveal other received data packets - **Acknowledgment and deletion**: recipients acknowledge receipt; senders delete data packets after delivery -### Size selection - -`prepareChunkSizes` selects data packet sizes using a threshold algorithm: if the remaining payload exceeds 75% of the next larger size, it uses the larger size. This balances storage efficiency against the number of round trips. Single-chunk payloads (e.g., redirect descriptors) can use `singleChunkSize` to verify they fit in one data packet. - ## Use cases These libraries are appropriate when the application manages its own encryption and connection logic: - **IoT sensor data collection**: a sensor creates an SMP queue and sends readings; a collector subscribes and receives them. The queue address (router + queue ID + keys) is provisioned once, out-of-band. - **Device control**: a controller sends commands to an actuator's queue. Separate queues for commands and telemetry provide unidirectional isolation. -- **Bulk data delivery**: an application encrypts and chunks a file, uploads data packets to XFTP routers, and shares the packet addresses with the recipient out-of-band. +- **Bulk data delivery**: an application encrypts and chunks a file, sends data packets to XFTP routers, and shares the packet addresses with the recipient out-of-band. - **Custom protocols**: any application that needs unidirectional, router-mediated packet delivery without the overhead of the Agent's connection management. ## What this layer does NOT provide The following capabilities require the [Agent](AGENT.md) (Layer 3): -- **Bidirectional connections** — the Agent pairs two unidirectional queues into a duplex connection +- **Duplex connections** — the Agent pairs two simplex queues into a duplex connection - **End-to-end encryption** — the Agent manages double ratchet with post-quantum extensions -- **File transfer** — the Agent handles chunking, encryption, padding, multi-router upload, and reassembly +- **File transfer** — the Agent handles chunking, encryption, padding, multi-router distribution, and reassembly - **Queue rotation** — the Agent transparently rotates queues to limit metadata correlation - **Connection discovery** — connection links, short links, and contact addresses are Agent-level abstractions - **Push notifications** — notification token management and subscription is Agent-level @@ -73,3 +69,13 @@ The following capabilities require the [Agent](AGENT.md) (Layer 3): - [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format, commands, and security properties - [XFTP Protocol](../protocol/xftp.md) — XFTP wire format, data packet lifecycle - [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model, and design rationale + +## Module specs + +- [SMP Client](../spec/modules/Simplex/Messaging/Client.md) — proxy forwarding, batching, connection lifecycle, keepalive +- [XFTP Client](../spec/modules/Simplex/FileTransfer/Client.md) — handshake, data packet operations, forward secrecy +- [SMP Protocol types](../spec/modules/Simplex/Messaging/Protocol.md) — command types, queue addresses, message encoding +- [XFTP Protocol types](../spec/modules/Simplex/FileTransfer/Protocol.md) — data packet types, XFTP commands +- [Transport](../spec/modules/Simplex/Messaging/Transport.md) — TLS transport, session handshake +- [HTTP/2 Client](../spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md) — HTTP/2 transport layer +- [Crypto](../spec/modules/Simplex/Messaging/Crypto.md) — cryptographic primitives used by clients diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md index 3169d480eb..77337a814d 100644 --- a/docs/ROUTERS.md +++ b/docs/ROUTERS.md @@ -6,7 +6,7 @@ This document covers deployment and advanced configuration. For an overview of t ## SMP Router -The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). +The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). **Module spec**: [`spec/modules/Simplex/Messaging/Server.md`](../spec/modules/Simplex/Messaging/Server.md). ### Advanced configuration @@ -35,13 +35,13 @@ echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile ## XFTP Router -The XFTP router provides data packet storage — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. +The XFTP router accepts and delivers data packets — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. **Module spec**: [`spec/modules/Simplex/FileTransfer/Server.md`](../spec/modules/Simplex/FileTransfer/Server.md). Initialize with `xftp-server init` and configure storage quota in `xftp-server.ini`. ## NTF Router -The NTF router bridges SimpleX Network to platform push notification services (APNS). It implements the [Push Notifications protocol](../protocol/push-notifications.md). Mobile clients register push tokens with the NTF router, which subscribes to their SMP queues and sends push notifications when messages arrive. The push notification contains only a notification ID, not message content. +The NTF router bridges SimpleX Network to platform push notification services (APNS). It implements the [Push Notifications protocol](../protocol/push-notifications.md). Mobile clients register push tokens with the NTF router, which subscribes to their SMP queues and sends push notifications when messages arrive. The push notification contains only a notification ID, not message content. **Module spec**: [`spec/modules/Simplex/Messaging/Notifications/Server.md`](../spec/modules/Simplex/Messaging/Notifications/Server.md). Initialize with `ntf-server init` and configure APNS credentials in `ntf-server.ini`. @@ -184,7 +184,7 @@ smp-server init [-l] -n ## Monitoring -SMP and XFTP routers expose Prometheus metrics via a control port. The control port also supports commands for runtime inspection (queue counts, client counts, statistics). See module specs for details on available metrics and control commands. +SMP and XFTP routers expose Prometheus metrics via a control port. The control port also supports commands for runtime inspection (queue counts, client counts, statistics). See [SMP Server Prometheus](../spec/modules/Simplex/Messaging/Server/Prometheus.md), [SMP Server Control](../spec/modules/Simplex/Messaging/Server/Control.md), and [NTF Server Control](../spec/modules/Simplex/Messaging/Notifications/Server/Control.md) module specs for available metrics and control commands. ## Protocol references @@ -192,3 +192,30 @@ SMP and XFTP routers expose Prometheus metrics via a control port. The control p - [XFTP Protocol](../protocol/xftp.md) — data packet protocol - [Push Notifications Protocol](../protocol/push-notifications.md) — NTF protocol - [SimpleX Network overview](../protocol/overview-tjr.md) — architecture and trust model + +## Module specs + +### SMP Router +- [Server](../spec/modules/Simplex/Messaging/Server.md) — main server module, client handling, message routing +- [Server Main](../spec/modules/Simplex/Messaging/Server/Main.md) — server startup, initialization +- [QueueStore](../spec/modules/Simplex/Messaging/Server/QueueStore.md) — queue persistence abstraction +- [QueueStore Postgres](../spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md) — PostgreSQL queue store +- [MsgStore](../spec/modules/Simplex/Messaging/Server/MsgStore.md) — message storage abstraction +- [StoreLog](../spec/modules/Simplex/Messaging/Server/StoreLog.md) — append-only store log for queue persistence +- [Server Control](../spec/modules/Simplex/Messaging/Server/Control.md) — control port commands +- [Server Prometheus](../spec/modules/Simplex/Messaging/Server/Prometheus.md) — metrics export +- [Server Stats](../spec/modules/Simplex/Messaging/Server/Stats.md) — statistics collection + +### XFTP Router +- [Server](../spec/modules/Simplex/FileTransfer/Server.md) — main server module, data packet handling +- [Server Main](../spec/modules/Simplex/FileTransfer/Server/Main.md) — server startup +- [Server Store](../spec/modules/Simplex/FileTransfer/Server/Store.md) — data packet storage +- [Server StoreLog](../spec/modules/Simplex/FileTransfer/Server/StoreLog.md) — store log for packet persistence +- [Server Stats](../spec/modules/Simplex/FileTransfer/Server/Stats.md) — statistics + +### NTF Router +- [Server](../spec/modules/Simplex/Messaging/Notifications/Server.md) — main server module +- [Server Main](../spec/modules/Simplex/Messaging/Notifications/Server/Main.md) — server startup +- [Server Store Postgres](../spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) — PostgreSQL store for tokens and subscriptions +- [APNS Push](../spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) — Apple push notification delivery +- [Server Control](../spec/modules/Simplex/Messaging/Notifications/Server/Control.md) — control port commands From cbf32a33399a0a92f5908ebf135ca1b595aeb7b1 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 22:59:28 +0000 Subject: [PATCH 50/91] ntf --- README.md | 6 +++--- docs/CLIENT.md | 18 ++++++++++++++++-- docs/ROUTERS.md | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4e3327c186..eada2d49ff 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ See the [SimpleX Network overview](./protocol/overview-tjr.md) for the full prot Routers are the network infrastructure — they accept, buffer, and deliver packets. Three router types serve different purposes: - **SMP routers** provide messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes). Protocol: [SMP](./protocol/simplex-messaging.md). Module spec: [`Simplex.Messaging.Server`](./spec/modules/Simplex/Messaging/Server.md). -- **XFTP routers** accept and deliver data packets — individually addressed blocks in fixed sizes (64KB–4MB) for larger payloads. Protocol: [XFTP](./protocol/xftp.md). Module spec: [`Simplex.FileTransfer.Server`](./spec/modules/Simplex/FileTransfer/Server.md). +- **XFTP routers** accept and deliver data packets over HTTP/2 — individually addressed blocks in fixed sizes (64KB–4MB) for larger payloads. Protocol: [XFTP](./protocol/xftp.md). Module spec: [`Simplex.FileTransfer.Server`](./spec/modules/Simplex/FileTransfer/Server.md). - **NTF routers** bridge to platform push services (APNS) for mobile notification delivery. Protocol: [Push Notifications](./protocol/push-notifications.md). Module spec: [`Simplex.Messaging.Notifications.Server`](./spec/modules/Simplex/Messaging/Notifications/Server.md). #### Running an SMP router @@ -50,9 +50,9 @@ See [docs/ROUTERS.md](./docs/ROUTERS.md) for XFTP/NTF router setup, advanced con ### SimpleX Client Libraries -[Client libraries](./docs/CLIENT.md) provide low-level protocol access to SimpleX routers. They implement the wire protocols (SMP, XFTP) and handle connection lifecycle, command authentication, and keep-alive. +[Client libraries](./docs/CLIENT.md) provide low-level protocol access to SimpleX routers. They implement the wire protocols (SMP, XFTP, NTF) and handle connection lifecycle, command authentication, and keep-alive. -The [SMP client](./src/Simplex/Messaging/Client.hs) ([module spec](./spec/modules/Simplex/Messaging/Client.md)) offers a functional Haskell API with STM queues for asynchronous event delivery. The [XFTP client](./src/Simplex/FileTransfer/Client.hs) ([module spec](./spec/modules/Simplex/FileTransfer/Client.md)) sends and receives data packets with per-request forward secrecy. +The [SMP client](./src/Simplex/Messaging/Client.hs) ([module spec](./spec/modules/Simplex/Messaging/Client.md)) offers a functional Haskell API with STM queues for asynchronous event delivery. The [XFTP client](./src/Simplex/FileTransfer/Client.hs) ([module spec](./spec/modules/Simplex/FileTransfer/Client.md)) sends and receives data packets over HTTP/2 with per-request forward secrecy. The [NTF client](./src/Simplex/Messaging/Notifications/Client.hs) ([module spec](./spec/modules/Simplex/Messaging/Notifications/Client.md)) manages push notification tokens and subscriptions. Applications that manage their own encryption and connection logic — IoT devices, sensors, simple data pipelines — can use this layer directly. See [docs/CLIENT.md](./docs/CLIENT.md). diff --git a/docs/CLIENT.md b/docs/CLIENT.md index 8534e4f1af..0f3f975283 100644 --- a/docs/CLIENT.md +++ b/docs/CLIENT.md @@ -1,6 +1,6 @@ # SimpleX Client Libraries -SimpleX client libraries provide low-level protocol access to SimpleX routers. They implement the wire protocols ([SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md)) and handle connection lifecycle, but leave encryption, identity management, and connection orchestration to the application. +SimpleX client libraries provide low-level protocol access to SimpleX routers. They implement the wire protocols ([SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md), [NTF](../protocol/push-notifications.md)) and handle connection lifecycle, but leave encryption, identity management, and connection orchestration to the application. This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds duplex encrypted connections on top of these libraries. @@ -35,7 +35,7 @@ Routers are identified by the SHA-256 hash of their CA certificate fingerprint, **Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs) — **Module spec**: [`spec/modules/Simplex/FileTransfer/Client.md`](../spec/modules/Simplex/FileTransfer/Client.md) -The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). +The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The XFTP protocol runs over HTTP/2, simplifying browser integration. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). ### Capabilities @@ -44,6 +44,18 @@ The XFTP client connects to XFTP routers and manages data packets — individual - **Receive** (FGET): receive data packets with per-request ephemeral Diffie-Hellman key exchange, providing forward secrecy — compromising one DH key does not reveal other received data packets - **Acknowledgment and deletion**: recipients acknowledge receipt; senders delete data packets after delivery +## NTF Client + +**Source**: [`Simplex.Messaging.Notifications.Client`](../src/Simplex/Messaging/Notifications/Client.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Notifications/Client.md`](../spec/modules/Simplex/Messaging/Notifications/Client.md) + +The NTF client connects to NTF (notification) routers and manages push notification tokens and subscriptions. It implements the [Push Notifications protocol](../protocol/push-notifications.md). + +### Capabilities + +- **Token management**: register, verify, replace, and delete push notification tokens on NTF routers +- **Subscription management**: create, check, and delete notification subscriptions that link SMP queues to push tokens +- **Batch operations**: create or check multiple subscriptions in a single request, with per-item error handling for partial success + ## Use cases These libraries are appropriate when the application manages its own encryption and connection logic: @@ -74,8 +86,10 @@ The following capabilities require the [Agent](AGENT.md) (Layer 3): - [SMP Client](../spec/modules/Simplex/Messaging/Client.md) — proxy forwarding, batching, connection lifecycle, keepalive - [XFTP Client](../spec/modules/Simplex/FileTransfer/Client.md) — handshake, data packet operations, forward secrecy +- [NTF Client](../spec/modules/Simplex/Messaging/Notifications/Client.md) — token and subscription operations, batch commands - [SMP Protocol types](../spec/modules/Simplex/Messaging/Protocol.md) — command types, queue addresses, message encoding - [XFTP Protocol types](../spec/modules/Simplex/FileTransfer/Protocol.md) — data packet types, XFTP commands +- [NTF Protocol types](../spec/modules/Simplex/Messaging/Notifications/Protocol.md) — notification commands, token/subscription types - [Transport](../spec/modules/Simplex/Messaging/Transport.md) — TLS transport, session handshake - [HTTP/2 Client](../spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md) — HTTP/2 transport layer - [Crypto](../spec/modules/Simplex/Messaging/Crypto.md) — cryptographic primitives used by clients diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md index 77337a814d..7ebc0f9eef 100644 --- a/docs/ROUTERS.md +++ b/docs/ROUTERS.md @@ -35,7 +35,7 @@ echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile ## XFTP Router -The XFTP router accepts and delivers data packets — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. **Module spec**: [`spec/modules/Simplex/FileTransfer/Server.md`](../spec/modules/Simplex/FileTransfer/Server.md). +The XFTP router accepts and delivers data packets over HTTP/2 — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. The use of HTTP/2 simplifies browser integration. **Module spec**: [`spec/modules/Simplex/FileTransfer/Server.md`](../spec/modules/Simplex/FileTransfer/Server.md). Initialize with `xftp-server init` and configure storage quota in `xftp-server.ini`. From ca847b101a89cc08db8bc89660cd1e6ceda09e89 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Fri, 13 Mar 2026 23:04:00 +0000 Subject: [PATCH 51/91] update --- docs/AGENT.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/AGENT.md b/docs/AGENT.md index 8079736e86..e6fe32b08f 100644 --- a/docs/AGENT.md +++ b/docs/AGENT.md @@ -20,10 +20,10 @@ The Agent turns simplex (unidirectional) SMP queues into duplex connections, imp The Agent provides end-to-end encryption with forward secrecy and break-in recovery, specified in the [Post-Quantum Double Ratchet protocol](../protocol/pqdr.md): -- **Double ratchet**: messages are encrypted using a double ratchet protocol derived from the Signal protocol. Each message uses a unique key; compromising one key does not reveal past or future messages. See the [PQDR specification](../protocol/pqdr.md) for the full ratchet state machine. +- **Double ratchet**: messages are encrypted using a double ratchet protocol. Each message uses a unique key; compromising one key does not reveal past or future messages. See the [PQDR specification](../protocol/pqdr.md) for the full ratchet state machine. - **Post-quantum extensions**: the ratchet supports hybrid key exchange using SNTRUP761 (a lattice-based KEM) combined with X25519 DH. This provides protection against future quantum computers that could break classical DH. See the [SNTRUP761 module spec](../spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md) and [Ratchet module spec](../spec/modules/Simplex/Messaging/Crypto/Ratchet.md) for implementation details. - **Ratchet synchronization**: if the ratchet state becomes desynchronized (e.g., due to message loss or device restore), the Agent detects this and can negotiate resynchronization with the peer. -- **Per-queue encryption**: in addition to end-to-end encryption, each queue has a separate encryption layer between sender and router, preventing traffic correlation even if TLS is compromised. See the [SMP protocol security model](../protocol/simplex-messaging.md). +- **Per-queue encryption**: in addition to end-to-end encryption, the [SMP protocol](../protocol/simplex-messaging.md) provides a separate encryption layer on each queue between sender and router, preventing traffic correlation even if TLS is compromised. ## File Transfer @@ -66,7 +66,7 @@ The Agent is designed to be embedded as a Haskell library: | Encryption | Application's responsibility | Double ratchet with PQ extensions | | File transfer | Raw data packet send/receive | Chunking, encryption, reassembly | | Identity | Per-queue keys | Per-connection, rotatable | -| Notifications | Not available | NTF router integration | +| Notifications | Direct NTF protocol operations | Automated subscription supervision | ## Protocol references @@ -76,7 +76,9 @@ The Agent is designed to be embedded as a Haskell library: - [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP queue operations used by the Agent - [XFTP Protocol](../protocol/xftp.md) — data packet operations for file transfer - [Push Notifications Protocol](../protocol/push-notifications.md) — NTF token and subscription management -- [XRCP Protocol](../protocol/xrcp.md) — remote control protocol for cross-device Agent access +## Peer library: Remote Control + +The Agent exposes the [XRCP protocol](../protocol/xrcp.md) API for cross-device remote control (e.g., controlling a mobile app from a desktop). The actual logic is in the standalone [`Simplex.RemoteControl.Client`](../src/Simplex/RemoteControl/Client.hs) library — the Agent provides thin wrappers that pass through its random and multicast state. XRCP is not a managed Agent capability (no workers, persistence, or background supervision). See the [RemoteControl module specs](../spec/modules/Simplex/RemoteControl/Types.md). ## Module specs From a7c6dde39f9e145debe69b121f811c4003857ec6 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 09:07:26 +0000 Subject: [PATCH 52/91] router diagrams --- docs/ROUTERS.md | 37 +--- spec/diagrams/ntf-router.svg | 208 ++++++++++++++++++ spec/diagrams/smp-router.svg | 191 ++++++++++++++++ spec/diagrams/xftp-router.svg | 133 +++++++++++ spec/modules/Simplex/FileTransfer/Server.md | 2 + .../Simplex/Messaging/Notifications/Server.md | 2 + spec/modules/Simplex/Messaging/Server.md | 2 + spec/routers.md | 164 ++++++++++++++ 8 files changed, 707 insertions(+), 32 deletions(-) create mode 100644 spec/diagrams/ntf-router.svg create mode 100644 spec/diagrams/smp-router.svg create mode 100644 spec/diagrams/xftp-router.svg create mode 100644 spec/routers.md diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md index 7ebc0f9eef..29d518d650 100644 --- a/docs/ROUTERS.md +++ b/docs/ROUTERS.md @@ -2,11 +2,11 @@ SimpleX routers are the network infrastructure of the [SimpleX Network](../protocol/overview-tjr.md). They accept, buffer, and deliver data packets between endpoints. Each router operates independently and can be run by any party on standard computing hardware. -This document covers deployment and advanced configuration. For an overview of the router architecture and trust model, see the [SimpleX Network overview](../protocol/overview-tjr.md). +This document covers deployment and advanced configuration. For an overview of the router architecture and trust model, see the [SimpleX Network overview](../protocol/overview-tjr.md). For internal architecture diagrams (thread topology, command processing flows), see [`spec/routers.md`](../spec/routers.md). ## SMP Router -The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). **Module spec**: [`spec/modules/Simplex/Messaging/Server.md`](../spec/modules/Simplex/Messaging/Server.md). +The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). For architecture and module specs, see [SMP Router](../spec/routers.md#smp-router). ### Advanced configuration @@ -35,13 +35,13 @@ echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile ## XFTP Router -The XFTP router accepts and delivers data packets over HTTP/2 — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. The use of HTTP/2 simplifies browser integration. **Module spec**: [`spec/modules/Simplex/FileTransfer/Server.md`](../spec/modules/Simplex/FileTransfer/Server.md). +The XFTP router accepts and delivers data packets over HTTP/2 — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. The use of HTTP/2 simplifies browser integration. For architecture and module specs, see [XFTP Router](../spec/routers.md#xftp-router). Initialize with `xftp-server init` and configure storage quota in `xftp-server.ini`. ## NTF Router -The NTF router bridges SimpleX Network to platform push notification services (APNS). It implements the [Push Notifications protocol](../protocol/push-notifications.md). Mobile clients register push tokens with the NTF router, which subscribes to their SMP queues and sends push notifications when messages arrive. The push notification contains only a notification ID, not message content. **Module spec**: [`spec/modules/Simplex/Messaging/Notifications/Server.md`](../spec/modules/Simplex/Messaging/Notifications/Server.md). +The NTF router bridges SimpleX Network to platform push notification services (APNS). It implements the [Push Notifications protocol](../protocol/push-notifications.md). Mobile clients register push tokens with the NTF router, which subscribes to their SMP queues and sends push notifications when messages arrive. The push notification contains only a notification ID, not message content. For architecture and module specs, see [NTF Router](../spec/routers.md#ntf-router). Initialize with `ntf-server init` and configure APNS credentials in `ntf-server.ini`. @@ -184,7 +184,7 @@ smp-server init [-l] -n ## Monitoring -SMP and XFTP routers expose Prometheus metrics via a control port. The control port also supports commands for runtime inspection (queue counts, client counts, statistics). See [SMP Server Prometheus](../spec/modules/Simplex/Messaging/Server/Prometheus.md), [SMP Server Control](../spec/modules/Simplex/Messaging/Server/Control.md), and [NTF Server Control](../spec/modules/Simplex/Messaging/Notifications/Server/Control.md) module specs for available metrics and control commands. +SMP and XFTP routers expose Prometheus metrics via a control port. The control port also supports commands for runtime inspection (queue counts, client counts, statistics). See module specs linked from each router section in [`spec/routers.md`](../spec/routers.md) (Control, Prometheus, Stats). ## Protocol references @@ -192,30 +192,3 @@ SMP and XFTP routers expose Prometheus metrics via a control port. The control p - [XFTP Protocol](../protocol/xftp.md) — data packet protocol - [Push Notifications Protocol](../protocol/push-notifications.md) — NTF protocol - [SimpleX Network overview](../protocol/overview-tjr.md) — architecture and trust model - -## Module specs - -### SMP Router -- [Server](../spec/modules/Simplex/Messaging/Server.md) — main server module, client handling, message routing -- [Server Main](../spec/modules/Simplex/Messaging/Server/Main.md) — server startup, initialization -- [QueueStore](../spec/modules/Simplex/Messaging/Server/QueueStore.md) — queue persistence abstraction -- [QueueStore Postgres](../spec/modules/Simplex/Messaging/Server/QueueStore/Postgres.md) — PostgreSQL queue store -- [MsgStore](../spec/modules/Simplex/Messaging/Server/MsgStore.md) — message storage abstraction -- [StoreLog](../spec/modules/Simplex/Messaging/Server/StoreLog.md) — append-only store log for queue persistence -- [Server Control](../spec/modules/Simplex/Messaging/Server/Control.md) — control port commands -- [Server Prometheus](../spec/modules/Simplex/Messaging/Server/Prometheus.md) — metrics export -- [Server Stats](../spec/modules/Simplex/Messaging/Server/Stats.md) — statistics collection - -### XFTP Router -- [Server](../spec/modules/Simplex/FileTransfer/Server.md) — main server module, data packet handling -- [Server Main](../spec/modules/Simplex/FileTransfer/Server/Main.md) — server startup -- [Server Store](../spec/modules/Simplex/FileTransfer/Server/Store.md) — data packet storage -- [Server StoreLog](../spec/modules/Simplex/FileTransfer/Server/StoreLog.md) — store log for packet persistence -- [Server Stats](../spec/modules/Simplex/FileTransfer/Server/Stats.md) — statistics - -### NTF Router -- [Server](../spec/modules/Simplex/Messaging/Notifications/Server.md) — main server module -- [Server Main](../spec/modules/Simplex/Messaging/Notifications/Server/Main.md) — server startup -- [Server Store Postgres](../spec/modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) — PostgreSQL store for tokens and subscriptions -- [APNS Push](../spec/modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) — Apple push notification delivery -- [Server Control](../spec/modules/Simplex/Messaging/Notifications/Server/Control.md) — control port commands diff --git a/spec/diagrams/ntf-router.svg b/spec/diagrams/ntf-router.svg new file mode 100644 index 0000000000..fb35fe804c --- /dev/null +++ b/spec/diagrams/ntf-router.svg @@ -0,0 +1,208 @@ + + + + + + + + + + + + NTF Router -- Component Topology + + + + per client (raceAny_) + + + net + + + + + receive + + + + rcvQ + + + + client + + + + sndQ + + + + send + + + net + + + TNEW, TVFY, TRPL, TDEL + SNEW, SCHK, SDEL + + + + store + + + + SMP Client Agent (connects to SMP routers) + + + SMP routers + + + + + SMPClientAgent + + + + msgQ + + + + agentQ + + + + ntfSubscriber/receiveSMP + + + + receiveAgent + + + race_ + + + + pushQ + + + + store + + + + runSMPSubscriber + (one per SMP router) + + + subscriberSubQ + + + + tokens / subscriptions / tokenLastNtfs + (in-memory TMap + PostgreSQL) + + + + push delivery pipeline + + + + pushQ + + + + ntfPush + + + + + APNS provider + + + + periodicNtfsThread + + + + pushQ + + + + reads + + + + optional + + + logServerStats + + + prometheus + + + controlPort + + + resubscribe + + + + + + per-client thread + + + singleton thread + + + storage + + + external connection + + + Solid arrows: TBQueue connections. Dashed: store access. + + + diff --git a/spec/diagrams/smp-router.svg b/spec/diagrams/smp-router.svg new file mode 100644 index 0000000000..796bbf80fe --- /dev/null +++ b/spec/diagrams/smp-router.svg @@ -0,0 +1,191 @@ + + + + + + + + + + + + SMP Router -- Component Topology + + + + per client connection (raceAny_ -- any thread exit tears down connection) + + + + receive + + + + rcvQ + + + + client + + + + sndQ + + + + send + + + + msgQ + + + + sendMsg + + + net + + + + + + net + + + + + + + + + + QueueStore + (STM or Postgres) + + + + MsgStore + (STM or Postgres) + + + + StoreLog (optional) + + + + + subQ + + + + singleton threads (one instance each, all in raceAny_) + + + + serverThread + (SMP subscriptions) + + + + serverThread + (NTF subscriptions) + + + + pendingEvents + + + + deliverNtfs + + + + sendPendingEvts + + + + expireMessages + + + + expireNtfs + + + + proxyAgent + + + + optional + + + + logServerStats + + + + prometheus + + + + controlPort + + + + + + per-client thread + + + singleton thread + + + storage + + + optional + + + Solid arrows: TBQueue connections. Dashed blue: subQ linking per-client to singleton threads. + + + diff --git a/spec/diagrams/xftp-router.svg b/spec/diagrams/xftp-router.svg new file mode 100644 index 0000000000..bf60f000dd --- /dev/null +++ b/spec/diagrams/xftp-router.svg @@ -0,0 +1,133 @@ + + + + + + + + + + + + XFTP Router -- Component Topology + + + + per request (inline HTTP/2 callback, no spawned threads) + + + net + + + + + HTTP/2 handler + + + + Handshake State (per session) + None -> Sent -> Accepted + + + + sessions + + + + + + + Command Processing (FNEW, FADD, FPUT, FGET, FACK, FDEL) + + + + + + + + + + FileStore + (TMap in STM) + + + + Disk Storage + filesPath / senderId / data + + + quota-managed via usedStorage TVar + + + + StoreLog (append-only) + + + + + + + net + + + + background threads (singleton, in raceAny_) + + + + expireFiles + + + + logServerStats + + + + prometheus + + + + controlPort + + + + + + request handler (no threads) + + + storage + + + per-session state + + + background thread + + diff --git a/spec/modules/Simplex/FileTransfer/Server.md b/spec/modules/Simplex/FileTransfer/Server.md index cb64adad22..b695fe9080 100644 --- a/spec/modules/Simplex/FileTransfer/Server.md +++ b/spec/modules/Simplex/FileTransfer/Server.md @@ -16,6 +16,8 @@ The XFTP router runs several concurrent threads via `raceAny_`: | `savePrometheusMetrics` | Periodic Prometheus metrics dump | | `runCPServer` | Control port for admin commands | +See [spec/routers.md](../../routers.md) for component and sequence diagrams. + ## Non-obvious behavior ### 1. Three-state handshake with session caching diff --git a/spec/modules/Simplex/Messaging/Notifications/Server.md b/spec/modules/Simplex/Messaging/Notifications/Server.md index b87f64ce87..0f7ebc67d6 100644 --- a/spec/modules/Simplex/Messaging/Notifications/Server.md +++ b/spec/modules/Simplex/Messaging/Notifications/Server.md @@ -18,6 +18,8 @@ The NTF router runs several concurrent threads via `raceAny_`: Each client connection spawns `receive`, `send`, and `client` threads via `raceAny_`. +See [spec/routers.md](../../../routers.md) for component and sequence diagrams. + ## Non-obvious behavior ### 1. Timing attack mitigation on entity lookup diff --git a/spec/modules/Simplex/Messaging/Server.md b/spec/modules/Simplex/Messaging/Server.md index 5cfdfa24a3..7d991fbb71 100644 --- a/spec/modules/Simplex/Messaging/Server.md +++ b/spec/modules/Simplex/Messaging/Server.md @@ -10,6 +10,8 @@ The router runs as `raceAny_` over many threads — any thread exit stops the entire router process. The thread set includes: one `serverThread` per subscription type (SMP, NTF), a notification delivery thread, a pending events thread, a proxy agent receiver, a SIGINT handler, plus per-transport listener threads and optional expiration/stats/prometheus/control-port threads. `E.finally` ensures `stopServer` runs on any exit. +See [spec/routers.md](../../routers.md) for component and sequence diagrams. + ## serverThread — subscription lifecycle with split STM See comment on `serverThread`. It reads the subscription request from `subQ`, then looks up the client **outside** STM (via `getServerClient`), then enters an STM transaction (`updateSubscribers`) to compute which old subscriptions to end, then runs `endPreviousSubscriptions` in IO. If the client disconnects between lookup and transaction, `updateSubscribers` handles `Nothing` by still sending END/DELD to other subscribed clients. diff --git a/spec/routers.md b/spec/routers.md new file mode 100644 index 0000000000..f146ca8af7 --- /dev/null +++ b/spec/routers.md @@ -0,0 +1,164 @@ +# Router Architecture + +SimpleX routers are the Layer 1 network infrastructure. This document shows their internal architecture: component topology and command processing flows. + +For deployment and configuration, see [docs/ROUTERS.md](../docs/ROUTERS.md). For protocol specifications, see [SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md), [Push Notifications](../protocol/push-notifications.md). + +--- + +## SMP Router + +**Module specs**: [Server](modules/Simplex/Messaging/Server.md) · [Main](modules/Simplex/Messaging/Server/Main.md) · [QueueStore](modules/Simplex/Messaging/Server/QueueStore.md) · [QueueStore Postgres](modules/Simplex/Messaging/Server/QueueStore/Postgres.md) · [MsgStore](modules/Simplex/Messaging/Server/MsgStore.md) · [StoreLog](modules/Simplex/Messaging/Server/StoreLog.md) · [Control](modules/Simplex/Messaging/Server/Control.md) · [Prometheus](modules/Simplex/Messaging/Server/Prometheus.md) · [Stats](modules/Simplex/Messaging/Server/Stats.md) + +### Component topology + +![SMP Router — Component Topology](diagrams/smp-router.svg) + +### Packet delivery flow + +```mermaid +sequenceDiagram + participant S as Sender + + box SMP Router + participant auth as Command
Authorization + participant QS as QueueStore + participant MS as MsgStore + participant del as Packet
Delivery + end + + participant R as Recipient + + S->>auth: SEND (queue ID + packet) + auth->>QS: verify sender key (constant-time) + auth->>MS: store packet + auth->>S: OK (via sndQ) + + auth->>del: tryDeliverMessage + + alt recipient has active SUB + del->>R: MSG (via recipient's sndQ) + R->>auth: ACK + auth->>MS: delete packet + else no active subscriber + Note over MS: packet waits in MsgStore + R->>auth: SUB (subscribe to queue) + auth->>MS: fetch pending packets + del->>R: MSG + end +``` + +### Proxy forwarding flow + +```mermaid +sequenceDiagram + participant C as Client + participant P as Proxy Router + participant D as Destination Router + + C->>P: PRXY (destination address) + P->>D: connect (if not already connected) + P->>C: PKEY (proxy session key) + + C->>P: PFWD (encrypted command for destination) + P->>D: RFWD (relay forwarded command) + D->>P: command result + P->>C: command result +``` + +--- + +## XFTP Router + +**Module specs**: [Server](modules/Simplex/FileTransfer/Server.md) · [Main](modules/Simplex/FileTransfer/Server/Main.md) · [Store](modules/Simplex/FileTransfer/Server/Store.md) · [StoreLog](modules/Simplex/FileTransfer/Server/StoreLog.md) · [Stats](modules/Simplex/FileTransfer/Server/Stats.md) · [Transport](modules/Simplex/FileTransfer/Transport.md) + +### Component topology + +![XFTP Router — Component Topology](diagrams/xftp-router.svg) + +### Data packet delivery flow + +```mermaid +sequenceDiagram + participant S as Sender + + box XFTP Router + participant HS as Handshake + participant CP as Command
Processing + participant FS as FileStore + participant D as Disk + end + + participant R as Recipient + + S->>HS: HELLO + HS->>S: server DH key + version + + S->>CP: FNEW (create data packet) + CP->>FS: create FileRec, reserve quota + CP->>S: sender ID + recipient IDs + + S->>CP: FPUT (send encrypted data) + CP->>D: write to disk + CP->>FS: commit filePath + CP->>S: OK + + R->>HS: HELLO + HS->>R: server DH key + version + + R->>CP: FGET (recipient DH key) + CP->>CP: DH key agreement + CP->>D: read file + CP->>R: encrypted data stream + + R->>CP: FACK + CP->>FS: delete recipient entry +``` + +--- + +## NTF Router + +**Module specs**: [Server](modules/Simplex/Messaging/Notifications/Server.md) · [Main](modules/Simplex/Messaging/Notifications/Server/Main.md) · [Store Postgres](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) · [APNS](modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) · [Control](modules/Simplex/Messaging/Notifications/Server/Control.md) · [Client](modules/Simplex/Messaging/Notifications/Client.md) · [Protocol](modules/Simplex/Messaging/Notifications/Protocol.md) + +### Component topology + +![NTF Router — Component Topology](diagrams/ntf-router.svg) + +### Token registration and notification delivery + +```mermaid +sequenceDiagram + participant App + + box NTF Router + participant cl as client thread + participant Store + participant sub as ntfSubscriber + participant push as ntfPush + end + + participant SMP as SMP Router + participant APNS + + App->>cl: TNEW (push token + DH key) + cl->>Store: create token (NTRegistered) + cl->>push: PNVerification (via pushQ) + push->>APNS: verification push + APNS-->>App: verification code (encrypted) + App->>cl: TVFY (code) + cl->>Store: token -> NTActive + + App->>cl: SNEW (subscribe to SMP queue) + cl->>Store: create subscription + cl->>SMP: NKEY (subscribe for notifications) + SMP->>cl: OK (notifier ID) + + Note over SMP: message arrives on queue + SMP->>sub: NMSG (via msgQ) + sub->>Store: update tokenLastNtfs + sub->>push: PNMessage (via pushQ) + push->>APNS: push notification + APNS-->>App: notification (ID only) + App->>SMP: connect and retrieve message +``` From abcc6da9a09f85527211749909ce52dce56528f7 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:35:36 +0000 Subject: [PATCH 53/91] fixes --- spec/diagrams/ntf-router.svg | 21 +++++++++++++-------- spec/diagrams/smp-router.svg | 5 +++++ spec/diagrams/xftp-router.svg | 4 ++-- spec/routers.md | 3 ++- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/spec/diagrams/ntf-router.svg b/spec/diagrams/ntf-router.svg index fb35fe804c..b42429459c 100644 --- a/spec/diagrams/ntf-router.svg +++ b/spec/diagrams/ntf-router.svg @@ -52,14 +52,19 @@ net - TNEW, TVFY, TRPL, TDEL - SNEW, SCHK, SDEL + TNEW, TVFY, TCHK, TRPL, TDEL, TCRN + SNEW, SCHK, SDEL, PING - + store + + + pushQ + @@ -104,9 +109,9 @@ pushQ - - store + store subscriberSubQ - tokens / subscriptions / tokenLastNtfs - (in-memory TMap + PostgreSQL) + (PostgreSQL) StoreLog (optional) + + + NtfStore (STM TMap) + - Command Processing (FNEW, FADD, FPUT, FGET, FACK, FDEL) + Command Processing (FNEW, FADD, FPUT, FGET, FACK, FDEL, PING) Disk Storage - filesPath / senderId / data + filesPath / base64(senderId) quota-managed via usedStorage TVar diff --git a/spec/routers.md b/spec/routers.md index f146ca8af7..b66c52ce45 100644 --- a/spec/routers.md +++ b/spec/routers.md @@ -95,10 +95,11 @@ sequenceDiagram HS->>S: server DH key + version S->>CP: FNEW (create data packet) - CP->>FS: create FileRec, reserve quota + CP->>FS: create FileRec CP->>S: sender ID + recipient IDs S->>CP: FPUT (send encrypted data) + CP->>FS: reserve quota CP->>D: write to disk CP->>FS: commit filePath CP->>S: OK From 4df501efe4f076b00c6890a6f2cdcf32371327bb Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 11:24:12 +0000 Subject: [PATCH 54/91] client diagrams --- docs/CLIENT.md | 20 +--- spec/clients.md | 165 +++++++++++++++++++++++++++++ spec/diagrams/smp-client-agent.svg | 146 +++++++++++++++++++++++++ spec/diagrams/smp-client.svg | 149 ++++++++++++++++++++++++++ spec/diagrams/xftp-client.svg | 83 +++++++++++++++ 5 files changed, 547 insertions(+), 16 deletions(-) create mode 100644 spec/clients.md create mode 100644 spec/diagrams/smp-client-agent.svg create mode 100644 spec/diagrams/smp-client.svg create mode 100644 spec/diagrams/xftp-client.svg diff --git a/docs/CLIENT.md b/docs/CLIENT.md index 0f3f975283..6cd4f2321b 100644 --- a/docs/CLIENT.md +++ b/docs/CLIENT.md @@ -2,11 +2,11 @@ SimpleX client libraries provide low-level protocol access to SimpleX routers. They implement the wire protocols ([SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md), [NTF](../protocol/push-notifications.md)) and handle connection lifecycle, but leave encryption, identity management, and connection orchestration to the application. -This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds duplex encrypted connections on top of these libraries. +This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers themselves; Layer 3 is the [Agent](AGENT.md), which builds duplex encrypted connections on top of these libraries. For internal architecture diagrams (thread topology, command processing flows), see [`spec/clients.md`](../spec/clients.md). ## SMP Client -**Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Client.md`](../spec/modules/Simplex/Messaging/Client.md) +**Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs). For architecture and module specs, see [SMP Client](../spec/clients.md#smp-client-protocolclient). The SMP client connects to SMP routers and manages simplex messaging queues — the fundamental addressing primitive of the SimpleX Network. Each simplex queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. The queue model and command set are defined in the [SMP protocol](../protocol/simplex-messaging.md). @@ -33,7 +33,7 @@ Routers are identified by the SHA-256 hash of their CA certificate fingerprint, ## XFTP Client -**Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs) — **Module spec**: [`spec/modules/Simplex/FileTransfer/Client.md`](../spec/modules/Simplex/FileTransfer/Client.md) +**Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs). For architecture and module specs, see [XFTP Client](../spec/clients.md#xftp-client). The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The XFTP protocol runs over HTTP/2, simplifying browser integration. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). @@ -46,7 +46,7 @@ The XFTP client connects to XFTP routers and manages data packets — individual ## NTF Client -**Source**: [`Simplex.Messaging.Notifications.Client`](../src/Simplex/Messaging/Notifications/Client.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Notifications/Client.md`](../spec/modules/Simplex/Messaging/Notifications/Client.md) +**Source**: [`Simplex.Messaging.Notifications.Client`](../src/Simplex/Messaging/Notifications/Client.hs). For architecture and module specs, see [NTF Client](../spec/clients.md#ntf-client). The NTF client connects to NTF (notification) routers and manages push notification tokens and subscriptions. It implements the [Push Notifications protocol](../protocol/push-notifications.md). @@ -81,15 +81,3 @@ The following capabilities require the [Agent](AGENT.md) (Layer 3): - [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format, commands, and security properties - [XFTP Protocol](../protocol/xftp.md) — XFTP wire format, data packet lifecycle - [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model, and design rationale - -## Module specs - -- [SMP Client](../spec/modules/Simplex/Messaging/Client.md) — proxy forwarding, batching, connection lifecycle, keepalive -- [XFTP Client](../spec/modules/Simplex/FileTransfer/Client.md) — handshake, data packet operations, forward secrecy -- [NTF Client](../spec/modules/Simplex/Messaging/Notifications/Client.md) — token and subscription operations, batch commands -- [SMP Protocol types](../spec/modules/Simplex/Messaging/Protocol.md) — command types, queue addresses, message encoding -- [XFTP Protocol types](../spec/modules/Simplex/FileTransfer/Protocol.md) — data packet types, XFTP commands -- [NTF Protocol types](../spec/modules/Simplex/Messaging/Notifications/Protocol.md) — notification commands, token/subscription types -- [Transport](../spec/modules/Simplex/Messaging/Transport.md) — TLS transport, session handshake -- [HTTP/2 Client](../spec/modules/Simplex/Messaging/Transport/HTTP2/Client.md) — HTTP/2 transport layer -- [Crypto](../spec/modules/Simplex/Messaging/Crypto.md) — cryptographic primitives used by clients diff --git a/spec/clients.md b/spec/clients.md new file mode 100644 index 0000000000..871d1b4fd5 --- /dev/null +++ b/spec/clients.md @@ -0,0 +1,165 @@ +# Client Architecture + +SimpleX clients are the Layer 2 libraries that connect to routers. This document shows their internal architecture: component topology and command processing flows. + +For deployment and usage, see [docs/CLIENT.md](../docs/CLIENT.md). For protocol specifications, see [SMP](../protocol/simplex-messaging.md), [XFTP](../protocol/xftp.md), [Push Notifications](../protocol/push-notifications.md). + +--- + +## SMP Client (ProtocolClient) + +**Module specs**: [Client](modules/Simplex/Messaging/Client.md) · [Protocol](modules/Simplex/Messaging/Protocol.md) · [Transport](modules/Simplex/Messaging/Transport.md) · [Crypto](modules/Simplex/Messaging/Crypto.md) + +Generic protocol client used for both SMP and NTF connections. Manages a single TLS connection with multiplexed command/response matching via correlation IDs. + +### Component topology + +![SMP Client — Component Topology](diagrams/smp-client.svg) + +### Command/response flow + +```mermaid +sequenceDiagram + participant C as Caller
(Agent / router) + + box ProtocolClient + participant SC as sentCommands
(TMap CorrId Request) + participant SQ as sndQ + participant S as send thread + participant R as receive thread + participant RQ as rcvQ + participant P as process thread + end + + participant Router as SMP Router + + C->>SC: mkTransmission (generate CorrId, create Request with empty responseVar) + C->>SQ: write (Request, encoded command) + S->>SQ: read + S-->>S: check pending flag (drop if timed out) + S->>Router: tPutLog (transmit bytes) + + Router->>R: tGetClient (receive batch) + R->>RQ: write transmissions + + P->>RQ: read + P->>SC: lookup CorrId + alt command response (CorrId matches, pending) + P->>SC: remove CorrId + fill responseVar (TMVar) + else expired response (CorrId matches, already timed out) + P->>C: write to msgQ (STResponse) + else server event (empty CorrId) + P->>C: write to msgQ (STEvent) + end + + Note over C: getResponse: takeTMVar with timeout +``` + +--- + +## SMPClientAgent + +**Module specs**: [Client Agent](modules/Simplex/Messaging/Client/Agent.md) + +Connection manager that multiplexes multiple ProtocolClient connections. Tracks subscriptions, handles reconnection with backoff, and forwards server messages and connection events upward. Used by SMP router (proxying) and NTF router (subscriptions). + +### Component topology + +![SMPClientAgent — Component Topology](diagrams/smp-client-agent.svg) + +### Connection lifecycle + +```mermaid +sequenceDiagram + participant C as Consumer
(router / app) + participant A as SMPClientAgent + participant PC as ProtocolClient + participant Router as SMP Router + + C->>A: getSMPServerClient'' (server) + alt client exists in smpClients + A->>C: return existing client + else no client + A->>PC: connectClient (create new ProtocolClient) + PC->>Router: TLS handshake + A->>A: register disconnect handler + A->>C: return new client + end + + C->>A: subscribeQueuesNtfs (queueIds) + A->>A: add to pendingQueueSubs + A->>PC: sendProtocolCommands (SUB batch) + PC->>Router: SUB commands + Router->>PC: OK responses + A->>A: move pending → activeQueueSubs + A->>C: CASubscribed (via agentQ) + + Note over Router: connection drops + + PC->>A: disconnect handler fires + A->>A: filter by SessionId (only remove subs matching disconnected session) + A->>A: move active → pending (queue subs + service subs) + A->>C: CAServiceDisconnected (via agentQ, if service sub existed) + A->>C: CADisconnected (via agentQ, if queue subs existed) + A->>A: spawn smpSubWorker (retry with backoff) + A->>PC: reconnect + resubscribe pending subs + A->>C: CAConnected + CASubscribed (via agentQ) +``` + +--- + +## XFTP Client + +**Module specs**: [Client](modules/Simplex/FileTransfer/Client.md) · [Protocol](modules/Simplex/FileTransfer/Protocol.md) · [HTTP/2 Client](modules/Simplex/Messaging/Transport/HTTP2/Client.md) + +Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own — each operation is a synchronous HTTP/2 request/response. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. + +### Component topology + +![XFTP Client — Component Topology](diagrams/xftp-client.svg) + +### Upload/download flow + +```mermaid +sequenceDiagram + participant C as Caller
(Agent / app) + participant X as XFTPClient + participant H as HTTP2Client + participant Router as XFTP Router + + C->>X: createXFTPChunk (FNEW) + X->>H: HTTP/2 POST (encoded command) + H->>Router: request + Router->>H: response (sender ID + recipient IDs) + H->>X: decode response + X->>C: return IDs + + C->>X: uploadXFTPChunk (FPUT + file data) + X->>H: HTTP/2 POST (streaming body) + H->>Router: request with file stream + Router->>H: OK + H->>X: OK + X->>C: return OK + + C->>X: downloadXFTPChunk (FGET + ephemeral DH key) + X->>H: HTTP/2 POST (command) + H->>Router: request + Router->>H: streaming response (server DH key + nonce + encrypted data) + H->>X: streaming body + X->>X: compute DH secret, decrypt + save to file + X->>C: return () +``` + +--- + +## NTF Client + +**Module specs**: [Client](modules/Simplex/Messaging/Notifications/Client.md) · [Protocol](modules/Simplex/Messaging/Notifications/Protocol.md) + +Type alias for ProtocolClient — same architecture as SMP Client: + +```haskell +type NtfClient = ProtocolClient NTFVersion ErrorType NtfResponse +``` + +Same threads (send, receive, process, monitor), same queues (sndQ, rcvQ, sentCommands, msgQ), same command/response flow. Different command types: TNEW, TVFY, TCHK, TRPL, TDEL, TCRN, SNEW, SCHK, SDEL, PING. diff --git a/spec/diagrams/smp-client-agent.svg b/spec/diagrams/smp-client-agent.svg new file mode 100644 index 0000000000..4257726ec9 --- /dev/null +++ b/spec/diagrams/smp-client-agent.svg @@ -0,0 +1,146 @@ + + + + + + + + + + + + SMPClientAgent -- Component Topology + + + consumer + (NTF router / + SMP proxy / + application) + + + + msgQ + (TBQueue, server messages) + + + + + + + agentQ + (TBQueue SMPClientAgentEvent) + + + + + + CAConnected + CADisconnected + CASubscribed / CASubError + CAServiceDisconnected + CAServiceSubscribed / SubError + + + + SMPClientAgent (connection manager) + + + + smpClients + (TMap SMPServer SMPClientVar) + + + + activeQueueSubs / pendingQueueSubs + (TMap SMPServer (TMap QueueId ...)) + activeServiceSubs / pendingServiceSubs + (TMap SMPServer (TVar (Maybe ...))) + + + + + + + + + + smpSubWorkers + (one per server) + + + + reconnect + resubscribe + + + getSMPServerClient'': get or create client + connectClient: create ProtocolClient, register disconnect handler + on disconnect: filter by SessionId, move active → pending, notify agentQ, spawn worker + worker: retry connect with backoff, resubscribe pending subs + subscribeQueuesNtfs / subscribeServiceNtfs: subscribe + track state + + + + ProtocolClient connections (one per SMP Router) + + + + ProtocolClient + → SMP Router A + + + + ProtocolClient + → SMP Router B + + + + ProtocolClient + → SMP Router N + + + ... + + + + + + + + + + ProtocolClient + + + state / queue + + + background worker + + + Solid arrows: TBQueue flow. Dashed: reconnection / resubscription. + + + diff --git a/spec/diagrams/smp-client.svg b/spec/diagrams/smp-client.svg new file mode 100644 index 0000000000..d537ec8d04 --- /dev/null +++ b/spec/diagrams/smp-client.svg @@ -0,0 +1,149 @@ + + + + + + + + + + + + SMP Client (ProtocolClient) -- Component Topology + + + + per connection (raceAny_ -- any thread exit tears down connection) + + + caller + (Agent/ + router) + + + + commands + + + + sndQ + (TBQueue, 64) + + + + + + + send + + + + + + + receive + + + + + + SMP + Router + (TLS) + + + + + + + rcvQ + (TBQueue, 64) + + + + + + + process + + + + sentCommands + (TMap CorrId Request) + + + + match + + + + responseVar + (TMVar) + + + + monitor + + + + PING + + + optional + + + + msgQ (optional) + (TBQueue, server events) + + + + events (empty CorrId) + + + + to Agent / SMPClientAgent + + + + + + thread + + + queue / state + + + optional + + + Solid arrows: TBQueue flow. Dashed: STM lookups / TMVar responses. + + + diff --git a/spec/diagrams/xftp-client.svg b/spec/diagrams/xftp-client.svg new file mode 100644 index 0000000000..847922ed9f --- /dev/null +++ b/spec/diagrams/xftp-client.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + XFTP Client -- Component Topology + + + + per connection (XFTPClient adds no threads; serialization in HTTP2Client) + + + caller + (Agent/ + router) + + + + + + + XFTPClient + sendXFTPCommand + + + + + + + HTTP2Client + (TLS + HTTP/2 streams) + + + + XFTP + Router + (HTTP/2) + + + + thParams (negotiated) + + + uploads: streaming request body + downloads: ephemeral DH + streaming + response body (per-chunk forward secrecy) + + + + + + client wrapper + + + external connection + + + state + + + XFTPClient adds no threads. HTTP2Client has internal reqQ + process thread. + + + From 1db93b936d616ad49bd7978ebd7f4f6da8af4df2 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 13:54:41 +0000 Subject: [PATCH 55/91] corrections --- docs/AGENT.md | 40 +++--- docs/CLIENT.md | 37 ++++-- docs/ROUTERS.md | 22 ++-- spec/clients.md | 22 ++-- spec/diagrams/ntf-router.svg | 3 - spec/diagrams/smp-client-agent.svg | 161 ++++++++++++------------ spec/diagrams/smp-client.svg | 191 ++++++++++++++++------------- spec/diagrams/smp-router.svg | 5 +- spec/diagrams/xftp-client.svg | 3 - spec/diagrams/xftp-router.svg | 3 - spec/routers.md | 6 +- 11 files changed, 258 insertions(+), 235 deletions(-) diff --git a/docs/AGENT.md b/docs/AGENT.md index e6fe32b08f..83050013f4 100644 --- a/docs/AGENT.md +++ b/docs/AGENT.md @@ -4,16 +4,16 @@ The SimpleX Agent builds duplex encrypted connections on top of [SimpleX client This is **Layer 3** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers; Layer 2 is the [client libraries](CLIENT.md) that speak the wire protocols. The Agent adds the connection semantics that applications need. -**Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs) — **Module spec**: [`spec/modules/Simplex/Messaging/Agent.md`](../spec/modules/Simplex/Messaging/Agent.md) +**Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs). **Module spec**: [`spec/modules/Simplex/Messaging/Agent.md`](../spec/modules/Simplex/Messaging/Agent.md) ## Connections The Agent turns simplex (unidirectional) SMP queues into duplex connections, implementing the [Agent protocol](../protocol/agent-protocol.md): -- **Duplex connections**: each connection uses a pair of SMP queues — one for each direction. The queues can be on different routers chosen independently by each party. See the [duplex connection procedure](../protocol/agent-protocol.md) for the full handshake. +- **Duplex connections**: each connection uses a pair of SMP queues - one for each direction. The queues can be on different routers chosen independently by each party. See the [duplex connection procedure](../protocol/agent-protocol.md) for the full handshake. - **Connection establishment**: one party creates a connection and generates an invitation (containing router address, queue ID, and public keys). The invitation is passed out-of-band (QR code, link, etc.). The other party joins by creating a reverse queue and completing the handshake. - **Connection links**: the Agent supports connection links (long and short) for sharing connection invitations via URLs. Short links use a separate SMP queue to store the full invitation, allowing compact QR codes. -- **Queue rotation**: the Agent periodically rotates the underlying SMP queues, limiting the window for metadata correlation. Rotation is transparent to the application — the connection identity is stable while the underlying queues change. +- **Queue rotation**: the Agent periodically rotates the underlying SMP queues, limiting the window for metadata correlation. Rotation is transparent to the application - the connection identity is stable while the underlying queues change. - **Redundant queues**: connections can use multiple queues for reliability. If one router becomes unreachable, messages flow through the remaining queues. ## Encryption @@ -46,7 +46,7 @@ The Agent manages push notification subscriptions for mobile devices, using the The Agent is designed to be embedded as a Haskell library: -- **STM queues**: the application communicates with the Agent via STM queues. Commands go in (`ACommand`), events come out (`AEvent`). No serialization or parsing — direct Haskell values. The command/event types are defined in the [Agent Protocol module](../spec/modules/Simplex/Messaging/Agent/Protocol.md). +- **STM queues**: the application communicates with the Agent via STM queues. Commands go in (`ACommand`), events come out (`AEvent`). No serialization or parsing - direct Haskell values. The command/event types are defined in the [Agent Protocol module](../spec/modules/Simplex/Messaging/Agent/Protocol.md). - **Async operation**: all network operations are asynchronous. The Agent manages internal worker threads for each router connection, message processing, and background tasks (cleanup, statistics, notification supervision). See the [Agent Client module spec](../spec/modules/Simplex/Messaging/Agent/Client.md) for worker architecture. - **Background mode**: on mobile platforms, the Agent can run in a reduced mode with only the message receiver active, minimizing resource usage when the app is backgrounded. - **Dual database backends**: the Agent supports both SQLite (for mobile/desktop) and PostgreSQL (for server deployments) as persistence backends, selected at compile time. See [Agent Store Interface](../spec/modules/Simplex/Messaging/Agent/Store/Interface.md) and [Agent Store Postgres](../spec/modules/Simplex/Messaging/Agent/Store/Postgres.md). @@ -70,24 +70,24 @@ The Agent is designed to be embedded as a Haskell library: ## Protocol references -- [Agent Protocol](../protocol/agent-protocol.md) — duplex connection procedure, message format -- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model -- [PQDR](../protocol/pqdr.md) — post-quantum double ratchet specification -- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP queue operations used by the Agent -- [XFTP Protocol](../protocol/xftp.md) — data packet operations for file transfer -- [Push Notifications Protocol](../protocol/push-notifications.md) — NTF token and subscription management +- [Agent Protocol](../protocol/agent-protocol.md) - duplex connection procedure, message format +- [SimpleX Network overview](../protocol/overview-tjr.md) - architecture, trust model +- [PQDR](../protocol/pqdr.md) - post-quantum double ratchet specification +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) - SMP queue operations used by the Agent +- [XFTP Protocol](../protocol/xftp.md) - data packet operations for file transfer +- [Push Notifications Protocol](../protocol/push-notifications.md) - NTF token and subscription management ## Peer library: Remote Control -The Agent exposes the [XRCP protocol](../protocol/xrcp.md) API for cross-device remote control (e.g., controlling a mobile app from a desktop). The actual logic is in the standalone [`Simplex.RemoteControl.Client`](../src/Simplex/RemoteControl/Client.hs) library — the Agent provides thin wrappers that pass through its random and multicast state. XRCP is not a managed Agent capability (no workers, persistence, or background supervision). See the [RemoteControl module specs](../spec/modules/Simplex/RemoteControl/Types.md). +The Agent exposes the [XRCP protocol](../protocol/xrcp.md) API for cross-device remote control (e.g., controlling a mobile app from a desktop). The actual logic is in the standalone [`Simplex.RemoteControl.Client`](../src/Simplex/RemoteControl/Client.hs) library - the Agent provides thin wrappers that pass through its random and multicast state. XRCP is not a managed Agent capability (no workers, persistence, or background supervision). See the [RemoteControl module specs](../spec/modules/Simplex/RemoteControl/Types.md). ## Module specs -- [Agent](../spec/modules/Simplex/Messaging/Agent.md) — main Agent module, connection lifecycle, message processing -- [Agent Client](../spec/modules/Simplex/Messaging/Agent/Client.md) — worker threads, router connections, subscription management -- [Agent Protocol](../spec/modules/Simplex/Messaging/Agent/Protocol.md) — ACommand/AEvent types, connection invitations -- [Agent Store Interface](../spec/modules/Simplex/Messaging/Agent/Store/Interface.md) — database abstraction for SQLite/Postgres -- [Agent Store (AgentStore)](../spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md) — connection, queue, and message persistence -- [NtfSubSupervisor](../spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) — notification subscription management -- [XFTP Agent](../spec/modules/Simplex/FileTransfer/Agent.md) — file transfer orchestration -- [Ratchet](../spec/modules/Simplex/Messaging/Crypto/Ratchet.md) — double ratchet implementation -- [SNTRUP761](../spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md) — post-quantum KEM +- [Agent](../spec/modules/Simplex/Messaging/Agent.md) - main Agent module, connection lifecycle, message processing +- [Agent Client](../spec/modules/Simplex/Messaging/Agent/Client.md) - worker threads, router connections, subscription management +- [Agent Protocol](../spec/modules/Simplex/Messaging/Agent/Protocol.md) - ACommand/AEvent types, connection invitations +- [Agent Store Interface](../spec/modules/Simplex/Messaging/Agent/Store/Interface.md) - database abstraction for SQLite/Postgres +- [Agent Store (AgentStore)](../spec/modules/Simplex/Messaging/Agent/Store/AgentStore.md) - connection, queue, and message persistence +- [NtfSubSupervisor](../spec/modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) - notification subscription management +- [XFTP Agent](../spec/modules/Simplex/FileTransfer/Agent.md) - file transfer orchestration +- [Ratchet](../spec/modules/Simplex/Messaging/Crypto/Ratchet.md) - double ratchet implementation +- [SNTRUP761](../spec/modules/Simplex/Messaging/Crypto/SNTRUP761.md) - post-quantum KEM diff --git a/docs/CLIENT.md b/docs/CLIENT.md index 6cd4f2321b..4f71dab897 100644 --- a/docs/CLIENT.md +++ b/docs/CLIENT.md @@ -8,7 +8,7 @@ This is **Layer 2** of the [SimpleX Network architecture](../protocol/overview-t **Source**: [`Simplex.Messaging.Client`](../src/Simplex/Messaging/Client.hs). For architecture and module specs, see [SMP Client](../spec/clients.md#smp-client-protocolclient). -The SMP client connects to SMP routers and manages simplex messaging queues — the fundamental addressing primitive of the SimpleX Network. Each simplex queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. The queue model and command set are defined in the [SMP protocol](../protocol/simplex-messaging.md). +The SMP client connects to SMP routers and manages simplex messaging queues, the fundamental addressing primitive of the SimpleX Network. Each simplex queue is a unidirectional, ordered sequence of fixed-size packets (16,384 bytes) with separate cryptographic credentials for sending and receiving. The queue model and command set are defined in the [SMP protocol](../protocol/simplex-messaging.md). ### Capabilities @@ -31,17 +31,30 @@ The client uses a functional Haskell API with STM queues for asynchronous event Routers are identified by the SHA-256 hash of their CA certificate fingerprint, not by hostname. The client validates the full X.509 certificate chain on every TLS connection and compares the CA fingerprint against the expected hash from the queue address. This means a DNS or IP-level attacker who cannot produce the correct certificate is detected at connection time. +## SMPClientAgent + +**Source**: [`Simplex.Messaging.Client.Agent`](../src/Simplex/Messaging/Client/Agent.hs). For architecture and module specs, see [SMPClientAgent](../spec/clients.md#smpclientagent). + +Connection manager that multiplexes multiple SMP client connections. Maintains one ProtocolClient per SMP router, tracks queue and service subscriptions, and handles reconnection with exponential backoff. Used by the SMP router (for proxy forwarding) and the NTF router (for message subscriptions). + +### Capabilities + +- **Connection pooling**: maintains a pool of ProtocolClient connections keyed by SMP router, creating connections on demand and reusing existing ones +- **Subscription tracking**: tracks active and pending subscriptions (both queue-based and service-based) with automatic state transitions on connect/disconnect +- **Automatic reconnection**: on connection loss, moves subscriptions from active to pending, then spawns a background worker that retries with backoff and resubscribes +- **Session-scoped disconnect handling**: uses session IDs to ensure only subscriptions belonging to the disconnected session are affected, preventing races with newly established connections + ## XFTP Client **Source**: [`Simplex.FileTransfer.Client`](../src/Simplex/FileTransfer/Client.hs). For architecture and module specs, see [XFTP Client](../spec/clients.md#xftp-client). -The XFTP client connects to XFTP routers and manages data packets — individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The XFTP protocol runs over HTTP/2, simplifying browser integration. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). +The XFTP client connects to XFTP routers and manages data packets, individually addressed blocks used for larger payload delivery. Data packets come in fixed sizes (64KB, 256KB, 1MB, 4MB), hiding the actual payload size. The XFTP protocol runs over HTTP/2, simplifying browser integration. The data packet lifecycle and command set are defined in the [XFTP protocol](../protocol/xftp.md). ### Capabilities - **Data packet creation**: create data packets on routers with sender, recipient, and optional additional recipient credentials. See the [XFTP protocol](../protocol/xftp.md) for credential roles and packet lifecycle. - **Send** (FPUT): send encrypted data to the router in a single HTTP/2 streaming request (command + body) -- **Receive** (FGET): receive data packets with per-request ephemeral Diffie-Hellman key exchange, providing forward secrecy — compromising one DH key does not reveal other received data packets +- **Receive** (FGET): receive data packets with per-request ephemeral Diffie-Hellman key exchange, providing forward secrecy: compromising one DH key does not reveal other received data packets - **Acknowledgment and deletion**: recipients acknowledge receipt; senders delete data packets after delivery ## NTF Client @@ -69,15 +82,15 @@ These libraries are appropriate when the application manages its own encryption The following capabilities require the [Agent](AGENT.md) (Layer 3): -- **Duplex connections** — the Agent pairs two simplex queues into a duplex connection -- **End-to-end encryption** — the Agent manages double ratchet with post-quantum extensions -- **File transfer** — the Agent handles chunking, encryption, padding, multi-router distribution, and reassembly -- **Queue rotation** — the Agent transparently rotates queues to limit metadata correlation -- **Connection discovery** — connection links, short links, and contact addresses are Agent-level abstractions -- **Push notifications** — notification token management and subscription is Agent-level +- **Duplex connections** - the Agent pairs two simplex queues into a duplex connection +- **End-to-end encryption** - the Agent manages double ratchet with post-quantum extensions +- **File transfer** - the Agent handles chunking, encryption, padding, multi-router distribution, and reassembly +- **Queue rotation** - the Agent transparently rotates queues to limit metadata correlation +- **Connection discovery** - connection links, short links, and contact addresses are Agent-level abstractions +- **Push notifications** - notification token management and subscription is Agent-level ## Protocol references -- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format, commands, and security properties -- [XFTP Protocol](../protocol/xftp.md) — XFTP wire format, data packet lifecycle -- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture, trust model, and design rationale +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) - SMP wire format, commands, and security properties +- [XFTP Protocol](../protocol/xftp.md) - XFTP wire format, data packet lifecycle +- [SimpleX Network overview](../protocol/overview-tjr.md) - architecture, trust model, and design rationale diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md index 29d518d650..d938dc40f7 100644 --- a/docs/ROUTERS.md +++ b/docs/ROUTERS.md @@ -1,4 +1,4 @@ -# SimpleX Routers — Deployment and Configuration +# SimpleX Routers: Deployment and Configuration SimpleX routers are the network infrastructure of the [SimpleX Network](../protocol/overview-tjr.md). They accept, buffer, and deliver data packets between endpoints. Each router operates independently and can be run by any party on standard computing hardware. @@ -6,7 +6,7 @@ This document covers deployment and advanced configuration. For an overview of t ## SMP Router -The SMP router provides messaging queues — unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). For architecture and module specs, see [SMP Router](../spec/routers.md#smp-router). +The SMP router provides messaging queues - unidirectional, ordered sequences of fixed-size packets (16,384 bytes each). It implements the [SimpleX Messaging Protocol](../protocol/simplex-messaging.md). For architecture and module specs, see [SMP Router](../spec/routers.md#smp-router). ### Advanced configuration @@ -35,7 +35,7 @@ echo 'PATH="/opt/homebrew/opt/openssl@3/bin:$PATH"' >> ~/.zprofile ## XFTP Router -The XFTP router accepts and delivers data packets over HTTP/2 — individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. The use of HTTP/2 simplifies browser integration. For architecture and module specs, see [XFTP Router](../spec/routers.md#xftp-router). +The XFTP router accepts and delivers data packets over HTTP/2 - individually addressed blocks in fixed sizes (64KB, 256KB, 1MB, 4MB). It implements the [XFTP protocol](../protocol/xftp.md). Data packets are used for larger payload delivery (files, media) where SMP queue packet sizes would be inefficient. The use of HTTP/2 simplifies browser integration. For architecture and module specs, see [XFTP Router](../spec/routers.md#xftp-router). Initialize with `xftp-server init` and configure storage quota in `xftp-server.ini`. @@ -66,7 +66,7 @@ Prebuilt images are available from [Docker Hub](https://hub.docker.com/r/simplex 2. Run: - **SMP router** — change `your_ip_or_domain`; `-e "PASS=password"` is optional: + **SMP router** - change `your_ip_or_domain`; `-e "PASS=password"` is optional: ```sh docker run -d \ -e "ADDR=your_ip_or_domain" \ @@ -77,7 +77,7 @@ Prebuilt images are available from [Docker Hub](https://hub.docker.com/r/simplex simplexchat/smp-server:latest ``` - **XFTP router** — change `your_ip_or_domain` and `maximum_storage`: + **XFTP router** - change `your_ip_or_domain` and `maximum_storage`: ```sh docker run -d \ -e "ADDR=your_ip_or_domain" \ @@ -160,12 +160,12 @@ Then run with the same Docker commands as above, replacing `simplexchat/smp-serv ### Linode StackScript -[Deploy via Linode StackScript](https://cloud.linode.com/stackscripts/748014) — Shared CPU Nanode with 1GB is sufficient. +[Deploy via Linode StackScript](https://cloud.linode.com/stackscripts/748014). Shared CPU Nanode with 1GB is sufficient. Configuration options: - SMP Server store log flag for queue persistence (recommended) - [Linode API token](https://www.linode.com/docs/guides/getting-started-with-the-linode-api#get-an-access-token) for automatic DNS and tagging (scopes: read/write for "linodes" and "domains") -- Domain name (e.g., `smp1.example.com`) — the [domain must exist](https://cloud.linode.com/domains/create) in your Linode account +- Domain name (e.g., `smp1.example.com`) - the [domain must exist](https://cloud.linode.com/domains/create) in your Linode account After deployment (up to 5 minutes), get the server address from Linode tags or SSH: `smp://@`. @@ -188,7 +188,7 @@ SMP and XFTP routers expose Prometheus metrics via a control port. The control p ## Protocol references -- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) — SMP wire format and security properties -- [XFTP Protocol](../protocol/xftp.md) — data packet protocol -- [Push Notifications Protocol](../protocol/push-notifications.md) — NTF protocol -- [SimpleX Network overview](../protocol/overview-tjr.md) — architecture and trust model +- [SimpleX Messaging Protocol](../protocol/simplex-messaging.md) - SMP wire format and security properties +- [XFTP Protocol](../protocol/xftp.md) - data packet protocol +- [Push Notifications Protocol](../protocol/push-notifications.md) - NTF protocol +- [SimpleX Network overview](../protocol/overview-tjr.md) - architecture and trust model diff --git a/spec/clients.md b/spec/clients.md index 871d1b4fd5..a99686ab06 100644 --- a/spec/clients.md +++ b/spec/clients.md @@ -14,9 +14,9 @@ Generic protocol client used for both SMP and NTF connections. Manages a single ### Component topology -![SMP Client — Component Topology](diagrams/smp-client.svg) +![SMP Client - Component Topology](diagrams/smp-client.svg) -### Command/response flow +### Command/result flow ```mermaid sequenceDiagram @@ -33,7 +33,7 @@ sequenceDiagram participant Router as SMP Router - C->>SC: mkTransmission (generate CorrId, create Request with empty responseVar) + C->>SC: mkTransmission
(generate CorrId, create Request
with empty responseVar) C->>SQ: write (Request, encoded command) S->>SQ: read S-->>S: check pending flag (drop if timed out) @@ -65,15 +65,19 @@ Connection manager that multiplexes multiple ProtocolClient connections. Tracks ### Component topology -![SMPClientAgent — Component Topology](diagrams/smp-client-agent.svg) +![SMPClientAgent - Component Topology](diagrams/smp-client-agent.svg) ### Connection lifecycle ```mermaid sequenceDiagram participant C as Consumer
(router / app) - participant A as SMPClientAgent - participant PC as ProtocolClient + + box + participant A as SMPClientAgent + participant PC as ProtocolClient + end + participant Router as SMP Router C->>A: getSMPServerClient'' (server) @@ -112,11 +116,11 @@ sequenceDiagram **Module specs**: [Client](modules/Simplex/FileTransfer/Client.md) · [Protocol](modules/Simplex/FileTransfer/Protocol.md) · [HTTP/2 Client](modules/Simplex/Messaging/Transport/HTTP2/Client.md) -Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own — each operation is a synchronous HTTP/2 request/response. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. +Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own; each operation is a synchronous HTTP/2 request/response. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. ### Component topology -![XFTP Client — Component Topology](diagrams/xftp-client.svg) +![XFTP Client - Component Topology](diagrams/xftp-client.svg) ### Upload/download flow @@ -156,7 +160,7 @@ sequenceDiagram **Module specs**: [Client](modules/Simplex/Messaging/Notifications/Client.md) · [Protocol](modules/Simplex/Messaging/Notifications/Protocol.md) -Type alias for ProtocolClient — same architecture as SMP Client: +Type alias for ProtocolClient - same architecture as SMP Client: ```haskell type NtfClient = ProtocolClient NTFVersion ErrorType NtfResponse diff --git a/spec/diagrams/ntf-router.svg b/spec/diagrams/ntf-router.svg index b42429459c..c6c9c24316 100644 --- a/spec/diagrams/ntf-router.svg +++ b/spec/diagrams/ntf-router.svg @@ -10,9 +10,6 @@ - - NTF Router -- Component Topology - diff --git a/spec/diagrams/smp-client-agent.svg b/spec/diagrams/smp-client-agent.svg index 4257726ec9..be76d22339 100644 --- a/spec/diagrams/smp-client-agent.svg +++ b/spec/diagrams/smp-client-agent.svg @@ -1,4 +1,4 @@ - + @@ -10,137 +10,134 @@ - - SMPClientAgent -- Component Topology + + consumer + (NTF router / + SMP proxy / + application) - - consumer - (NTF router / - SMP proxy / - application) - - - + - msgQ - (TBQueue, server messages) + agentQ + (TBQueue SMPClientAgentEvent) - - + - - + CAConnected / Disconnected + CASubscribed / SubError + CAServiceDisconnected + CAServiceSubscribed / SubError + CAServiceUnavailable + + + - agentQ - (TBQueue SMPClientAgentEvent) + msgQ + (TBQueue, server messages) - - + - - CAConnected - CADisconnected - CASubscribed / CASubError - CAServiceDisconnected - CAServiceSubscribed / SubError - - - SMPClientAgent (connection manager) + SMPClientAgent (connection manager) - - smpClients - (TMap SMPServer SMPClientVar) - - - - activeQueueSubs / pendingQueueSubs - (TMap SMPServer (TMap QueueId ...)) - activeServiceSubs / pendingServiceSubs - (TMap SMPServer (TVar (Maybe ...))) + smpClients + (TMap SMPServer SMPClientVar) - - - - - - + - smpSubWorkers - (one per server) + smpSubWorkers + (one per server) - - smpClients (reconnect, dashed) --> + - reconnect + resubscribe + reconnect + + + + activeQueueSubs / pendingQueueSubs + (TMap SMPServer (TMap QueueId ...)) + activeServiceSubs / pendingServiceSubs + (TMap SMPServer (TVar (Maybe ...))) - - getSMPServerClient'': get or create client - connectClient: create ProtocolClient, register disconnect handler - on disconnect: filter by SessionId, move active → pending, notify agentQ, spawn worker - worker: retry connect with backoff, resubscribe pending subs - subscribeQueuesNtfs / subscribeServiceNtfs: subscribe + track state + + - - ProtocolClient connections (one per SMP Router) + ProtocolClient connections (one per SMP Router) - - ProtocolClient - → SMP Router A + ProtocolClient + → SMP Router A - - ProtocolClient - → SMP Router B + ProtocolClient + → SMP Router B - - ProtocolClient - → SMP Router N + ProtocolClient + → SMP Router N - ... + ... - - + + getSMPServerClient'': get or create client + connectClient: create ProtocolClient, register disconnect handler + on disconnect: filter by SessionId, move active to pending, notify agentQ, spawn worker + worker: retry connect with backoff, resubscribe pending subs + subscribeQueuesNtfs / subscribeServiceNtfs: subscribe + track state + - - - ProtocolClient + ProtocolClient - - state / queue + state / queue - - background worker + background worker - - Solid arrows: TBQueue flow. Dashed: reconnection / resubscription. + + Solid arrows: TBQueue flow. Dashed: reconnection. diff --git a/spec/diagrams/smp-client.svg b/spec/diagrams/smp-client.svg index d537ec8d04..754c41c89c 100644 --- a/spec/diagrams/smp-client.svg +++ b/spec/diagrams/smp-client.svg @@ -1,4 +1,4 @@ - + @@ -10,139 +10,160 @@ - - SMP Client (ProtocolClient) -- Component Topology - - - per connection (raceAny_ -- any thread exit tears down connection) + per connection (raceAny_: any exit tears down all threads) + + + + monitor + optional + + + + PING - - caller - (Agent/ - router) + - - + caller + (Agent / + router) + + + - commands + + + + sendProtocolCommand + mkTransmission + + + - - sndQ - (TBQueue, 64) + sndQ + (TBQueue, 64) - - - send + send - - Router (exits per-connection box) --> + - - - receive + + SMP + Router + (TLS) + + + + + + insert + + + + sentCommands + (TMap CorrId Request) + + + + responseVar + (TMVar) - - receive (enters per-connection box) --> + - - SMP - Router - (TLS) + + + receive - - - rcvQ - (TBQueue, 64) + rcvQ + (TBQueue, 64) + + - - - - process - - - - sentCommands - (TMap CorrId Request) - - - sentCommands (match CorrId, dashed upward) --> + - match + match - - - responseVar - (TMVar) - - - - monitor + + + process - - - PING + - - optional + + + events - - + - msgQ (optional) - (TBQueue, server events) - - - - events (empty CorrId) + msgQ (optional) + (TBQueue, server events) - - to Agent / SMPClientAgent + to Agent / SMPClientAgent - - - thread + thread - - queue / state + queue / state + + + API entry point - - optional + optional - + Solid arrows: TBQueue flow. Dashed: STM lookups / TMVar responses. diff --git a/spec/diagrams/smp-router.svg b/spec/diagrams/smp-router.svg index 09a198bc9e..819da8a363 100644 --- a/spec/diagrams/smp-router.svg +++ b/spec/diagrams/smp-router.svg @@ -10,13 +10,10 @@ - - SMP Router -- Component Topology - - per client connection (raceAny_ -- any thread exit tears down connection) + per client connection (raceAny_: any exit tears down connection) - - XFTP Client -- Component Topology - diff --git a/spec/diagrams/xftp-router.svg b/spec/diagrams/xftp-router.svg index 6f13221f9a..5a51462755 100644 --- a/spec/diagrams/xftp-router.svg +++ b/spec/diagrams/xftp-router.svg @@ -10,9 +10,6 @@ - - XFTP Router -- Component Topology - diff --git a/spec/routers.md b/spec/routers.md index b66c52ce45..6e8be5e8b5 100644 --- a/spec/routers.md +++ b/spec/routers.md @@ -12,7 +12,7 @@ For deployment and configuration, see [docs/ROUTERS.md](../docs/ROUTERS.md). For ### Component topology -![SMP Router — Component Topology](diagrams/smp-router.svg) +![SMP Router - Component Topology](diagrams/smp-router.svg) ### Packet delivery flow @@ -74,7 +74,7 @@ sequenceDiagram ### Component topology -![XFTP Router — Component Topology](diagrams/xftp-router.svg) +![XFTP Router - Component Topology](diagrams/xftp-router.svg) ### Data packet delivery flow @@ -124,7 +124,7 @@ sequenceDiagram ### Component topology -![NTF Router — Component Topology](diagrams/ntf-router.svg) +![NTF Router - Component Topology](diagrams/ntf-router.svg) ### Token registration and notification delivery From 8e294cb72dcad242df118473346e85dadf980ce6 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 14:47:41 +0000 Subject: [PATCH 56/91] agent diagrams --- docs/AGENT.md | 2 +- spec/agent.md | 134 ++++++++++++++++++++++++-- spec/clients.md | 2 +- spec/diagrams/agent.svg | 202 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 9 deletions(-) create mode 100644 spec/diagrams/agent.svg diff --git a/docs/AGENT.md b/docs/AGENT.md index 83050013f4..415ba08f5a 100644 --- a/docs/AGENT.md +++ b/docs/AGENT.md @@ -2,7 +2,7 @@ The SimpleX Agent builds duplex encrypted connections on top of [SimpleX client libraries](CLIENT.md). It manages the full lifecycle of secure communication: connection establishment, end-to-end encryption, queue rotation, file transfer, and push notifications. -This is **Layer 3** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers; Layer 2 is the [client libraries](CLIENT.md) that speak the wire protocols. The Agent adds the connection semantics that applications need. +This is **Layer 3** of the [SimpleX Network architecture](../protocol/overview-tjr.md). Layer 1 is the routers; Layer 2 is the [client libraries](CLIENT.md) that speak the wire protocols. The Agent adds the connection semantics that applications need. For internal architecture diagrams (thread topology, message processing flows), see [`spec/agent.md`](../spec/agent.md). **Source**: [`Simplex.Messaging.Agent`](../src/Simplex/Messaging/Agent.hs). **Module spec**: [`spec/modules/Simplex/Messaging/Agent.md`](../spec/modules/Simplex/Messaging/Agent.md) diff --git a/spec/agent.md b/spec/agent.md index 250bf22534..f77ccd4a4d 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -1,13 +1,133 @@ -# SMP Agent +# Agent Architecture -> SMP agent implementation: duplex connections, queue rotation, ratchet sync, and notification subscriptions. +The SimpleX Agent is the Layer 3 connection manager. It builds duplex encrypted connections on top of Layer 2 client libraries. This document shows its internal architecture: component topology and message processing flows. -## Duplex Connections +For usage and API overview, see [docs/AGENT.md](../docs/AGENT.md). For protocol specifications, see [Agent Protocol](../protocol/agent-protocol.md), [PQDR](../protocol/pqdr.md). -## Queue Rotation +--- -## Ratchet Sync +**Module specs**: [Agent](modules/Simplex/Messaging/Agent.md) · [Agent Client](modules/Simplex/Messaging/Agent/Client.md) · [Agent Protocol](modules/Simplex/Messaging/Agent/Protocol.md) · [Store Interface](modules/Simplex/Messaging/Agent/Store/Interface.md) · [NtfSubSupervisor](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) · [XFTP Agent](modules/Simplex/FileTransfer/Agent.md) · [Ratchet](modules/Simplex/Messaging/Crypto/Ratchet.md) -## Notification Subscriptions +### Component topology -## Functions +![Agent - Component Topology](diagrams/agent.svg) + +### Message receive flow + +```mermaid +sequenceDiagram + participant R as SMP Router + + box Agent + participant SC as smpClients
(ProtocolClient pool) + participant MQ as msgQ
(TBQueue) + participant S as subscriber + participant St as Store + participant SQ as subQ
(TBQueue) + end + + participant App as Application + + R->>SC: MSG (encrypted packet) + SC->>MQ: write batch + + S->>MQ: read batch + S->>S: withConnLock
(serialize per connection) + S->>St: load ratchet state
(lockConnForUpdate) + S->>S: agentRatchetDecrypt
(double ratchet) + S->>S: checkMsgIntegrity
(sequence + hash chain) + S->>St: store received message,
update ratchet + S->>SQ: write AEvt (MSG + metadata) + + App->>SQ: read event + + Note over App: application processes message + + App->>S: ackMessage (agentMsgId) + Note over S,R: ACK is async
(enqueued as internal command) + S->>SC: ACK + SC->>R: ACK +``` + +### Message send flow + +```mermaid +sequenceDiagram + participant App as Application + + box Agent + participant API as sendMessage + participant St as Store + participant DW as deliveryWorker
(per send queue) + participant SC as smpClients
(ProtocolClient pool) + end + + participant R as SMP Router + + App->>API: sendMessage(connId, body) + API->>St: agentRatchetEncryptHeader
(advance ratchet, store
encrypt key + pending message) + API->>DW: signal doWork (TMVar) + API->>App: return msgId + + DW->>St: getPendingQueueMsg + DW->>DW: rcEncryptMsg
(encrypt body with stored key) + DW->>DW: encode AgentMsgEnvelope + DW->>SC: sendAgentMessage
(per-queue encrypt + SEND) + SC->>R: SEND (encrypted packet) + R->>SC: OK + + DW->>St: delete pending message + DW->>App: SENT msgId (via subQ) +``` + +### Connection establishment flow + +```mermaid +sequenceDiagram + participant A as Alice (initiator) + + box Agent A + participant AA as Agent + end + + participant SMP as SMP Router + + box Agent B + participant AB as Agent + end + + participant B as Bob (joiner) + + A->>AA: createConnection + AA->>SMP: NEW (Alice's receive queue) + SMP->>AA: queue ID + keys + AA->>A: invitation URI
(queue address + DH keys) + + Note over A,B: invitation passed out-of-band
(QR code, link) + + B->>AB: joinConnection(invitation) + AB->>AB: initSndRatchet
(PQ X3DH key agreement) + AB->>SMP: NEW (Bob's receive queue) + SMP->>AB: queue ID + AB->>SMP: KEY (secure Alice's queue) + AB->>SMP: SEND confirmation to
Alice's queue (Bob's queue
address + ratchet keys) + + SMP->>AA: MSG (confirmation) + AA->>AA: initRcvRatchet
(PQ X3DH key agreement),
decrypt confirmation + AA->>A: CONF (request approval) + A->>AA: allowConnection(confId) + AA->>SMP: SKEY (secure Alice's rcv queue) + AA->>SMP: NEW (Alice's send queue) + AA->>SMP: SEND reply to Bob's queue
(Alice's connection info) + + SMP->>AB: MSG (reply) + AB->>SMP: SKEY (secure Bob's rcv queue) + AB->>SMP: SEND HELLO to Alice + + SMP->>AA: MSG (HELLO) + AA->>SMP: SEND HELLO to Bob + AA->>A: CON (connected) + + SMP->>AB: MSG (HELLO) + AB->>B: CON (connected) +``` diff --git a/spec/clients.md b/spec/clients.md index a99686ab06..10634d0de4 100644 --- a/spec/clients.md +++ b/spec/clients.md @@ -122,7 +122,7 @@ Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own; eac ![XFTP Client - Component Topology](diagrams/xftp-client.svg) -### Upload/download flow +### Packet delivery flow ```mermaid sequenceDiagram diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg new file mode 100644 index 0000000000..4b7bf802c2 --- /dev/null +++ b/spec/diagrams/agent.svg @@ -0,0 +1,202 @@ + + + + + + + + + + + + + Application + + + + subQ + + + + main threads (raceAny_: any exit tears down all) + + + + subscriber + (reads msgQ) + + + ntfSubQ + + + + + ntfSupervisor + (reads ntfSubQ) + + + + cleanupManager + (periodic cleanup) + + + + logServersStats + (periodic stats) + + + + worker pools (on-demand, one per queue/connection/server) + + + + delivery + (per send queue) + + + asyncCmd + (per connection) + + + smpSub + (per session) + + + + xftpRcv + (per server) + + + xftpSnd + (per server) + + + xftpDel + (per server) + + + + ntfSMP + (per SMP server) + + + ntfWorkers + (per NTF server) + + + ntfTknDel + (per NTF server) + + + + ntf workers (dispatched by ntfSupervisor) + + + + + + + + store + + + + Store + (SQLite / Postgres) + + + currentSubs + (TSessionSubs) + + + Operation State + (5-op suspension cascade) + + + + protocol client pools (lazy singleton per router) + + + smpClients + (TMap SMPTransportSession) + + + xftpClients + (TMap XFTPTransportSession) + + + ntfClients + (TMap NtfTransportSession) + + + SMP Routers + + + XFTP Routers + + + NTF Routers + + + + + msgQ + + + + + + on-demand worker + + + singleton thread + + + storage / state + + + external connection + + + Solid arrows: TBQueue connections. Dashed: store access / dispatch. Workers connect to protocol clients in their column. + + + From 7410cebac5532dd60289dd59abcac3c30564381e Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 15:35:53 +0000 Subject: [PATCH 57/91] update agent diagram --- spec/agent.md | 52 +++++++ spec/diagrams/agent.svg | 292 +++++++++++++++++++++++----------------- 2 files changed, 220 insertions(+), 124 deletions(-) diff --git a/spec/agent.md b/spec/agent.md index f77ccd4a4d..a6f9442603 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -131,3 +131,55 @@ sequenceDiagram SMP->>AB: MSG (HELLO) AB->>B: CON (connected) ``` + +### File delivery flow (XFTP) + +```mermaid +sequenceDiagram + participant SA as Sender App + + box Sender Agent + participant S as xftpSnd workers + participant SS as Store + end + + participant XFTP as XFTP Routers + participant SMP as SMP Router + + box Receiver Agent + participant RS as Store + participant R as xftpRcv workers + end + + participant RA as Receiver App + + SA->>S: xftpSendFile(file) + S->>S: encrypt file
(XSalsa20-Poly1305, random key + nonce) + S->>S: split into chunks
(fixed sizes: 64KB - 4MB) + S->>SS: store SndFile + chunks + + loop each chunk + S->>XFTP: FNEW (create data packet) + XFTP->>S: sender ID + recipient IDs + S->>XFTP: FPUT (upload encrypted chunk) + end + + S->>S: assemble FileDescription
(chunk locations, replicas,
encryption key + nonce) + S->>SA: SFDONE
(sender + recipient descriptions) + + Note over SA,RA: recipient description sent as
SMP message (encrypted, via double ratchet) + + SA->>SMP: description in A_MSG + SMP->>RA: description in MSG + + RA->>R: xftpReceiveFile(description) + R->>RS: store RcvFile + chunks + + loop each chunk (parallel per server) + R->>XFTP: FGET (per-recipient auth key) + XFTP->>R: encrypted chunk stream + end + + R->>R: stream chunks through
stateful decrypt (key + nonce),
verify auth tag at end + R->>RA: RFDONE (decrypted file path) +``` diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg index 4b7bf802c2..9ec32bf317 100644 --- a/spec/diagrams/agent.svg +++ b/spec/diagrams/agent.svg @@ -1,4 +1,4 @@ - + @@ -8,195 +8,239 @@ markerWidth="6" markerHeight="6" orient="auto-start-reverse"> + + + - - + - Application + Application - - API arrows (down) ===== --> + + + + + + - subQ + subQ - - - main threads (raceAny_: any exit tears down all) + - - - subscriber - (reads msgQ) + + + sendMessage, createConnection + joinConnection, subscribe... - - ntfSubQ - + + + xftpSendFile + xftpReceiveFile... - - - ntfSupervisor - (reads ntfSubQ) - - - - cleanupManager - (periodic cleanup) + + + registerNtfToken + toggleConnectionNtfs... - - - logServersStats - (periodic stats) + - - + - worker pools (on-demand, one per queue/connection/server) + SMP - - + + subscriber + (reads msgQ) + + + - delivery - (per send queue) + delivery + (per send queue) - + - asyncCmd - (per connection) + asyncCmd + (per connection) - + - smpSub - (per session) + smpSub + (per session) + + + + XFTP - - + - xftpRcv - (per server) + xftpRcv + (per server + local) - + - xftpSnd - (per server) + xftpSnd + (per server + local) - + - xftpDel - (per server) + xftpDel + (per server) - - + + NTF + + + + ntfSupervisor + (reads ntfSubQ) + + + - ntfSMP - (per SMP server) + ntfWorkers + (per NTF server) - + - ntfWorkers - (per NTF server) + ntfSMP + (per SMP server) - + - ntfTknDel - (per NTF server) + ntfTknDel + (per NTF server) - - - ntf workers (dispatched by ntfSupervisor) - + + + ntfSubQ - - + + + ntfSubQ (queue rotation) - - - store + + + ntfSMP uses smpClients - - - Store - (SQLite / Postgres) + + + cleanupManager + + + logServersStats + + shared singletons (all green run in raceAny_) - + - currentSubs - (TSessionSubs) + Store + (SQLite / Postgres) - - Operation State - (5-op suspension cascade) + Operation State + (5-op suspension cascade) - - - protocol client pools (lazy singleton per router) + + + store - + - smpClients - (TMap SMPTransportSession) + smpClients + (TMap SMPTransportSession) - - xftpClients - (TMap XFTPTransportSession) + xftpClients + (TMap XFTPTransportSession) - - ntfClients - (TMap NtfTransportSession) + ntfClients + (TMap NtfTransportSession) - - SMP Routers - + SMP Routers + - XFTP Routers - XFTP Routers + - NTF Routers - NTF Routers + - - msgQ + msgQ - - - on-demand worker + on-demand worker - - singleton thread + singleton thread - - storage / state + storage / state - - external connection + external connection + + + API entry point + + + cross-protocol - - Solid arrows: TBQueue connections. Dashed: store access / dispatch. Workers connect to protocol clients in their column. + + Solid arrows: TBQueue flow. Dashed grey: store access. Dashed red: cross-protocol link. Workers use clients in their column. From 1354918ed56163c94ee599d7339931a2a423180a Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:08:53 +0000 Subject: [PATCH 58/91] improve diagram --- spec/agent.md | 8 +- spec/diagrams/agent.svg | 270 +++++++++++++++++++++++----------------- 2 files changed, 158 insertions(+), 120 deletions(-) diff --git a/spec/agent.md b/spec/agent.md index a6f9442603..9074006e23 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -107,21 +107,21 @@ sequenceDiagram B->>AB: joinConnection(invitation) AB->>AB: initSndRatchet
(PQ X3DH key agreement) + AB->>SMP: SKEY (sender auth on
Alice's queue) AB->>SMP: NEW (Bob's receive queue) SMP->>AB: queue ID - AB->>SMP: KEY (secure Alice's queue) AB->>SMP: SEND confirmation to
Alice's queue (Bob's queue
address + ratchet keys) SMP->>AA: MSG (confirmation) AA->>AA: initRcvRatchet
(PQ X3DH key agreement),
decrypt confirmation AA->>A: CONF (request approval) A->>AA: allowConnection(confId) - AA->>SMP: SKEY (secure Alice's rcv queue) - AA->>SMP: NEW (Alice's send queue) + AA->>SMP: KEY (register sender key
on Alice's rcv queue) + AA->>SMP: SKEY (sender auth on
Bob's queue) AA->>SMP: SEND reply to Bob's queue
(Alice's connection info) SMP->>AB: MSG (reply) - AB->>SMP: SKEY (secure Bob's rcv queue) + AB->>SMP: KEY (register sender key
on Bob's rcv queue) AB->>SMP: SEND HELLO to Alice SMP->>AA: MSG (HELLO) diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg index 9ec32bf317..d53e3ec130 100644 --- a/spec/diagrams/agent.svg +++ b/spec/diagrams/agent.svg @@ -1,4 +1,4 @@ - + @@ -15,231 +15,269 @@ - - Application + Application + + + + subQ + (TBQueue) + + + + + + + all threads + write subQ - - - - - - subQ - - - sendMessage, createConnection - joinConnection, subscribe... + sendMessage, createConnection + joinConnection, subscribe... - - xftpSendFile - xftpReceiveFile... + xftpSendFile + xftpReceiveFile... - - registerNtfToken - toggleConnectionNtfs... + registerNtfToken + toggleConnectionNtfs... - - + - SMP + SMP + + + + msgQ + (TBQueue) - - subscriber - (reads msgQ) + subscriber + (reads msgQ) + + + + + + worker pools (on-demand, one per queue / connection / session) - - delivery - (per send queue) + delivery + (per send queue) - - asyncCmd - (per connection) + asyncCmd + (per conn + server) - - smpSub - (per session) + smpSub + (per session) - - + - XFTP + XFTP + + + worker pools (on-demand, one per server) - - xftpRcv - (per server + local) + xftpRcv + (per server + local) - - xftpSnd - (per server + local) + xftpSnd + (per server + local) - - xftpDel - (per server) + xftpDel + (per server) - - + - NTF + NTF + + + + ntfSubQ + (TBQueue) - - ntfSupervisor - (reads ntfSubQ) + ntfSupervisor + (reads ntfSubQ) + + + + + + + + + worker pools (on-demand, one per server) - - ntfWorkers - (per NTF server) + ntfWorkers + (per NTF server) - - ntfSMP - (per SMP server) + ntfSMP + (per SMP server) - - ntfTknDel - (per NTF server) - - - - ntfSubQ + ntfTknDel + (per NTF server) - - ntfSubQ (queue rotation) + ntfSubQ (queue rotation) - - ntfSMP uses smpClients + ntfSMP uses smpClients - shared singletons (all green run in raceAny_) + - cleanupManager + cleanupManager - - logServersStats - - shared singletons (all green run in raceAny_) + logServersStats - - Store - (SQLite / Postgres) + Store + (SQLite / Postgres) - - Operation State - (5-op suspension cascade) + Operation State + (5-op suspension cascade) - - store + store - - smpClients - (TMap SMPTransportSession) + smpClients + (TMap SMPTransportSession) - - xftpClients - (TMap XFTPTransportSession) + xftpClients + (TMap XFTPTransportSession) - - ntfClients - (TMap NtfTransportSession) + ntfClients + (TMap NtfTransportSession) - SMP Routers - SMP Routers + - XFTP Routers - XFTP Routers + - NTF Routers - NTF Routers + - - msgQ + msgQ - - - on-demand worker + on-demand worker - - singleton thread + singleton thread - - storage / state + queue / state - - external connection + external connection - - API entry point + API entry point - - cross-protocol + cross-protocol - + Solid arrows: TBQueue flow. Dashed grey: store access. Dashed red: cross-protocol link. Workers use clients in their column. From 021d929e66acedde6d014eac1c1bf49afe0f3602 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:15:13 +0000 Subject: [PATCH 59/91] titles --- spec/agent.md | 4 ++-- spec/clients.md | 12 ++++++------ spec/routers.md | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/spec/agent.md b/spec/agent.md index 9074006e23..6ec9b1b729 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -8,9 +8,9 @@ For usage and API overview, see [docs/AGENT.md](../docs/AGENT.md). For protocol **Module specs**: [Agent](modules/Simplex/Messaging/Agent.md) · [Agent Client](modules/Simplex/Messaging/Agent/Client.md) · [Agent Protocol](modules/Simplex/Messaging/Agent/Protocol.md) · [Store Interface](modules/Simplex/Messaging/Agent/Store/Interface.md) · [NtfSubSupervisor](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) · [XFTP Agent](modules/Simplex/FileTransfer/Agent.md) · [Ratchet](modules/Simplex/Messaging/Crypto/Ratchet.md) -### Component topology +### Agent components -![Agent - Component Topology](diagrams/agent.svg) +![Agent components](diagrams/agent.svg) ### Message receive flow diff --git a/spec/clients.md b/spec/clients.md index 10634d0de4..3ab9d58682 100644 --- a/spec/clients.md +++ b/spec/clients.md @@ -12,9 +12,9 @@ For deployment and usage, see [docs/CLIENT.md](../docs/CLIENT.md). For protocol Generic protocol client used for both SMP and NTF connections. Manages a single TLS connection with multiplexed command/response matching via correlation IDs. -### Component topology +### SMP Client components -![SMP Client - Component Topology](diagrams/smp-client.svg) +![SMP Client components](diagrams/smp-client.svg) ### Command/result flow @@ -63,9 +63,9 @@ sequenceDiagram Connection manager that multiplexes multiple ProtocolClient connections. Tracks subscriptions, handles reconnection with backoff, and forwards server messages and connection events upward. Used by SMP router (proxying) and NTF router (subscriptions). -### Component topology +### SMPClientAgent components -![SMPClientAgent - Component Topology](diagrams/smp-client-agent.svg) +![SMPClientAgent components](diagrams/smp-client-agent.svg) ### Connection lifecycle @@ -118,9 +118,9 @@ sequenceDiagram Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own; each operation is a synchronous HTTP/2 request/response. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. -### Component topology +### XFTP Client components -![XFTP Client - Component Topology](diagrams/xftp-client.svg) +![XFTP Client components](diagrams/xftp-client.svg) ### Packet delivery flow diff --git a/spec/routers.md b/spec/routers.md index 6e8be5e8b5..b7b8761ef0 100644 --- a/spec/routers.md +++ b/spec/routers.md @@ -10,9 +10,9 @@ For deployment and configuration, see [docs/ROUTERS.md](../docs/ROUTERS.md). For **Module specs**: [Server](modules/Simplex/Messaging/Server.md) · [Main](modules/Simplex/Messaging/Server/Main.md) · [QueueStore](modules/Simplex/Messaging/Server/QueueStore.md) · [QueueStore Postgres](modules/Simplex/Messaging/Server/QueueStore/Postgres.md) · [MsgStore](modules/Simplex/Messaging/Server/MsgStore.md) · [StoreLog](modules/Simplex/Messaging/Server/StoreLog.md) · [Control](modules/Simplex/Messaging/Server/Control.md) · [Prometheus](modules/Simplex/Messaging/Server/Prometheus.md) · [Stats](modules/Simplex/Messaging/Server/Stats.md) -### Component topology +### SMP Router components -![SMP Router - Component Topology](diagrams/smp-router.svg) +![SMP Router components](diagrams/smp-router.svg) ### Packet delivery flow @@ -72,9 +72,9 @@ sequenceDiagram **Module specs**: [Server](modules/Simplex/FileTransfer/Server.md) · [Main](modules/Simplex/FileTransfer/Server/Main.md) · [Store](modules/Simplex/FileTransfer/Server/Store.md) · [StoreLog](modules/Simplex/FileTransfer/Server/StoreLog.md) · [Stats](modules/Simplex/FileTransfer/Server/Stats.md) · [Transport](modules/Simplex/FileTransfer/Transport.md) -### Component topology +### XFTP Router components -![XFTP Router - Component Topology](diagrams/xftp-router.svg) +![XFTP Router components](diagrams/xftp-router.svg) ### Data packet delivery flow @@ -122,9 +122,9 @@ sequenceDiagram **Module specs**: [Server](modules/Simplex/Messaging/Notifications/Server.md) · [Main](modules/Simplex/Messaging/Notifications/Server/Main.md) · [Store Postgres](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) · [APNS](modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) · [Control](modules/Simplex/Messaging/Notifications/Server/Control.md) · [Client](modules/Simplex/Messaging/Notifications/Client.md) · [Protocol](modules/Simplex/Messaging/Notifications/Protocol.md) -### Component topology +### NTF Router components -![NTF Router - Component Topology](diagrams/ntf-router.svg) +![NTF Router components](diagrams/ntf-router.svg) ### Token registration and notification delivery From 958f030899ba5c06a28865881a8248ef1291bdcc Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:21:49 +0000 Subject: [PATCH 60/91] improve diagram --- spec/diagrams/agent.svg | 248 +++++++++++++++++++++------------------- 1 file changed, 128 insertions(+), 120 deletions(-) diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg index d53e3ec130..d885d4b259 100644 --- a/spec/diagrams/agent.svg +++ b/spec/diagrams/agent.svg @@ -1,4 +1,4 @@ - + @@ -29,256 +29,264 @@ - - + + + + + + + + + - all threads - write subQ - - - - - sendMessage, createConnection - joinConnection, subscribe... + sendMessage, createConnection + joinConnection, subscribe... - - xftpSendFile - xftpReceiveFile... + xftpSendFile + xftpReceiveFile... - - registerNtfToken - toggleConnectionNtfs... + registerNtfToken + toggleConnectionNtfs... - - SMP + SMP - - msgQ - (TBQueue) + msgQ + (TBQueue) - - subscriber - (reads msgQ) + subscriber + (reads msgQ) - - worker pools (on-demand, one per queue / connection / session) + worker pools (on-demand, one per queue / conn+server / session) - - delivery - (per send queue) + delivery + (per send queue) - - asyncCmd - (per conn + server) + asyncCmd + (per conn + server) - - smpSub - (per session) + smpSub + (per session) - - XFTP + XFTP - worker pools (on-demand, one per server) + worker pools (on-demand, one per server) - - xftpRcv - (per server + local) + xftpRcv + (per server + local) - - xftpSnd - (per server + local) + xftpSnd + (per server + local) - - xftpDel - (per server) + xftpDel + (per server) - - NTF + NTF - - ntfSubQ - (TBQueue) + ntfSubQ + (TBQueue) - - ntfSupervisor - (reads ntfSubQ) + ntfSupervisor + (reads ntfSubQ) - - - worker pools (on-demand, one per server) + worker pools (on-demand, one per server) - - ntfWorkers - (per NTF server) + ntfWorkers + (per NTF server) - - ntfSMP - (per SMP server) + ntfSMP + (per SMP server) - - ntfTknDel - (per NTF server) + ntfTknDel + (per NTF server) - - ntfSubQ (queue rotation) - - - - ntfSMP uses smpClients + ntfSubQ (queue rotation) - shared singletons (all green run in raceAny_) - shared singletons (all green run in raceAny_) + - cleanupManager + cleanupManager - - logServersStats + logServersStats + + + + ntfSMP uses smpClients - - Store - (SQLite / Postgres) + Store + (SQLite / Postgres) - - Operation State - (5-op suspension cascade) + Operation State + (5-op suspension cascade) - - store + store - - smpClients - (TMap SMPTransportSession) + smpClients + (TMap SMPTransportSession) - - xftpClients - (TMap XFTPTransportSession) + xftpClients + (TMap XFTPTransportSession) - - ntfClients - (TMap NtfTransportSession) + ntfClients + (TMap NtfTransportSession) - SMP Routers - SMP Routers + - XFTP Routers - XFTP Routers + - NTF Routers - NTF Routers + - - msgQ + msgQ - - - on-demand worker + on-demand worker - - singleton thread + singleton thread - - queue / state + queue / state - - external connection + external connection - - API entry point + API entry point - - cross-protocol + cross-protocol - - Solid arrows: TBQueue flow. Dashed grey: store access. Dashed red: cross-protocol link. Workers use clients in their column. + + Solid arrows: TBQueue flow. Dashed grey: store access. Dashed red: cross-protocol link. All threads and pools write events to subQ. From 1ed344405700d26695d244670f99d2d0ce5bbb30 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:36:54 +0000 Subject: [PATCH 61/91] improve diagram 2 --- spec/diagrams/agent.svg | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg index d885d4b259..2c4569f78f 100644 --- a/spec/diagrams/agent.svg +++ b/spec/diagrams/agent.svg @@ -29,18 +29,15 @@ - + - - - - - + + + + - - + From 05824293e548452c0a9ea9cf9d569c5b3ab079b1 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:55:34 +0000 Subject: [PATCH 62/91] more diagram --- spec/diagrams/agent.svg | 47 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/spec/diagrams/agent.svg b/spec/diagrams/agent.svg index 2c4569f78f..9d7226e524 100644 --- a/spec/diagrams/agent.svg +++ b/spec/diagrams/agent.svg @@ -75,20 +75,20 @@ fill="none" stroke="#888" stroke-dasharray="6,3" /> SMP - - + - msgQ - (TBQueue) + msgQ + (TBQueue) - - + - subscriber - (reads msgQ) + subscriber + (reads msgQ) - @@ -143,24 +143,24 @@ fill="none" stroke="#888" stroke-dasharray="6,3" /> NTF - - + - ntfSubQ - (TBQueue) + ntfSubQ + (TBQueue) - - + - ntfSupervisor - (reads ntfSubQ) + ntfSupervisor + (reads ntfSubQ) - - @@ -185,9 +185,9 @@ (per NTF server) - - ntfSubQ (queue rotation) + ntfSubQ (queue rotation) shared singletons (all green run in raceAny_) @@ -249,10 +249,9 @@ - - msgQ (left margin) ===== --> + - msgQ Date: Sat, 14 Mar 2026 17:03:53 +0000 Subject: [PATCH 63/91] sequence diagrams layout --- spec/agent.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spec/agent.md b/spec/agent.md index 6ec9b1b729..02f93314b6 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -64,7 +64,7 @@ sequenceDiagram participant R as SMP Router - App->>API: sendMessage(connId, body) + App->>API: sendMessage
(connId, body) API->>St: agentRatchetEncryptHeader
(advance ratchet, store
encrypt key + pending message) API->>DW: signal doWork (TMVar) API->>App: return msgId @@ -99,16 +99,16 @@ sequenceDiagram participant B as Bob (joiner) A->>AA: createConnection - AA->>SMP: NEW (Alice's receive queue) + AA->>SMP: NEW
(Alice's receive queue) SMP->>AA: queue ID + keys AA->>A: invitation URI
(queue address + DH keys) Note over A,B: invitation passed out-of-band
(QR code, link) - B->>AB: joinConnection(invitation) + B->>AB: joinConnection
(invitation) AB->>AB: initSndRatchet
(PQ X3DH key agreement) AB->>SMP: SKEY (sender auth on
Alice's queue) - AB->>SMP: NEW (Bob's receive queue) + AB->>SMP: NEW
(Bob's receive queue) SMP->>AB: queue ID AB->>SMP: SEND confirmation to
Alice's queue (Bob's queue
address + ratchet keys) @@ -172,7 +172,7 @@ sequenceDiagram SA->>SMP: description in A_MSG SMP->>RA: description in MSG - RA->>R: xftpReceiveFile(description) + RA->>R: xftpReceiveFile
(description) R->>RS: store RcvFile + chunks loop each chunk (parallel per server) @@ -181,5 +181,5 @@ sequenceDiagram end R->>R: stream chunks through
stateful decrypt (key + nonce),
verify auth tag at end - R->>RA: RFDONE (decrypted file path) + R->>RA: RFDONE
(decrypted file path) ``` From b62a22472e222a9cf6842a8c3b911dfc8c7e7c63 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 19:18:14 +0000 Subject: [PATCH 64/91] agent topics --- spec/agent/infrastructure.md | 197 +++++++++++++++++++++++++++++++++++ spec/agent/xrcp.md | 101 ++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 spec/agent/infrastructure.md create mode 100644 spec/agent/xrcp.md diff --git a/spec/agent/infrastructure.md b/spec/agent/infrastructure.md new file mode 100644 index 0000000000..ad84fd6e7b --- /dev/null +++ b/spec/agent/infrastructure.md @@ -0,0 +1,197 @@ +# Agent Infrastructure + +The Agent's internal machinery: worker lifecycle, command dispatch, message delivery, subscription tracking, operation suspension, protocol client management, and dual-backend store. These cross-module patterns are not visible from any single module spec. + +This document covers the "big agent" (`Agent.hs` + `Agent/Client.hs`) used in client applications. The "small agent" (`SMPClientAgent`) used in routers is documented in [clients.md](../clients.md). + +For per-module details: [Agent](../modules/Simplex/Messaging/Agent.md) · [Agent Client](../modules/Simplex/Messaging/Agent/Client.md) · [Store Interface](../modules/Simplex/Messaging/Agent/Store/Interface.md) · [NtfSubSupervisor](../modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) · [XFTP Agent](../modules/Simplex/FileTransfer/Agent.md). For the component diagram, see [agent.md](../agent.md). + +- [Worker framework](#worker-framework) +- [Async command processing](#async-command-processing) +- [Message delivery](#message-delivery) +- [Subscription tracking](#subscription-tracking) +- [Operation suspension cascade](#operation-suspension-cascade) +- [SessionVar lifecycle](#sessionvar-lifecycle) +- [Dual-backend store](#dual-backend-store) + +--- + +## Worker framework + +**Source**: [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs), [Agent/Env/SQLite.hs](../../src/Simplex/Messaging/Agent/Env/SQLite.hs) (Worker type) + +All agent background processing - async commands, message delivery, notification workers, XFTP workers - uses a shared worker infrastructure defined in `Agent/Client.hs`. + +**Create-or-reuse**: `getAgentWorker` atomically checks a `TMap` for an existing worker keyed by the work item (connection+server, send queue address, etc.). If absent, creates a new `Worker` with a unique monotonic `workerId` from `workerSeq` and inserts it. If present and `hasWork=True`, signals the existing worker via `tryPutTMVar doWork ()`. + +**Fork and run**: `runWorkerAsync` uses bracket on the worker's `action` TMVar. If the taken value is `Nothing`, the worker is idle - start it. If `Just _`, it's already running - put it back and return. The `action` TMVar holds `Just (Weak ThreadId)` to avoid preventing GC of the worker thread. + +**Task retrieval race prevention**: `withWork` clears the `doWork` flag *before* calling `getWork` (not after). This prevents a race: query finds nothing → another thread adds work + signals → worker clears flag (losing the signal). By clearing first, any signal that arrives during the query is preserved. + +**Error classification**: `withWork` distinguishes two failure modes: +- *Work-item error* (`isWorkItemError`): the task itself is broken (likely recurring). Worker stops and sends `CRITICAL False`. +- *Store error*: transient database issue. Worker re-signals `doWork` and reports `INTERNAL` (retry may succeed). + +**Restart rate limiting**: On worker exit, `restartOrDelete` checks the `restarts` counter against `maxWorkerRestartsPerMin`. Under the limit: reset action, re-signal, restart. Over the limit: delete the worker from the map and send `CRITICAL True` (escalation to the application). A restart only proceeds if the `workerId` in the map still matches the current worker - a stale restart from a replaced worker is a no-op. + +**Consumers**: Four families use this framework: +- Async command workers - keyed by `(ConnId, Maybe SMPServer)`, in `asyncCmdWorkers` TMap +- Delivery workers - keyed by `SndQAddr`, in `smpDeliveryWorkers` TMap, paired with a `TMVar ()` retry lock +- NTF workers - three pools (`ntfWorkers` per NTF server, `ntfSMPWorkers` per SMP server, `ntfTknDelWorkers` for token deletion) in `NtfSubSupervisor` +- XFTP workers - three worker types (rcv, snd, del) with TMVar-based connection sharing + +--- + +## Async command processing + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs) (command types), [Agent/Store.hs](../../src/Simplex/Messaging/Agent/Store.hs) (internal command types) + +Async commands handle state transitions that require network calls but shouldn't block the API thread: securing queues, deleting old queues during rotation, acknowledging messages. The dispatch loop `runCommandProcessing` runs one worker per `(ConnId, Maybe SMPServer)` key. + +**Enqueueing**: API functions call `enqueueCommand`, which persists the command to the `commands` table (crash-safe) and spawns/wakes the worker via `getAsyncCmdWorker`. On agent startup, `resumeAllCommands` fetches all pending commands grouped by connection+server and signals their workers. + +**Command types**: Two categories share the same dispatch loop: +- *Client commands* (`AClientCommand`): `NEW`, `JOIN`, `LET` (allow connection), `ACK`, `LSET`/`LGET` (set/get connection link data), `SWCH` (switch queue), `DEL`. Triggered by application API calls. +- *Internal commands* (`AInternalCommand`): `ICAck` (ack to router), `ICAckDel` (ack + delete local message), `ICAllowSecure`/`ICDuplexSecure` (secure after confirmation), `ICQSecure` (secure queue during switch), `ICQDelete` (delete old queue after switch), `ICDeleteConn` (delete connection), `ICDeleteRcvQueue` (delete specific receive queue). Generated *during* message processing to handle state transitions asynchronously. + +**Retry and movement**: `tryMoveableCommand` wraps execution with `withRetryInterval`. On `temporaryOrHostError`, it retries with backoff. On cross-server errors (e.g., queue moved to different router), it updates the command's server field in the store (`CCMoved`) and retries against the new server. + +**Locking**: State-sensitive commands use `tryWithLock` / `tryMoveableWithLock`, which acquire `withConnLock` before execution. This serializes operations on the same connection, preventing races between concurrent command processing and message receipt. + +**Event overflow**: Events are written directly to `subQ` if there is room. When `subQ` is full, events overflow into a local `pendingCmds` list and are flushed to `subQ` after the command completes, providing backpressure handling. + +--- + +## Message delivery + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/RetryInterval.hs](../../src/Simplex/Messaging/Agent/RetryInterval.hs) + +Message delivery uses a split-phase encryption design: the ratchet advances in the API thread (serialized), while the actual body encryption happens in the per-queue delivery worker (parallel). This avoids ratchet lock contention across queues. + +**Phase 1 - API thread** (`enqueueMessageB`): +1. Encode the agent message with `internalSndId` + `prevMsgHash` (for the receiver's integrity chain) +2. Call `agentRatchetEncryptHeader` - advances the double ratchet, produces a message encryption key (MEK), padded length, and PQ encryption status +3. Store `SndMsg` with `SndMsgPrepData` (MEK, paddedLen, sndMsgBodyId) in the database +4. Create `SndMsgDelivery` record for each send queue +5. Increment `msgDeliveryOp.opsInProgress` (for suspension tracking) +6. Signal delivery workers via `getDeliveryWorker` + +**Phase 2 - delivery worker** (`runSmpQueueMsgDelivery`): +1. `throwWhenNoDelivery` - kills the worker thread if the queue's address has been removed from `smpDeliveryWorkers` (prevents delivery to queues replaced during switch) +2. `getPendingQueueMsg` - fetches the next pending message from the store, resolving the `sndMsgBodyId` reference into the actual message body and constructing `PendingMsgPrepData` +3. Re-encode the message with `internalSndId`/`prevMsgHash`, then `rcEncryptMsg` to encrypt with the stored MEK (no ratchet access needed) +4. `sendAgentMessage` - per-queue encrypt + SEND to the router + +**Connection info messages** (`AM_CONN_INFO`, `AM_CONN_INFO_REPLY`) skip split-phase encryption entirely - they are sent as plaintext confirmation bodies via `sendConfirmation`. + +**Retry with dual intervals**: Delivery uses `withRetryLock2`, which maintains two independent retry clocks (slow and fast). A background thread sleeps for the current interval, then signals the delivery worker via `tryPutTMVar`. When the router sends `QCONT` (queue buffer cleared), the agent calls `tryPutTMVar retryLock ()` to wake the delivery thread immediately, avoiding unnecessary delay. + +**Error handling**: +- `SMP QUOTA` - switch to slow retry, don't penalize (backpressure from router) +- `SMP AUTH` - permanent failure: for data messages, notify and delete; for handshake messages, report connection error; for queue-switch messages, report queue error +- `temporaryOrHostError` - retry with backoff +- Other errors - report to application, delete command + +--- + +## Subscription tracking + +**Source**: [Agent/TSessionSubs.hs](../../src/Simplex/Messaging/Agent/TSessionSubs.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +The agent tracks per-queue subscription state in `TSessionSubs` (defined in `Agent/TSessionSubs.hs`), keyed by `SMPTransportSession = (UserId, SMPServer, Maybe ByteString)` where the `ByteString` carries the entity ID in entity-session mode or `Nothing` in shared mode. Each transport session holds: + +``` +SessSubs +├── subsSessId :: TVar (Maybe SessionId) -- TLS session ID +├── activeSubs :: TMap RecipientId RcvQueueSub +├── pendingSubs :: TMap RecipientId RcvQueueSub +├── activeServiceSub :: TVar (Maybe ServiceSub) +└── pendingServiceSub :: TVar (Maybe ServiceSub) +``` + +**State machine**: Subscriptions move between three states: + +- **Pending → Active**: After subscription RPC succeeds, `addActiveSub'` promotes the queue - but only if the returned session ID matches the stored TLS session ID (`Just sessId == sessId'`). On mismatch (TLS reconnected between RPC send and response), the subscription is silently added to pending instead. No exception - concurrent resubscription paths handle this naturally. + +- **Active → Pending**: When `setSessionId` is called with a *different* session ID (TLS reconnect), all active subscriptions are atomically demoted to pending. Session ID is updated to the new value. + +- **Pending → Removed**: `failSubscriptions` moves permanently-failed queues (non-temporary SMP errors) to `removedSubs`. The removal is tracked for diagnostic reporting via `getSubscriptions`. + +**Service-associated queues**: Queues with `serviceAssoc=True` are *not* added to `activeSubs` individually. Instead, the service subscription's count is incremented and its `idsHash` XOR-accumulates the queue's hash. The router tracks individual queues via the service subscription; the agent only tracks the aggregate. Consequence: `hasActiveSub(rId)` returns `False` for service-associated queues - callers must check the service subscription separately. + +**Disconnect cleanup** (`smpClientDisconnected`): +1. `removeSessVar` with CAS check (monotonic `sessionVarId` prevents stale callbacks from removing newer clients) +2. `setSubsPending` - demote active→pending, filtered by matching `SessionId` only +3. Delete proxied relay sessions created by this client +4. Fire `DISCONNECT`, `DOWN` (affected connections), `SERVICE_DOWN` (if service sub existed) +5. Release GET locks for affected queues +6. Resubscribe: either spawn `resubscribeSMPSession` worker (entity-session mode) or directly resubscribe queues and services (other modes) + +**Resubscription worker**: Per-transport-session worker with exponential backoff. Loops until `pendingSubs` and `pendingServiceSub` are both empty. Uses `waitForUserNetwork` with bounded wait - proceeds even without network (prevents indefinite blocking). Worker self-cleans via `removeSessVar` on exit. + +**UP event deduplication**: After a batch subscription RPC, `UP` events are emitted only for connections that were *not* already in `activeSubs` before the batch. This prevents duplicate notifications for already-subscribed connections. + +--- + +## Operation suspension cascade + +**Source**: [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +Five `AgentOpState` TVars track in-flight operations for graceful shutdown. Each holds `{opSuspended :: Bool, opsInProgress :: Int}`. + +**Cascade ordering**: +``` +AONtfNetwork (independent - no cascading) + +AORcvNetwork → AOMsgDelivery → AOSndNetwork → AODatabase +``` + +**Mechanics**: `endAgentOperation` decrements `opsInProgress`. If the count reaches zero and the operation is suspended, it calls the cascade action: `AORcvNetwork` suspends `AOMsgDelivery`, which suspends `AOSndNetwork`, which suspends `AODatabase`. At the leaf (`AODatabase`), `notifySuspended` writes `SUSPENDED` to `subQ` and sets `agentState = ASSuspended`. + +**Blocking**: `beginAgentOperation` blocks (STM `retry`) while `opSuspended == True`. This means new operations of a suspended type cannot start - they wait until the operation is resumed. `agentOperationBracket` provides structured bracketing (begin on entry, end on exit). + +**Two wait modes**: +- `waitWhileSuspended` - blocks only during `ASSuspended`, proceeds during `ASSuspending` (allows in-flight operations to complete) +- `waitUntilForeground` - blocks during both `ASSuspending` and `ASSuspended` (stricter, for operations that need full foreground) + +**Usage**: `withStore` brackets all database access with `AODatabase`. Message delivery uses `AOSndNetwork` + `AOMsgDelivery`. Receive processing uses `AORcvNetwork`. This ensures that suspending receive processing cascades through delivery to database, and nothing touches the database after all operations drain. + +--- + +## SessionVar lifecycle + +**Source**: [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +Protocol client connections (SMP, XFTP, NTF) use a lazy singleton pattern via `SessionVar` - a `TMVar` in a `TMap` keyed by transport session. + +**Connection**: `getSessVar` atomically checks the TMap. Returns `Left newVar` (absent - caller must connect) or `Right existingVar` (present - wait for result). `newProtocolClient` wraps the connection attempt: on success, fills the TMVar with `Right client` and writes `CONNECT` event; on failure, fills with `Left (error, maybeRetryTime)` and re-throws. + +**Error caching**: Failed connections cache the error with an expiry timestamp based on `persistErrorInterval`. Future attempts during the interval immediately receive the cached error without reconnecting - this prevents connection storms when a router is down. When `persistErrorInterval == 0`, the SessionVar is removed immediately on failure (fresh connection on next attempt). + +**Compare-and-swap**: Each SessionVar has a monotonic `sessionVarId` from `workerSeq`. `removeSessVar` only removes if the `sessionVarId` matches the current map entry. This prevents a stale disconnect callback (from an old client) from removing a newer client that connected after the old one disconnected. + +**Service credential synchronization** (`updateClientService`): On SMP reconnect, the agent reconciles service credentials between client and router state - updating, creating, or removing service associations as needed. Router version downgrade (router loses service support) triggers client-side service deletion. + +**XFTP special case**: `getProtocolServerClient` ignores the caller's `NetworkRequestMode` parameter for XFTP, always using `NRMBackground` timing. XFTP connections always use background retry timing regardless of the caller's request. + +--- + +## Dual-backend store + +**Source**: [Agent/Store/SQLite.hs](../../src/Simplex/Messaging/Agent/Store/SQLite.hs), [Agent/Store/Postgres.hs](../../src/Simplex/Messaging/Agent/Store/Postgres.hs), [Agent/Store/AgentStore.hs](../../src/Simplex/Messaging/Agent/Store/AgentStore.hs) + +The agent supports SQLite and PostgreSQL via CPP compilation flags (`#if defined(dbPostgres)`). Three wrapper modules (`Interface.hs`, `Common.hs`, `DB.hs`) re-export the appropriate backend. A single binary compiles with one active backend. + +**Key behavioral differences**: + +| Aspect | SQLite | PostgreSQL | +|--------|--------|------------| +| Row locking | Single-writer model (no locking needed) | `FOR UPDATE` on reads preceding writes | +| Batch queries | Per-row `forM` loops | `IN ?` with `In` wrapper | +| Constraint violations | `SQL.ErrorConstraint` pattern match | `constraintViolation` function | +| Transaction savepoints | Not needed | Used in `createWithRandomId'` (failed statement aborts entire transaction without them) | +| Busy/locked errors | `ErrorBusy`/`ErrorLocked` → `SEDatabaseBusy` → `CRITICAL True` | All SQL errors → `SEInternal` | + +**Store access bracketing**: `withStore` wraps all database operations with `agentOperationBracket AODatabase`, connecting the store to the suspension cascade. `withStoreBatch` / `withStoreBatch'` run multiple operations in a single transaction with per-operation error catching. + +**Known bug**: `checkConfirmedSndQueueExists_` uses `#if defined(dpPostgres)` (typo - should be `dbPostgres`), so the `FOR UPDATE` clause is never included on either backend. diff --git a/spec/agent/xrcp.md b/spec/agent/xrcp.md new file mode 100644 index 0000000000..864b4d8027 --- /dev/null +++ b/spec/agent/xrcp.md @@ -0,0 +1,101 @@ +# XRCP - Cross-Device Remote Control + +XRCP enables a desktop application to control a mobile device over the local network. The protocol establishes an encrypted session between two devices using TLS, post-quantum hybrid key exchange, and optional multicast discovery. + +This document covers the cross-module flows that are not visible from individual module specs. For message formats and cryptographic operations, see [protocol/xrcp.md](../../protocol/xrcp.md). For per-module details: [Client](../modules/Simplex/RemoteControl/Client.md) · [Invitation](../modules/Simplex/RemoteControl/Invitation.md) · [Discovery](../modules/Simplex/RemoteControl/Discovery.md) · [Types](../modules/Simplex/RemoteControl/Types.md). + +**Terminology note**: in the code, "host" is the mobile device (being controlled) and "ctrl" is the desktop (controlling). The protocol spec uses the reverse convention - "host" serves, "controller" connects. This document uses the code convention. + +- [Session handshake flow](#session-handshake-flow) +- [KEM hybrid key exchange](#kem-hybrid-key-exchange) +- [Multicast discovery](#multicast-discovery) +- [Block framing and padding](#block-framing-and-padding) + +--- + +## Session handshake flow + +**Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs), [RemoteControl/Discovery.hs](../../src/Simplex/RemoteControl/Discovery.hs) + +The handshake spans `Client.connectRCHost` (controller side, despite the name), `Client.connectRCCtrl` (host side), `Invitation.mkInvitation`, and `Discovery.startTLSServer`. The full sequence: + +1. **Controller starts TLS server**: generates ephemeral session keys + DH keys, creates a signed invitation containing the CA fingerprint and identity key, starts a TLS server on an ephemeral port. The TLS hook `onNewHandshake` enforces single-session - a second connection attempt is rejected by checking whether the session TMVar is already filled. + +2. **Invitation delivery**: the invitation reaches the host either out-of-band (QR code scan for first pairing) or via encrypted multicast announcement (subsequent sessions - see [Multicast discovery](#multicast-discovery)). + +3. **Host connects via TLS**: `connectRCCtrl` establishes a TLS connection. Both sides validate 2-certificate chains (leaf + CA root). On reconnection, the host validates the controller's CA fingerprint against `KnownHostPairing`; on first pairing, it stores the fingerprint. + +4. **User confirmation barrier**: after TLS connects, the controller extracts the TLS channel binding (`tlsUniq`) as a session code. The application displays this code; the user verifies it on the host. `confirmCtrlSession` uses a double `putTMVar` - the first put signals the decision (accept/reject), the second blocks until the session thread consumes it, creating a synchronization point that prevents the session from proceeding before confirmation completes. + +5. **Hello exchange** (asymmetric encryption): + - Controller sends `RCHostEncHello`: DH public key in plaintext + encrypted body containing the KEM encapsulation key, CA fingerprint, and app info. Encrypted with `cbEncrypt` (classical DH secret). + - Host decrypts the hello, performs KEM encapsulation (see [KEM hybrid key exchange](#kem-hybrid-key-exchange)), derives the hybrid session key, and sends `RCCtrlEncHello` encrypted with `sbEncrypt` (post-quantum hybrid key). + - The asymmetry is deliberate: at the time the controller sends its hello, KEM hasn't completed yet, so only classical DH encryption is available. After the host encapsulates, both sides have the hybrid key. + +6. **Chain key initialization**: both sides call `sbcInit` with the hybrid key to derive send/receive chain keys. The controller explicitly **swaps** the key pair (`swap` call in `prepareCtrlSession`) - both sides derive keys in the same order from `sbcInit`, but have opposite send/receive roles, so the controller must reverse them. The host does not swap. + +7. **Error path**: if KEM encapsulation fails, the host sends `RCCtrlEncError` encrypted with the DH key (not the hybrid key, which doesn't exist yet). The controller can decrypt the error because it has the DH secret from step 5. + +--- + +## KEM hybrid key exchange + +**Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs) + +The session key combines classical Diffie-Hellman with SNTRUP761 (lattice-based KEM) via `SHA3_256(dhSecret || kemSharedKey)` (`kemHybridSecret` in Client.hs). This provides protection against quantum computers while maintaining classical security as a fallback. + +**First session** - KEM public key is too large for a QR code invitation, so it travels in the encrypted hello body: + +1. Controller generates DH + KEM key pairs, puts KEM encapsulation key in the hello body +2. Host decrypts hello with DH secret, extracts KEM encapsulation key +3. Host encapsulates: produces `(kemCiphertext, kemSharedKey)` +4. Host derives hybrid key: `SHA3_256(dhSecret || kemSharedKey)` +5. Host sends `kemCiphertext` in the controller hello body +6. Controller decapsulates `kemCiphertext` to recover `kemSharedKey`, derives the same hybrid key + +**Subsequent sessions** (via multicast) - the previous session's KEM secret is cached in the pairing: + +- Both sides already know each other's KEM capabilities from the previous session +- Fresh DH keys are generated per session for forward secrecy +- The hybrid key derivation uses the new DH secret + the cached KEM secret +- `updateKnownHost` (called in `prepareHostSession`) updates the stored DH public key for the next session + +**Key rotation and `prevDhPrivKey`**: when the host updates its DH key pair for a new session, it retains the previous private key in `RCCtrlPairing.prevDhPrivKey`. This is critical for multicast - during the transition window, the controller may send announcements encrypted with the old public key. `findRCCtrlPairing` tries decryption with both the current and previous DH keys. Without this fallback, key rotation would break multicast discovery. + +--- + +## Multicast discovery + +**Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs), [RemoteControl/Invitation.hs](../../src/Simplex/RemoteControl/Invitation.hs), [RemoteControl/Discovery.hs](../../src/Simplex/RemoteControl/Discovery.hs) + +For subsequent sessions (after initial QR pairing), the controller announces its presence via UDP multicast so the host can connect without scanning a new QR code. The flow spans `Client.announceRC`, `Client.discoverRCCtrl`, `Client.findRCCtrlPairing`, `Invitation.signInvitation`/`verifySignedInvitation`, and `Discovery.joinMulticast`/`withSender`. + +**Announcement creation** (`announceRC`): + +1. The invitation is signed with a dual-signature chain: the session key signs the invitation URI, then the identity key signs the URI + session signature concatenated. This chain means a compromised session key alone cannot forge a valid identity-signed announcement - the identity key must also be compromised. +2. The signed invitation is encrypted with a DH shared secret between the host's known DH public key and the controller's ephemeral DH private key. +3. The encrypted packet is padded to 900 bytes (privacy: all announcements are indistinguishable by size). +4. Sent 60 times at 1-second intervals to multicast group `224.0.0.251:5227`. +5. Runs as a cancellable async task - cancelled in `prepareHostSession` once the session is established. + +**Listener and discovery** (`discoverRCCtrl`): + +1. Host calls `joinMulticast` to subscribe to the multicast group. A shared `TMVar Int` counter tracks active listeners - OS-level `IP_ADD_MEMBERSHIP` is only issued on 0→1 transition, `IP_DROP_MEMBERSHIP` on 1→0. This prevents duplicate syscalls when multiple listeners are active. +2. For each received packet, `findRCCtrlPairing` iterates over known pairings and tries decryption with the current DH key, falling back to `prevDhPrivKey` if present. +3. After successful decryption, the invitation's `dh` field is verified against the announcement's `dhPubKey` to prevent relay attacks. +4. Dual signatures are verified: session signature first, then identity signature. +5. 30-second timeout on the entire discovery process (`RCENotDiscovered` on expiry). + +--- + +## Block framing and padding + +**Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs), [RemoteControl/Types.hs](../../src/Simplex/RemoteControl/Types.hs) + +XRCP uses three padding sizes at different protocol layers: + +- **16,384 bytes** - XRCP block size for all session messages (hello, commands, responses). Matches SMP's block size. Hides message content size variation within the TLS session. +- **12,288 bytes** - hello body padding within the 16,384-byte block, after encryption overhead. +- **900 bytes** - multicast announcement padding. Constrained by typical UDP MTU to avoid fragmentation. + +All padding uses the standard `pad`/`unPad` format (2-byte length prefix + `#` fill). The fixed sizes ensure that an observer monitoring network traffic cannot distinguish different XRCP operations by packet size. From 7aefcbf91d8d18823dc1c9ca4bdbf38281391ef3 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:21:26 +0000 Subject: [PATCH 65/91] agent connection topic --- spec/agent/connections.md | 232 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 spec/agent/connections.md diff --git a/spec/agent/connections.md b/spec/agent/connections.md new file mode 100644 index 0000000000..2ac83b2948 --- /dev/null +++ b/spec/agent/connections.md @@ -0,0 +1,232 @@ +# Agent Connections + +Duplex connection lifecycle: establishment, queue rotation, ratchet synchronization, and message integrity. These cross-module flows span the Agent, protocol client, and store layers. + +For per-module details: [Agent](../modules/Simplex/Messaging/Agent.md) · [Agent Protocol](../modules/Simplex/Messaging/Agent/Protocol.md) · [Ratchet](../modules/Simplex/Messaging/Crypto/Ratchet.md) · [Store Interface](../modules/Simplex/Messaging/Agent/Store/Interface.md). For the component diagram, see [agent.md](../agent.md). For protocol specification, see [Agent Protocol](../../protocol/agent-protocol.md) and [PQDR](../../protocol/pqdr.md). + +- [Design constraints](#design-constraints) +- [Connection establishment](#connection-establishment) +- [Queue rotation](#queue-rotation) +- [Ratchet synchronization](#ratchet-synchronization) +- [Message envelope hierarchy](#message-envelope-hierarchy) +- [Integrity chain](#integrity-chain) + +--- + +## Design constraints + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +Two properties of the protocol drive much of the agent's complexity: + +**TOFU retry safety**: Queues and links are secured via trust-on-first-use - the router accepts the first key presented (SKEY, KEY) and rejects any subsequent different key. If a network call succeeds but the response is lost, the client must retry with the *same* key, or the router will reject it. This means all cryptographic keys must be generated and persisted *before* the network call that uses them. The agent's pervasive store-then-execute pattern (`enqueueCommand` persists to DB, then worker executes with stored keys) exists primarily to satisfy this constraint. + +**Network asymmetry**: After a client sends a message to a queue, the peer's response can arrive at the agent before the originating API call returns to the application. The application must already know the connection exists when it receives the event, otherwise it gets handshake events for an unknown connection. This drives split-phase APIs where the connection is registered locally before any network call. + +Together, these constraints explain why the agent separates key generation from network operations, why commands are persisted before execution, and why connection creation is split into prepare + create phases. + +--- + +## Connection establishment + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs) + +### Split-phase connection creation + +Connection creation is split into two phases to satisfy both design constraints: + +**`prepareConnectionLink`** (no network, no database): generates root Ed25519 signing key pair, queue-level X25519 DH keys, and short link key. Returns the connection link URI and `PreparedLinkParams` in memory. The application can now embed the link in link data (e.g., for short link resolution) before the queue exists. + +**`createConnectionForLink`** (single network call): uses the prepared parameters to create the queue on the router with SKEY (root signature). The sender ID is deterministically derived from the correlation nonce (`SMP.EntityId $ B.take 24 $ C.sha3_384 corrId`), so a lost response can be retried - the router validates the same sender ID. + +Without split-phase, the application would need to create the queue first, get the link, then update the queue with link data containing the link - requiring an extra round-trip. + +### Standard handshake + +The connection establishment flow is shown in [agent.md](../agent.md#connection-establishment-flow). The key non-obvious details: + +**Ratchet initialization is asymmetric**: The initiator (Alice) generates X3DH key parameters during `newRcvConnSrv` and stores them via `createRatchetX3dhKeys`, but does not initialize any ratchet yet. The *receiving* ratchet is only initialized later in `smpConfirmation` via `initRcvRatchet` when the confirmation arrives with the responder's parameters. The responder (Bob) initializes a *sending* ratchet during `startJoinInvitation` via `initSndRatchet`. The names `RcvE2ERatchetParams`/`SndE2ERatchetParams` are historical - what matters is that the responder initializes first (sending direction), and the initiator initializes second (receiving direction) using the responder's parameters. + +**Confirmation decryption proves key agreement**: In `smpConfirmation`, the initiator creates a fresh receiving ratchet from the responder's parameters and immediately uses it to decrypt the confirmation body. If decryption fails, the entire confirmation is rejected - there is no state where a connection has mismatched ratchets. + +**HELLO exchange completes the handshake**: After `allowConnection`, both sides have duplex queues but haven't confirmed liveness. The initiator sends HELLO (with `notification = True` in MsgFlags). The responder receives it and sends its own HELLO back (also with `notification = True`). The initiator emits `CON` when it receives the responder's HELLO. The responder emits `CON` when its own HELLO is *successfully delivered* (in the delivery callback, not on receiving a reply). There are exactly two HELLO messages, not three. + +### Contact URI async path + +For contact URIs (`joinConnectionAsync` with `CRContactUri`), the join is enqueued as an async command. The connection record is created locally (NewConnection state) before the network call, satisfying the network asymmetry constraint. The background worker then creates the receive queue, sends the invitation, and processes the handshake. + +### PQ key agreement + +PQ support is negotiated via version numbers: `agentVersion >= pqdrSMPAgentVersion && e2eVersion >= pqRatchetE2EEncryptVersion`. When both sides support PQ, the KEM public key travels in the confirmation body (too large for invitation URI). The responder encapsulates, producing `(kemCiphertext, kemSharedKey)`, and the hybrid key is derived via `SHA3_256(dhSecret || kemSharedKey)`. + +**PQ support is monotonic**: once enabled for a connection (`PQSupport PQSupportOn`), it cannot be downgraded. This affects header padding size (88 bytes without PQ vs 2310 bytes with PQ). + +### Connection type state machine + +``` +NewConnection + +-> RcvConnection (initiator, after newRcvConnSrv) + | +-> DuplexConnection (after allowConnection + connectReplyQueues) + | +-> ContactConnection (contact address case) + +-> SndConnection (responder, before reply queue created) + | +-> DuplexConnection (after reply queue created) + +-> ContactConnection (short link / contact address) +``` + +--- + +## Queue rotation + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Store.hs](../../src/Simplex/Messaging/Agent/Store.hs) + +Queue rotation replaces a receive queue with a new one on a different router, providing forward secrecy for the transport layer. The protocol uses a 4-message handshake. + +### Protocol sequence + +Rotation is initiated by `switchConnectionAsync` (client API) or by receiving QADD from the peer. Preconditions: connection must be duplex, no switch already in progress, ratchet must not be syncing. + +``` +Receiver (switching party) Sender (peer) + | | + |-- QADD (new queue address) ---------->| + | |-- creates SndQueue to new address + |<--------- QKEY (sender auth key) ----| + | | + |-- secures new queue (ICQSecure) ----->| + |-- QUSE (start using new queue) ----->| + | |-- switches delivery to new queue + |<--------- QTEST (on new queue) ------| + | | + |-- deletes old queue (ICQDelete) ---->| +``` + +### State machines + +**Receiver (RcvQueue switch)**: `RSSwitchStarted` -> `RSSendingQADD` -> `RSSendingQUSE` -> `RSReceivedMessage`. The switch becomes non-abortable at `RSSendingQUSE` - by this point the sender may have already deleted the old queue, so aborting would break the connection. `canAbortRcvSwitch` enforces this. + +**Sender (SndQueue switch)**: creates new SndQueue on QADD, sends QKEY, marks old as `SSSendingQKEY`. On QUSE: sends QTEST *only to the new queue*, marks as `SSSendingQTEST`. Completes when QTEST delivery succeeds. + +### Consecutive rotation handling + +`dbReplaceQId` tracks which old queue a new one replaces. Each new queue stores `dbReplaceQId = Just oldQueueId`. When QADD is processed, send queues whose `dbReplaceQId` points to the current queue's `dbQueueId` are found and deleted in bulk. This handles consecutive rotation requests - only the latest rotation survives. + +### Old queue deletion + +Three triggers delete the old queue: +1. **Sender-side**: QTEST delivery succeeds - old queue removed from `smpDeliveryWorkers` (worker thread stops) +2. **Receiver-side**: first message arrives on new queue - receiver marks old queue for deletion via `ICQDelete` +3. **Abort cleanup**: `abortConnectionSwitch` explicitly deletes new queues created during a failed switch attempt + +--- + +## Ratchet synchronization + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs) + +When double ratchet state becomes desynchronized (e.g., one side restores from backup), the agent can re-establish the ratchet without breaking the connection. + +### State machine + +``` +RSOk (synchronized) + | + v (crypto error detected) +RSAllowed / RSRequired + | + v (synchronizeRatchet called) +RSStarted (waiting for peer) + | + v (peer responds with own keys) +RSAgreed (both exchanged keys) + | + v (ratchet recreated, EREADY sent/received) +RSOk +``` + +**Send prohibition**: `ratchetSyncSendProhibited` returns `True` for `RSRequired`, `RSStarted`, and `RSAgreed`. This blocks *all* messages including queue rotation messages - preventing state corruption while the ratchet is being re-established. + +### Key exchange protocol + +1. Initiator calls `synchronizeRatchet`, which generates new X3DH keys and sends them in an `AgentRatchetKey` envelope (discriminant `'R'`). State becomes `RSStarted`. +2. Peer receives the ratchet key in `newRatchetKey`. If peer hasn't started sync, it generates own keys and sends a reply `AgentRatchetKey`. +3. Both sides now have each other's keys. State becomes `RSAgreed`. + +### Hash-ordered role assignment + +Both parties compute `rkHash = SHA256(pubKeyBytes k1 || pubKeyBytes k2)` for their own keys. The party with the *smaller* hash initializes the receiving ratchet (`pqX3dhRcv`); the party with the larger hash initializes the sending ratchet (`pqX3dhSnd`) and sends `EREADY`. This deterministic tie-breaking avoids a separate negotiation round. + +### EREADY completion + +`EREADY` carries `lastExternalSndId` - the ID of the last message sent with the old ratchet. The receiving party uses this to know when the old ratchet's messages are exhausted and the new ratchet is fully active. Until EREADY arrives, messages may arrive encrypted with either the old or new ratchet. + +### Error recovery + +- **Crypto error during decrypt**: `cryptoErrToSyncState` classifies the error and sets state to `RSAllowed` or `RSRequired`. Client is notified via `RSYNC`. +- **Successful decrypt during non-RSOk state**: if state is not `RSStarted` (which means sync is actively in progress), reset to `RSOk`. A successful message proves the ratchets are synchronized. +- **Duplicate handling**: `rkHash` of received keys is checked against stored hashes to prevent reprocessing the same ratchet key message. + +--- + +## Message envelope hierarchy + +**Source**: [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs) + +Messages use three nesting levels, each adding a layer of structure: + +### Level 1: AgentMsgEnvelope (transport) + +Four variants with single-character discriminants: + +| Variant | Disc. | Encryption | When | +|---------|-------|-----------|------| +| `AgentConfirmation` | `'C'` | Per-queue E2E only | Connection handshake | +| `AgentMsgEnvelope` | `'M'` | Double ratchet | Normal messages | +| `AgentInvitation` | `'I'` | Per-queue E2E only | Contact URI join | +| `AgentRatchetKey` | `'R'` | Per-queue E2E only | Ratchet sync | + +Only `AgentMsgEnvelope` is double-ratchet encrypted. The other three use only the per-queue E2E encryption (DH shared secret from queue creation). This is because during handshake and ratchet sync, the double ratchet is either not yet established or being replaced. + +### Level 2: AgentMessage (application) + +Inside the decrypted envelope: +- `AgentConnInfo` / `AgentConnInfoReply` - connection info during handshake (not double-ratchet encrypted) +- `AgentRatchetInfo` - ratchet sync payload (not double-ratchet encrypted) +- `AgentMessage APrivHeader AMessage` - user and control messages (double-ratchet encrypted) + +The private header (`APrivHeader`) carries `sndMsgId` and `prevMsgHash` for the integrity chain. + +### Level 3: AMessage (semantic) + +Message types with 1-2 character discriminants: +- User messages: `HELLO_`, `A_MSG_`, `A_RCVD_`, `A_QCONT_`, `EREADY_` +- Queue rotation: `QADD_`, `QKEY_`, `QUSE_`, `QTEST_` + +### ACK semantics + +- **User messages** (`A_MSG_`): NOT auto-ACKed. Agent returns `ACKPending`; application must call `ackMessage`. +- **Receipts** (`A_RCVD`): returns `ACKPending` when valid receipts are present (application must ACK after processing); auto-ACKed only when all receipts fail. +- **Other control messages** (HELLO, QADD, QKEY, QUSE, QTEST, EREADY): auto-ACKed by the agent. +- **Error during processing**: `handleNotifyAck` sends `ERR` to the application but still ACKs to the router, preventing re-delivery of a message that will fail again. + +--- + +## Integrity chain + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs) + +Each message in a connection commits to the previous message via two mechanisms: + +1. **External sender ID** (`lastExternalSndId`): monotonically increasing counter per connection +2. **Previous message hash** (`prevMsgHash`): SHA256 of the previous message body + +`checkMsgIntegrity` produces one of five outcomes: + +| Outcome | Condition | +|---------|-----------| +| `MsgOk` | Sequential ID and matching hash | +| `MsgBadId` | ID from the past (less than previous) | +| `MsgDuplicate` | Same ID as previous | +| `MsgSkipped` | Gap in IDs (messages lost) | +| `MsgBadHash` | Sequential ID but hash mismatch | + +**Non-rejecting semantics**: the agent does NOT reject messages with integrity failures. The result is reported to the application via `MsgMeta.integrity`. The application decides the policy - warn, ignore, or terminate the connection. From eafc84fd02c76ef740856fc2d0416560ff32110f Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:43:26 +0000 Subject: [PATCH 66/91] fix doc --- spec/agent/connections.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/agent/connections.md b/spec/agent/connections.md index 2ac83b2948..1926504fc4 100644 --- a/spec/agent/connections.md +++ b/spec/agent/connections.md @@ -35,7 +35,7 @@ Together, these constraints explain why the agent separates key generation from Connection creation is split into two phases to satisfy both design constraints: -**`prepareConnectionLink`** (no network, no database): generates root Ed25519 signing key pair, queue-level X25519 DH keys, and short link key. Returns the connection link URI and `PreparedLinkParams` in memory. The application can now embed the link in link data (e.g., for short link resolution) before the queue exists. +**`prepareConnectionLink`** (no network, no database): generates root Ed25519 signing key pair and queue-level X25519 DH keys. Derives a short link key as `SHA3_256` of the encoded fixed link data. Returns the connection link URI and `PreparedLinkParams` in memory. The application can now embed the link in link data (e.g., for short link resolution) before the queue exists. **`createConnectionForLink`** (single network call): uses the prepared parameters to create the queue on the router with SKEY (root signature). The sender ID is deterministically derived from the correlation nonce (`SMP.EntityId $ B.take 24 $ C.sha3_384 corrId`), so a lost response can be retried - the router validates the same sender ID. @@ -49,7 +49,7 @@ The connection establishment flow is shown in [agent.md](../agent.md#connection- **Confirmation decryption proves key agreement**: In `smpConfirmation`, the initiator creates a fresh receiving ratchet from the responder's parameters and immediately uses it to decrypt the confirmation body. If decryption fails, the entire confirmation is rejected - there is no state where a connection has mismatched ratchets. -**HELLO exchange completes the handshake**: After `allowConnection`, both sides have duplex queues but haven't confirmed liveness. The initiator sends HELLO (with `notification = True` in MsgFlags). The responder receives it and sends its own HELLO back (also with `notification = True`). The initiator emits `CON` when it receives the responder's HELLO. The responder emits `CON` when its own HELLO is *successfully delivered* (in the delivery callback, not on receiving a reply). There are exactly two HELLO messages, not three. +**HELLO exchange completes the handshake**: After `allowConnection`, both sides have duplex queues but haven't confirmed liveness. The responder (Bob) sends the first HELLO (with `notification = True` in MsgFlags), triggered by `ICDuplexSecure`. The initiator (Alice) receives it and sends her own HELLO back (also with `notification = True`). The initiator emits `CON` in the *delivery callback* of her HELLO (her rcvQueue is already Active from receiving Bob's HELLO). The responder emits `CON` when he *receives* the initiator's reply HELLO (his sndQueue is already Active from his own HELLO delivery). There are exactly two HELLO messages. ### Contact URI async path @@ -57,7 +57,7 @@ For contact URIs (`joinConnectionAsync` with `CRContactUri`), the join is enqueu ### PQ key agreement -PQ support is negotiated via version numbers: `agentVersion >= pqdrSMPAgentVersion && e2eVersion >= pqRatchetE2EEncryptVersion`. When both sides support PQ, the KEM public key travels in the confirmation body (too large for invitation URI). The responder encapsulates, producing `(kemCiphertext, kemSharedKey)`, and the hybrid key is derived via `SHA3_256(dhSecret || kemSharedKey)`. +PQ support is negotiated via version numbers: `agentVersion >= pqdrSMPAgentVersion && e2eVersion >= pqRatchetE2EEncryptVersion`. When both sides support PQ, the KEM public key travels in the confirmation body (too large for invitation URI). The responder encapsulates, producing `(kemCiphertext, kemSharedKey)`, and the hybrid key is derived via HKDF-SHA512 over the concatenation of three X3DH shared secrets plus the KEM shared secret, with info string `"SimpleXX3DH"`. **PQ support is monotonic**: once enabled for a connection (`PQSupport PQSupportOn`), it cannot be downgraded. This affects header padding size (88 bytes without PQ vs 2310 bytes with PQ). @@ -108,7 +108,7 @@ Receiver (switching party) Sender (peer) ### Consecutive rotation handling -`dbReplaceQId` tracks which old queue a new one replaces. Each new queue stores `dbReplaceQId = Just oldQueueId`. When QADD is processed, send queues whose `dbReplaceQId` points to the current queue's `dbQueueId` are found and deleted in bulk. This handles consecutive rotation requests - only the latest rotation survives. +`dbReplaceQueueId` tracks which old queue a new one replaces. Each new queue stores `dbReplaceQueueId = Just oldQueueId`. When QADD is processed, send queues whose `dbReplaceQueueId` points to the current queue's `dbQueueId` are found and deleted in bulk. This handles consecutive rotation requests - only the latest rotation survives. ### Old queue deletion @@ -205,7 +205,7 @@ Message types with 1-2 character discriminants: - **User messages** (`A_MSG_`): NOT auto-ACKed. Agent returns `ACKPending`; application must call `ackMessage`. - **Receipts** (`A_RCVD`): returns `ACKPending` when valid receipts are present (application must ACK after processing); auto-ACKed only when all receipts fail. -- **Other control messages** (HELLO, QADD, QKEY, QUSE, QTEST, EREADY): auto-ACKed by the agent. +- **Other control messages** (HELLO, QADD, QKEY, QUSE, QTEST, EREADY, A_QCONT): auto-ACKed by the agent. - **Error during processing**: `handleNotifyAck` sends `ERR` to the application but still ACKs to the router, preventing re-delivery of a message that will fail again. --- From c8de00872a2396e6eca581f23b6054e595e15096 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 21:51:43 +0000 Subject: [PATCH 67/91] fixes --- spec/agent/connections.md | 12 ++++----- spec/agent/infrastructure.md | 13 +++++----- spec/agent/xrcp.md | 48 +++++++++++++++++------------------- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/spec/agent/connections.md b/spec/agent/connections.md index 1926504fc4..2cda714a30 100644 --- a/spec/agent/connections.md +++ b/spec/agent/connections.md @@ -37,7 +37,7 @@ Connection creation is split into two phases to satisfy both design constraints: **`prepareConnectionLink`** (no network, no database): generates root Ed25519 signing key pair and queue-level X25519 DH keys. Derives a short link key as `SHA3_256` of the encoded fixed link data. Returns the connection link URI and `PreparedLinkParams` in memory. The application can now embed the link in link data (e.g., for short link resolution) before the queue exists. -**`createConnectionForLink`** (single network call): uses the prepared parameters to create the queue on the router with SKEY (root signature). The sender ID is deterministically derived from the correlation nonce (`SMP.EntityId $ B.take 24 $ C.sha3_384 corrId`), so a lost response can be retried - the router validates the same sender ID. +**`createConnectionForLink`** (single network call): uses the prepared parameters to create the queue on the router with NEW (root signing key as owner auth). The sender ID is deterministically derived from the correlation nonce (`SMP.EntityId $ B.take 24 $ C.sha3_384 corrId`), so a lost response can be retried - the router validates the same sender ID. Without split-phase, the application would need to create the queue first, get the link, then update the queue with link data containing the link - requiring an extra round-trip. @@ -83,7 +83,7 @@ Queue rotation replaces a receive queue with a new one on a different router, pr ### Protocol sequence -Rotation is initiated by `switchConnectionAsync` (client API) or by receiving QADD from the peer. Preconditions: connection must be duplex, no switch already in progress, ratchet must not be syncing. +Rotation is initiated by the switching party calling `switchConnectionAsync` (client API), which sends QADD. The peer responds to QADD by creating a new send queue and replying with QKEY. Preconditions: connection must be duplex, no switch already in progress, ratchet must not be syncing. ``` Receiver (switching party) Sender (peer) @@ -157,7 +157,7 @@ Both parties compute `rkHash = SHA256(pubKeyBytes k1 || pubKeyBytes k2)` for the ### EREADY completion -`EREADY` carries `lastExternalSndId` - the ID of the last message sent with the old ratchet. The receiving party uses this to know when the old ratchet's messages are exhausted and the new ratchet is fully active. Until EREADY arrives, messages may arrive encrypted with either the old or new ratchet. +`EREADY` carries `lastExternalSndId` - the ID of the last message the sender received from the peer before switching ratchets. The receiving party uses this to know when the old ratchet's messages are exhausted and the new ratchet is fully active. Until EREADY arrives, messages may arrive encrypted with either the old or new ratchet. ### Error recovery @@ -179,17 +179,17 @@ Four variants with single-character discriminants: | Variant | Disc. | Encryption | When | |---------|-------|-----------|------| -| `AgentConfirmation` | `'C'` | Per-queue E2E only | Connection handshake | +| `AgentConfirmation` | `'C'` | Per-queue E2E (outer) + double ratchet (inner `encConnInfo`) | Connection handshake | | `AgentMsgEnvelope` | `'M'` | Double ratchet | Normal messages | | `AgentInvitation` | `'I'` | Per-queue E2E only | Contact URI join | | `AgentRatchetKey` | `'R'` | Per-queue E2E only | Ratchet sync | -Only `AgentMsgEnvelope` is double-ratchet encrypted. The other three use only the per-queue E2E encryption (DH shared secret from queue creation). This is because during handshake and ratchet sync, the double ratchet is either not yet established or being replaced. +`AgentMsgEnvelope` is fully double-ratchet encrypted. `AgentConfirmation` uses per-queue E2E for the outer envelope but also contains `encConnInfo` which is double-ratchet encrypted (the ratchet is initialized during confirmation processing). `AgentInvitation` and `AgentRatchetKey` use only per-queue E2E - the double ratchet is either not yet established or being replaced. ### Level 2: AgentMessage (application) Inside the decrypted envelope: -- `AgentConnInfo` / `AgentConnInfoReply` - connection info during handshake (not double-ratchet encrypted) +- `AgentConnInfo` / `AgentConnInfoReply` - connection info during handshake (double-ratchet encrypted inside `encConnInfo`) - `AgentRatchetInfo` - ratchet sync payload (not double-ratchet encrypted) - `AgentMessage APrivHeader AMessage` - user and control messages (double-ratchet encrypted) diff --git a/spec/agent/infrastructure.md b/spec/agent/infrastructure.md index ad84fd6e7b..5e3c828e6c 100644 --- a/spec/agent/infrastructure.md +++ b/spec/agent/infrastructure.md @@ -30,7 +30,7 @@ All agent background processing - async commands, message delivery, notification **Error classification**: `withWork` distinguishes two failure modes: - *Work-item error* (`isWorkItemError`): the task itself is broken (likely recurring). Worker stops and sends `CRITICAL False`. -- *Store error*: transient database issue. Worker re-signals `doWork` and reports `INTERNAL` (retry may succeed). +- *Other error*: any non-work-item error (e.g., transient database issue). Worker re-signals `doWork` and reports `INTERNAL` (retry may succeed). **Restart rate limiting**: On worker exit, `restartOrDelete` checks the `restarts` counter against `maxWorkerRestartsPerMin`. Under the limit: reset action, re-signal, restart. Over the limit: delete the worker from the map and send `CRITICAL True` (escalation to the application). A restart only proceeds if the `workerId` in the map still matches the current worker - a stale restart from a replaced worker is a no-op. @@ -54,7 +54,7 @@ Async commands handle state transitions that require network calls but shouldn't - *Client commands* (`AClientCommand`): `NEW`, `JOIN`, `LET` (allow connection), `ACK`, `LSET`/`LGET` (set/get connection link data), `SWCH` (switch queue), `DEL`. Triggered by application API calls. - *Internal commands* (`AInternalCommand`): `ICAck` (ack to router), `ICAckDel` (ack + delete local message), `ICAllowSecure`/`ICDuplexSecure` (secure after confirmation), `ICQSecure` (secure queue during switch), `ICQDelete` (delete old queue after switch), `ICDeleteConn` (delete connection), `ICDeleteRcvQueue` (delete specific receive queue). Generated *during* message processing to handle state transitions asynchronously. -**Retry and movement**: `tryMoveableCommand` wraps execution with `withRetryInterval`. On `temporaryOrHostError`, it retries with backoff. On cross-server errors (e.g., queue moved to different router), it updates the command's server field in the store (`CCMoved`) and retries against the new server. +**Retry and movement**: `tryMoveableCommand` wraps execution with `withRetryInterval`. On `temporaryOrHostError`, it retries with backoff. Individual command handlers can return `CCMoved` (e.g., when a queue has moved to a different router) after updating the command's server field in the store - `tryMoveableCommand` then exits cleanly, letting the moved command be picked up by the appropriate worker. **Locking**: State-sensitive commands use `tryWithLock` / `tryMoveableWithLock`, which acquire `withConnLock` before execution. This serializes operations on the same connection, preventing races between concurrent command processing and message receipt. @@ -73,8 +73,7 @@ Message delivery uses a split-phase encryption design: the ratchet advances in t 2. Call `agentRatchetEncryptHeader` - advances the double ratchet, produces a message encryption key (MEK), padded length, and PQ encryption status 3. Store `SndMsg` with `SndMsgPrepData` (MEK, paddedLen, sndMsgBodyId) in the database 4. Create `SndMsgDelivery` record for each send queue -5. Increment `msgDeliveryOp.opsInProgress` (for suspension tracking) -6. Signal delivery workers via `getDeliveryWorker` +5. `submitPendingMsg` - increments `msgDeliveryOp.opsInProgress` (for suspension tracking) and signals delivery workers via `getDeliveryWorker` **Phase 2 - delivery worker** (`runSmpQueueMsgDelivery`): 1. `throwWhenNoDelivery` - kills the worker thread if the queue's address has been removed from `smpDeliveryWorkers` (prevents delivery to queues replaced during switch) @@ -82,9 +81,9 @@ Message delivery uses a split-phase encryption design: the ratchet advances in t 3. Re-encode the message with `internalSndId`/`prevMsgHash`, then `rcEncryptMsg` to encrypt with the stored MEK (no ratchet access needed) 4. `sendAgentMessage` - per-queue encrypt + SEND to the router -**Connection info messages** (`AM_CONN_INFO`, `AM_CONN_INFO_REPLY`) skip split-phase encryption entirely - they are sent as plaintext confirmation bodies via `sendConfirmation`. +**Connection info messages** (`AM_CONN_INFO`, `AM_CONN_INFO_REPLY`) skip split-phase encryption entirely - they are sent as per-queue E2E encrypted confirmation bodies via `sendConfirmation` (encrypted with `agentCbEncrypt`, not with the double ratchet). -**Retry with dual intervals**: Delivery uses `withRetryLock2`, which maintains two independent retry clocks (slow and fast). A background thread sleeps for the current interval, then signals the delivery worker via `tryPutTMVar`. When the router sends `QCONT` (queue buffer cleared), the agent calls `tryPutTMVar retryLock ()` to wake the delivery thread immediately, avoiding unnecessary delay. +**Retry with dual intervals**: Delivery uses `withRetryLock2`, which maintains two retry interval states (slow and fast) but only one wait is active at a time. A background thread sleeps for the current interval, then signals the delivery worker via `tryPutTMVar`. When the router sends `QCONT` (queue buffer cleared), the agent calls `tryPutTMVar retryLock ()` to wake the delivery thread immediately, avoiding unnecessary delay. **Error handling**: - `SMP QUOTA` - switch to slow retry, don't penalize (backpressure from router) @@ -115,7 +114,7 @@ SessSubs - **Active → Pending**: When `setSessionId` is called with a *different* session ID (TLS reconnect), all active subscriptions are atomically demoted to pending. Session ID is updated to the new value. -- **Pending → Removed**: `failSubscriptions` moves permanently-failed queues (non-temporary SMP errors) to `removedSubs`. The removal is tracked for diagnostic reporting via `getSubscriptions`. +- **Pending → Removed**: `failSubscriptions` moves permanently-failed queues (non-temporary SMP errors) to `removedSubs` - a separate `TMap` in `AgentClient`, not part of `TSessionSubs`. The removal is tracked for diagnostic reporting via `getSubscriptions`. **Service-associated queues**: Queues with `serviceAssoc=True` are *not* added to `activeSubs` individually. Instead, the service subscription's count is incremented and its `idsHash` XOR-accumulates the queue's hash. The router tracks individual queues via the service subscription; the agent only tracks the aggregate. Consequence: `hasActiveSub(rId)` returns `False` for service-associated queues - callers must check the service subscription separately. diff --git a/spec/agent/xrcp.md b/spec/agent/xrcp.md index 864b4d8027..883e64c5c9 100644 --- a/spec/agent/xrcp.md +++ b/spec/agent/xrcp.md @@ -23,18 +23,18 @@ The handshake spans `Client.connectRCHost` (controller side, despite the name), 2. **Invitation delivery**: the invitation reaches the host either out-of-band (QR code scan for first pairing) or via encrypted multicast announcement (subsequent sessions - see [Multicast discovery](#multicast-discovery)). -3. **Host connects via TLS**: `connectRCCtrl` establishes a TLS connection. Both sides validate 2-certificate chains (leaf + CA root). On reconnection, the host validates the controller's CA fingerprint against `KnownHostPairing`; on first pairing, it stores the fingerprint. +3. **Host connects via TLS**: `connectRCCtrl` establishes a TLS connection. Both sides validate certificate chains. On the controller side, `onClientCertificate` explicitly checks for a 2-certificate chain (leaf + CA root) and validates the host's CA fingerprint against `KnownHostPairing.hostFingerprint` (or stores it on first pairing). On the host side, the controller's CA fingerprint is validated against `RCCtrlPairing.ctrlFingerprint` in `updateCtrlPairing`. -4. **User confirmation barrier**: after TLS connects, the controller extracts the TLS channel binding (`tlsUniq`) as a session code. The application displays this code; the user verifies it on the host. `confirmCtrlSession` uses a double `putTMVar` - the first put signals the decision (accept/reject), the second blocks until the session thread consumes it, creating a synchronization point that prevents the session from proceeding before confirmation completes. +4. **User confirmation barrier**: after TLS connects, both sides extract the TLS channel binding (`tlsUniq`) as a session code. The application displays this code on both devices for the user to verify. On the host side, `confirmCtrlSession` uses a double `putTMVar` - the first put signals the decision (accept/reject), the second blocks until the session thread acknowledges the value, ensuring `confirmCtrlSession` does not return prematurely. 5. **Hello exchange** (asymmetric encryption): - - Controller sends `RCHostEncHello`: DH public key in plaintext + encrypted body containing the KEM encapsulation key, CA fingerprint, and app info. Encrypted with `cbEncrypt` (classical DH secret). - - Host decrypts the hello, performs KEM encapsulation (see [KEM hybrid key exchange](#kem-hybrid-key-exchange)), derives the hybrid session key, and sends `RCCtrlEncHello` encrypted with `sbEncrypt` (post-quantum hybrid key). - - The asymmetry is deliberate: at the time the controller sends its hello, KEM hasn't completed yet, so only classical DH encryption is available. After the host encapsulates, both sides have the hybrid key. + - Host sends `RCHostEncHello` (`prepareHostHello`): DH public key in plaintext + encrypted body containing the KEM encapsulation key, CA fingerprint, and app info. Encrypted with `cbEncrypt` (classical DH secret). + - Controller decrypts the hello, performs KEM encapsulation (see [KEM hybrid key exchange](#kem-hybrid-key-exchange)), derives the hybrid session key, initializes a chain via `sbcInit`, and sends `RCCtrlEncHello` (`prepareHostSession`) encrypted with a key derived from the chain (`sbcHkdf` + `sbEncrypt`). + - The asymmetry is deliberate: at the time the host sends its hello, KEM hasn't completed yet, so only classical DH encryption is available. After the controller encapsulates, both sides have the hybrid key. -6. **Chain key initialization**: both sides call `sbcInit` with the hybrid key to derive send/receive chain keys. The controller explicitly **swaps** the key pair (`swap` call in `prepareCtrlSession`) - both sides derive keys in the same order from `sbcInit`, but have opposite send/receive roles, so the controller must reverse them. The host does not swap. +6. **Chain key initialization**: both sides call `sbcInit` with the hybrid key to derive send/receive chain keys. The host explicitly **swaps** the key pair (`swap` call in `prepareCtrlSession`, which runs on the host side despite its name) - both sides derive keys in the same order from `sbcInit`, but have opposite send/receive roles, so the host must reverse them. The controller does not swap. -7. **Error path**: if KEM encapsulation fails, the host sends `RCCtrlEncError` encrypted with the DH key (not the hybrid key, which doesn't exist yet). The controller can decrypt the error because it has the DH secret from step 5. +7. **Error path**: if KEM encapsulation fails, the controller sends `RCCtrlEncError` (a variant of `RCCtrlEncHello`) encrypted with the DH key (not the hybrid key, which doesn't exist yet). The host can decrypt the error because it has the DH secret from step 5. Note: this error path is not yet fully implemented in the code. --- @@ -42,23 +42,20 @@ The handshake spans `Client.connectRCHost` (controller side, despite the name), **Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs) -The session key combines classical Diffie-Hellman with SNTRUP761 (lattice-based KEM) via `SHA3_256(dhSecret || kemSharedKey)` (`kemHybridSecret` in Client.hs). This provides protection against quantum computers while maintaining classical security as a fallback. +The session key combines classical Diffie-Hellman with SNTRUP761 (lattice-based KEM) via `SHA3_256(dhSecret || kemSharedKey)` (`kemHybridSecret` in `Crypto/SNTRUP761.hs`). This provides protection against quantum computers while maintaining classical security as a fallback. -**First session** - KEM public key is too large for a QR code invitation, so it travels in the encrypted hello body: +The KEM public key is too large for a QR code invitation, so it travels in the encrypted hello body. Fresh KEM keys are generated every session - no KEM state is cached between sessions. -1. Controller generates DH + KEM key pairs, puts KEM encapsulation key in the hello body -2. Host decrypts hello with DH secret, extracts KEM encapsulation key -3. Host encapsulates: produces `(kemCiphertext, kemSharedKey)` -4. Host derives hybrid key: `SHA3_256(dhSecret || kemSharedKey)` -5. Host sends `kemCiphertext` in the controller hello body -6. Controller decapsulates `kemCiphertext` to recover `kemSharedKey`, derives the same hybrid key +1. Host generates a fresh KEM key pair (`prepareHostHello`), puts the KEM public key in the host hello body +2. Controller decrypts hello with DH secret, extracts KEM public key +3. Controller encapsulates (`sntrup761Enc`): produces `(kemCiphertext, kemSharedKey)` +4. Controller derives hybrid key: `SHA3_256(dhSecret || kemSharedKey)` +5. Controller sends `kemCiphertext` in the ctrl hello body (`RCCtrlEncHello`) +6. Host decapsulates `kemCiphertext` (`sntrup761Dec`) to recover `kemSharedKey`, derives the same hybrid key -**Subsequent sessions** (via multicast) - the previous session's KEM secret is cached in the pairing: +The KEM exchange is identical for first and subsequent sessions. The only difference between sessions is how the invitation is delivered (QR code vs multicast) and whether TLS fingerprints are stored for the first time or verified against known pairings. -- Both sides already know each other's KEM capabilities from the previous session -- Fresh DH keys are generated per session for forward secrecy -- The hybrid key derivation uses the new DH secret + the cached KEM secret -- `updateKnownHost` (called in `prepareHostSession`) updates the stored DH public key for the next session +`updateKnownHost` (called in `prepareHostSession` on the controller) updates the stored host DH public key (`hostDhPubKey` in `KnownHostPairing`) - this is used for encrypting multicast announcements in subsequent sessions, not for KEM. **Key rotation and `prevDhPrivKey`**: when the host updates its DH key pair for a new session, it retains the previous private key in `RCCtrlPairing.prevDhPrivKey`. This is critical for multicast - during the transition window, the controller may send announcements encrypted with the old public key. `findRCCtrlPairing` tries decryption with both the current and previous DH keys. Without this fallback, key rotation would break multicast discovery. @@ -68,23 +65,24 @@ The session key combines classical Diffie-Hellman with SNTRUP761 (lattice-based **Source**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs), [RemoteControl/Invitation.hs](../../src/Simplex/RemoteControl/Invitation.hs), [RemoteControl/Discovery.hs](../../src/Simplex/RemoteControl/Discovery.hs) -For subsequent sessions (after initial QR pairing), the controller announces its presence via UDP multicast so the host can connect without scanning a new QR code. The flow spans `Client.announceRC`, `Client.discoverRCCtrl`, `Client.findRCCtrlPairing`, `Invitation.signInvitation`/`verifySignedInvitation`, and `Discovery.joinMulticast`/`withSender`. +For subsequent sessions (after initial QR pairing), the controller announces its presence via UDP multicast so the host can connect without scanning a new QR code. The flow spans `Client.announceRC`, `Client.discoverRCCtrl`, `Client.findRCCtrlPairing`, `Invitation.signInvitation`/`verifySignedInvitation`, and `Discovery.withListener`/`withSender`. **Announcement creation** (`announceRC`): -1. The invitation is signed with a dual-signature chain: the session key signs the invitation URI, then the identity key signs the URI + session signature concatenated. This chain means a compromised session key alone cannot forge a valid identity-signed announcement - the identity key must also be compromised. +1. The invitation is signed with a dual-signature chain: the session key signs the invitation URI, then the identity key signs the concatenation `URI + "&ssig=" + sessionSignature`. This chain means a compromised session key alone cannot forge a valid identity-signed announcement - the identity key must also be compromised. 2. The signed invitation is encrypted with a DH shared secret between the host's known DH public key and the controller's ephemeral DH private key. 3. The encrypted packet is padded to 900 bytes (privacy: all announcements are indistinguishable by size). 4. Sent 60 times at 1-second intervals to multicast group `224.0.0.251:5227`. -5. Runs as a cancellable async task - cancelled in `prepareHostSession` once the session is established. +5. Runs as a cancellable async task - cancelled in `connectRCHost` after `prepareHostSession` returns, once the session is established. **Listener and discovery** (`discoverRCCtrl`): 1. Host calls `joinMulticast` to subscribe to the multicast group. A shared `TMVar Int` counter tracks active listeners - OS-level `IP_ADD_MEMBERSHIP` is only issued on 0→1 transition, `IP_DROP_MEMBERSHIP` on 1→0. This prevents duplicate syscalls when multiple listeners are active. 2. For each received packet, `findRCCtrlPairing` iterates over known pairings and tries decryption with the current DH key, falling back to `prevDhPrivKey` if present. 3. After successful decryption, the invitation's `dh` field is verified against the announcement's `dhPubKey` to prevent relay attacks. -4. Dual signatures are verified: session signature first, then identity signature. -5. 30-second timeout on the entire discovery process (`RCENotDiscovered` on expiry). +4. The source IP address is checked against the invitation's `host` field - prevents re-broadcasting a legitimate announcement from a different host. +5. Dual signatures are verified: session signature first, then identity signature. +6. 30-second timeout on the entire discovery process (`RCENotDiscovered` on expiry). --- From 38fa104c7ec11fd8fe35431e255f9db802f10727 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 22:40:10 +0000 Subject: [PATCH 68/91] subscriptions --- spec/topics/subscriptions.md | 222 +++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 spec/topics/subscriptions.md diff --git a/spec/topics/subscriptions.md b/spec/topics/subscriptions.md new file mode 100644 index 0000000000..b7bc9fa1cf --- /dev/null +++ b/spec/topics/subscriptions.md @@ -0,0 +1,222 @@ +# Subscriptions + +How messages reach recipients: router subscription model, subscription-driven delivery, cross-layer subscription flow, and reconnection. This is the cross-cutting view spanning all three layers (router, client, agent). + +For agent-internal subscription tracking (TSessionSubs, pending/active state machine, UP event deduplication), see [agent/infrastructure.md](../agent/infrastructure.md#subscription-tracking). For service subscription lifecycle, see [client-services.md](client-services.md). For the SMP protocol specification, see [simplex-messaging.md](../../protocol/simplex-messaging.md). + +- [Router subscription model](#router-subscription-model) +- [Subscription-driven delivery](#subscription-driven-delivery) +- [Cross-layer subscription flow](#cross-layer-subscription-flow) +- [Reconnection and resubscription](#reconnection-and-resubscription) +- [Service subscriptions](#service-subscriptions) + +--- + +## Router subscription model + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Server/Env/STM.hs](../../src/Simplex/Messaging/Server/Env/STM.hs) + +The router tracks which client connection is subscribed to each queue. At most one client can be subscribed to a given queue at a time - a new subscription displaces the previous one. + +### SubscribedClients - the TVar-of-Maybe pattern + +`SubscribedClients` is a `TMap EntityId (TVar (Maybe (Client s)))`. The indirection through `TVar (Maybe ...)` serves two purposes: + +1. **STM re-evaluation**: any transaction reading the TVar automatically re-evaluates when the subscriber changes (disconnects, gets displaced). This is used by `tryDeliverMessage` - if the subscriber disconnects mid-delivery, the STM transaction retries and sees `Nothing`. + +2. **Reconnection continuity**: when a mobile client disconnects and reconnects, the TVar is reused rather than recreated. Subscriptions that were made at any point are never removed from the map - this is a deliberate trade-off for intermittently connected mobile clients. + +The `SubscribedClients` constructor is not exported from `Server/Env/STM.hs` (only the type is). All access goes through `getSubscribedClient` (IO, outside STM) and `upsertSubscribedClient` (STM). This prevents accidental use of `TM.lookup` inside STM transactions, which would add the entire TMap to the transaction's read set. + +Two instances exist: `queueSubscribers` for individually-subscribed queues and `serviceSubscribers` for service-subscribed queues. + +### serverThread - split-STM processing + +`serverThread` processes subscription registration events from `subQ`. It runs separately from the client handler threads and uses a split-STM pattern to reduce contention: + +``` +subQ (TQueue) -- (A) STM: read event + → getServerClient clientId -- (B) IO: lookup client outside STM + → updateSubscribers -- (C) STM: register in SubscribedClients + → endPreviousSubscriptions -- (D) IO: notify displaced clients +``` + +Step (B) is deliberately outside STM. If the client lookup were inside the transaction, the transaction would re-evaluate every time the clients `IntMap` TVar changes (e.g., when any client connects or disconnects). By reading in IO, only the `updateSubscribers` transaction needs to be STM. + +If the client disconnects between steps (B) and (C), `updateSubscribers` handles `Nothing` - it still sends END/DELD to any existing subscriber for the same queue. + +### Subscription displacement + +When `upsertSubscribedClient` finds a different client already subscribed to the same entity, it returns the previous client. `endPreviousSubscriptions` then: + +1. Queues `(entityId, END)` or `(entityId, DELD)` into `pendingEvents` (a `TVar (IntMap (NonEmpty ...))` keyed by client ID). +2. Removes the subscription from the displaced client's local `subscriptions` map and cancels any delivery thread. + +A separate `sendPendingEvtsThread` flushes `pendingEvents` on a timer (`pendingENDInterval`), delivering END/DELD events to displaced clients via their `sndQ`. If the client's `sndQ` is full, it forks a blocking thread rather than stalling the flush. + +For service subscriptions, the displacement event is `ENDS n idsHash` rather than `END`. + +### GET vs SUB mutual exclusion + +When `GET` is used on a queue, the server creates a `ProhibitSub` subscription. This prevents `SUB` on the same queue in the same connection (`CMD PROHIBITED`). Conversely, if `SUB` is active, `GET` is prohibited. GET clients are not added to `ServerSubscribers` and do not receive END events. + +--- + +## Subscription-driven delivery + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs) + +The router delivers at most one unacknowledged message per subscription. The `delivered :: TVar (Maybe (MsgId, SystemSeconds))` in each `Sub` record is the gate: `Just _` means a message is in flight (awaiting ACK), `Nothing` means the next message can be delivered. + +### Three delivery triggers + +**1. SUB** - `subscribeQueueAndDeliver`: after registering the subscription, the server peeks the first pending message (`tryPeekMsg`). If one exists, it is delivered alongside the `SOK` response in the same transmission batch. `setDelivered` records the message ID and timestamp. + +**2. ACK** - `acknowledgeMsg`: when the client ACKs a message, the server clears `delivered`, then calls `tryDelPeekMsg` which deletes the ACK'd message AND peeks the next. If a next message exists, it is immediately delivered in the ACK response and `setDelivered` is called again. This means ACK responses can piggyback the next message - minimizing round-trips. + +**3. SEND to empty queue** - `tryDeliverMessage`: when a sender writes a message to a previously empty queue (`wasEmpty = True`), the server attempts to push it to the subscribed recipient immediately. + +### Sync/async split in tryDeliverMessage + +`tryDeliverMessage` has a three-phase structure optimized for the common case: + +**Phase 1 - outside STM**: `getSubscribedClient` reads the `SubscribedClients` TMap via `readTVarIO` (IO, not STM). If no subscriber exists, the function returns immediately without entering any STM transaction. This avoids transaction overhead for queues with no active subscriber. + +**Phase 2 - STM transaction** (`deliverToSub`): reads the client TVar (inside STM, so the transaction re-evaluates if the subscriber changes), checks `subThread == NoSub` and `delivered == Nothing`. Then: + +- If the client's `sndQ` is **not full**: delivers the message directly in the same STM transaction (`writeTBQueue sndQ`), sets `delivered`. No thread is needed. This is the fast path. +- If the client's `sndQ` is **full**: sets `subThread = SubPending` and returns the client + sub for phase 3. + +**Phase 3 - forked thread** (`forkDeliver`): a `deliverThread` is spawned that blocks until the `sndQ` has room. Before delivering, it re-checks that the subscriber is still the same client and `delivered` is still `Nothing` - handling the race where the client disconnected and a new one subscribed between phases 2 and 3. + +### Per-queue encryption + +The server encrypts every message before delivery using `encryptMsg`: `XSalsa20-Poly1305` with the per-queue DH shared secret (`rcvDhSecret` from `QueueRec`) and a nonce derived from the message ID. This is the server-to-recipient transport encryption layer - independent of the end-to-end encryption between sender and recipient. + +--- + +## Cross-layer subscription flow + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs), [Client.hs](../../src/Simplex/Messaging/Client.hs) + +### Subscribe path (agent → router) + +``` +subscribeConnections' + ├── getConnSubs (DB) → load RcvQueueSub per connection + └── subscribeConnections_ + ├── partition: send-only/new → immediate results; duplex/rcv → subscribe + ├── resumeDelivery, resumeConnCmds + └── subscribeQueues + ├── checkQueues (filter GET-locked queues) + ├── batchQueues by SMPTransportSession + ├── addPendingSubs (mark pending in currentSubs) + └── mapConcurrently per session: + subscribeSessQueues_ + ├── getSMPServerClient (get/create TCP connection) + ├── subscribeSMPQueues (protocol client: batch TLS write) + ├── processSubResults (STM: pending → active, record failures) + └── notify UP (for newly active connections) +``` + +**Batching**: `batchQueues` groups queues by `SMPTransportSession = (UserId, SMPServer, Maybe ByteString)`. The third field carries the connection ID in entity-session mode (each connection gets its own TCP session) or `Nothing` in shared mode (all queues to the same server share one session). Per-session batches are subscribed concurrently via `mapConcurrently`. + +**Protocol client**: `subscribeSMPQueues` maps each queue to a `SUB` command, batches them into physical TLS writes (respecting server block size limits via `batchTransmissions'`), and awaits responses concurrently. `processSUBResponse_` classifies responses: `OK`/`SOK serviceId` (success), `MSG` (immediate message delivery piggybacked on response), or error. + +### Receive path (router → application) + +``` +Router MSG → TLS → protocol client rcvQ + → processMsg: server push (empty corrId) → STEvent → msgQ + → subscriber thread (Agent.hs): + readTBQueue msgQ → processSMPTransmissions + ├── STEvent MSG → processSMP → withConnLock → decrypt → subQ → Application + ├── STEvent END → removeSubscription → subQ END + ├── STEvent DELD → removeSubscription → subQ DELD + └── STResponse SUB OK → processSubOk → addActiveSub → accumulate UP +``` + +The protocol client's `processMsg` thread classifies each incoming transmission: +- **Non-empty corrId**: response to a pending command - delivered to the waiting `getResponse` caller via `responseVar`. +- **Empty corrId**: server-initiated push (MSG, END, DELD, ENDS) - wrapped as `STEvent` and forwarded to `msgQ`. +- **Expired/unexpected responses**: also forwarded to `msgQ` as `STResponse`. + +The agent's `subscriber` thread reads from `msgQ` and processes all events under `agentOperationBracket AORcvNetwork`. + +### Dual UP event sources + +UP events can originate from two paths: +- **Synchronous** (`subscribeSessQueues_`): after `processSubResults` promotes pending → active, notifies `UP srv connIds` for newly active connections. Used during initial subscription. +- **Asynchronous** (`processSMPTransmissions`): when SUB responses arrive via `msgQ` (e.g., after reconnection), `processSubOk` promotes pending → active and accumulates `upConnIds`, which are batch-notified at the end of the transmission batch. + +Both paths guard against duplicates: they only emit UP for connections that were not already in `activeSubs`. + +--- + +## Reconnection and resubscription + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +### Server-side disconnect cleanup + +When a client disconnects (`clientDisconnected`): + +1. `connected = False` - any STM transaction reading this TVar re-evaluates. +2. All `subscriptions` and `ntfSubscriptions` are swapped to empty maps. +3. Each subscription's delivery thread is killed (`cancelSub`). +4. `deleteSubcribedClient` sets each queue's `TVar (Maybe Client)` to `Nothing` and removes the entry from the `SubscribedClients` map. The `sameClient` check (comparing `clientId`) prevents removing a newer subscriber that connected after the disconnect. +5. The client is removed from `subClients` IntSet. + +After disconnect, the queue's messages remain stored. The next client to SUB the same queue will receive the first pending message in the SUB response. + +### Agent-side reconnection + +When the protocol client detects a TLS disconnect, `smpClientDisconnected` fires in the agent: + +1. `removeSessVar` with CAS check (monotonic `sessionVarId` prevents stale callbacks from removing newer clients). +2. `setSubsPending` demotes all active subscriptions for the matching session to pending in `currentSubs`. +3. `DOWN srv connIds` is sent to the application for affected connections. +4. Resubscription begins - the mechanism depends on transport session mode: + - **Entity-session mode**: `resubscribeSMPSession` spawns a persistent worker thread. + - **Shared mode**: directly calls `subscribeQueues` and `subscribeClientService` without a persistent worker. + +In entity-session mode, the resubscription worker loops with exponential backoff until all pending subscriptions are resubscribed: + +1. Gets or creates a new SMP client connection to the server. +2. Reads pending subscriptions for the session. +3. Calls `subscribeSessQueues_` with `withEvents = True` to re-send SUB commands. +4. On success, subscriptions move from pending → active and `UP` events are emitted. +5. On temporary error, backs off and retries. +6. Worker self-cleans on exit via `removeSessVar`. + +### Stale response protection + +Both subscription paths (synchronous `processSubResults` and asynchronous `processSubOk`) verify that the queue is still pending in `currentSubs` for the **current** session before promoting to active. If a session was replaced between sending SUB and receiving the response, the stale response is silently discarded. This prevents a response from an old TLS session from marking a queue as active when it should be pending for the new session. + +--- + +## Service subscriptions + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Protocol.hs](../../src/Simplex/Messaging/Protocol.hs) + +Service subscriptions are a bulk mechanism where one `SUBS n idsHash` command subscribes all queues associated with a service identity. The service identity is derived from a long-term TLS client certificate presented during the transport handshake. + +### How service subscriptions differ from individual subscriptions + +| Aspect | Individual (SUB) | Service (SUBS) | +|--------|------------------|----------------| +| Granularity | One queue per SUB command | All associated queues in one command | +| Subscriber tracking | `queueSubscribers` (keyed by QueueId) | `serviceSubscribers` (keyed by ServiceId) | +| Displacement signal | `END` per queue | `ENDS n idsHash` per service | +| Message delivery | Immediate (first message in SUB response) | Iterative (`deliverServiceMessages` iterates all queues, sends `ALLS` when complete) | +| Association | Implicit (queue + subscriber) | Explicit (`rcvServiceId` in QueueRec, set via `setQueueService`) | + +### SUBS flow on the router + +1. `sharedSubscribeService` checks the actual queue count and IDs hash against the stored service state, and enqueues a `CSService` event to `subQ` for `serverThread` to process (registration in `serviceSubscribers` happens asynchronously). +2. If this is a new service subscription (not previously subscribed): `deliverServiceMessages` iterates all service-associated queues via `foldRcvServiceMessages`, creates per-queue `Sub` entries, and delivers pending messages. +3. After iteration completes, `ALLS` is sent to signal the client that all pending messages have been delivered. + +For notification servers, `NSUBS` uses the same `sharedSubscribeService` for registration but does not deliver pending messages (no `deliverServiceMessages` call) - notification subscriptions only register for future `NMSG` events. + +For service certificate lifecycle and agent-side service management, see [client-services.md](client-services.md). From 7d6e319fc23c2075445abf58bdbe7e81685e06ec Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 22:52:16 +0000 Subject: [PATCH 69/91] client services --- spec/topics/client-services.md | 221 +++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 spec/topics/client-services.md diff --git a/spec/topics/client-services.md b/spec/topics/client-services.md new file mode 100644 index 0000000000..998fe51fe3 --- /dev/null +++ b/spec/topics/client-services.md @@ -0,0 +1,221 @@ +# Client Services + +How service certificates enable bulk queue subscriptions: identity lifecycle, queue association, service subscription flow, tracking, reconnection, and notification server usage. This is the cross-cutting view spanning transport, protocol, server, client, agent, and store layers. + +For agent-internal subscription tracking (TSessionSubs service state, active/pending promotion), see [agent/infrastructure.md](../agent/infrastructure.md#subscription-tracking). For the router subscription model and delivery mechanics, see [subscriptions.md](subscriptions.md). For the full implementation reference with types, wire encoding, test gaps, security invariants, and risk analysis, see [rcv-services.md](../rcv-services.md). + +- [Overview](#overview) +- [Service identity lifecycle](#service-identity-lifecycle) +- [Queue-service association](#queue-service-association) +- [Service subscription flow](#service-subscription-flow) +- [Service tracking in TSessionSubs](#service-tracking-in-tsessionsubs) +- [Reconnection and graceful degradation](#reconnection-and-graceful-degradation) +- [Notification server usage](#notification-server-usage) + +--- + +## Overview + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Client.hs](../../src/Simplex/Messaging/Client.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +A **service client** is a high-volume SMP client (notification router, chat relay, directory service) that presents a TLS client certificate during handshake. The router assigns it a persistent `ServiceId` derived from the certificate fingerprint. Individual queues are then associated with this ServiceId via per-queue `SUB` commands carrying a service signature. Once associated, the service client can bulk-subscribe all its queues with a single `SUBS` command instead of O(n) individual `SUB` commands on each reconnection. + +``` +Service client SMP Router + | | + |---- TLS + service cert --------->| Three-way handshake + |<--- ServiceId -------------------| (Transport layer) + | | + |---- SUB + service sig ---------->| Per-queue association + |<--- SOK(ServiceId) --------------| (one-time per queue) + | | + |---- SUBS count idsHash --------->| Bulk subscribe + |<--- SOKS count' idsHash' --------| (server's actual state) + |<--- MSG ... MSG ... MSG ---------| Buffered messages + |<--- ALLS ------------------------| All delivered +``` + +Two version gates control feature availability: `serviceCertsSMPVersion` (v16) enables the service handshake, `SOK`, and dual signatures; `rcvServiceSMPVersion` (v19) adds count+hash parameters to `SUBS`/`NSUBS`/`SOKS`/`ENDS` and enables the messaging service role (`SRMessaging`). Below v19, `SUBS`/`NSUBS` exist but are sent without parameters. + +--- + +## Service identity lifecycle + +**Source**: [Transport.hs](../../src/Simplex/Messaging/Transport.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs), [Agent/Store/AgentStore.hs](../../src/Simplex/Messaging/Agent/Store/AgentStore.hs) + +### Credential generation + +The agent generates a self-signed X.509 certificate per (userId, server) pair on first use via `getServiceCredentials`. The certificate is generated with `genCredentials` using a long validity period and is stored in the `client_services` table along with the private signing key and certificate fingerprint. The `ServiceId` column is NULL until the first successful handshake. + +### Three-way handshake + +Standard SMP handshake is two messages (server sends `SMPServerHandshake`, client sends `SMPClientHandshake`). When the client includes service credentials, an optional third message is added: + +1. **Router -> Client**: standard `SMPServerHandshake` +2. **Client -> Router**: `SMPClientHandshake` with `SMPClientHandshakeService {serviceRole, serviceCertKey}`. The `serviceCertKey` contains the TLS client certificate chain plus a proof-of-possession - a fresh per-session Ed25519 key pair signed by the X.509 signing key. +3. **Router -> Client**: `SMPServerHandshakeResponse {serviceId}`. The router verifies the certificate chain matches the TLS peer certificate, extracts the fingerprint, and calls `getCreateService` to find or create a `ServiceId` for that fingerprint. + +The per-session Ed25519 key (not the X.509 key) is used to sign `SUBS`/`NSUBS` commands. This limits exposure - compromising a session key does not compromise the long-term service identity. + +### Dual signature scheme + +When the TLS handshake established a service identity (the client has a `THClientService`) and the command is `NEW`, `SUB`, or `NSUB` (per `useServiceAuth`), `authTransmission` appends two signatures: + +1. The entity key signs over `serviceCertHash || transmission` - binding the service identity to the queue operation +2. The service session key signs over `transmission` alone + +This prevents MITM service substitution within TLS: an attacker cannot replace the service certificate hash without invalidating the entity key signature. + +### Version-gated role filtering + +Messaging services (`SRMessaging`) are suppressed below v19 - `mkClientService` returns `Nothing` for messaging role when the router version is below `rcvServiceSMPVersion`. Notifier services (`SRNotifier`) are sent at v16+. This allows gradual rollout - routers can support notification service certificates before full messaging service support. + +--- + +## Queue-service association + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Server/QueueStore.hs](../../src/Simplex/Messaging/Server/QueueStore.hs) + +Queues are associated with services through per-queue `SUB` commands (with service signature) or at creation time via `NEW`. The router stores `rcvServiceId :: Maybe ServiceId` on each `QueueRec`. + +### sharedSubscribeQueue - four cases + +`sharedSubscribeQueue` handles the intersection of client type and existing association: + +**Case 1: Service client, queue already associated with this service** - Duplicate association (retry after lost response). If no service subscription exists yet, increments the client's service queue count. + +**Case 2: Service client, queue not yet associated** (or different service) - Calls `setQueueService` to persist the association in `QueueRec`, increments client's `serviceSubsCount` by `(1, queueIdHash rId)`. + +**Case 3: Non-service client, queue has service association** - Calls `setQueueService` with `Nothing` to **remove** the association. This is the migration path when a user disables services. + +**Case 4: Non-service client, no service association** - Standard per-queue subscription, no service involvement. + +### Association persistence + +The `setQueueService` function in QueueStore updates `rcvServiceId` on the queue record and maintains the service's aggregate queue set (`STMService.serviceRcvQueues`). The set and its XOR hash are updated atomically. Associations persist across client disconnect - only live subscription state is cleaned up, not the stored `rcvServiceId`. + +### IdsHash - XOR-based drift detection + +`IdsHash` is a 16-byte value computed as XOR of MD5 hashes of individual queue IDs. XOR is self-inverse, so both `addServiceSubs` and `subtractServiceSubs` use the same `<>` (XOR) operator for the hash component. The count field prevents collision - two different queue sets with the same XOR could have different counts. + +--- + +## Service subscription flow + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Client.hs](../../src/Simplex/Messaging/Client.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +### SUBS command processing + +1. `subscribeServiceMessages` receives `SUBS count idsHash` from the client. +2. `sharedSubscribeService` queries `getServiceQueueCountHash` for the router's actual count and hash, sets `clientServiceSubscribed = True`, and enqueues a `CSService` event to `subQ`. `serverThread` processes this asynchronously: adds the client to `subClients`, adjusts `totalServiceSubs`, and upserts into `serviceSubscribers` (displacing any previous subscriber). +3. Returns `SOKS count' idsHash'` immediately - the client can compare expected vs actual to detect drift. + +### deliverServiceMessages and ALLS + +If this is a new subscription (not duplicate), the router forks `deliverServiceMessages`: + +1. `foldRcvServiceMessages` iterates all queues associated with the service. +2. For each queue with a pending message: `getSubscription` creates a `Sub` in the client's `subscriptions` TMap (if not already present), sets `delivered`, and writes the MSG event to `msgQ` immediately. +3. Queue errors are accumulated in a list whose initial value is `[(NoCorrId, NoEntity, ALLS)]`. Errors are prepended, so ALLS ends up as the last event. +4. After the fold completes, the accumulated events (errors plus ALLS) are written to `msgQ` in one batch. + +MSG events are delivered individually during the fold (not accumulated), while ALLS is deferred to the end - this ensures ALLS arrives only after all pending messages have been sent. + +If the subscription is a duplicate (`hasSub` is `True`), `deliverServiceMessages` is NOT forked - only `SOKS` is returned. + +### On-demand Sub creation for new messages + +When a new message arrives for a service-associated queue via `tryDeliverMessage`, the router looks up the subscriber in `serviceSubscribers` (by ServiceId) rather than `queueSubscribers` (by QueueId). If no `Sub` exists in the client's `subscriptions` TMap (the fold hasn't reached this queue yet, or the queue was associated after SUBS), `newServiceDeliverySub` creates one on the fly. The fold's `getSubscription` performs the same check. STM serialization ensures at most one path creates the Sub for a given queue. + +### Service displacement + +When a new service client subscribes to the same ServiceId and the previous subscriber is a different, still-connected client, `cancelServiceSubs` atomically zeros out the old client's `clientServiceSubs` counter and prepares an `ENDS count idsHash` event. `endPreviousSubscriptions` then swaps out the old client's individual subscription map, cancels per-queue Subs, and places ENDS in `pendingEvents` for deferred delivery via `sendPendingEvtsThread`. The old client's fold thread (if still running from `deliverServiceMessages`) continues writing to the old client's `msgQ` until ALLS, then exits. + +--- + +## Service tracking in TSessionSubs + +**Source**: [Agent/TSessionSubs.hs](../../src/Simplex/Messaging/Agent/TSessionSubs.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +### Aggregate tracking - service queues are not in activeSubs + +When a queue has both a matching `serviceId` and `serviceAssoc = True`, it is tracked only via the count and hash in `activeServiceSub`, **not** in the `activeSubs` TMap. Callers pre-separate queues into two lists before calling `batchAddActiveSubs`: non-service queues go to `activeSubs`, service-associated queues are counted via `updateActiveService`. A queue on a service-capable session but with `serviceAssoc = False` still lands in `activeSubs` normally. Consequence: `hasActiveSub(rId)` returns `False` for service-associated queues - callers must check the service subscription separately. + +### Session ID gating + +`setActiveServiceSub` only promotes the service subscription from pending to active if the session ID matches the current TLS session. If a reconnection occurred between sending SUBS and receiving SOKS, the stale response is kept as pending rather than promoted. This prevents a response from an old session from corrupting the new session's state. + +### State transitions + +- **setPendingServiceSub**: stores expected `ServiceSub` before SUBS is sent +- **setActiveServiceSub**: promotes to active after SOKS, with session ID validation +- **updateActiveService**: incrementally builds the active service sub as individual queues return `SOK(Just serviceId)` - used when per-queue SUBs succeed with service association +- **setServiceSubPending_**: demotes active to pending on disconnect (called by `setSubsPending`) +- **deleteServiceSub**: clears both active and pending on ENDS + +### Service events + +| Event | When | +|-------|------| +| `SERVICE_UP srv result` | SUBS succeeded; `ServiceSubResult` carries any drift errors (count/hash/serviceId mismatch) | +| `SERVICE_DOWN srv sub` | Client disconnected while service was subscribed | +| `SERVICE_ALL srv` | ALLS received - all buffered messages delivered | +| `SERVICE_END srv sub` | ENDS received - another service client took over | + +All are entity-less (`AENone`) events. + +--- + +## Reconnection and graceful degradation + +**Source**: [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +### updateClientService - credential synchronization + +After each SMP connection, `updateClientService` reconciles the agent's stored ServiceId with the router's: + +- **ServiceId matches**: normal path, no action needed +- **ServiceId changed** (router data was reset): calls `removeRcvServiceAssocs` to clear all queue-service associations for this server, forcing re-association via individual SUBs +- **Router lost service support** (version downgrade): calls `deleteClientService` to remove the local service record entirely +- **Router returned ServiceId without credentials**: logs error (should not happen) + +### Resubscription ordering + +On reconnect, the resubscription worker processes the pending service subscription **before** individual queues. This ensures the service context is established before queue-level SUB commands that depend on it (the router uses `clntServiceId` from the TLS session for queue-service association). + +### Fallback to individual subscriptions + +`resubscribeClientService` handles two error classes by falling back to `unassocSubscribeQueues`: + +- `SSErrorServiceId` - the router returned a different ServiceId than expected +- `clientServiceError` - matches `NO_SERVICE`, `SERVICE`, and `PROXY(BROKER NO_SERVICE)` errors + +`unassocSubscribeQueues` deletes the `client_services` row, sets `rcv_service_assoc = 0` on all queues, and resubscribes them individually. This is the nuclear recovery path - service state is fully reset, and the next connection will generate fresh credentials. + +### Agent store triggers + +The agent's `client_services` table tracks `service_queue_count` and `service_queue_ids_hash`. SQLite triggers on `rcv_queues` automatically maintain these counters when `rcv_service_assoc` changes. The triggers use `simplex_xor_md5_combine` - the SQLite equivalent of Haskell's `queueIdHash <>`. On credential update (new cert), `service_id` is set to NULL via `ON CONFLICT DO UPDATE`, forcing a fresh handshake. + +--- + +## Notification server usage + +**Source**: [Notifications/Server.hs](../../src/Simplex/Messaging/Notifications/Server.hs), [Notifications/Server/Env.hs](../../src/Simplex/Messaging/Notifications/Server/Env.hs) + +The notification server is the primary consumer of service certificates for the `SRNotifier` role. It manages thousands to millions of SMP queue subscriptions per SMP router. + +### Credential management + +`NtfServerConfig.useServiceCreds` controls whether the NTF server uses service certificates. On first use per SMP router, `mkDbService` generates a self-signed TLS certificate (stored in the `smp_servers` table) and reuses it across connections. + +### Startup subscription + +If a stored service subscription exists, `subscribeSrvSubs` sends `NSUBS` first (one command for all associated queues), then subscribes all queues individually in batches via `subscribeQueuesNtfs` (including service-associated queues, which were previously associated via `NSUB`). + +### Recovery path + +On `CAServiceUnavailable` (irrecoverable service error, e.g., ServiceId mismatch after cert rotation), `removeServiceAndAssociations` performs nuclear recovery: clears all service credentials, resets counters, removes all `ntf_service_assoc` flags, and resubscribes all queues individually. The Postgres schema uses `xor_combine` triggers (equivalent to the agent's SQLite triggers) to maintain per-SMP-server notifier count and hash. + +### NSUBS vs SUBS + +`NSUBS` uses the same `sharedSubscribeService` for registration in `serviceSubscribers` but does **not** fork `deliverServiceMessages`. Notification delivery is handled by the separate `deliverNtfsThread` which uses `serviceSubscribers` to look up the subscribed service client for each notification queue. Consequently, there is no `ALLS` signal for NSUBS subscriptions. From 9b15cdc5259812dcf41e3ecc29afa7b92e5f3ae6 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sat, 14 Mar 2026 23:34:36 +0000 Subject: [PATCH 70/91] notifications spec --- spec/topics/client-services.md | 8 +- spec/topics/notifications.md | 286 +++++++++++++++++++++++++++++++++ spec/topics/subscriptions.md | 15 +- 3 files changed, 297 insertions(+), 12 deletions(-) create mode 100644 spec/topics/notifications.md diff --git a/spec/topics/client-services.md b/spec/topics/client-services.md index 998fe51fe3..f2e0f287b3 100644 --- a/spec/topics/client-services.md +++ b/spec/topics/client-services.md @@ -107,7 +107,7 @@ The `setQueueService` function in QueueStore updates `rcvServiceId` on the queue ### SUBS command processing 1. `subscribeServiceMessages` receives `SUBS count idsHash` from the client. -2. `sharedSubscribeService` queries `getServiceQueueCountHash` for the router's actual count and hash, sets `clientServiceSubscribed = True`, and enqueues a `CSService` event to `subQ`. `serverThread` processes this asynchronously: adds the client to `subClients`, adjusts `totalServiceSubs`, and upserts into `serviceSubscribers` (displacing any previous subscriber). +2. `sharedSubscribeService` queries `getServiceQueueCountHash` for the router's actual count and hash. In one STM transaction, sets `clientServiceSubscribed = True` and swaps the client's service subs counter to the server's actual values (computing a delta). In a separate STM transaction, enqueues a `CSService` event (carrying the delta) to `subQ`. `serverThread` processes this asynchronously: adds the client to `subClients`, subtracts the delta from `totalServiceSubs` (preventing double-counting of per-queue accumulated counts), and upserts into `serviceSubscribers` (displacing any previous subscriber). 3. Returns `SOKS count' idsHash'` immediately - the client can compare expected vs actual to detect drift. ### deliverServiceMessages and ALLS @@ -115,7 +115,7 @@ The `setQueueService` function in QueueStore updates `rcvServiceId` on the queue If this is a new subscription (not duplicate), the router forks `deliverServiceMessages`: 1. `foldRcvServiceMessages` iterates all queues associated with the service. -2. For each queue with a pending message: `getSubscription` creates a `Sub` in the client's `subscriptions` TMap (if not already present), sets `delivered`, and writes the MSG event to `msgQ` immediately. +2. For each queue with a pending message: `getSubscription` creates a `Sub` in the client's `subscriptions` TMap if not already present (returning `Nothing` for duplicates). If a new Sub is created, `setDelivered` records the message and the MSG event is written to `msgQ` immediately. 3. Queue errors are accumulated in a list whose initial value is `[(NoCorrId, NoEntity, ALLS)]`. Errors are prepended, so ALLS ends up as the last event. 4. After the fold completes, the accumulated events (errors plus ALLS) are written to `msgQ` in one batch. @@ -129,7 +129,7 @@ When a new message arrives for a service-associated queue via `tryDeliverMessage ### Service displacement -When a new service client subscribes to the same ServiceId and the previous subscriber is a different, still-connected client, `cancelServiceSubs` atomically zeros out the old client's `clientServiceSubs` counter and prepares an `ENDS count idsHash` event. `endPreviousSubscriptions` then swaps out the old client's individual subscription map, cancels per-queue Subs, and places ENDS in `pendingEvents` for deferred delivery via `sendPendingEvtsThread`. The old client's fold thread (if still running from `deliverServiceMessages`) continues writing to the old client's `msgQ` until ALLS, then exits. +When a new service client subscribes to the same ServiceId and the previous subscriber is a different, still-connected client, `cancelServiceSubs` atomically zeros out the old client's service subs counter and prepares an `ENDS count idsHash` event. `endPreviousSubscriptions` first inserts ENDS into `pendingEvents` (for deferred delivery via `sendPendingEvtsThread`), then subtracts the changed subs from `totalServiceSubs`, swaps out the old client's individual subscription map to empty, and cancels per-queue Subs. The old client's fold thread (if still running from `deliverServiceMessages`) continues writing to the old client's `msgQ` until ALLS, then exits. --- @@ -214,7 +214,7 @@ If a stored service subscription exists, `subscribeSrvSubs` sends `NSUBS` first ### Recovery path -On `CAServiceUnavailable` (irrecoverable service error, e.g., ServiceId mismatch after cert rotation), `removeServiceAndAssociations` performs nuclear recovery: clears all service credentials, resets counters, removes all `ntf_service_assoc` flags, and resubscribes all queues individually. The Postgres schema uses `xor_combine` triggers (equivalent to the agent's SQLite triggers) to maintain per-SMP-server notifier count and hash. +On `CAServiceUnavailable` (irrecoverable service error, e.g., ServiceId mismatch after cert rotation), `removeServiceAndAssociations` performs nuclear DB cleanup: clears all service credentials, resets counters, and removes all `ntf_service_assoc` flags. The caller then resubscribes all queues individually via `subscribeSrvSubs`. The Postgres schema uses `xor_combine` triggers (equivalent to the agent's SQLite triggers) to maintain per-SMP-server notifier count and hash. ### NSUBS vs SUBS diff --git a/spec/topics/notifications.md b/spec/topics/notifications.md new file mode 100644 index 0000000000..6f086afd24 --- /dev/null +++ b/spec/topics/notifications.md @@ -0,0 +1,286 @@ +# Notifications + +How push notifications work: encryption architecture, SMP server notification infrastructure, NTF server processing, agent subscription supervisor, and push notification delivery. This is the cross-cutting view spanning SMP server, NTF server, agent, and push provider layers. + +For service certificate lifecycle and NSUBS bulk subscription, see [client-services.md](client-services.md). For the router subscription model, see [subscriptions.md](subscriptions.md). For the worker framework used by NtfSubSupervisor, see [agent/infrastructure.md](../agent/infrastructure.md#worker-framework). + +- [End-to-end flow](#end-to-end-flow) +- [Encryption architecture](#encryption-architecture) +- [SMP server notification infrastructure](#smp-server-notification-infrastructure) +- [NTF server](#ntf-server) +- [Agent NtfSubSupervisor](#agent-ntfsubsupervisor) +- [Push notification processing](#push-notification-processing) + +--- + +## End-to-end flow + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Notifications/Server.hs](../../src/Simplex/Messaging/Notifications/Server.hs), [Agent.hs](../../src/Simplex/Messaging/Agent.hs) + +### Setup (one-time per device) + +1. App calls `registerNtfToken` with device token and `NMInstant` mode. +2. Agent sends `TNEW` to NTF server - NTF server sends verification code via APNs. +3. App receives push notification, extracts code, calls `verifyNtfToken` (sends `TVFY`). +4. Token becomes `NTActive`. Agent calls `initializeNtfSubs` for all active connections. + +### Per-connection subscription setup (dual worker pipeline) + +``` +ntfSubQ (NSCCreate) + -> NtfSubSupervisor: partitions queues by SMP server + -> SMP worker: NKEY authKey dhKey -> SMP server + <- SMP server: NID notifierId srvDhKey + -> Agent stores ClientNtfCreds (notifierId, rcvNtfDhSecret) + -> NTF worker: SNEW tknId (server, notifierId) ntfPrivKey -> NTF server + -> NTF server stores sub, sends NSUB to SMP server + <- SMP server registers NTF server as notification subscriber +``` + +### Message notification delivery + +``` +Sender -> SEND msg (notification=True) -> SMP server + -> enqueueNotification: encrypt NMsgMeta with rcvNtfDhSecret -> NtfStore + -> deliverNtfsThread (periodic): NMSG nonce encMeta -> NTF server + -> ntfSubscriber.receiveSMP: PNMessageData -> addTokenLastNtf -> pushQ + -> ntfPush: encrypt PNMessageData list with tknDhSecret -> APNs -> device + -> App wakes, calls getNotificationConns + -> Agent: decrypt with tknDhSecret, then decrypt encMeta with rcvNtfDhSecret + -> App fetches actual message from SMP server +``` + +--- + +## Encryption architecture + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Notifications/Server.hs](../../src/Simplex/Messaging/Notifications/Server.hs), [Agent.hs](../../src/Simplex/Messaging/Agent.hs) + +The notification system uses two independent encryption layers to ensure no single entity (other than the recipient) can correlate queue identity with message metadata. + +### Layer 1: SMP server to recipient (rcvNtfDhSecret) + +When the agent sends `NKEY authKey dhKey` to the SMP server, both sides compute a DH shared secret (`rcvNtfDhSecret`). The SMP server uses this to encrypt `NMsgMeta {msgId, msgTs}` inside each `NMSG`. The NTF server cannot decrypt this - it forwards the encrypted blob opaquely. + +### Layer 2: NTF server to device (tknDhSecret) + +During `TNEW`, the agent and NTF server establish `tknDhSecret` via DH exchange. The NTF server encrypts the entire `PNMessageData` list (containing `smpQueue`, `ntfTs`, `nmsgNonce`, `encNMsgMeta`) with this secret before sending via APNs. + +### What each entity can see + +| Entity | Queue identity | Message metadata | Message content | +|--------|---------------|-----------------|----------------| +| SMP server | Yes (stores queue) | Yes (creates NMsgMeta) | No (E2E encrypted) | +| NTF server | Yes (smpQueue in PNMessageData) | No (encNMsgMeta opaque) | No | +| Push provider (APNs) | No (tknDhSecret encrypted) | No | No | +| Recipient | Yes | Yes (two-layer decrypt) | Yes | + +### Device-side two-layer decryption + +In `getNotificationConns`, the agent decrypts in two steps: +1. Decrypt push payload with `tknDhSecret` (NTF-to-device) to get `PNMessageData` list +2. For each entry, decrypt `encNMsgMeta` with `rcvNtfDhSecret` (SMP-to-recipient) to get `NMsgMeta {msgId, msgTs}` + +--- + +## SMP server notification infrastructure + +**Source**: [Server.hs](../../src/Simplex/Messaging/Server.hs), [Server/NtfStore.hs](../../src/Simplex/Messaging/Server/NtfStore.hs) + +### Notifier credentials on queues + +Each queue's `QueueRec` has an optional `notifier :: Maybe NtfCreds` containing: +- `notifierId` - the entity ID the NTF server uses for NSUB +- `notifierKey` - public auth key for verifying NSUB commands +- `rcvNtfDhSecret` - shared secret for encrypting notification metadata +- `ntfServiceId` - optional service association for bulk NSUBS + +`NKEY` creates these credentials (generating the DH shared secret server-side). `NDEL` removes them and deletes pending notifications from NtfStore. + +### Notification generation + +When a sender sends a message with `notification msgFlags == True`, `enqueueNotification` creates a `MsgNtf` containing `NMsgMeta {msgId, msgTs}` encrypted with `rcvNtfDhSecret` and a random nonce. The notification is stored in the in-memory `NtfStore` (a `TMap NotifierId (TVar [MsgNtf])`) - multiple notifications can accumulate per queue. + +### deliverNtfsThread - periodic batch delivery + +Runs every `ntfDeliveryInterval` microseconds. Each cycle: + +1. Reads all pending notifications from `NtfStore`. +2. Calls `getQueueNtfServices` to partition notifications by service association. +3. For service-associated queues: delivers NMSG to the subscribed service client via `serviceSubscribers`. +4. For non-service queues: iterates through `subClients` and delivers to individually-subscribed clients. +5. Each NMSG contains `(ntfNonce, encNMsgMeta)` - the encrypted notification metadata. +6. All pending notifications for a given client are delivered in one cycle (no per-cycle cap). Transmissions are batched into TLS frames by the transport layer. +7. Notifications for deleted queues (discovered during partitioning) are cleaned up from `NtfStore`. + +This is periodic, not event-driven - there is a deliberate latency trade-off to reduce overhead. Notifications are not pushed immediately when a message arrives. + +--- + +## NTF server + +**Source**: [Notifications/Server.hs](../../src/Simplex/Messaging/Notifications/Server.hs), [Notifications/Server/Env.hs](../../src/Simplex/Messaging/Notifications/Server/Env.hs) + +### Architecture + +Three main concurrent threads: + +- **ntfSubscriber**: receives NMSG events from SMP servers and SMP client agent state changes +- **ntfPush**: sends push notifications (APNs/Firebase) from a bounded queue +- **periodicNtfsThread**: sends periodic "check messages" background notifications based on per-token cron intervals + +### Token lifecycle + +``` +NTRegistered (after TNEW, verification push sent) + -> NTConfirmed (APNs accepts verification push delivery) + -> NTActive (after TVFY with correct code) + +Any state -> NTInvalid (push provider reports token invalid during any push) +Any state -> NTExpired (provider reports token expired) +``` + +`NTNew` exists only on the agent side (pre-registration); the NTF server creates tokens directly in `NTRegistered`. `NTInvalid` can be reached from any state where a push delivery is attempted (including `NTRegistered` during verification), not only from `NTActive`. + +`allowTokenVerification` permits TVFY from `NTRegistered`, `NTConfirmed`, and `NTActive` states. `TRPL` replaces the device token (e.g., after OS token refresh) while keeping all subscriptions - it resets status to `NTRegistered` and re-sends verification. + +### Subscription handling + +`SNEW tknId (SMPQueueNtf smpServer notifierId) ntfPrivateKey` creates a subscription record and delegates to the SMP subscriber infrastructure: + +1. `subscribeNtfs` gets or creates a per-SMP-server `SMPSubscriber` thread. +2. The subscriber thread reads from its queue and calls `subscribeQueuesNtfs`, which sends `NSUB` to the SMP server using the `ntfPrivateKey` provided by the agent. +3. `SCHK` returns the current subscription status; the agent uses this for periodic health checks. + +### ntfSubscriber - receiving from SMP + +Runs two concurrent sub-threads: + +**receiveSMP**: reads from the SMP client agent's `msgQ`: +- `NMSG nmsgNonce encNMsgMeta`: Creates `PNMessageData`, calls `addTokenLastNtf` to look up the owning token and aggregate with other recent notifications, then enqueues `PNMessage` to `pushQ`. +- `END`: Updates subscription status to `NSEnd`. +- `DELD`: Updates subscription status to `NSDeleted`. + +**receiveAgent**: reads from `agentQ` for client state changes: +- `CAConnected`: Logs reconnection (no status update). +- `CADisconnected`: Updates affected subscriptions to `NSInactive`. +- `CASubscribed`: Marks subscriptions as `NSActive`. +- `CASubError`: Updates individual subscription errors. +- `CAServiceDisconnected` / `CAServiceSubError`: Logs only. +- `CAServiceSubscribed`: Logs, warns on count/hash mismatches. +- `CAServiceUnavailable`: Calls `removeServiceAndAssociations` - nuclear recovery (see [client-services.md](client-services.md#notification-server-usage)). + +### Token-level notification batching + +`addTokenLastNtf` is critical for push efficiency. The `last_notifications` table is keyed by `(token_id, subscription_id)` and UPSERT'd - each subscription contributes only its most recent notification. When a push is sent, multiple `PNMessageData` entries for the same token are combined into a single APNs payload. This means one push notification can carry metadata for messages across multiple queues. + +### Push notification types + +| Type | Content | Trigger | +|------|---------|---------| +| `PNVerification` | Encrypted registration code | TNEW / TRPL | +| `PNMessage` | Encrypted `PNMessageData` list | NMSG from SMP server | +| `PNCheckMessages` | `{"checkMessages": true}` | periodicNtfsThread (cron) | + +`PNMessage` is sent as a mutable-content alert ("Encrypted message or another app event"). `PNVerification` and `PNCheckMessages` are silent background notifications. + +--- + +## Agent NtfSubSupervisor + +**Source**: [Agent/NtfSubSupervisor.hs](../../src/Simplex/Messaging/Agent/NtfSubSupervisor.hs), [Agent/Env/SQLite.hs](../../src/Simplex/Messaging/Agent/Env/SQLite.hs) + +### Supervisor structure + +``` +NtfSupervisor + ntfTkn :: TVar (Maybe NtfToken) -- current active token + ntfSubQ :: TBQueue (NtfSupervisorCommand, NonEmpty ConnId) + ntfWorkers :: TMap NtfServer Worker -- per-NTF-server + ntfSMPWorkers :: TMap SMPServer Worker -- per-SMP-server + ntfTknDelWorkers :: TMap NtfServer Worker -- token deletion +``` + +The main loop (`runNtfSupervisor`) reads commands from `ntfSubQ` and dispatches to `processNtfCmd`. Commands are only enqueued when `hasInstantNotifications` is true (active token in `NMInstant` mode). + +### Dual worker pipeline + +SMP workers and NTF workers form a two-stage pipeline, communicating through the DB-persisted `NtfSubAction`: + +**Stage 1 - SMP workers** (`runNtfSMPWorker`): +- `NSASmpKey`: Generates auth+DH key pairs, sends `NKEY` to SMP server, stores `ClientNtfCreds`, then sets action to `NSANtf NSACreate` and kicks NTF workers. +- `NSASmpDelete`: Resets notifier credentials, sends `NDEL` to SMP server, deletes the subscription. + +**Stage 2 - NTF workers** (`runNtfWorker`): +- `NSACreate`: Sends `SNEW` to NTF server, stores `ntfSubId`, schedules first check. +- `NSACheck`: Sends `SCHK` to NTF server. AUTH errors from the check are handled separately - those subscriptions are immediately recreated via `recreateNtfSub`. For successful checks, if the subscription is in a subscribe-able status (`NSNew`, `NSPending`, `NSActive`, `NSInactive`), reschedules next check. Any other status (ended, deleted, service error, etc.) also triggers recreation from scratch (resets to `NSASmpKey`). + +### Cross-protocol link + +The SMP workers (`enableQueuesNtfs` / `disableQueuesNtfs` in `Agent/Client.hs`) use the agent's normal SMP client pool to send `NKEY`/`NDEL` to SMP servers. This is the cross-protocol dependency visible in the agent architecture - notification subscription setup requires SMP protocol operations. + +### Subscription state machine + +``` +(new connection, notifications enabled) + -> NSASMP NSASmpKey -- SMP worker: send NKEY to SMP server + -> NSANtf NSACreate -- NTF worker: send SNEW to NTF server + -> NSANtf NSACheck -- NTF worker: periodic SCHK + -> (steady state) + +(notifications disabled or connection deleted) + -> NSASMP NSASmpDelete -- SMP worker: send NDEL to SMP server + -> (subscription deleted) + +(check fails: subscription ended/deleted/auth) + -> NSASMP NSASmpKey -- restart from scratch +``` + +Each action is persisted in the store before execution, so the pipeline resumes after agent restart. Workers use `withRetryInterval` for temporary errors. + +### NotificationsMode + +- **NMInstant**: NTF server maintains active NSUB subscriptions and pushes immediately when messages arrive. Requires the full dual-worker pipeline. +- **NMPeriodic**: No NSUB subscriptions. NTF server sends periodic `PNCheckMessages` background notifications based on `tknCronInterval` (set via `TCRN`). Device wakes and fetches messages on its own schedule. + +Switching from NMInstant to NMPeriodic triggers `deleteNtfSubs` which flushes the `ntfSubQ` and sends `NSCSmpDelete` commands through the async worker pipeline to remove all notification subscriptions. + +--- + +## Push notification processing + +**Source**: [Agent.hs](../../src/Simplex/Messaging/Agent.hs), [Notifications/Server.hs](../../src/Simplex/Messaging/Notifications/Server.hs) + +### getNotificationConns - device wake path + +When the device wakes from a push notification, the app calls `getNotificationConns`: + +1. Retrieves the active token's `ntfDhSecret`. +2. Decrypts the push payload using `ntfDhSecret` and the nonce from the APNs notification. +3. Parses the result as `NonEmpty PNMessageData` (semicolon-separated list). +4. For each entry: + - Looks up the `RcvQueue` by `smpQueue` (`SMPServer` + `notifierId`) via `getNtfRcvQueue`. + - Decrypts `encNMsgMeta` using the queue's `rcvNtfDhSecret` and `nmsgNonce` to get `NMsgMeta {msgId, msgTs}`. +5. Filters "init" notifications (all but the last) by comparing `msgTs` against `lastBrokerTs` - notifications with timestamps not newer than the last seen broker timestamp are discarded. If `lastBrokerTs` is not set, the notification passes through. +6. Returns `NonEmpty NotificationInfo` for the app to fetch actual messages. + +### Token registration state machine + +`registerNtfToken` handles multiple states based on `(ntfTokenId, ntfTknAction)`: + +- `(Nothing, Just NTARegister)`: Re-register (first attempt failed after key generation). +- `(Just tknId, Nothing)`: Same device token - re-register; different token - replace via `TRPL`. +- `(Just tknId, Just NTAVerify code)`: Same device token - verify; different token - replace via `TRPL`. +- `(Just tknId, Just NTACheck)`: Same device token - check status, then initialize or delete subscriptions based on mode; different token - replace via `TRPL`. + +All `(Just tknId, ...)` branches check whether the device token changed and fall through to `replaceToken` on mismatch. + +### ntfSubQ writers + +The `ntfSubQ` is written by multiple paths in `Agent.hs`, all via `sendNtfSubCommand`: +- `sendNtfCreate` - during `subscribeConnections_` and `subscribeAllConnections'` (writes both `NSCCreate` and `NSCSmpDelete` depending on per-connection `enableNtfs`) +- `toggleConnectionNtfs'` - when the app enables/disables notifications for a connection +- `initializeNtfSubs` / `deleteNtfSubs` - during token activation and mode switching +- `newQueueNtfSubscription` - when joining a new connection +- `unsubNtfConnIds` - writes `NSCDeleteSub` during connection deletion +- `ICQDelete` async command handler - during queue rotation diff --git a/spec/topics/subscriptions.md b/spec/topics/subscriptions.md index b7bc9fa1cf..54e1573ee2 100644 --- a/spec/topics/subscriptions.md +++ b/spec/topics/subscriptions.md @@ -24,7 +24,7 @@ The router tracks which client connection is subscribed to each queue. At most o 1. **STM re-evaluation**: any transaction reading the TVar automatically re-evaluates when the subscriber changes (disconnects, gets displaced). This is used by `tryDeliverMessage` - if the subscriber disconnects mid-delivery, the STM transaction retries and sees `Nothing`. -2. **Reconnection continuity**: when a mobile client disconnects and reconnects, the TVar is reused rather than recreated. Subscriptions that were made at any point are never removed from the map - this is a deliberate trade-off for intermittently connected mobile clients. +2. **Reconnection continuity**: when a mobile client disconnects and reconnects, the TVar can be reused rather than recreated if a new subscription is established before cleanup. On disconnect, `deleteSubcribedClient` removes entries from the map (with a `sameClient` guard to avoid removing a newer subscriber). The `SubscribedClients` constructor is not exported from `Server/Env/STM.hs` (only the type is). All access goes through `getSubscribedClient` (IO, outside STM) and `upsertSubscribedClient` (STM). This prevents accidental use of `TM.lookup` inside STM transactions, which would add the entire TMap to the transaction's read set. @@ -82,12 +82,12 @@ The router delivers at most one unacknowledged message per subscription. The `de **Phase 1 - outside STM**: `getSubscribedClient` reads the `SubscribedClients` TMap via `readTVarIO` (IO, not STM). If no subscriber exists, the function returns immediately without entering any STM transaction. This avoids transaction overhead for queues with no active subscriber. -**Phase 2 - STM transaction** (`deliverToSub`): reads the client TVar (inside STM, so the transaction re-evaluates if the subscriber changes), checks `subThread == NoSub` and `delivered == Nothing`. Then: +**Phase 2 - STM transaction** (`deliverToSub`): reads the client TVar (inside STM, so the transaction re-evaluates if the subscriber changes), checks that `subThread` is `ServerSub` (not `ProhibitSub`), reads the inner `SubscriptionThread` TVar for `NoSub`, and checks `delivered == Nothing`. Then: - If the client's `sndQ` is **not full**: delivers the message directly in the same STM transaction (`writeTBQueue sndQ`), sets `delivered`. No thread is needed. This is the fast path. - If the client's `sndQ` is **full**: sets `subThread = SubPending` and returns the client + sub for phase 3. -**Phase 3 - forked thread** (`forkDeliver`): a `deliverThread` is spawned that blocks until the `sndQ` has room. Before delivering, it re-checks that the subscriber is still the same client and `delivered` is still `Nothing` - handling the race where the client disconnected and a new one subscribed between phases 2 and 3. +**Phase 3 - forked thread** (`forkDeliver`): a `deliverThread` is spawned that blocks until the `sndQ` has room. Before delivering, it re-checks that the subscriber is still the same client and `delivered` is still `Nothing` - handling the race where the client disconnected and a new one subscribed between phases 2 and 3. Note: for service-subscribed queues, phase 1 dispatches to `serviceSubscribers` (by ServiceId), but `deliverThread` in phase 3 always uses `queueSubscribers` (by QueueId) - if the queue is only service-subscribed, the phase 3 lookup silently no-ops. ### Per-queue encryption @@ -137,9 +137,9 @@ Router MSG → TLS → protocol client rcvQ ``` The protocol client's `processMsg` thread classifies each incoming transmission: -- **Non-empty corrId**: response to a pending command - delivered to the waiting `getResponse` caller via `responseVar`. +- **Non-empty corrId, matching pending command**: response to a pending command - delivered to the waiting `getResponse` caller via `responseVar`. - **Empty corrId**: server-initiated push (MSG, END, DELD, ENDS) - wrapped as `STEvent` and forwarded to `msgQ`. -- **Expired/unexpected responses**: also forwarded to `msgQ` as `STResponse`. +- **Non-empty corrId, no matching command**: forwarded to `msgQ` as `STUnexpectedError`. Expired responses (command was pending but timed out) are forwarded as `STResponse` only if the entity ID matches. The agent's `subscriber` thread reads from `msgQ` and processes all events under `agentOperationBracket AORcvNetwork`. @@ -173,8 +173,7 @@ After disconnect, the queue's messages remain stored. The next client to SUB the When the protocol client detects a TLS disconnect, `smpClientDisconnected` fires in the agent: -1. `removeSessVar` with CAS check (monotonic `sessionVarId` prevents stale callbacks from removing newer clients). -2. `setSubsPending` demotes all active subscriptions for the matching session to pending in `currentSubs`. +1-2. Atomically (single STM transaction via `removeClientAndSubs`): `removeSessVar` with CAS check (monotonic `sessionVarId` prevents stale callbacks from removing newer clients), then `setSubsPending` demotes all active subscriptions for the matching session to pending in `currentSubs`. 3. `DOWN srv connIds` is sent to the application for affected connections. 4. Resubscription begins - the mechanism depends on transport session mode: - **Entity-session mode**: `resubscribeSMPSession` spawns a persistent worker thread. @@ -213,7 +212,7 @@ Service subscriptions are a bulk mechanism where one `SUBS n idsHash` command su ### SUBS flow on the router -1. `sharedSubscribeService` checks the actual queue count and IDs hash against the stored service state, and enqueues a `CSService` event to `subQ` for `serverThread` to process (registration in `serviceSubscribers` happens asynchronously). +1. `sharedSubscribeService` queries the actual queue count and IDs hash from the store, computes drift statistics (for monitoring, not enforcement), and enqueues a `CSService` event to `subQ` for `serverThread` to process (registration in `serviceSubscribers` happens asynchronously). 2. If this is a new service subscription (not previously subscribed): `deliverServiceMessages` iterates all service-associated queues via `foldRcvServiceMessages`, creates per-queue `Sub` entries, and delivers pending messages. 3. After iteration completes, `ALLS` is sent to signal the client that all pending messages have been delivered. From 259e950282b15214a4e2dbf12a9795cab8b9c7b1 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 08:52:52 +0000 Subject: [PATCH 71/91] transport --- spec/topics/transport.md | 266 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 spec/topics/transport.md diff --git a/spec/topics/transport.md b/spec/topics/transport.md new file mode 100644 index 0000000000..2c3de1cf1a --- /dev/null +++ b/spec/topics/transport.md @@ -0,0 +1,266 @@ +# Transport + +How data moves over the wire: TLS infrastructure, protocol handshake family, block framing, transmission encoding, version negotiation, and connection management. This is the cross-cutting view spanning TLS setup, protocol-specific handshakes, block-level framing, and the protocol client's thread architecture. + +For service certificate handshake extensions, see [client-services.md](client-services.md). For the protocol client's role in subscription flow, see [subscriptions.md](subscriptions.md). For the SMP protocol specification, see [simplex-messaging.md](../../protocol/simplex-messaging.md). + +- [TLS infrastructure](#tls-infrastructure) +- [Handshake protocol family](#handshake-protocol-family) +- [Block framing](#block-framing) +- [Transmission encoding and signing](#transmission-encoding-and-signing) +- [Version negotiation](#version-negotiation) +- [Connection management](#connection-management) + +--- + +## TLS infrastructure + +**Source**: [Transport.hs](../../src/Simplex/Messaging/Transport.hs), [Transport/Credentials.hs](../../src/Simplex/Messaging/Transport/Credentials.hs), [Transport/Client.hs](../../src/Simplex/Messaging/Transport/Client.hs) + +### Certificate generation + +`genCredentials` generates Ed25519 key pairs and self-signed (or parent-signed) X.509 v3 certificates. Validity periods use an `Hours` offset type. The certificate serial number is always 1; nanoseconds are stripped from timestamps during encoding. + +For CA + leaf chains (used by routers), a root CA certificate signs a leaf certificate. The leaf's private key is used for per-session signing. For self-signed certificates (used by service clients), a single certificate serves both purposes. + +### TLS parameters + +`defaultSupportedParams` configures a minimal, high-security cipher suite: + +| Parameter | Value | +|-----------|-------| +| TLS versions | TLS 1.3, TLS 1.2 | +| TLS 1.3 cipher | CHACHA20-POLY1305-SHA256 | +| TLS 1.2 cipher | ECDHE-ECDSA-CHACHA20-POLY1305-SHA256 | +| Hash-signature pairs | Ed448, Ed25519 (both HashIntrinsic) | +| DH groups | X448, X25519 | +| Secure renegotiation | Disabled | + +`defaultSupportedParamsHTTPS` extends this with browser-compatible ciphers (RSA, ECDSA with SHA256/384/512, FFDHE groups, P521) for XFTP web clients. + +### Session identity + +Both sides derive `sessionId` from the TLS-unique channel binding value (RFC 5929). The server reads `T.getPeerFinished`; the client reads `T.getFinished`. This `sessionId` is used throughout the session - in handshake validation, transmission signing, and block encryption key derivation. + +--- + +## Handshake protocol family + +**Source**: [Transport.hs](../../src/Simplex/Messaging/Transport.hs), [Notifications/Transport.hs](../../src/Simplex/Messaging/Notifications/Transport.hs) + +All three protocols (SMP, NTF, XFTP) use the same TLS transport, but their application-level handshakes differ in complexity. SMP and NTF use a block-based handshake over TLS; XFTP uses HTTP/2 POST with a custom handshake. + +### SMP handshake - two messages plus optional third + +**Message 1 (router to client)**: `SMPServerHandshake` contains: +- `smpVersionRange` - negotiable version range (uses ALPN to select current vs legacy range) +- `sessionId` - TLS-unique channel binding +- `authPubKey` - `CertChainPubKey`: certificate chain plus X25519 public key signed with the certificate's signing key (v7+) + +**Message 2 (client to router)**: `SMPClientHandshake` contains: +- `smpVersion` - agreed maximum version from intersection +- `keyHash` - SHA256 of router's root CA certificate (identity verification) +- `authPubKey` - client's X25519 public key for DH agreement (v7+) +- `proxyServer` - boolean flag to disable transport block encryption (v14+) +- `clientService` - service credentials with `serviceRole` and `serviceCertKey` (v16+) + +**Message 3 (router to client, conditional)**: Sent only when `clientService` is present. The router verifies the TLS peer certificate matches the handshake certificate chain, extracts the fingerprint, creates or retrieves a `ServiceId`, and returns `SMPServerHandshakeResponse {serviceId}` (or `SMPServerHandshakeError` on failure). + +### NTF handshake - simplified two messages + +The NTF handshake follows the same server-first pattern but is simpler: + +| Difference | SMP | NTF | +|-----------|-----|-----| +| Block size | 16384 bytes | 512 bytes | +| Client auth key | X25519 DH public key | None | +| Service certificates | v16+ | Not supported | +| Block encryption | v11+ | Not supported | +| Batching | v4+ | v2+ | +| Version range | v6 - v19 | v1 - v3 | + +`NtfServerHandshake` sends version range, sessionId, and signed X25519 key (present at v2+, absent at v1). `NtfClientHandshake` returns only version and keyHash. No client public key exchange, no service certificates, no block encryption. + +### XFTP handshake - HTTP/2 based + +XFTP does not use the block-based TLS handshake at all. It uses HTTP/2 POST with ALPN `"xftp/1"`. The client sends `XFTPClientHello` (optional 32-byte web challenge for identity proof); the server responds with `XFTPServerHandshake` containing a signed challenge response and `CertChainPubKey`. Block size is 16384 bytes (same as SMP). + +### Block encryption setup (SMP only, v11+) + +After the handshake DH agreement, both sides compute a shared `DhSecretX25519`. `blockEncryption` derives chain keys via `sbcInit`: + +``` +sbcInit sessionId dhSecret + -> HKDF-SHA512(salt=sessionId, ikm=dhSecret, info="SimpleXSbChainInit", len=64) + -> split into (sndChainKey, rcvChainKey) +``` + +Each block encryption advances the chain key: +``` +sbcHkdf chainKey + -> HKDF-SHA512(salt="", ikm=chainKey, info="SimpleXSbChain", len=88) + -> split into (newChainKey[32], aesKey[32], nonce[24]) +``` + +This provides per-block forward secrecy - each block uses a different key, and old keys cannot be derived from new ones. The client swaps send/receive keys (its send key = server's receive key). + +Block encryption is disabled when `proxyServer == True` (proxy connections already have their own encryption layer) and when the version is below v11. + +--- + +## Block framing + +**Source**: [Transport.hs](../../src/Simplex/Messaging/Transport.hs), [Protocol.hs](../../src/Simplex/Messaging/Protocol.hs) + +### Block sizes + +| Protocol | Block size | Effective payload | +|----------|-----------|-------------------| +| SMP | 16384 bytes | 16363 (single in batch) or 16382 (unbatched) | +| NTF | 512 bytes | 491 (single in batch) or 510 (unbatched) | +| XFTP | 16384 bytes | Same as SMP | + +Batch overhead: 2 (pad) + 1 (count byte) + 16 (auth tag) + 2 (`Large` Word16 prefix per transmission) = 21 bytes for a single-item batch. + +### Reading and writing blocks + +`tPutBlock` pads the message to exactly `blockSize` bytes: +- **Without block encryption**: `C.pad` writes a 2-byte big-endian length prefix, then the message, then `'#'` characters to fill the block. +- **With block encryption** (v11+): `sbEncrypt` with the chain-derived key and nonce. The available payload is reduced by 16 bytes (Poly1305 auth tag). + +`tGetBlock` reads exactly `blockSize` bytes and reverses the process. If the received data is not exactly `blockSize` bytes, an EOF error is raised. + +### Batch format + +When `batch` is enabled (SMP v4+, NTF v2+), multiple transmissions are packed into a single block: + +1. One byte: transmission count (1-255) +2. Each transmission wrapped in `Large` encoding (fixed 2-byte Word16 length prefix + content) +3. Total size of all `Large`-encoded transmissions must fit in `blockSize - 19` bytes (2 pad + 1 count + 16 auth tag) + +`batchTransmissions_` packs transmissions left-to-right into batches. When the next transmission would exceed the remaining space (or the count reaches 255), a new batch starts. Transmissions that individually exceed the batch limit produce a `TBError TELargeMsg`. + +`tPut` encodes a list of transmissions into batches via `batchTransmissions`, then writes each batch as a separate block via `tPutBlock`. Results are collected per-transmission, not per-block. + +--- + +## Transmission encoding and signing + +**Source**: [Protocol.hs](../../src/Simplex/Messaging/Protocol.hs), [Client.hs](../../src/Simplex/Messaging/Client.hs) + +### Wire format + +`encodeTransmission_` produces the core transmission bytes: + +``` +corrId || entityId || encodedCommand +``` + +- `corrId`: variable-length correlation ID (empty for server-initiated pushes) +- `entityId`: queue/entity identifier +- `encodedCommand`: protocol-specific command encoding + +### Session ID handling (`implySessId`) + +For v7+ (`authCmdsSMPVersion`), `implySessId` is `True`. This affects how `sessionId` is used: + +- **`tForAuth`** (what gets signed): always includes `sessionId` prefix +- **`tToSend`** (what goes on the wire): excludes `sessionId` when `implySessId == True` + +This saves bandwidth - the session ID is implicit (both sides know it from the TLS handshake) but still covered by the signature, preventing session fixation attacks. + +`tForAuth` is lazy (uses `~ByteString`) to avoid computing the signed representation when no signing key is present. + +### Dual signature scheme + +`authTransmission` produces `TAuthorizations` - a tuple of entity auth plus optional service signature: + +**Entity auth** (always present when key provided): +- X25519 keys: `C.cbAuthenticate` using the server's public key, the per-queue private key, correlation nonce, and the signing content (see below) +- Ed25519/Ed448 keys: standard signature over the signing content + +**Service auth** (v16+, when `serviceAuth == True` and `clientService` exists): +- The signing content becomes `serviceCertHash || tForAuth` (instead of plain `tForAuth`) - binding the service identity to the queue operation, preventing MITM service substitution within TLS +- Service session key additionally signs over `tForAuth` alone + +Without active service auth, the signing content is `tForAuth` directly. + +The dual signature ensures that even within a TLS session, an attacker cannot substitute a different service certificate without invalidating the entity key signature. + +--- + +## Version negotiation + +**Source**: [Version.hs](../../src/Simplex/Messaging/Version.hs), [Transport.hs](../../src/Simplex/Messaging/Transport.hs) + +### Range intersection + +`VersionRange` is an inclusive `(min, max)` pair with nominal typing per protocol (SMP, NTF, XFTP use distinct phantom types via `VersionScope`). + +`compatibleVRange` computes the intersection of two ranges: `max(min1, min2)` to `min(max1, max2)`. Returns `Nothing` if the intersection is empty (no compatible version exists). The agreed version is the maximum of the intersection range. + +`compatibleVRange'` caps a range by a single version (used when the peer advertises a specific maximum rather than a range). + +### Version-gated features + +Feature availability is controlled by version constants. Key SMP version gates: + +| Version | Feature | +|---------|---------| +| v4 | Command batching | +| v7 | Authenticated encryption, implied session ID | +| v9 | SKEY for faster sender handshake | +| v11 | Block encryption with forward secrecy | +| v14 | `proxyServer` handshake property | +| v16 | Service certificates | +| v19 | Service subscriptions (SUBS/NSUBS) | + +### Anti-fingerprinting version cap + +`proxiedSMPRelayVersion` (v18) is the maximum version an SMP proxy advertises to destination routers. The proxy's actual version may be higher (currently v19), but by capping the proxied connection, clients behind the proxy cannot be fingerprinted by the destination router based on their SMP version. All proxied clients appear as v18 or below. + +### Proxy version downgrade logic + +When `smpClientHandshake` detects it is acting as a proxy (`proxyServer == True`) and the destination router's maximum version is below v14 (`proxyServerHandshakeSMPVersion`), it caps the negotiated range at v10 (`deletedEventSMPVersion`). This disables transport block encryption between proxy and relay - transport encryption at v11 would increase message size, breaking clients at v10 or earlier. + +--- + +## Connection management + +**Source**: [Client.hs](../../src/Simplex/Messaging/Client.hs), [Transport/KeepAlive.hs](../../src/Simplex/Messaging/Transport/KeepAlive.hs) + +### Four concurrent threads + +Each protocol client connection runs four concurrent threads via `raceAny_` - if any thread exits, all are cancelled and the disconnect handler fires: + +**send**: reads `(Maybe Request, ByteString)` tuples from `sndQ` (bounded `TBQueue`). For requests with a `responseVar`, checks the `pending` flag before sending (a cancelled request is silently skipped). Transport errors on write are delivered to the waiting `responseVar`. + +**receive**: calls `tGetClient` in a loop to read and parse blocks. Updates `lastReceived` timestamp and resets `timeoutErrorCount` to 0 on each successful read. + +**process**: reads parsed transmissions from `rcvQ` and classifies each by correlation ID: +- Empty corrId: server-initiated push - forwarded to `msgQ` as `STEvent` (any response with empty corrId is classified this way; typical types are MSG, END, DELD, ENDS) +- Matching pending command: response - delivered to the command's `responseVar` +- No matching command: forwarded to `msgQ` as `STUnexpectedError` + +**monitor** (optional, disabled when `smpPingInterval == 0`): sends application-level PING when the connection is idle for `smpPingInterval` (default 600 seconds / 10 minutes), but only after `sendPings` is explicitly enabled by the caller. Tracks consecutive timeout errors via `timeoutErrorCount`. Drops the client after `smpPingCount` (default 3) consecutive timeouts, but only if at least 15 minutes have passed since the last received response (recovery window). + +### TCP keep-alive + +`defaultKeepAliveOpts` configures OS-level TCP keep-alive probes: + +| Parameter | Value | Socket option | +|-----------|-------|---------------| +| `keepIdle` | 30 seconds | TCP_KEEPIDLE (Linux) / TCP_KEEPALIVE (macOS) | +| `keepIntvl` | 15 seconds | TCP_KEEPINTVL | +| `keepCnt` | 4 probes | TCP_KEEPCNT | + +TCP keep-alive detects dead connections at the OS level. The application-level PING/PONG provides a higher-level liveness check that also validates the protocol layer. + +### Disconnect and teardown + +All four threads run inside `raceAny_` with `E.finally disconnected`. When any thread exits (network error, timeout, or protocol error), the `finally` handler: + +1. Fires the `disconnected` callback provided by the caller (e.g., `smpClientDisconnected` in the agent) +2. The agent callback demotes subscriptions, fires DOWN events, and initiates resubscription + +The `connected` TVar is set to `True` after the handshake succeeds and before the threads start. Note: in the protocol client, this TVar is not reset on disconnect - disconnect detection relies on thread cancellation via `raceAny_` and the `disconnected` callback, not STM re-evaluation. (The server-side `Client` type has a separate `connected` TVar that is reset in `clientDisconnected`.) From 31158ab02e653eeda9dc50f1b14aacf34e5c75aa Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 09:21:01 +0000 Subject: [PATCH 72/91] update --- spec/topics/transport.md | 67 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/spec/topics/transport.md b/spec/topics/transport.md index 2c3de1cf1a..2cce46f77f 100644 --- a/spec/topics/transport.md +++ b/spec/topics/transport.md @@ -42,6 +42,25 @@ For CA + leaf chains (used by routers), a root CA certificate signs a leaf certi Both sides derive `sessionId` from the TLS-unique channel binding value (RFC 5929). The server reads `T.getPeerFinished`; the client reads `T.getFinished`. This `sessionId` is used throughout the session - in handshake validation, transmission signing, and block encryption key derivation. +### Certificate chain semantics + +**Source**: [Transport/Shared.hs](../../src/Simplex/Messaging/Transport/Shared.hs) + +Routers use variable-length certificate chains. The `chainIdCaCerts` function extracts the identity certificate (`idCert`) based on chain length: + +| Chain length | Structure | Identity certificate | +|--------------|-----------|---------------------| +| 0 | `[]` | Rejected as CCEmpty | +| 1 | `[cert]` | Self-signed: `idCert = cert` | +| 2 | `[leaf, ca]` | Current online/offline pattern: `idCert = ca` | +| 3 | `[leaf, id, ca]` | With operator certificate: `idCert = id` (second) | +| 4 | `[leaf, id, net, ca]` | With network certificate: `idCert = id` (second, network cert ignored) | +| 5+ | - | Rejected as CCLong | + +The **router identity** is always determined by `idCert` - its SHA256 fingerprint is compared against the `keyHash` the client expects. For 2-cert chains (the common case), `idCert` equals the CA. For 3+ cert chains, `idCert` is always the **second certificate** (index 1). + +The client verifies the router identity by computing `XV.getFingerprint idCert X.HashSHA256` and comparing against the expected `keyHash`. This allows operators to rotate leaf certificates without changing the router's public identity. + --- ## Handshake protocol family @@ -59,7 +78,7 @@ All three protocols (SMP, NTF, XFTP) use the same TLS transport, but their appli **Message 2 (client to router)**: `SMPClientHandshake` contains: - `smpVersion` - agreed maximum version from intersection -- `keyHash` - SHA256 of router's root CA certificate (identity verification) +- `keyHash` - SHA256 of router's identity certificate (`idCert`, see certificate chain semantics above) - `authPubKey` - client's X25519 public key for DH agreement (v7+) - `proxyServer` - boolean flag to disable transport block encryption (v14+) - `clientService` - service credentials with `serviceRole` and `serviceCertKey` (v16+) @@ -73,7 +92,7 @@ The NTF handshake follows the same server-first pattern but is simpler: | Difference | SMP | NTF | |-----------|-----|-----| | Block size | 16384 bytes | 512 bytes | -| Client auth key | X25519 DH public key | None | +| Client auth key | X25519 DH public key | None (server sends key, client does not) | | Service certificates | v16+ | Not supported | | Block encryption | v11+ | Not supported | | Batching | v4+ | v2+ | @@ -83,7 +102,43 @@ The NTF handshake follows the same server-first pattern but is simpler: ### XFTP handshake - HTTP/2 based -XFTP does not use the block-based TLS handshake at all. It uses HTTP/2 POST with ALPN `"xftp/1"`. The client sends `XFTPClientHello` (optional 32-byte web challenge for identity proof); the server responds with `XFTPServerHandshake` containing a signed challenge response and `CertChainPubKey`. Block size is 16384 bytes (same as SMP). +**Source**: [FileTransfer/Transport.hs](../../src/Simplex/FileTransfer/Transport.hs), [FileTransfer/Server.hs](../../src/Simplex/FileTransfer/Server.hs), [FileTransfer/Client.hs](../../src/Simplex/FileTransfer/Client.hs) + +XFTP does not use the block-based TLS handshake. It uses HTTP/2 POST with ALPN `"xftp/1"`. The handshake has two flows depending on client type. + +**Native client handshake** (standard two-step): + +1. Client sends POST with no body, server responds with `XFTPServerHandshake`: + - `xftpVersionRange` - negotiable version range + - `sessionId` - TLS-unique channel binding + - `authPubKey` - `CertChainPubKey` (always required, non-optional) + - `webIdentityProof` - absent for native clients + +2. Client sends POST with `XFTPClientHandshake`: + - `xftpVersion` - agreed version + - `keyHash` - SHA256 of router's identity certificate + +3. Server validates keyHash against `idCert` fingerprint (currently expects exactly 2-cert chain: `[leaf, ca]` where `ca` is identity) + +**Web client handshake** (three-step with identity proof): + +Web browsers cannot access the TLS certificate chain for verification. The web handshake adds a challenge-response mechanism: + +1. Client sends POST with `xftp-web-hello: 1` header and `XFTPClientHello`: + - `webChallenge` - optional 32-byte random challenge + +2. Server responds with `XFTPServerHandshake`: + - `webIdentityProof` - signature over `(webChallenge || sessionId)` using the router's signing key + +3. Client verifies `webIdentityProof` using the public key from `authPubKey`, confirming server identity without needing TLS certificate access + +4. Client sends POST with `xftp-handshake: 1` header and `XFTPClientHandshake` (same as native step 2) + +The server tracks handshake state per `sessionId` in a `TMap SessionId Handshake`: +- `HandshakeSent pk` - hello received, awaiting client handshake +- `HandshakeAccepted thParams` - handshake complete, ready for commands + +Web hello can be re-sent at any state (server reuses existing X25519 key if already generated). Block size is 16384 bytes (same as SMP). ### Block encryption setup (SMP only, v11+) @@ -99,12 +154,14 @@ Each block encryption advances the chain key: ``` sbcHkdf chainKey -> HKDF-SHA512(salt="", ikm=chainKey, info="SimpleXSbChain", len=88) - -> split into (newChainKey[32], aesKey[32], nonce[24]) + -> split into (newChainKey[32], secretBoxKey[32], nonce[24]) ``` +The keys are used with XSalsa20-Poly1305 (NaCl secret_box), not AES. + This provides per-block forward secrecy - each block uses a different key, and old keys cannot be derived from new ones. The client swaps send/receive keys (its send key = server's receive key). -Block encryption is disabled when `proxyServer == True` (proxy connections already have their own encryption layer) and when the version is below v11. +Block encryption is disabled when `proxyServer == True` (proxy connections already have their own encryption layer), when the version is below v11, or when no DH session secret is available (no `thAuth` or missing `sessSecret`). --- From 73d12aad8a331794f9bd6452c7ce08bc7ca45ed0 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 10:22:24 +0000 Subject: [PATCH 73/91] patterns --- spec/topics/patterns.md | 337 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 spec/topics/patterns.md diff --git a/spec/topics/patterns.md b/spec/topics/patterns.md new file mode 100644 index 0000000000..a5d61500e2 --- /dev/null +++ b/spec/topics/patterns.md @@ -0,0 +1,337 @@ +# Code Patterns + +Cross-cutting patterns used throughout the codebase: exception handling, encoding utilities, compression, concurrent data structures, and batch processing. These patterns provide consistency, type safety, and correctness guarantees across all modules. + +For protocol-specific encoding details, see [transport.md](transport.md). For cryptographic operations, see the inline documentation in [Crypto.hs](../../src/Simplex/Messaging/Crypto.hs). + +- [Exception handling](#exception-handling) +- [Binary encoding](#binary-encoding) +- [String encoding](#string-encoding) +- [Compression](#compression) +- [Concurrent data structures](#concurrent-data-structures) +- [Batch processing](#batch-processing) + +--- + +## Exception handling + +**Source**: [Agent/Protocol.hs](../../src/Simplex/Messaging/Agent/Protocol.hs), [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs), [Agent/Store/SQLite.hs](../../src/Simplex/Messaging/Agent/Store/SQLite.hs) + +### Error type hierarchy + +The codebase uses a hierarchical error type structure: + +**`AgentErrorType`** - top-level error type for agent client responses: +- `CMD` - command/response errors with context string +- `CONN` - connection errors with context (NOT_FOUND, DUPLICATE, SIMPLEX) +- `SMP`/`NTF`/`XFTP` - protocol-specific errors with server address +- `BROKER` - transport-level broker errors +- `AGENT` - internal agent errors (A_DUPLICATE, A_PROHIBITED) +- `INTERNAL` - implementation bugs (should never occur in production) +- `CRITICAL` - critical errors with optional restart offer + +**`StoreError`** - database/storage layer errors: +- `SEInternal` - IO exceptions during database operations +- `SEDatabaseBusy` - database locked/busy (triggers CRITICAL with restart) +- `SEConnNotFound`/`SEUserNotFound` - entity lookup failures +- `SEBadConnType` - wrong connection type for operation + +**`AgentCryptoError`** - cryptographic failures: +- `DECRYPT_AES`/`DECRYPT_CB` - decryption failures +- `RATCHET_HEADER`/`RATCHET_EARLIER Word32`/`RATCHET_SKIPPED Word32`/`RATCHET_SYNC` - double ratchet state issues + +### Monad stack + +``` +AM a = ExceptT AgentErrorType (ReaderT Env IO) a -- full error handling +AM' a = ReaderT Env IO a -- no error handling (for batch ops) +``` + +### Store access patterns + +**Basic operations** lift IO actions into the AM monad: + +```haskell +withStore :: AgentClient -> (DB.Connection -> IO (Either StoreError a)) -> AM a +withStore' :: AgentClient -> (DB.Connection -> IO a) -> AM a -- wraps result in Right +``` + +Both wrap the action in a database transaction and convert `StoreError` to `AgentErrorType` via `storeError`. + +**Error mapping** (key cases from `storeError`): +- `SEConnNotFound`/`SERatchetNotFound` -> `CONN NOT_FOUND` +- `SEConnDuplicate` -> `CONN DUPLICATE` +- `SEBadConnType` -> `CONN SIMPLEX` with context +- `SEUserNotFound` -> `NO_USER` +- `SEAgentError e` -> `e` (propagates wrapped error) +- `SEDatabaseBusy` -> `CRITICAL True` (offers restart) +- Other errors -> `INTERNAL` with error message + +### Error recovery patterns + +**tryError** - attempt an operation, handle failure without throwing: +```haskell +tryError (deleteQueue c NRMBackground rq') >>= \case + Left e -> logError e >> continue + Right () -> success +``` + +**catchAllErrors** - catch errors and run cleanup: +```haskell +getQueueMessage c rq `catchAllErrors` \e -> + atomically (releaseGetLock c rq) >> throwError e +``` + +**catchAll_** - catch all exceptions, return default on failure: +```haskell +notices <- liftIO $ withTransaction store (`getClientNotices` servers) `catchAll_` pure [] +``` + +--- + +## Binary encoding + +**Source**: [Encoding.hs](../../src/Simplex/Messaging/Encoding.hs) + +### Encoding typeclass + +```haskell +class Encoding a where + smpEncode :: a -> ByteString -- encode to binary + smpP :: Parser a -- attoparsec parser + smpDecode :: ByteString -> Either String a -- default via parseAll smpP +``` + +### Primitive encoding + +| Type | Wire format | +|------|-------------| +| `Char` | Single byte | +| `Bool` | `'T'` or `'F'` | +| `Word16` | 2-byte big-endian | +| `Word32` | 4-byte big-endian | +| `Int64` | Two `Word32`s combined | +| `ByteString` | 1-byte length prefix + data (max 255 bytes) | +| `Maybe a` | `'0'` (Nothing) or `'1'` + encoded value | +| `(a, b)` | Concatenated encodings (no separator) | + +### Special wrappers + +**`Tail`** - takes remaining bytes without length prefix: +```haskell +newtype Tail = Tail {unTail :: ByteString} +-- smpEncode = unTail (no prefix) +-- smpP = takeByteString +``` + +**`Large`** - for ByteStrings > 255 bytes: +```haskell +newtype Large = Large {unLarge :: ByteString} +-- smpEncode = Word16 length prefix + data +-- smpP = read Word16, take that many bytes +``` + +### List encoding + +```haskell +smpEncodeList :: Encoding a => [a] -> ByteString +-- 1-byte count prefix + concatenated encoded items (max 255 items) + +instance Encoding (NonEmpty a) +-- Same format, fails on empty input during parsing +``` + +--- + +## String encoding + +**Source**: [Encoding/String.hs](../../src/Simplex/Messaging/Encoding/String.hs) + +### StrEncoding typeclass + +```haskell +class StrEncoding a where + strEncode :: a -> ByteString -- human-readable encoding + strP :: Parser a -- parser (defaults to base64url) + strDecode :: ByteString -> Either String a +``` + +Used for addresses, keys, and values displayed to users or in URIs. + +### Base64 URL encoding + +`ByteString` instances use base64url encoding (RFC 4648): +- Alphabet: A-Z, a-z, 0-9, `-`, `_` +- No padding by default in output +- Accepts optional `=` padding on input + +### Tuple and list encoding + +**Tuples** use space separation (via `B.unwords`): +```haskell +strEncode (a, b) = B.unwords [strEncode a, strEncode b] +``` + +**Lists** use comma separation: +```haskell +strEncodeList :: StrEncoding a => [a] -> ByteString +strEncodeList = B.intercalate "," . map strEncode +``` + +### Numeric types + +`Int`, `Word16`, `Word32`, `Int64` encode as decimal strings (not binary). + +### JSON conversion utilities + +```haskell +strToJSON :: StrEncoding a => a -> J.Value +strParseJSON :: StrEncoding a => String -> J.Value -> JT.Parser a +``` + +Convert between `StrEncoding` and JSON string values for API serialization. + +--- + +## Compression + +**Source**: [Compression.hs](../../src/Simplex/Messaging/Compression.hs) + +### Algorithm and thresholds + +Uses Zstandard (zstd) compression at level 3 (moderate compression/speed tradeoff). + +```haskell +maxLengthPassthrough :: Int +maxLengthPassthrough = 180 -- messages <= 180 bytes are not compressed +``` + +### Wire format + +```haskell +data Compressed + = Passthrough ByteString -- tag '0' + 1-byte length + data + | Compressed Large -- tag '1' + 2-byte length + zstd data +``` + +### Decompression bomb protection + +`decompress1` requires the compressed data to declare its decompressed size upfront: + +```haskell +decompress1 :: Int -> Compressed -> Either String ByteString +``` + +The function checks `Z1.decompressedSize` before decompressing. If the declared size exceeds the `limit` parameter (or is not specified), decompression is rejected. This prevents zip-bomb attacks where a small compressed payload would expand to exhaust memory. + +Zstd's `decompress` can return `Error`, `Skip` (empty result), or `Decompress bs'` - all cases are handled explicitly. + +--- + +## Concurrent data structures + +**Source**: [TMap.hs](../../src/Simplex/Messaging/TMap.hs) + +### TMap + +A `TVar`-wrapped immutable `Data.Map`, providing atomic read-modify-write operations via STM: + +```haskell +type TMap k a = TVar (Map k a) +``` + +### STM operations (atomic) + +| Operation | Description | +|-----------|-------------| +| `lookup k m` | Read value for key | +| `member k m` | Check key existence | +| `insert k v m` | Insert/update value | +| `delete k m` | Remove key | +| `lookupInsert k v m` | Atomic lookup-then-insert, returns old value | +| `lookupDelete k m` | Atomic lookup-then-delete, returns deleted value | + +### IO operations (non-transactional) + +```haskell +lookupIO :: Ord k => k -> TMap k a -> IO (Maybe a) +memberIO :: Ord k => k -> TMap k a -> IO Bool +``` + +These bypass STM for read-only access when atomicity with other operations is not needed. + +### Usage pattern + +```haskell +-- Within STM transaction (atomic with other STM ops) +atomically $ do + existing <- TM.lookup key map + case existing of + Nothing -> TM.insert key newValue map + Just _ -> pure () + +-- Outside transaction (simple read) +value <- TM.lookupIO key map +``` + +--- + +## Batch processing + +**Source**: [Agent/Client.hs](../../src/Simplex/Messaging/Agent/Client.hs) + +### withStoreBatch + +Executes multiple database operations in a single transaction: + +```haskell +withStoreBatch :: Traversable t + => AgentClient + -> (DB.Connection -> t (IO (Either AgentErrorType a))) + -> AM' (t (Either AgentErrorType a)) +``` + +All operations run within one database transaction, ensuring: +- **Atomicity**: All operations succeed or all fail together +- **Isolation**: No partial updates visible to other readers +- **Efficiency**: Single transaction overhead instead of per-operation + +### Result semantics + +Each batched operation produces an individual `Either AgentErrorType a`: +- Partial success is possible (some `Right`, some `Left`) +- If the transaction itself fails, all results become errors +- Fine-grained error handling per operation + +### Common patterns + +**Store multiple items**: +```haskell +void $ withStoreBatch' c $ \db -> + map (storeDelivery db) deliveries +``` + +**Fetch multiple items**: +```haskell +results <- withStoreBatch c $ \db -> + map (getConnection db) connIds +``` + +**Update multiple items**: +```haskell +void $ withStoreBatch' c $ \db -> + map (\connId -> setConnPQSupport db connId PQSupportOn) connIds +``` + +### withStoreBatch' + +Convenience variant that wraps results in `Right`: + +```haskell +withStoreBatch' :: Traversable t + => AgentClient + -> (DB.Connection -> t (IO a)) + -> AM' (t (Either AgentErrorType a)) +``` + +Use when operations cannot fail (or failures should become `INTERNAL` errors). From c0698817d173122e8353af49278fa53a6232d5ea Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 10:54:12 +0000 Subject: [PATCH 74/91] xftp topic --- spec/topics/xftp.md | 394 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 spec/topics/xftp.md diff --git a/spec/topics/xftp.md b/spec/topics/xftp.md new file mode 100644 index 0000000000..7efd884953 --- /dev/null +++ b/spec/topics/xftp.md @@ -0,0 +1,394 @@ +# XFTP + +File transfer protocol for large files: router storage architecture, protocol commands, agent upload/download pipelines, chunk management, and encryption. XFTP enables secure file sharing by splitting files into encrypted chunks stored across multiple routers. + +For XFTP transport handshake details, see [transport.md](transport.md). For the XFTP protocol specification, see [xftp.md](../../protocol/xftp.md). + +- [Protocol overview](#protocol-overview) +- [Router storage](#router-storage) +- [Protocol commands](#protocol-commands) +- [Agent upload pipeline](#agent-upload-pipeline) +- [Agent download pipeline](#agent-download-pipeline) +- [Chunk encryption](#chunk-encryption) +- [Chunk management](#chunk-management) + +--- + +## Protocol overview + +**Source**: [FileTransfer/Protocol.hs](../../src/Simplex/FileTransfer/Protocol.hs) + +XFTP separates file metadata from file content. A sender uploads encrypted chunks to one or more routers, then shares a file description (containing chunk locations, keys, and digests) with recipients via SMP messaging. + +Key properties: +- File encrypted as a single stream with XSalsa20-Poly1305, then split into chunks +- Chunks are byte ranges of the encrypted file (not independently encrypted) +- Chunks can be replicated across multiple routers +- Recipients download chunks directly from routers +- Router never sees plaintext or file metadata + +### Parties + +| Party | Role | Authentication | +|-------|------|----------------| +| Sender | Creates file, uploads chunks, manages recipients | Per-file sender key | +| Recipient | Downloads chunks, acknowledges receipt | Per-recipient key (created by sender) | + +### File description + +The sender generates a `ValidFileDescription` containing: +- Chunk specifications: server address, recipient ID, recipient key, size, digest +- Encryption key and nonce for the full file +- File size and SHA-512 digest +- Optional redirect to another file description + +--- + +## Router storage + +**Source**: [FileTransfer/Server/Store.hs](../../src/Simplex/FileTransfer/Server/Store.hs) + +### In-memory store + +```haskell +data FileStore = FileStore + { files :: TMap SenderId FileRec, + recipients :: TMap RecipientId (SenderId, RcvPublicAuthKey), + usedStorage :: TVar Int64 + } +``` + +- `files` maps sender IDs to file records +- `recipients` maps recipient IDs to (sender, auth key) for download authorization +- `usedStorage` tracks total bytes for quota enforcement + +### File record + +```haskell +data FileRec = FileRec + { senderId :: SenderId, + fileInfo :: FileInfo, -- sndKey, size, digest + filePath :: TVar (Maybe FilePath), -- set after upload + recipientIds :: TVar (Set RecipientId), + createdAt :: RoundedFileTime, -- truncated to 1-hour precision + fileStatus :: TVar ServerEntityStatus + } +``` + +The `filePath` is `Nothing` until FPUT completes. The file is stored at `filesPath/`. + +### Quota management + +File size is reserved atomically when FNEW is processed. If `usedStorage + fileSize > fileSizeQuota`, the request is rejected with QUOTA error. Storage is released when files are deleted or expire. + +### File expiration + +Files expire based on `ttl` configuration (default 48 hours). The expiration thread periodically scans files where `createdAt + fileTimePrecision < threshold`. Expired files are deleted from disk and removed from the store. + +`fileTimePrecision` is 3600 seconds (1 hour), providing k-anonymity for file creation times. + +--- + +## Protocol commands + +**Source**: [FileTransfer/Protocol.hs](../../src/Simplex/FileTransfer/Protocol.hs), [FileTransfer/Server.hs](../../src/Simplex/FileTransfer/Server.hs) + +### Command summary + +| Command | Party | Purpose | +|---------|-------|---------| +| FNEW | Sender | Create file with metadata and initial recipient keys | +| FADD | Sender | Add recipient auth keys to existing file | +| FPUT | Sender | Upload encrypted chunk data | +| FDEL | Sender | Delete file from router | +| FGET | Recipient | Download file (initiates DH key exchange) | +| FACK | Recipient | Acknowledge download, remove recipient from file | +| PING | Recipient | Keepalive | + +### FNEW - create file + +Request: `FNEW FileInfo (NonEmpty RcvPublicAuthKey) (Maybe BasicAuth)` + +- `FileInfo`: sender's auth key, file size (Word32), SHA-512 digest +- Recipient keys: one per intended recipient +- Optional basic auth for servers requiring authorization + +Response: `FRSndIds SenderId (NonEmpty RecipientId)` + +The router generates random sender ID and recipient IDs. The sender uses `SenderId` for subsequent commands; recipients receive their `RecipientId` via file description. + +### FPUT - upload chunk + +Request: `FPUT` with chunk data in HTTP/2 body + +The router: +1. Validates sender authorization +2. Reserves storage quota +3. Receives encrypted chunk with timeout +4. Writes to `filesPath/` +5. Updates `filePath` in file record + +If the file already has a `filePath` (re-upload), the body is discarded and `FROk` returned immediately. + +### FGET - download chunk + +Request: `FGET RcvPublicDhKey` + +The recipient provides an ephemeral X25519 public key for DH agreement. + +Response: `FRFile SrvPublicDhKey C.CbNonce` (server's ephemeral DH key and nonce) + +The router: +1. Generates ephemeral DH key pair +2. Computes shared secret: `dh'(recipientDhKey, serverPrivKey)` +3. Initializes encryption state with shared secret and nonce +4. Streams encrypted file in HTTP/2 response body + +The recipient uses the returned server DH key and nonce to decrypt the stream. + +### FACK - acknowledge receipt + +Request: `FACK` + +Removes the recipient from the file's recipient set. Once all recipients have acknowledged, only the sender can access the file (until FDEL or expiration). + +### FDEL - delete file + +Request: `FDEL` + +Deletes the file from disk and store, releases quota. All recipient IDs become invalid. + +--- + +## Agent upload pipeline + +**Source**: [FileTransfer/Agent.hs](../../src/Simplex/FileTransfer/Agent.hs), [FileTransfer/Chunks.hs](../../src/Simplex/FileTransfer/Chunks.hs) + +### Upload state machine + +``` +SFSNew -> SFSEncrypting -> SFSEncrypted -> SFSUploading -> SFSComplete + \-> SFSError +``` + +### Phase 1: File preparation (SFSNew -> SFSEncrypted) + +`prepareFile` encrypts the source file: + +1. Generate random `SbKey` and `CbNonce` +2. Create encrypted file structure: + - 8 bytes: encoded content length + - FileHeader: filename and optional metadata (SMP-encoded) + - File content: encrypted in 64KB streaming chunks + - Padding: `'#'` characters to multiple of 16384 bytes + - Auth tag: 16 bytes (Poly1305) +3. Compute SHA-512 digest of encrypted file +4. Calculate chunk boundaries via `prepareChunkSizes` + +### Chunk size selection + +`prepareChunkSizes` selects chunk sizes based on total file size: + +| File size | Chunk size used | +|-----------|-----------------| +| > 3/4 of 4MB (~3.0MB) | 4MB chunks | +| > 3/4 of 1MB (768KB) | 1MB chunks | +| Otherwise | 64KB or 256KB | + +The last chunk may be smaller than the standard size. + +### Phase 2: Chunk registration + +For each chunk: +1. Select XFTP server (different server per chunk recommended) +2. Send FNEW with chunk's digest and recipient keys +3. Store `SndFileChunkReplica` with server-assigned IDs +4. Status: `SFRSCreated` + +### Phase 3: Upload (SFSUploading -> SFSComplete) + +`uploadFileChunk` for each replica: +1. If not all recipients added: send FADD +2. Read chunk from encrypted file at (offset, size) +3. Send FPUT with chunk data +4. Update replica status to `SFRSUploaded` +5. Report progress to agent client + +When all chunks uploaded: mark file `SFSComplete`, generate file description. + +### Error handling + +- Retry with exponential backoff per `reconnectInterval` +- Track consecutive retries per replica +- After `xftpConsecutiveRetries` failures: mark `SFSError` +- Delay and retry count stored in DB for resumption + +--- + +## Agent download pipeline + +**Source**: [FileTransfer/Agent.hs](../../src/Simplex/FileTransfer/Agent.hs) + +### Download state machine + +``` +RFSReceiving -> RFSReceived -> RFSDecrypting -> RFSComplete + \-> RFSError +``` + +### Phase 1: Chunk download (RFSReceiving -> RFSReceived) + +`downloadFileChunk` for each chunk: +1. Verify server is in approved relays (if relay approval required) +2. Generate ephemeral DH key pair +3. Send FGET with public DH key +4. Receive `FRFile` with server's DH key and nonce +5. Compute shared secret, initialize decryption +6. Stream-decrypt chunk to `tmpPath/chunkNo` +7. Verify chunk's SHA-256 digest matches specification +8. Mark replica as `received` + +Replicas are tried in order; if first fails, try next replica of same chunk. + +### Phase 2: Reassembly (RFSReceived -> RFSComplete) + +`decryptFile` reassembles and decrypts: +1. Concatenate all chunk files in order +2. Validate total size matches file digest +3. Decrypt with file's `SbKey` and `CbNonce`: + - Parse length prefix and FileHeader + - Stream-decrypt content + - Verify auth tag +4. Write to final destination (`savePath`) +5. Delete temporary chunk files +6. Mark `RFSComplete` + +### Redirect files + +If the file description has a `redirect` field: +1. Decrypt the downloaded content +2. Parse as YAML file description +3. Validate size/digest match redirect specification +4. Register actual chunks from redirect description +5. Download from redirected sources + +This enables indirection for large file descriptions or server migration. + +--- + +## Chunk encryption + +**Source**: [FileTransfer/Crypto.hs](../../src/Simplex/FileTransfer/Crypto.hs), [Messaging/Crypto/File.hs](../../src/Simplex/Messaging/Crypto/File.hs) + +### File encryption (sender side) + +``` +[8-byte length][FileHeader][file content][padding][16-byte auth tag] +``` + +- Algorithm: XSalsa20-Poly1305 (NaCl secret_box) +- Key: random 32-byte `SbKey` +- Nonce: random 24-byte `CbNonce` +- Streaming: 64KB chunks encrypted incrementally +- Padding: `'#'` characters to align to 16384-byte boundary + +### Chunk transport encryption (FGET) + +Each FGET establishes a fresh DH shared secret: +1. Recipient generates ephemeral X25519 key pair +2. Sends public key in FGET request +3. Router generates ephemeral key pair +4. Both compute: `secret = dh(peerPubKey, ownPrivKey)` +5. Router streams chunk encrypted with `cbInit(secret, nonce)` +6. Recipient decrypts with same parameters + +This provides forward secrecy per-download - compromising the file encryption key does not reveal transport keys. + +### Auth tag verification + +The 16-byte Poly1305 auth tag is verified after receiving all chunks: +- Single chunk: tag appended at end +- Multiple chunks: tag in final chunk, verified after concatenation + +Failed auth tag verification produces `CRYPTO` error. + +--- + +## Chunk management + +**Source**: [FileTransfer/Types.hs](../../src/Simplex/FileTransfer/Types.hs) + +### Sender chunk state + +```haskell +data SndFileChunkReplica = SndFileChunkReplica + { sndChunkReplicaId :: Int64, + server :: XFTPServer, + replicaId :: ChunkReplicaId, + replicaKey :: C.APrivateAuthKey, + rcvIdsKeys :: [(ChunkReplicaId, C.APrivateAuthKey)], + replicaStatus :: SndFileReplicaStatus, + delay :: Maybe Int64, + retries :: Int + } + +data SndFileReplicaStatus = SFRSCreated | SFRSUploaded +``` + +- `SFRSCreated`: FNEW sent, replica registered on server +- `SFRSUploaded`: FPUT complete, chunk data stored +- `rcvIdsKeys`: recipient IDs and keys for this replica + +### Recipient chunk state + +```haskell +data RcvFileChunk = RcvFileChunk + { rcvFileChunkId :: Int64, + chunkNo :: Int, + chunkSize :: Word32, + digest :: ByteString, + replicas :: [RcvFileChunkReplica], + fileTmpPath :: FilePath, + chunkTmpPath :: Maybe FilePath + } + +data RcvFileChunkReplica = RcvFileChunkReplica + { rcvChunkReplicaId :: Int64, + server :: XFTPServer, + replicaId :: ChunkReplicaId, + replicaKey :: C.APrivateAuthKey, + received :: Bool, + delay :: Maybe Int64, + retries :: Int + } +``` + +### Replica selection + +Each chunk can have multiple replicas on different servers. The file description includes all replicas; the recipient: +1. Tries first replica +2. On failure, tries next replica +3. Continues until success or all replicas exhausted + +This provides redundancy against server unavailability. + +### Retry handling + +Retry state is stored per-replica with two fields: +- `delay :: Maybe Int64` - milliseconds until next retry +- `retries :: Int` - consecutive failure count + +On failure, delay increases with exponential backoff. State persists in DB for resumption after agent restart. + +### Chunk sizes + +```haskell +chunkSize0 = kb 64 -- 65536 bytes +chunkSize1 = kb 256 -- 262144 bytes +chunkSize2 = mb 1 -- 1048576 bytes +chunkSize3 = mb 4 -- 4194304 bytes + +serverChunkSizes = [chunkSize0, chunkSize1, chunkSize2, chunkSize3] +``` + +Routers validate that uploaded chunks match one of the allowed sizes. This prevents fingerprinting based on exact file sizes. From f56b5940368c5d8773a4f1534c5f5c87daaa95d4 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 11:03:30 +0000 Subject: [PATCH 75/91] remove old topic stubs --- spec/agent-protocol.md | 13 -- spec/compression.md | 84 --------- spec/crypto-ratchet.md | 13 -- spec/crypto-tls.md | 11 -- spec/crypto.md | 19 --- spec/encoding.md | 332 ------------------------------------ spec/ntf-protocol.md | 15 -- spec/ntf-server.md | 11 -- spec/remote-control.md | 11 -- spec/smp-client.md | 11 -- spec/smp-protocol.md | 13 -- spec/smp-server.md | 13 -- spec/storage-agent.md | 11 -- spec/storage-server.md | 9 - spec/transport-http2.md | 13 -- spec/transport-websocket.md | 7 - spec/transport.md | 11 -- spec/version.md | 177 ------------------- spec/xftp-client.md | 11 -- spec/xftp-protocol.md | 13 -- spec/xftp-server.md | 11 -- spec/xrcp-protocol.md | 13 -- 22 files changed, 822 deletions(-) delete mode 100644 spec/agent-protocol.md delete mode 100644 spec/compression.md delete mode 100644 spec/crypto-ratchet.md delete mode 100644 spec/crypto-tls.md delete mode 100644 spec/crypto.md delete mode 100644 spec/encoding.md delete mode 100644 spec/ntf-protocol.md delete mode 100644 spec/ntf-server.md delete mode 100644 spec/remote-control.md delete mode 100644 spec/smp-client.md delete mode 100644 spec/smp-protocol.md delete mode 100644 spec/smp-server.md delete mode 100644 spec/storage-agent.md delete mode 100644 spec/storage-server.md delete mode 100644 spec/transport-http2.md delete mode 100644 spec/transport-websocket.md delete mode 100644 spec/transport.md delete mode 100644 spec/version.md delete mode 100644 spec/xftp-client.md delete mode 100644 spec/xftp-protocol.md delete mode 100644 spec/xftp-server.md delete mode 100644 spec/xrcp-protocol.md diff --git a/spec/agent-protocol.md b/spec/agent-protocol.md deleted file mode 100644 index b84ffb9ceb..0000000000 --- a/spec/agent-protocol.md +++ /dev/null @@ -1,13 +0,0 @@ -# Agent Protocol Implementation - -> Implements agent connection procedures, queue rotation, and duplex messaging. - -**Protocol reference**: [`protocol/agent-protocol.md`](../protocol/agent-protocol.md) - -## Types - -## Connection Procedures - -## Queue Rotation - -## Functions diff --git a/spec/compression.md b/spec/compression.md deleted file mode 100644 index faa8c275fb..0000000000 --- a/spec/compression.md +++ /dev/null @@ -1,84 +0,0 @@ -# Compression - -> Zstd compression for SimpleX protocol messages. - -**Source file**: [`Compression.hs`](../src/Simplex/Messaging/Compression.hs) - -## Overview - -Optional Zstd compression for SMP message bodies. Short messages bypass compression entirely to avoid overhead. The `Compressed` type carries a tag byte indicating whether the payload is compressed or passthrough, making it self-describing on the wire. - -## Types - -### `Compressed` - -**Source**: `Compression.hs:17-22` - -```haskell -data Compressed - = Passthrough ByteString -- short messages, left intact - | Compressed Large -- Zstd-compressed, 2-byte length prefix -``` - -Wire encoding (`Compression.hs:30-38`): - -``` -Passthrough → '0' ++ smpEncode ByteString (1-byte tag + 1-byte length + data) -Compressed → '1' ++ smpEncode Large (1-byte tag + 2-byte length + data) -``` - -Tags are `'0'` (0x30) and `'1'` (0x31) — same ASCII convention as `Maybe` encoding. - -`Passthrough` uses standard `ByteString` encoding (max 255 bytes, 1-byte length prefix). `Compressed` uses `Large` encoding (max 65535 bytes, 2-byte Word16 length prefix), since compressed output can exceed 255 bytes for larger inputs. - -## Constants - -| Constant | Value | Purpose | Source | -|----------|-------|---------|--------| -| `maxLengthPassthrough` | 180 | Messages at or below this length are not compressed | `Compression.hs:24-25` | -| `compressionLevel` | 3 | Zstd compression level | `Compression.hs:27-28` | - -The 180-byte threshold was "sampled from real client data" — messages above this length show rapidly increasing compression ratio. Below 180 bytes, compression overhead (FFI call, dictionary-less Zstd startup) outweighs savings. - -## Functions - -### `compress1` - -**Source**: `Compression.hs:40-43` - -```haskell -compress1 :: ByteString -> Compressed -``` - -Compress a message body: -- If `B.length bs <= 180` → `Passthrough bs` -- Otherwise → `Compressed (Large (Z1.compress 3 bs))` - -No context or dictionary — each message is independently compressed ("1" in `compress1` refers to single-shot compression). - -### `decompress1` - -**Source**: `Compression.hs:45-53` - -```haskell -decompress1 :: Int -> Compressed -> Either String ByteString -``` - -Decompress with size limit: -- `Passthrough bs` → `Right bs` (no check needed — already bounded by encoding) -- `Compressed (Large bs)` → check `Z1.decompressedSize bs`: - - If size is known and within `limit` → decompress - - If size unknown or exceeds `limit` → `Left` error - -The size limit check happens **before** decompression, using Zstd's frame header (which includes the decompressed size when the compressor wrote it). This prevents decompression bombs — an attacker cannot cause unbounded memory allocation by sending a small compressed payload that expands to gigabytes. - -The `Z1.decompress` result is pattern-matched for three cases: -- `Z1.Error e` → `Left e` -- `Z1.Skip` → `Right mempty` (zero-length output) -- `Z1.Decompress bs'` → `Right bs'` - -## Security notes - -- **Decompression bomb protection**: `decompress1` requires an explicit size limit and checks `decompressedSize` before allocating. Callers must pass an appropriate limit (typically the SMP block size). -- **No dictionary/context**: Each message is independently compressed. No shared state between messages that could leak information across compression boundaries. -- **Passthrough for short messages**: Messages ≤ 180 bytes are never compressed, avoiding timing side channels from compression ratio differences on short, potentially-predictable messages. diff --git a/spec/crypto-ratchet.md b/spec/crypto-ratchet.md deleted file mode 100644 index de5af38a28..0000000000 --- a/spec/crypto-ratchet.md +++ /dev/null @@ -1,13 +0,0 @@ -# Double Ratchet & PQDR - -> Implements the double ratchet algorithm with post-quantum extensions (PQDR). - -**Protocol reference**: [`protocol/pqdr.md`](../protocol/pqdr.md) - -## State - -## Transitions - -## Key Derivation - -## Functions diff --git a/spec/crypto-tls.md b/spec/crypto-tls.md deleted file mode 100644 index 9327ae69a7..0000000000 --- a/spec/crypto-tls.md +++ /dev/null @@ -1,11 +0,0 @@ -# TLS & Certificate Chains - -> TLS session setup, certificate chain construction, and server identity validation. - -## TLS Setup - -## Certificate Validation - -## Trust Anchoring - -## Functions diff --git a/spec/crypto.md b/spec/crypto.md deleted file mode 100644 index ec8fb0a497..0000000000 --- a/spec/crypto.md +++ /dev/null @@ -1,19 +0,0 @@ -# Cryptographic Primitives - -> All cryptographic primitives used across SimpleX protocols. - -## Ed25519 - -## X25519 - -## NaCl - -## AES-GCM - -## SHA - -## HKDF - -## Key Generation - -## Functions diff --git a/spec/encoding.md b/spec/encoding.md deleted file mode 100644 index f5501cfabb..0000000000 --- a/spec/encoding.md +++ /dev/null @@ -1,332 +0,0 @@ -# Encoding - -> Binary and string encoding used across all SimpleX protocols. - -**Source files**: [`Encoding.hs`](../src/Simplex/Messaging/Encoding.hs), [`Encoding/String.hs`](../src/Simplex/Messaging/Encoding/String.hs), [`Parsers.hs`](../src/Simplex/Messaging/Parsers.hs) - -## Overview - -Two encoding layers serve different purposes: - -- **`Encoding`** — Binary wire format for SMP protocol transmissions. Compact, no delimiters between fields. Used in all on-the-wire protocol messages. -- **`StrEncoding`** — Human-readable string format for configuration, URIs, logs, and JSON serialization. Uses base64url for binary data, decimal for numbers, comma-separated lists, space-separated tuples. - -Both are typeclasses with `MINIMAL` pragmas requiring `encode` + (`decode` | `parser`), with the missing one derived from the other. - -## Binary Encoding (`Encoding` class) - -```haskell -class Encoding a where - smpEncode :: a -> ByteString - smpDecode :: ByteString -> Either String a -- default: parseAll smpP - smpP :: Parser a -- default: smpDecode <$?> smpP -``` - -### Length-prefix conventions - -| Type | Prefix | Max size | -|------|--------|----------| -| `ByteString` | 1-byte length (Word8 as Char) | 255 bytes | -| `Large` (newtype) | 2-byte length (Word16 big-endian) | 65535 bytes | -| `Tail` (newtype) | None — consumes rest of input | Unlimited | -| Lists (`smpEncodeList`) | 1-byte count prefix, then concatenated items | 255 items | -| `NonEmpty` | Same as list (fails on count=0) | 255 items | - -### Scalar types - -| Type | Encoding | Bytes | -|------|----------|-------| -| `Char` | Raw byte | 1 | -| `Bool` | `'T'` / `'F'` (0x54 / 0x46) | 1 | -| `Word16` | Big-endian | 2 | -| `Word32` | Big-endian | 4 | -| `Int64` | Two big-endian Word32s (high then low) | 8 | -| `SystemTime` | `systemSeconds` as Int64 (nanoseconds dropped) | 8 | -| `Text` | UTF-8 then ByteString encoding (1-byte length prefix) | 1 + len | -| `String` | `B.pack` then ByteString encoding | 1 + len | - -### `Maybe a` - -``` -Nothing → '0' (0x30) -Just x → '1' (0x31) ++ smpEncode x -``` - -Tags are ASCII characters `'0'`/`'1'`, not binary 0x00/0x01. - -### Tuples - -Tuples (2 through 8) encode as simple concatenation — no length prefix, no separator. Fields are parsed sequentially using each component's `smpP`. This works because each component's parser knows how many bytes to consume (via its own length prefix or fixed size). - -### Combinators - -| Function | Signature | Purpose | -|----------|-----------|---------| -| `_smpP` | `Parser a` | Space-prefixed parser (`A.space *> smpP`) | -| `smpEncodeList` | `[a] -> ByteString` | 1-byte count + concatenated items | -| `smpListP` | `Parser [a]` | Parse count then that many items | -| `lenEncode` | `Int -> Char` | Int to single-byte length char | - -## String Encoding (`StrEncoding` class) - -```haskell -class StrEncoding a where - strEncode :: a -> ByteString - strDecode :: ByteString -> Either String a -- default: parseAll strP - strP :: Parser a -- default: strDecode <$?> base64urlP -``` - -Key difference from `Encoding`: the default `strP` parses base64url input first, then applies `strDecode`. This means types that only implement `strDecode` will automatically accept base64url-encoded input. - -### Instance conventions - -| Type | Encoding | -|------|----------| -| `ByteString` | base64url (non-empty required) | -| `Word16`, `Word32` | Decimal string | -| `Int`, `Int64` | Signed decimal | -| `Char`, `Bool` | Delegates to `Encoding` (`smpEncode`/`smpP`) | -| `Maybe a` | Empty string = `Nothing`, otherwise `strEncode a` | -| `Text` | UTF-8 bytes, parsed until space/newline | -| `SystemTime` | `systemSeconds` as Int64 (decimal) | -| `UTCTime` | ISO 8601 string | -| `CertificateChain` | Comma-separated base64url blobs | -| `Fingerprint` | base64url of fingerprint bytes | - -### Collection encoding - -| Type | Separator | -|------|-----------| -| Lists (`strEncodeList`) | Comma `,` | -| `NonEmpty` | Comma (fails on empty) | -| `Set a` | Comma | -| `IntSet` | Comma | -| Tuples (2-6) | Space (` `) | - -### `Str` newtype - -Raw string (not base64url-encoded). Parses until space, consumes trailing space. Used for string-valued protocol fields that should not be base64-encoded. - -### `TextEncoding` class - -```haskell -class TextEncoding a where - textEncode :: a -> Text - textDecode :: Text -> Maybe a -``` - -Separate from `StrEncoding` — operates on `Text` rather than `ByteString`. Used for types that need Text representation (e.g., enum display names). - -### JSON bridge functions - -| Function | Purpose | -|----------|---------| -| `strToJSON` | `StrEncoding a => a -> J.Value` via `decodeLatin1 . strEncode` | -| `strToJEncoding` | Same, for Aeson encoding | -| `strParseJSON` | `StrEncoding a => String -> J.Value -> JT.Parser a` — parse JSON string via `strP` | -| `textToJSON` | `TextEncoding a => a -> J.Value` | -| `textToEncoding` | Same, for Aeson encoding | -| `textParseJSON` | `TextEncoding a => String -> J.Value -> JT.Parser a` | - -## Parsers - -**Source**: [`Parsers.hs`](../src/Simplex/Messaging/Parsers.hs) - -### Core parsing functions - -| Function | Signature | Purpose | -|----------|-----------|---------| -| `parseAll` | `Parser a -> ByteString -> Either String a` | Parse consuming all input (fails if bytes remain) | -| `parse` | `Parser a -> e -> ByteString -> Either e a` | `parseAll` with custom error type (discards error string) | -| `parseE` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | `parseAll` lifted into `ExceptT` | -| `parseE'` | `(String -> e) -> Parser a -> ByteString -> ExceptT e IO a` | Like `parseE` but allows trailing input | -| `parseRead1` | `Read a => Parser a` | Parse a word then `readMaybe` it | -| `parseString` | `(ByteString -> Either String a) -> String -> a` | Parse from `String` (errors with `error`) | - -### `base64P` - -Standard base64 parser (not base64url — uses `+`/`/` alphabet). Takes alphanumeric + `+`/`/` characters, optional `=` padding, then decodes. Contrast with `base64urlP` in `Encoding/String.hs` which uses `-`/`_` alphabet. - -### JSON options helpers - -Platform-conditional JSON encoding for cross-platform compatibility (Haskell ↔ Swift). - -| Function | Purpose | -|----------|---------| -| `enumJSON` | All-nullary constructors as strings, with tag modifier | -| `sumTypeJSON` | Platform-conditional: `taggedObjectJSON` on non-Darwin, `singleFieldJSON` on Darwin | -| `taggedObjectJSON` | `{"type": "Tag", "data": {...}}` format | -| `singleFieldJSON` | `{"Tag": value}` format | -| `defaultJSON` | Default options with `omitNothingFields = True` | - -Pattern synonyms for JSON field names: -- `TaggedObjectJSONTag = "type"` -- `TaggedObjectJSONData = "data"` -- `SingleFieldJSONTag = "_owsf"` - -### String helpers - -| Function | Purpose | -|----------|---------| -| `fstToLower` | Lowercase first character | -| `dropPrefix` | Remove prefix string, lowercase remainder | -| `textP` | Parse rest of input as UTF-8 `String` | - -## Auxiliary Types and Utilities - -### TMap - -**Source**: [`TMap.hs`](../src/Simplex/Messaging/TMap.hs) - -```haskell -type TMap k a = TVar (Map k a) -``` - -STM-based concurrent map. Wraps `Data.Map.Strict` in a `TVar`. All mutations use `modifyTVar'` (strict) to prevent thunk accumulation. - -| Function | Notes | -|----------|-------| -| `emptyIO` | IO allocation (`newTVarIO`) | -| `singleton` | STM allocation | -| `clear` | Reset to empty | -| `lookup` / `lookupIO` | STM / non-transactional IO read | -| `member` / `memberIO` | STM / non-transactional IO membership | -| `insert` / `insertM` | Insert value / insert from STM action | -| `delete` | Remove key | -| `lookupInsert` | Atomic lookup-then-insert (returns old value) | -| `lookupDelete` | Atomic lookup-then-delete | -| `adjust` / `update` / `alter` / `alterF` | Standard Map operations lifted to STM | -| `union` | Merge `Map` into `TMap` | - -`lookupIO`/`memberIO` use `readTVarIO` — single-read outside STM transaction, useful when you need a snapshot without composing with other STM operations. - -### SessionVar - -**Source**: [`Session.hs`](../src/Simplex/Messaging/Session.hs) - -Race-safe session management using TMVar + monotonic ID. - -```haskell -data SessionVar a = SessionVar - { sessionVar :: TMVar a -- result slot - , sessionVarId :: Int -- monotonic ID from TVar counter - , sessionVarTs :: UTCTime -- creation timestamp - } -``` - -| Function | Purpose | -|----------|---------| -| `getSessVar` | Lookup or create session. Returns `Left new` or `Right existing` | -| `removeSessVar` | Delete session only if ID matches (prevents removing a replacement) | -| `tryReadSessVar` | Non-blocking read of session result | - -The ID-match check in `removeSessVar` prevents a race where: -1. Thread A creates session #5, starts work -2. Thread B creates session #6 (replacing #5 in TMap) -3. Thread A finishes, tries to remove — ID mismatch, removal blocked - -### ServiceScheme - -**Source**: [`ServiceScheme.hs`](../src/Simplex/Messaging/ServiceScheme.hs) - -```haskell -data ServiceScheme = SSSimplex | SSAppServer SrvLoc -data SrvLoc = SrvLoc HostName ServiceName -``` - -URI scheme for SimpleX service addresses. `SSSimplex` encodes as `"simplex:"`, `SSAppServer` as `"https://host:port"`. - -`simplexChat` is the constant `SSAppServer (SrvLoc "simplex.chat" "")`. - -### SystemTime - -**Source**: [`SystemTime.hs`](../src/Simplex/Messaging/SystemTime.hs) - -```haskell -newtype RoundedSystemTime (t :: Nat) = RoundedSystemTime { roundedSeconds :: Int64 } -type SystemDate = RoundedSystemTime 86400 -- day precision -type SystemSeconds = RoundedSystemTime 1 -- second precision -``` - -Phantom-typed time rounding. The `Nat` type parameter specifies rounding granularity in seconds. - -| Function | Purpose | -|----------|---------| -| `getRoundedSystemTime` | Get current time rounded to `t` seconds | -| `getSystemDate` | Alias for day-rounded time | -| `getSystemSeconds` | Second-precision (no rounding needed, just drops nanoseconds) | -| `roundedToUTCTime` | Convert back to `UTCTime` | - -`RoundedSystemTime` derives `FromField`/`ToField` for SQLite storage and `FromJSON`/`ToJSON` for API serialization. - -### Util - -**Source**: [`Util.hs`](../src/Simplex/Messaging/Util.hs) - -Selected utilities used across the codebase: - -**Monadic combinators**: - -| Function | Signature | Purpose | -|----------|-----------|---------| -| `<$?>` | `MonadFail m => (a -> Either String b) -> m a -> m b` | Lift fallible function into parser | -| `$>>=` | `(Monad m, Monad f, Traversable f) => m (f a) -> (a -> m (f b)) -> m (f b)` | Monadic bind through nested monad | -| `ifM` / `whenM` / `unlessM` | Monadic conditionals | | -| `anyM` | Short-circuit `any` for monadic predicates (strict) | | - -**Error handling**: - -| Function | Purpose | -|----------|---------| -| `tryAllErrors` | Catch all exceptions (including async) into `ExceptT` | -| `catchAllErrors` | Same with handler | -| `tryAllOwnErrors` | Catch only "own" exceptions (re-throws async cancellation) | -| `catchAllOwnErrors` | Same with handler | -| `isOwnException` | `StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded` | -| `isAsyncCancellation` | Any `SomeAsyncException` except own exceptions | -| `catchThrow` | Catch exceptions, wrap in Left | -| `allFinally` | `tryAllErrors` + `final` + `except` (like `finally` for ExceptT) | - -The own-vs-async distinction is critical: `catchOwn`/`tryAllOwnErrors` never swallow async cancellation (`ThreadKilled`, `UserInterrupt`, etc.), only synchronous exceptions and resource exhaustion (`StackOverflow`, `HeapOverflow`, `AllocationLimitExceeded`). - -**STM**: - -| Function | Purpose | -|----------|---------| -| `tryWriteTBQueue` | Non-blocking bounded queue write, returns success | - -**Database result helpers**: - -| Function | Purpose | -|----------|---------| -| `firstRow` | Extract first row with transform, or Left error | -| `maybeFirstRow` | Extract first row as Maybe | -| `firstRow'` | Like `firstRow` but transform can also fail | - -**Collection utilities**: - -| Function | Purpose | -|----------|---------| -| `groupOn` | `groupBy` using equality on projected key | -| `groupAllOn` | `groupOn` after `sortOn` (groups non-adjacent elements) | -| `toChunks` | Split list into `NonEmpty` chunks of size n | -| `packZipWith` | Optimized ByteString zipWith (direct memory access) | - -**Miscellaneous**: - -| Function | Purpose | -|----------|---------| -| `safeDecodeUtf8` | Decode UTF-8 replacing errors with `'?'` | -| `bshow` / `tshow` | `show` to `ByteString` / `Text` | -| `threadDelay'` | `Int64` delay (handles overflow by looping) | -| `diffToMicroseconds` / `diffToMilliseconds` | `NominalDiffTime` conversion | -| `labelMyThread` | Label current thread for debugging | -| `encodeJSON` / `decodeJSON` | `ToJSON a => a -> Text` / `FromJSON a => Text -> Maybe a` | -| `traverseWithKey_` | `Map` traversal discarding results | - -## Security notes - -- **Length prefix overflow**: `ByteString` encoding uses 1-byte length — silently truncates strings > 255 bytes. Callers must ensure size bounds before encoding. `Large` extends to 65535 bytes via Word16 prefix. -- **`Tail` unbounded**: `Tail` consumes all remaining input with no size check. Only safe when total message size is already bounded (e.g., within a padded SMP block). -- **base64 vs base64url**: `Parsers.base64P` uses standard alphabet (`+`/`/`), while `String.base64urlP` uses URL-safe alphabet (`-`/`_`). Mixing them causes silent decode failures. -- **`safeDecodeUtf8`**: Replaces invalid UTF-8 with `'?'` rather than failing. Suitable for logging/display, not for security-critical string comparison. diff --git a/spec/ntf-protocol.md b/spec/ntf-protocol.md deleted file mode 100644 index c826e7e722..0000000000 --- a/spec/ntf-protocol.md +++ /dev/null @@ -1,15 +0,0 @@ -# NTF Protocol Implementation - -> Implements NTF commands, token registration, and subscription lifecycle for push notifications. - -**Protocol reference**: [`protocol/push-notifications.md`](../protocol/push-notifications.md) - -## Types - -## Commands - -## Token Lifecycle - -## Subscription Lifecycle - -## Functions diff --git a/spec/ntf-server.md b/spec/ntf-server.md deleted file mode 100644 index 4a39957e3b..0000000000 --- a/spec/ntf-server.md +++ /dev/null @@ -1,11 +0,0 @@ -# Notification Server - -> Notification server implementation: token management, subscriptions, and APNS integration. - -## Token Management - -## Subscription Management - -## APNS Integration - -## Functions diff --git a/spec/remote-control.md b/spec/remote-control.md deleted file mode 100644 index 5a064437c8..0000000000 --- a/spec/remote-control.md +++ /dev/null @@ -1,11 +0,0 @@ -# Remote Control (XRCP) - -> XRCP implementation: discovery, invitation, and session management. - -## Discovery - -## Invitation - -## Session Management - -## Functions diff --git a/spec/smp-client.md b/spec/smp-client.md deleted file mode 100644 index 39ae87f9ae..0000000000 --- a/spec/smp-client.md +++ /dev/null @@ -1,11 +0,0 @@ -# SMP Client - -> SMP client implementation: protocol operations, proxy relay, and reconnection logic. - -## Protocol Operations - -## Proxy Relay - -## Reconnection - -## Functions diff --git a/spec/smp-protocol.md b/spec/smp-protocol.md deleted file mode 100644 index 0def979418..0000000000 --- a/spec/smp-protocol.md +++ /dev/null @@ -1,13 +0,0 @@ -# SMP Protocol Implementation - -> Implements SMP commands, types, and binary encoding for the SimpleX Messaging Protocol. - -**Protocol reference**: [`protocol/simplex-messaging.md`](../protocol/simplex-messaging.md) - -## Types - -## Commands - -## Encoding - -## Functions diff --git a/spec/smp-server.md b/spec/smp-server.md deleted file mode 100644 index 696d190673..0000000000 --- a/spec/smp-server.md +++ /dev/null @@ -1,13 +0,0 @@ -# SMP Server - -> SMP server implementation: connection handling, queue operations, proxying, and control port. - -## Connection Handling - -## Queue Operations - -## Proxying - -## Control - -## Functions diff --git a/spec/storage-agent.md b/spec/storage-agent.md deleted file mode 100644 index 4ba4c414cb..0000000000 --- a/spec/storage-agent.md +++ /dev/null @@ -1,11 +0,0 @@ -# Agent Storage - -> Agent storage backends: SQLite, Postgres, and migration framework. - -## SQLite Backend - -## Postgres Backend - -## Migration Framework - -## Functions diff --git a/spec/storage-server.md b/spec/storage-server.md deleted file mode 100644 index b2dec18425..0000000000 --- a/spec/storage-server.md +++ /dev/null @@ -1,9 +0,0 @@ -# Server Storage - -> Server storage backends: STM queues and message stores (STM, Journal, Postgres). - -## STM Queues - -## Message Stores (STM, Journal, Postgres) - -## Functions diff --git a/spec/transport-http2.md b/spec/transport-http2.md deleted file mode 100644 index 2594b84311..0000000000 --- a/spec/transport-http2.md +++ /dev/null @@ -1,13 +0,0 @@ -# HTTP/2 Transport - -> HTTP/2 framing, client and server sessions, and file streaming for XFTP. - -## Framing - -## Client Sessions - -## Server Sessions - -## File Streaming - -## Functions diff --git a/spec/transport-websocket.md b/spec/transport-websocket.md deleted file mode 100644 index 182a43c47c..0000000000 --- a/spec/transport-websocket.md +++ /dev/null @@ -1,7 +0,0 @@ -# WebSocket Transport - -> WebSocket adapter for browser-based SimpleX clients. - -## Adapter - -## Functions diff --git a/spec/transport.md b/spec/transport.md deleted file mode 100644 index 0e50a67d94..0000000000 --- a/spec/transport.md +++ /dev/null @@ -1,11 +0,0 @@ -# Transport Layer - -> Transport abstraction, handshake protocol, and block padding for metadata privacy. - -## Abstraction - -## Handshake Protocol - -## Block Padding - -## Functions diff --git a/spec/version.md b/spec/version.md deleted file mode 100644 index 19ad786fe2..0000000000 --- a/spec/version.md +++ /dev/null @@ -1,177 +0,0 @@ -# Version Negotiation - -> Version ranges and compatibility checking for protocol evolution. - -**Source files**: [`Version.hs`](../src/Simplex/Messaging/Version.hs), [`Version/Internal.hs`](../src/Simplex/Messaging/Version/Internal.hs) - -## Overview - -All SimpleX protocols use version negotiation during handshake. Each party advertises a `VersionRange` (min..max supported), and negotiation produces a `Compatible` proof value if the ranges overlap — choosing the highest mutually-supported version. - -The `Compatible` newtype can only be constructed internally (constructor is not exported), so the type system enforces that compatibility was actually checked. - -## Types - -### `Version v` - -```haskell -newtype Version v = Version Word16 -``` - -Phantom-typed version number. The phantom `v` distinguishes version spaces (e.g., SMP versions vs Agent versions vs XFTP versions) at the type level, preventing accidental comparison across protocols. - -- `Encoding`: 2 bytes big-endian (via Word16 instance) -- `StrEncoding`: decimal string -- JSON: numeric value -- Derives: `Eq`, `Ord`, `Show` - -The constructor is exported from `Version.Internal` but not from `Version`, so application code cannot fabricate versions — they must come from protocol constants or parsing. - -### `VersionRange v` - -```haskell -data VersionRange v = VRange - { minVersion :: Version v - , maxVersion :: Version v - } -``` - -Invariant: `minVersion <= maxVersion` (enforced by smart constructors). - -The `VRange` constructor is not exported — only the pattern synonym `VersionRange` (read-only) is public. - -- `Encoding`: two Word16s concatenated (4 bytes total) -- `StrEncoding`: `"min-max"` or `"v"` if min == max -- JSON: `{"minVersion": n, "maxVersion": n}` - -### `VersionScope v` - -```haskell -class VersionScope v -``` - -Empty typeclass used as a constraint on version operations. Each protocol declares its version scope: - -```haskell -instance VersionScope SMP -instance VersionScope Agent -``` - -This prevents accidentally mixing version ranges from different protocols in negotiation functions. - -### `Compatible a` - -```haskell -newtype Compatible a = Compatible_ a - -pattern Compatible :: a -> Compatible a -pattern Compatible a <- Compatible_ a -``` - -Proof that compatibility was checked. The `Compatible_` constructor is not exported — `Compatible` is a read-only pattern synonym. The only way to obtain a `Compatible` value is through `compatibleVersion`, `compatibleVRange`, `proveCompatible`, or the internal `mkCompatibleIf`. - -### `VersionI` / `VersionRangeI` type classes - -Multi-param typeclasses with functional dependencies for generic version/range operations. Allow extension types that wrap `Version` or `VersionRange` to participate in negotiation: - -```haskell -class VersionScope v => VersionI v a | a -> v where - type VersionRangeT v a -- associated type: range form - version :: a -> Version v - toVersionRangeT :: a -> VersionRange v -> VersionRangeT v a - -class VersionScope v => VersionRangeI v a | a -> v where - type VersionT v a -- associated type: version form - versionRange :: a -> VersionRange v - toVersionRange :: a -> VersionRange v -> a - toVersionT :: a -> Version v -> VersionT v a -``` - -Identity instances exist for `Version v` and `VersionRange v` themselves. - -## Functions - -### Construction - -| Function | Signature | Purpose | -|----------|-----------|---------| -| `mkVersionRange` | `Version v -> Version v -> VersionRange v` | Construct range, `error` if min > max | -| `safeVersionRange` | `Version v -> Version v -> Maybe (VersionRange v)` | Safe construction, `Nothing` if invalid | -| `versionToRange` | `Version v -> VersionRange v` | Singleton range (min == max) | - -### Compatibility checking - -### isCompatible - -**Purpose**: Check if a single version falls within a range. - -```haskell -isCompatible :: VersionI v a => a -> VersionRange v -> Bool -``` - -### isCompatibleRange - -**Purpose**: Check if two version ranges overlap: `min1 <= max2 && min2 <= max1`. - -```haskell -isCompatibleRange :: VersionRangeI v a => a -> VersionRange v -> Bool -``` - -### proveCompatible - -**Purpose**: If version is compatible, wrap in `Compatible` proof. Returns `Nothing` if out of range. - -```haskell -proveCompatible :: VersionI v a => a -> VersionRange v -> Maybe (Compatible a) -``` - -### Negotiation - -### compatibleVersion - -**Purpose**: Negotiate a single version from two ranges. Returns `min(max1, max2)` — the highest mutually-supported version. Returns `Nothing` if ranges don't overlap. - -```haskell -compatibleVersion :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible (VersionT v a)) -``` - -### compatibleVRange - -**Purpose**: Compute the intersection of two version ranges: `(max(min1,min2), min(max1,max2))`. Returns `Nothing` if the intersection is empty. - -```haskell -compatibleVRange :: VersionRangeI v a => a -> VersionRange v -> Maybe (Compatible a) -``` - -### compatibleVRange' - -**Purpose**: Cap a version range's maximum at a given version. Returns `Nothing` if the cap is below the range's minimum. - -```haskell -compatibleVRange' :: VersionRangeI v a => a -> Version v -> Maybe (Compatible a) -``` - -## Protocol version constants - -Version constants for each protocol are defined in their respective Transport modules. For SMP, key gates include: - -- `currentSMPAgentVersion`, `supportedSMPAgentVRange` — current negotiation range -- `serviceCertsSMPVersion = 16` — service certificate handshake -- `rcvServiceSMPVersion = 19` — service subscription commands - -See [`transport.md`](transport.md) and [`rcv-services.md`](rcv-services.md) for protocol-specific version constants. - -## Negotiation protocol - -During handshake: -1. Client sends its `VersionRange` to server -2. Server computes `compatibleVRange clientRange serverRange` -3. If `Nothing` → reject connection (incompatible) -4. If `Just (Compatible agreedRange)` → use `maxVersion agreedRange` as the effective protocol version - -The `Compatible` proof flows through the connection setup, ensuring all subsequent version-gated code paths have evidence that negotiation occurred. - -## Security notes - -- **No downgrade attack protection in negotiation itself** — an active MITM could modify the version range to force a lower version. Protection comes from the TLS layer (authentication prevents MITM) and from servers setting minimum version floors. -- **`mkVersionRange` uses `error`** — only safe for compile-time constants. Runtime construction must use `safeVersionRange`. diff --git a/spec/xftp-client.md b/spec/xftp-client.md deleted file mode 100644 index 99306bb73e..0000000000 --- a/spec/xftp-client.md +++ /dev/null @@ -1,11 +0,0 @@ -# XFTP Client - -> XFTP client implementation: file operations, CLI interface, and agent integration. - -## File Operations - -## CLI - -## Agent - -## Functions diff --git a/spec/xftp-protocol.md b/spec/xftp-protocol.md deleted file mode 100644 index 26eb950bee..0000000000 --- a/spec/xftp-protocol.md +++ /dev/null @@ -1,13 +0,0 @@ -# XFTP Protocol Implementation - -> Implements XFTP commands, types, and chunk operations for the SimpleX File Transfer Protocol. - -**Protocol reference**: [`protocol/xftp.md`](../protocol/xftp.md) - -## Types - -## Commands - -## Chunk Operations - -## Functions diff --git a/spec/xftp-server.md b/spec/xftp-server.md deleted file mode 100644 index bdcbbb9aad..0000000000 --- a/spec/xftp-server.md +++ /dev/null @@ -1,11 +0,0 @@ -# XFTP Server - -> XFTP server implementation: chunk storage, recipient management, and control port. - -## Chunk Storage - -## Recipient Management - -## Control - -## Functions diff --git a/spec/xrcp-protocol.md b/spec/xrcp-protocol.md deleted file mode 100644 index 8f084f7ca8..0000000000 --- a/spec/xrcp-protocol.md +++ /dev/null @@ -1,13 +0,0 @@ -# XRCP Protocol Implementation - -> Implements XRCP session handshake and commands for remote control of SimpleX clients. - -**Protocol reference**: [`protocol/xrcp.md`](../protocol/xrcp.md) - -## Types - -## Session Handshake - -## Commands - -## Functions From bb72e1a97af98052e29a7655ea9c82df0ddadf2a Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 12:37:33 +0000 Subject: [PATCH 76/91] update --- spec/agent/infrastructure.md | 15 +++++++++++++++ spec/topics/patterns.md | 20 ++++++++++++++++++++ spec/topics/transport.md | 26 ++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/spec/agent/infrastructure.md b/spec/agent/infrastructure.md index 5e3c828e6c..784608b625 100644 --- a/spec/agent/infrastructure.md +++ b/spec/agent/infrastructure.md @@ -194,3 +194,18 @@ The agent supports SQLite and PostgreSQL via CPP compilation flags (`#if defined **Store access bracketing**: `withStore` wraps all database operations with `agentOperationBracket AODatabase`, connecting the store to the suspension cascade. `withStoreBatch` / `withStoreBatch'` run multiple operations in a single transaction with per-operation error catching. **Known bug**: `checkConfirmedSndQueueExists_` uses `#if defined(dpPostgres)` (typo - should be `dbPostgres`), so the `FOR UPDATE` clause is never included on either backend. + +### Migration framework + +**Source**: [Agent/Store/Migrations.hs](../../src/Simplex/Messaging/Agent/Store/Migrations.hs), [Agent/Store/Shared.hs](../../src/Simplex/Messaging/Agent/Store/Shared.hs) + +Migrations are Haskell modules under `Agent/Store/SQLite/Migrations/` and `Agent/Store/Postgres/Migrations/`. Each has `up` SQL and optional `down` SQL. + +**Key behaviors**: + +- `migrationsToRun` compares app migrations against the `migrations` table by name. Divergent histories (app has `[a,b]`, DB has `[a,c]`) produce `MTREDifferent` error - manual intervention required. +- Each migration runs in its own transaction with the `migrations` insert *before* the schema change - failure rolls back both. +- Downgrades require all intermediate migrations to have `down` SQL; missing any produces `MTRENoDown`. +- `MigrationConfirmation` controls whether upgrades/downgrades auto-apply, prompt, or error. + +**Special case**: `m20220811_onion_hosts` triggers `updateServers` to expand host entries with Tor addresses - this is data migration, not just schema. diff --git a/spec/topics/patterns.md b/spec/topics/patterns.md index a5d61500e2..09bd9a7e10 100644 --- a/spec/topics/patterns.md +++ b/spec/topics/patterns.md @@ -10,6 +10,8 @@ For protocol-specific encoding details, see [transport.md](transport.md). For cr - [Compression](#compression) - [Concurrent data structures](#concurrent-data-structures) - [Batch processing](#batch-processing) +- [Time encoding](#time-encoding) +- [Utilities](#utilities) --- @@ -335,3 +337,21 @@ withStoreBatch' :: Traversable t ``` Use when operations cannot fail (or failures should become `INTERNAL` errors). + +--- + +## Time encoding + +**Source**: [SystemTime.hs](../../src/Simplex/Messaging/SystemTime.hs) + +`RoundedSystemTime t` uses a phantom type-level `Nat` for precision. `SystemDate` (precision 86400) provides k-anonymity for file creation times - all timestamps within a day collapse to the same value, preventing correlation attacks. + +--- + +## Utilities + +**Source**: [Util.hs](../../src/Simplex/Messaging/Util.hs) + +**Functor combinators**: `<$$>` (double fmap), `<$$` (double fmap const), and `<$?>` (fmap with `MonadFail` on `Left`) are used throughout for nested functor manipulation and fallible parsing chains. + +**`threadDelay'`**: Handles `Int64` delays that exceed `maxBound::Int` by looping with `maxBound`-sized chunks. diff --git a/spec/topics/transport.md b/spec/topics/transport.md index 2cce46f77f..a9ce85f1b3 100644 --- a/spec/topics/transport.md +++ b/spec/topics/transport.md @@ -10,6 +10,8 @@ For service certificate handshake extensions, see [client-services.md](client-se - [Transmission encoding and signing](#transmission-encoding-and-signing) - [Version negotiation](#version-negotiation) - [Connection management](#connection-management) +- [HTTP/2 sessions](#http2-sessions) +- [WebSocket adapter](#websocket-adapter) --- @@ -321,3 +323,27 @@ All four threads run inside `raceAny_` with `E.finally disconnected`. When any t 2. The agent callback demotes subscriptions, fires DOWN events, and initiates resubscription The `connected` TVar is set to `True` after the handshake succeeds and before the threads start. Note: in the protocol client, this TVar is not reset on disconnect - disconnect detection relies on thread cancellation via `raceAny_` and the `disconnected` callback, not STM re-evaluation. (The server-side `Client` type has a separate `connected` TVar that is reset in `clientDisconnected`.) + +--- + +## HTTP/2 sessions + +**Source**: [Transport/HTTP2/Client.hs](../../src/Simplex/Messaging/Transport/HTTP2/Client.hs), [Transport/HTTP2/Server.hs](../../src/Simplex/Messaging/Transport/HTTP2/Server.hs) + +HTTP/2 is used for XFTP file transfers and notifications to push providers (APNs). + +**Why the request queue**: `sendRequest` serializes requests through a `TBQueue` because the underlying http2 library is not thread-safe for concurrent stream creation. `sendRequestDirect` exists but is explicitly marked unsafe. + +**Inactivity expiration**: Server connections track `activeAt` and are closed by a background thread when idle beyond `checkInterval`. This is necessary because HTTP/2 has no application-level keepalive - abandoned connections would otherwise persist indefinitely. + +--- + +## WebSocket adapter + +**Source**: [Transport/WebSockets.hs](../../src/Simplex/Messaging/Transport/WebSockets.hs) + +WebSocket wraps TLS for browser clients, implementing the `Transport` typeclass. + +**Strict size matching**: Unlike raw TLS where `cGet` may accumulate multiple reads, WebSocket `cGet` expects a single `receiveData` to return exactly the requested size. Mismatch throws `TEBadBlock` immediately - WebSocket messages are atomic, so partial reads indicate a protocol error. + +**No compression**: `connectionCompressionOptions = NoCompression` because the payload is already encrypted. Compressing ciphertext wastes CPU and leaks information about plaintext structure. From 8dfb59ba88dcc29c294d7f536eb440ed69928647 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:00:27 +0000 Subject: [PATCH 77/91] design notes for main spec files --- spec/agent.md | 6 ++++++ spec/clients.md | 12 +++++++++++- spec/routers.md | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/spec/agent.md b/spec/agent.md index 02f93314b6..af49ac63f6 100644 --- a/spec/agent.md +++ b/spec/agent.md @@ -4,6 +4,12 @@ The SimpleX Agent is the Layer 3 connection manager. It builds duplex encrypted For usage and API overview, see [docs/AGENT.md](../docs/AGENT.md). For protocol specifications, see [Agent Protocol](../protocol/agent-protocol.md), [PQDR](../protocol/pqdr.md). +**Split-phase encryption**: Message sending separates ratchet advancement (API thread, serialized) from body encryption (delivery worker, parallel). This prevents ratchet lock contention across queues while maintaining correct message ordering. See [infrastructure.md](agent/infrastructure.md#message-delivery). + +**Worker taxonomy**: Three worker families handle background operations - delivery workers (per send queue), async command workers (per connection), and NTF workers (per server). All use the same create-or-reuse pattern with restart rate limiting. See [infrastructure.md](agent/infrastructure.md#worker-framework). + +**Suspension cascade**: Operations drain in dependency order: `AORcvNetwork` → `AOMsgDelivery` → `AOSndNetwork` → `AODatabase`. Suspending receive processing cascades through to database access, ensuring clean shutdown. See [infrastructure.md](agent/infrastructure.md#operation-suspension-cascade). + --- **Module specs**: [Agent](modules/Simplex/Messaging/Agent.md) · [Agent Client](modules/Simplex/Messaging/Agent/Client.md) · [Agent Protocol](modules/Simplex/Messaging/Agent/Protocol.md) · [Store Interface](modules/Simplex/Messaging/Agent/Store/Interface.md) · [NtfSubSupervisor](modules/Simplex/Messaging/Agent/NtfSubSupervisor.md) · [XFTP Agent](modules/Simplex/FileTransfer/Agent.md) · [Ratchet](modules/Simplex/Messaging/Crypto/Ratchet.md) diff --git a/spec/clients.md b/spec/clients.md index 3ab9d58682..acb513e9d7 100644 --- a/spec/clients.md +++ b/spec/clients.md @@ -8,6 +8,10 @@ For deployment and usage, see [docs/CLIENT.md](../docs/CLIENT.md). For protocol ## SMP Client (ProtocolClient) +**Four threads**: Send and receive threads are separate to allow backpressure - a slow receiver doesn't block sending. The process thread decouples parsing from delivery, preventing a slow consumer from stalling the receive loop. The monitor thread provides application-level keepalive beyond TCP - detecting protocol-level stalls. See [transport.md](topics/transport.md#connection-management). + +**Correlation ID lifecycle**: IDs are generated before send and removed on response OR timeout. Removal on timeout prevents unbounded growth of `sentCommands` when the router is unresponsive. + **Module specs**: [Client](modules/Simplex/Messaging/Client.md) · [Protocol](modules/Simplex/Messaging/Protocol.md) · [Transport](modules/Simplex/Messaging/Transport.md) · [Crypto](modules/Simplex/Messaging/Crypto.md) Generic protocol client used for both SMP and NTF connections. Manages a single TLS connection with multiplexed command/response matching via correlation IDs. @@ -59,6 +63,10 @@ sequenceDiagram ## SMPClientAgent +**Dual consumers**: Used by both SMP router (for proxy connections to relays) and NTF router (for NSUB subscriptions to SMP routers). Same connection pooling and reconnection logic, different command sets. + +**Session ID gating**: Subscription responses are validated against the current TLS session ID. A response from a stale session (connection dropped and reconnected between send and receive) is discarded rather than corrupting state. See [infrastructure.md](agent/infrastructure.md#subscription-tracking). + **Module specs**: [Client Agent](modules/Simplex/Messaging/Client/Agent.md) Connection manager that multiplexes multiple ProtocolClient connections. Tracks subscriptions, handles reconnection with backoff, and forwards server messages and connection events upward. Used by SMP router (proxying) and NTF router (subscriptions). @@ -114,9 +122,11 @@ sequenceDiagram ## XFTP Client +**No subscriptions**: File operations complete independently - no persistent server-side state to track. This allows XFTPClient to be a thin wrapper with no threads of its own. + **Module specs**: [Client](modules/Simplex/FileTransfer/Client.md) · [Protocol](modules/Simplex/FileTransfer/Protocol.md) · [HTTP/2 Client](modules/Simplex/Messaging/Transport/HTTP2/Client.md) -Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own; each operation is a synchronous HTTP/2 request/response. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. +Stateless wrapper around HTTP2Client. XFTPClient adds no threads of its own. Serialization and multiplexing happen inside HTTP2Client's internal request queue and process thread. ### XFTP Client components diff --git a/spec/routers.md b/spec/routers.md index b7b8761ef0..d141afaf38 100644 --- a/spec/routers.md +++ b/spec/routers.md @@ -8,6 +8,12 @@ For deployment and configuration, see [docs/ROUTERS.md](../docs/ROUTERS.md). For ## SMP Router +**Thread model**: Client handler threads process commands synchronously, but subscription registration goes through a separate `serverThread` via `subQ`. This split-STM pattern reduces contention - client handlers don't block on the shared `SubscribedClients` map. See [subscriptions.md](topics/subscriptions.md) for details. + +**Store separation**: `QueueStore` holds queue metadata and auth keys; `MsgStore` holds message bodies. Different durability tradeoffs - queue metadata needs consistency (Postgres option), message bodies optimize for throughput (STM/Journal options). + +**Proxy architecture**: The proxy router maintains an `SMPClientAgent` that pools connections to destination relays - one connection per relay server, shared across all proxy sessions to that relay. Each proxy session gets its own `SessionId` (from the relay's TLS session) and DH keys, but the underlying TCP connection is reused. The proxy is stateless for command forwarding - it doesn't subscribe to queues or maintain transaction state, just relays encrypted commands and responses. + **Module specs**: [Server](modules/Simplex/Messaging/Server.md) · [Main](modules/Simplex/Messaging/Server/Main.md) · [QueueStore](modules/Simplex/Messaging/Server/QueueStore.md) · [QueueStore Postgres](modules/Simplex/Messaging/Server/QueueStore/Postgres.md) · [MsgStore](modules/Simplex/Messaging/Server/MsgStore.md) · [StoreLog](modules/Simplex/Messaging/Server/StoreLog.md) · [Control](modules/Simplex/Messaging/Server/Control.md) · [Prometheus](modules/Simplex/Messaging/Server/Prometheus.md) · [Stats](modules/Simplex/Messaging/Server/Stats.md) ### SMP Router components @@ -70,6 +76,10 @@ sequenceDiagram ## XFTP Router +**Stateless operations**: Unlike SMP, XFTP has no subscriptions or delivery threads. Each command completes independently. This simplifies scaling - no subscription state to synchronize across instances. + +**Quota reservation**: File size is reserved atomically on FNEW (before upload), released on deletion or expiration. This prevents overcommit - a client cannot upload more than they reserved. + **Module specs**: [Server](modules/Simplex/FileTransfer/Server.md) · [Main](modules/Simplex/FileTransfer/Server/Main.md) · [Store](modules/Simplex/FileTransfer/Server/Store.md) · [StoreLog](modules/Simplex/FileTransfer/Server/StoreLog.md) · [Stats](modules/Simplex/FileTransfer/Server/Stats.md) · [Transport](modules/Simplex/FileTransfer/Transport.md) ### XFTP Router components @@ -120,6 +130,10 @@ sequenceDiagram ## NTF Router +**Inverted role**: The NTF router is itself an SMP *client* - it maintains NSUB subscriptions to SMP routers, receiving NMSG events when messages arrive. It doesn't serve queues; it subscribes to them. + +**Token batching**: `tokenLastNtfs` aggregates notifications per token before push. Multiple queue notifications for the same device are combined into a single APNs payload, reducing push overhead. + **Module specs**: [Server](modules/Simplex/Messaging/Notifications/Server.md) · [Main](modules/Simplex/Messaging/Notifications/Server/Main.md) · [Store Postgres](modules/Simplex/Messaging/Notifications/Server/Store/Postgres.md) · [APNS](modules/Simplex/Messaging/Notifications/Server/Push/APNS.md) · [Control](modules/Simplex/Messaging/Notifications/Server/Control.md) · [Client](modules/Simplex/Messaging/Notifications/Client.md) · [Protocol](modules/Simplex/Messaging/Notifications/Protocol.md) ### NTF Router components From 486d3251fc1af531bd12899eb97c198c91ef52cd Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:05:55 +0000 Subject: [PATCH 78/91] update readme --- spec/README.md | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/spec/README.md b/spec/README.md index c993f108d9..54eda20d9b 100644 --- a/spec/README.md +++ b/spec/README.md @@ -58,12 +58,30 @@ Module doc entry format: ## Index +### Architecture + +Component topology and message flow diagrams for each layer: + +- [routers.md](routers.md) — Layer 1: SMP, XFTP, NTF routers +- [clients.md](clients.md) — Layer 2: protocol client libraries +- [agent.md](agent.md) — Layer 3: connection manager + ### Topics -- [rcv-services.md](rcv-services.md) — Service certificates for high-volume SMP clients (bulk subscription) -- [encoding.md](encoding.md) — Binary and string encoding -- [version.md](version.md) — Version ranges and negotiation -- [compression.md](compression.md) — Zstd compression +Cross-cutting concerns that span multiple modules: + +- [topics/transport.md](topics/transport.md) — TLS, HTTP/2, WebSocket transport layers +- [topics/patterns.md](topics/patterns.md) — Exception handling, encoding, compression, TMap +- [topics/subscriptions.md](topics/subscriptions.md) — Queue subscriptions and delivery +- [topics/notifications.md](topics/notifications.md) — Push notification flow +- [topics/xftp.md](topics/xftp.md) — File transfer protocol +- [topics/client-services.md](topics/client-services.md) — Service certificates for bulk operations + +### Agent internals + +- [agent/infrastructure.md](agent/infrastructure.md) — Workers, store, operation suspension +- [agent/connections.md](agent/connections.md) — Connection lifecycle and states +- [agent/xrcp.md](agent/xrcp.md) — Remote control protocol ### Modules From ae57fef89851342e6f072c11bbeabc49b973719a Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:57:42 +0000 Subject: [PATCH 79/91] encryption schemes --- spec/README.md | 1 + spec/topics/encryption.md | 139 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 spec/topics/encryption.md diff --git a/spec/README.md b/spec/README.md index 54eda20d9b..394eac7724 100644 --- a/spec/README.md +++ b/spec/README.md @@ -76,6 +76,7 @@ Cross-cutting concerns that span multiple modules: - [topics/notifications.md](topics/notifications.md) — Push notification flow - [topics/xftp.md](topics/xftp.md) — File transfer protocol - [topics/client-services.md](topics/client-services.md) — Service certificates for bulk operations +- [topics/encryption.md](topics/encryption.md) — Encryption layers (TODO) ### Agent internals diff --git a/spec/topics/encryption.md b/spec/topics/encryption.md new file mode 100644 index 0000000000..567b2c384e --- /dev/null +++ b/spec/topics/encryption.md @@ -0,0 +1,139 @@ +# Encryption + +TODO - subjects to cover: + +## TLS layer + +**Protocol**: [simplex-messaging.md#tls-transport-encryption](../../protocol/simplex-messaging.md#tls-transport-encryption) +**Code**: [Transport.hs](../../src/Simplex/Messaging/Transport.hs), [Transport/Credentials.hs](../../src/Simplex/Messaging/Transport/Credentials.hs) + +- **Cipher suites**: CHACHA20-POLY1305-SHA256 (TLS 1.3), ECDHE-ECDSA-CHACHA20-POLY1305 (TLS 1.2) +- **Signature algorithms**: Ed448, Ed25519 (HashIntrinsic) +- **DH groups**: X448, X25519 +- **Certificate fingerprints**: SHA256 +- **Browser-compatible extension**: RSA, ECDSA-SHA256/384/512, P521 for XFTP web + +## Transport block encryption (optional, v11+) + +**Protocol**: [simplex-messaging.md#transport-handshake](../../protocol/simplex-messaging.md#transport-handshake) +**Code**: [Crypto.hs#sbcInit](../../src/Simplex/Messaging/Crypto.hs), [Transport.hs#tPutBlock](../../src/Simplex/Messaging/Transport.hs) + +- **Algorithm**: XSalsa20-Poly1305 (NaCl secret_box) +- **Key derivation**: `sbcInit` - HKDF-SHA512(salt=sessionId, ikm=dhSecret, info="SimpleXSbChainInit") +- **Chain advancement**: `sbcHkdf` - HKDF-SHA512(salt="", ikm=chainKey, info="SimpleXSbChain") +- **16-byte auth tag** reduces available payload + +## SMP queue layer + +**Protocol**: [simplex-messaging.md#cryptographic-algorithms](../../protocol/simplex-messaging.md#cryptographic-algorithms), [simplex-messaging.md#deniable-client-authentication-scheme](../../protocol/simplex-messaging.md#deniable-client-authentication-scheme) +**RFC** (design rationale): [2026-03-09-deniability.md](../../rfcs/standard/2026-03-09-deniability.md) +**Code**: [Crypto.hs#cbEncrypt](../../src/Simplex/Messaging/Crypto.hs), [Protocol.hs](../../src/Simplex/Messaging/Protocol.hs) + +- **Message body encryption**: NaCl crypto_box (X25519 + XSalsa20-Poly1305) with per-queue DH secret +- **Recipient/notifier commands**: Ed25519/Ed448 signatures +- **Sender commands**: X25519 DH-based `CbAuthenticator` (80 bytes = SHA512 hash encrypted with crypto_box) - provides deniability +- **Nonce**: correlation ID (24 bytes) +- **Server-to-recipient encryption**: `encryptMsg` with XSalsa20-Poly1305, nonce derived from message ID + +## SMP proxy layer + +**Protocol**: [simplex-messaging.md#sending-messages-via-proxy-router](../../protocol/simplex-messaging.md#sending-messages-via-proxy-router) +**Code**: [Protocol.hs#PRXY](../../src/Simplex/Messaging/Protocol.hs), [Server.hs](../../src/Simplex/Messaging/Server.hs) + +- **Double encryption**: client encrypts for relay (s2r), proxy adds layer for relay (p2r) +- **Per-session X25519 keys**: PKEY response contains relay's DH key signed by relay certificate +- **Session ID binding**: `tlsunique` from proxy-relay TLS session included in encrypted transmission +- **PFWD/RFWD**: correlation ID (24 bytes) used as crypto_box nonce + +## Agent/E2E layer (double ratchet) + +**Protocol**: [pqdr.md](../../protocol/pqdr.md) +**RFC** (versioning/migration): [2026-03-09-pqdr-version.md](../../rfcs/standard/2026-03-09-pqdr-version.md) +**Code**: [Crypto/Ratchet.hs](../../src/Simplex/Messaging/Crypto/Ratchet.hs), [Crypto/SNTRUP761.hs](../../src/Simplex/Messaging/Crypto/SNTRUP761.hs) + +- **DH algorithm**: X448 (not X25519) - `RatchetX448` +- **Post-quantum KEM**: SNTRUP761, hybrid secret = SHA3-256(DHSecret || KEMSharedKey) +- **Key derivation**: HKDF-SHA512 with context strings +- **Header encryption**: AES-256-GCM with header key (HKs) +- **Body encryption**: AES-256-GCM with message key derived from chain key +- **Associated data**: ratchet AD concatenated with encrypted header +- **Split-phase**: header encryption (API thread, serialized) vs body encryption (delivery worker, parallel) + +## XFTP file layer + +**Protocol**: [xftp.md#cryptographic-algorithms](../../protocol/xftp.md#cryptographic-algorithms) +**Code**: [Crypto/File.hs](../../src/Simplex/Messaging/Crypto/File.hs), [Crypto/Lazy.hs](../../src/Simplex/Messaging/Crypto/Lazy.hs), [FileTransfer/Crypto.hs](../../src/Simplex/FileTransfer/Crypto.hs) + +- **File encryption**: XSalsa20-Poly1305 (NaCl secret_box), random 32-byte key + 24-byte nonce per file +- **File integrity**: SHA512 digest in FileDescription +- **Command signing**: Ed25519 per-chunk keys from FileDescription +- **Transit encryption**: per-download X25519 DH, server returns ephemeral key with FGET response +- **Streaming**: Poly1305 state updated per chunk, 16-byte auth tag at end (tail tag pattern) + +## NTF (notifications) + +**Protocol**: [push-notifications.md](../../protocol/push-notifications.md) +**Code**: [Notifications/Protocol.hs](../../src/Simplex/Messaging/Notifications/Protocol.hs), [Notifications/Transport.hs](../../src/Simplex/Messaging/Notifications/Transport.hs) + +- **E2E encryption**: NaCl crypto_box between router and client +- **Key exchange**: X25519 DH (clientDhPubKey in TNEW, routerDhPubKey in response) +- **Command auth**: Ed25519 + +## Short links + +**Protocol**: [agent-protocol.md#short-invitation-links](../../protocol/agent-protocol.md#short-invitation-links) +**Code**: [Crypto/ShortLink.hs](../../src/Simplex/Messaging/Crypto/ShortLink.hs) + +- **Link key derivation**: SHA3-256(fixedLinkData) +- **Data encryption**: NaCl secret_box (XSalsa20-Poly1305) with HKDF-derived key +- **Fixed/user data**: padded to fixed sizes (2008/13784 bytes) for traffic analysis resistance +- **Signatures**: Ed25519 for owner authentication + +## Remote control (XRCP) + +**Protocol**: [xrcp.md](../../protocol/xrcp.md) +**Code**: [RemoteControl/Client.hs](../../src/Simplex/RemoteControl/Client.hs), [Crypto/SNTRUP761.hs](../../src/Simplex/Messaging/Crypto/SNTRUP761.hs) + +- **Session key**: SHA3-256(dhSecret || kemSharedKey) - hybrid DH + SNTRUP761 KEM +- **Chain keys**: `sbcInit` with HKDF-SHA512, keys swapped between controller and host +- **Command signing**: Ed25519 session key + long-term key (dual signature) + +## Service certificates + +**Protocol**: [simplex-messaging.md#service-certificates](../../protocol/simplex-messaging.md#service-certificates) +**RFC** (design rationale): [2026-03-10-client-certificates.md](../../rfcs/standard/2026-03-10-client-certificates.md) +**Code**: [Agent/Client.hs#getServiceCredentials](../../src/Simplex/Messaging/Agent/Client.hs), [Transport/Credentials.hs](../../src/Simplex/Messaging/Transport/Credentials.hs) + +- **Certificate type**: X.509 with Ed25519 signing key +- **Per-session keys**: fresh Ed25519 key pair per connection, signed by X.509 key +- **Fingerprint**: SHA256 of identity certificate +- **Proof-of-possession**: session key signed by service certificate + +## Primitives reference + +**Code**: [Crypto.hs](../../src/Simplex/Messaging/Crypto.hs) + +- **NaCl crypto_box** (`cbEncrypt`/`cbDecrypt`): X25519 DH + XSalsa20-Poly1305 +- **NaCl crypto_secretbox** (`sbEncrypt`/`sbDecrypt`): symmetric XSalsa20-Poly1305 +- **AES-256-GCM** (`encryptAEAD`/`decryptAEAD`): for ratchet message bodies +- **SNTRUP761**: post-quantum KEM via C FFI bindings - [Crypto/SNTRUP761.hs](../../src/Simplex/Messaging/Crypto/SNTRUP761.hs) +- **CbAuthenticator**: 80-byte authenticator = crypto_box(SHA512(message)) +- **HKDF**: SHA512-based, used with various context strings +- **Hashes**: SHA256 (fingerprints), SHA512 (authenticators, HKDF), SHA3-256 (hybrid KEM, short links) + +## Padding + +**Code**: [Crypto.hs#pad](../../src/Simplex/Messaging/Crypto.hs) + +- **Message padding** (`pad`/`unPad`): 2-byte big-endian length prefix + '#' fill +- **Short link data**: fixed-size encrypted blobs +- **XFTP hello**: 16384 bytes (indistinguishable from commands) +- **Ratchet header**: padded before encryption to hide KEM state + +## Key type constraints + +**Code**: [Crypto.hs](../../src/Simplex/Messaging/Crypto.hs) + +- `SignatureAlgorithm`: Ed25519, Ed448 only +- `DhAlgorithm`: X25519, X448 only +- `AuthAlgorithm`: Ed25519, Ed448, X25519 (NOT X448) - for queue command auth From 152c30ca6881e3cee8689d13fbee31332b44d1a6 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Sun, 15 Mar 2026 18:19:06 +0000 Subject: [PATCH 80/91] rcv-services issues --- spec/rcv-services.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spec/rcv-services.md b/spec/rcv-services.md index 6518059f20..0e98c06053 100644 --- a/spec/rcv-services.md +++ b/spec/rcv-services.md @@ -478,6 +478,8 @@ update (s, idsHash) = | **R-SVC-05** | Fold blocking | Low | `foldRcvServiceMessages` iterates all service queues sequentially, reading queue records and first messages. For services with many queues, this could take significant time. It runs in a forked thread, so it doesn't block the client's command processing, but the ALLS marker is delayed. No progress signal between SOKS and ALLS -- client doesn't know how many messages to expect. | | **R-SVC-06** | XOR hash collision | Very Low | IdsHash uses XOR of MD5 hashes. XOR is commutative and associative, so different queue sets with the same XOR-combined hash would not be detected. Given 16-byte hashes, collision probability is negligible for realistic queue counts, but the hash provides no ordering information. | | **R-SVC-07** | Count underflow in subtractServiceSubs | Very Low | If `n <= n'`, the function returns `(0, mempty)` -- a full reset. This is a defensive fallback but could mask accounting errors. | +| **R-SVC-08** | Big agent service handling diverged from small agent | Medium | Small agent (Client/Agent.hs, NTF-proven) has cleaner service unavailable handling: `notifyUnavailable` clears pending service sub and sends `CAServiceUnavailable` event, triggering queue-by-queue resubscription. Big agent (Agent/Client.hs) lacks equivalent path - errors throw without clearing pending state. TransportSessionMode adds complexity (per-entity vs per-user sessions). Service role validation differs (small agent checks `partyServiceRole`, big agent doesn't). These differences may cause subtle bugs when releasing rcv-services. | +| **R-SVC-09** | Server deferred delivery broken for service queues | Critical | In `tryDeliverMessage` (Server.hs), when a message arrives and the subscribed client's `sndQ` is full, the sync path correctly checks `rcvServiceId qr` to find the service subscriber (lines 1996-1998). But the spawned `deliverThread` (line 2043) hardcodes `getSubscribedClient rId (queueSubscribers subscribers)` - it looks in `queueSubscribers` instead of `serviceSubscribers`. For service-subscribed queues, `deliverThread` will never find the client. The message remains marked `SubPending` but is never delivered. Only reconnection or explicit re-subscription will deliver it. Impact: under load when sndQ fills, service clients silently lose message delivery until reconnection. | ### Considered and dismissed @@ -705,3 +707,7 @@ Triggers use `xor_combine` (Postgres equivalent of XOR hash combine) and fire on | **TG-SVC-10** | Medium | No agent-level test for concurrent reconnection — service resubscription racing with individual queue resubscription | | **TG-SVC-11** | Medium | No test for `SERVICE_END` agent event handling — what does the agent do after receiving ENDS? | | **TG-SVC-12** | Low | No test for SQLite trigger correctness — verifying `service_queue_count`/`service_queue_ids_hash` match expected values after insert/delete/update cycles | +| **TG-SVC-13** | High | Big agent lacks `CAServiceUnavailable` equivalent — no clean path to resubscribe all queues individually when service becomes unavailable. Small agent has `notifyUnavailable` which triggers queue-by-queue resubscription; big agent just throws error | +| **TG-SVC-14** | Medium | `pendingServiceSub` not cleared on service errors — small agent clears pending in `notifyUnavailable`; big agent may retain stale pending service subs after `clientServiceError` or `SSErrorServiceId` | +| **TG-SVC-15** | High | Missing `rcvServiceAssoc` cleanup on service unavailable — TODO at Agent/Client.hs:1742 notes this is incomplete. When service ID changes or becomes unavailable, queue associations should be cleared in database | +| **TG-SVC-16** | Critical | **Server bug**: `deliverThread` uses wrong subscriber lookup for service queues — At Server.hs:2043, deferred delivery (when sndQ is full) always uses `queueSubscribers`, but service clients are in `serviceSubscribers`. The sync path (lines 1996-1998) correctly checks `rcvServiceId qr`. Messages sent when sndQ is full will never be delivered to service subscribers until reconnection/resubscription. | From 8f4274763b956fb7eed78483387b7fb6db1c8260 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 20 Mar 2026 07:54:26 +0000 Subject: [PATCH 81/91] smp: service fixes (#1737) * smp: deliver service subscription to correct client * tests: more resilient to concurrency * optimize PostgreSQL query * fix service re-association after server "downgrade" * correctly handle service removed from server (and ID changed) * remove unused --------- Co-authored-by: Evgeny @ SimpleX Chat <259188159+evgeny-simplex@users.noreply.github.com> --- src/Simplex/Messaging/Agent.hs | 4 +- src/Simplex/Messaging/Agent/Client.hs | 21 ++++- .../Messaging/Agent/Store/AgentStore.hs | 16 +++- .../Messaging/Notifications/Server/Store.hs | 4 - src/Simplex/Messaging/Server.hs | 2 +- .../Messaging/Server/MsgStore/Postgres.hs | 10 +- tests/AgentTests/FunctionalAPITests.hs | 92 +++++++++++++++++++ tests/ServerTests.hs | 12 ++- 8 files changed, 139 insertions(+), 22 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 9e637ca960..8483bfebfb 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -1633,11 +1633,11 @@ subscribeAllConnections' c onlyNeeded activeUserId_ = handleErr $ do Left e -> do atomically $ writeTBQueue (subQ c) ("", "", AEvt SAEConn $ ERR e) if clientServiceError e - then unassocQueues + then False <$ withStore' c (\db -> unassocUserServerRcvQueueSubs' db userId srv) else pure True where unassocQueues :: AM Bool - unassocQueues = False <$ withStore' c (\db -> unassocUserServerRcvQueueSubs' db userId srv) + unassocQueues = False <$ withStore' c (\db -> removeRcvServiceAssocs db userId srv) _ -> pure False subscribeUserServer :: Int -> TVar Int -> ((UserId, SMPServer), ServiceAssoc) -> AM' (Either AgentErrorType Int) subscribeUserServer maxPending currPending ((userId, srv), hasService) = do diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 46a441aaf2..a3178b6b4e 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -1726,20 +1726,33 @@ processClientNotices c@AgentClient {presetServers} tSess notices = do resubscribeClientService :: AgentClient -> SMPTransportSession -> ServiceSub -> AM ServiceSubResult resubscribeClientService c tSess@(userId, srv, _) serviceSub = - tryAllErrors (withServiceClient c tSess $ \smp _ -> subscribeClientService_ c True tSess smp serviceSub) >>= \case + tryAllErrors (withServiceClient c tSess subscribeOrUpdate) >>= \case Right r@(ServiceSubResult e _) -> case e of - Just SSErrorServiceId {} -> unassocSubscribeQueues $> r + Just SSErrorServiceId {} -> + r <$ withStore' c (\db -> removeRcvServiceAssocs db userId srv) _ -> pure r Left e -> do - when (clientServiceError e) $ unassocSubscribeQueues atomically $ writeTBQueue (subQ c) ("", "", AEvt SAEConn $ ERR e) + when (clientServiceError e) $ do + atomically $ SS.deleteServiceSub tSess $ currentSubs c + unassocSubscribeQueues throwE e where + subscribeOrUpdate smp connServiceId + | connServiceId == SMP.smpServiceId serviceSub = + subscribeClientService_ c True tSess smp serviceSub + | otherwise = do + let newServiceSub = SMP.ServiceSub connServiceId 0 mempty + sessId = sessionId $ thParams smp + r = serviceSubResult serviceSub newServiceSub + atomically $ whenM (activeClientSession c tSess sessId) $ + SS.setActiveServiceSub tSess sessId newServiceSub $ currentSubs c + notifySub c $ SERVICE_UP srv r + pure r unassocSubscribeQueues = do qs <- withStore' c $ \db -> unassocUserServerRcvQueueSubs db userId srv void $ lift $ subscribeUserServerQueues c userId srv qs --- TODO [certs rcv] update service in the database if it has different ID and re-associate queues, and send event subscribeClientService :: AgentClient -> Bool -> UserId -> SMPServer -> ServiceSub -> AM ServiceSubResult subscribeClientService c withEvent userId srv (ServiceSub _ n idsHash) = withServiceClient c tSess $ \smp smpServiceId -> do diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 32720dd85c..0853bf6269 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -2354,18 +2354,28 @@ getUserServerRcvQueueSubs db userId (SMPServer h p kh) onlyNeeded hasService = unassocUserServerRcvQueueSubs :: DB.Connection -> UserId -> SMPServer -> IO [RcvQueueSub] unassocUserServerRcvQueueSubs db userId srv@(SMPServer h p kh) = do deleteClientService db userId srv +#if defined(dbPostgres) map toRcvQueueSub <$> DB.query db - (removeRcvAssocsQuery <> " " <> returningColums) + (removeRcvAssocsQuery <> " " <> returningColumns) (h, p, userId, kh) where - returningColums = + returningColumns = [sql| RETURNING c.user_id, rcv_queues.conn_id, rcv_queues.host, rcv_queues.port, COALESCE(rcv_queues.server_key_hash, s.key_hash), rcv_queues.rcv_id, rcv_queues.rcv_private_key, rcv_queues.status, c.enable_ntfs, rcv_queues.client_notice_id, rcv_queues.rcv_queue_id, rcv_queues.rcv_primary, rcv_queues.replace_rcv_queue_id |] +#else + qs <- map toRcvQueueSub + <$> DB.query + db + (rcvQueueSubQuery <> " WHERE c.user_id = ? AND q.host = ? AND q.port = ? AND COALESCE(q.server_key_hash, s.key_hash) = ? AND q.rcv_service_assoc = 1") + (userId, h, p, kh) + DB.execute db removeRcvAssocsQuery (h, p, userId, kh) + pure qs +#endif unassocUserServerRcvQueueSubs' :: DB.Connection -> UserId -> SMPServer -> IO () unassocUserServerRcvQueueSubs' db userId srv@(SMPServer h p kh) = do @@ -2376,7 +2386,7 @@ unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" setRcvServiceAssocs :: SMPQueue q => DB.Connection -> [q] -> IO () -setRcvServiceAssocs db rqs = +setRcvServiceAssocs db rqs = do #if defined(dbPostgres) DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN ?" $ Only $ In (map queueId rqs) #else diff --git a/src/Simplex/Messaging/Notifications/Server/Store.hs b/src/Simplex/Messaging/Notifications/Server/Store.hs index 0486978ecb..294fe04989 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store.hs @@ -26,7 +26,6 @@ module Simplex.Messaging.Notifications.Server.Store stmAddNtfSubscription, stmDeleteNtfSubscription, stmStoreTokenLastNtf, - stmSetNtfService, ) where @@ -205,9 +204,6 @@ stmStoreTokenLastNtf (NtfSTMStore {tokens, tokenLastNtfs}) tknId ntf = do whenM (TM.member tknId tokens) $ TM.insertM tknId (newTVar [ntf]) tokenLastNtfs -stmSetNtfService :: NtfSTMStore -> SMPServer -> Maybe ServiceId -> STM () -stmSetNtfService (NtfSTMStore {ntfServices}) srv serviceId = - maybe (TM.delete srv) (TM.insert srv) serviceId ntfServices data TokenNtfMessageRecord = TNMRv1 NtfTokenId PNMessageData diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 3d977dc8c4..e50416af67 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -2037,7 +2037,7 @@ client labelMyThread $ B.unpack ("client $" <> encode sessionId) <> " deliver/SEND" -- lookup can be outside of STM transaction, -- as long as the check that it is the same client is inside. - getSubscribedClient rId (queueSubscribers subscribers) >>= mapM_ deliverIfSame + getSubscribed >>= mapM_ deliverIfSame deliverIfSame rcv = do ts <- getSystemSeconds atomically $ whenM (sameClient rc rcv) $ diff --git a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs index edf7f481cd..77d9973e6b 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs @@ -131,11 +131,13 @@ instance MsgStoreClass PostgresMsgStore where q.status, q.updated_at, q.link_id, q.rcv_service_id, m.msg_id, m.msg_ts, m.msg_quota, m.msg_ntf_flag, m.msg_body FROM msg_queues q - LEFT JOIN ( - SELECT recipient_id, msg_id, msg_ts, msg_quota, msg_ntf_flag, msg_body, - ROW_NUMBER() OVER (PARTITION BY recipient_id ORDER BY message_id ASC) AS row_num + LEFT JOIN LATERAL ( + SELECT msg_id, msg_ts, msg_quota, msg_ntf_flag, msg_body FROM messages - ) m ON q.recipient_id = m.recipient_id AND m.row_num = 1 + WHERE recipient_id = q.recipient_id + ORDER BY message_id ASC + LIMIT 1 + ) m ON true WHERE q.rcv_service_id = ? AND q.deleted_at IS NULL; |] (Only serviceId) diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index b824b61c34..b44280316c 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -110,6 +110,7 @@ import Simplex.Messaging.Server.MsgStore.Types (SMSType (..), SQSType (..)) import Simplex.Messaging.Server.QueueStore.QueueInfo import Simplex.Messaging.Server.StoreLog (StoreLogRecord (..)) import Simplex.Messaging.Transport (ASrvTransport, SMPVersion, VersionSMP, authCmdsSMPVersion, currentServerSMPRelayVersion, minClientSMPRelayVersion, minServerSMPRelayVersion, sendingProxySMPVersion, sndAuthKeySMPVersion, alpnSupportedSMPHandshakes, supportedServerSMPRelayVRange) +import Simplex.Messaging.Transport.Server (TransportServerConfig (..)) import Simplex.Messaging.Util (bshow, diffToMicroseconds) import Simplex.Messaging.Version (VersionRange (..)) import qualified Simplex.Messaging.Version as V @@ -491,6 +492,8 @@ functionalAPITests ps = do describe "Client service certificates" $ do it "should connect, subscribe and reconnect as a service" $ testClientServiceConnection ps it "should re-subscribe when service ID changed" $ testClientServiceIDChange ps + it "should clear pending service sub when service unavailable" $ testServiceUnavailableClearsPending ps + it "should recover when service ID changes on reconnect" $ testServiceIdChangeOnReconnect ps it "migrate connections to and from service" $ testMigrateConnectionsToService ps describe "Connection switch" $ do describe "should switch delivery to the new queue" $ @@ -3905,6 +3908,95 @@ testClientServiceIDChange ps@(_, ASType qs _) = do ("", "", UP _ [_]) <- nGet user exchangeGreetingsMsgId 6 notService uId user sId +-- | Test that service subscription is correctly cleared and re-established +-- when server temporarily stops supporting services (askClientCert = False). +testServiceUnavailableClearsPending :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testServiceUnavailableClearsPending (t, msType) = do + -- Same agent across all phases to test pendingServiceSub persistence + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + -- Phase 1: Establish connection with active service subscription on normal server + (_sId, _uId) <- withSmpServerStoreLogOn (t, msType) testPort $ \_ -> runRight $ do + conns@(sId, uId) <- makeConnection service user + exchangeGreetings service uId user sId + pure conns + ("", "", SERVICE_DOWN _ _) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + -- Phase 2: Server without service support: agent gets NO_SERVICE, queue resubscribed without service + let cfgNoService = updateCfg (cfgMS msType) $ \(cfg' :: ServerConfig s) -> + let ServerConfig {transportConfig} = cfg' + in cfg' {transportConfig = transportConfig {askClientCert = False}} :: ServerConfig s + withSmpServerConfigOn t cfgNoService testPort $ \_ -> do + ("", "", ERR (BROKER _ NO_SERVICE)) <- get service + ("", "", UP _ [_]) <- nGet service + ("", "", UP _ [_]) <- nGet user + pure () + ("", "", DOWN _ [_]) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + -- Phase 3: Server with service support restored: only queue subscription, no service subscription + withSmpServerStoreLogOn (t, msType) testPort $ \_ -> do + e1 <- nGet service + case e1 of + ("", "", UP _ [_]) -> pure () -- Fixed: only queue subscription, no service subscription + ("", "", SERVICE_UP _ _) -> + expectationFailure "pendingServiceSub not cleared, service subscription attempted again" + ("", "", SERVICE_ALL _) -> + expectationFailure "pendingServiceSub not cleared, service subscription attempted again" + other -> expectationFailure $ "Unexpected first event: " <> show other + ("", "", UP _ [_]) <- nGet user + pure () + -- Phase 4: After another reconnect cycle, service subscription is re-established + ("", "", SERVICE_DOWN _ _) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + withSmpServerStoreLogOn (t, msType) testPort $ \_ -> do + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ _)) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + ("", "", UP _ [_]) <- nGet user + pure () + +-- | Test that service subscription recovers when service ID changes on reconnect. +-- Server restart with deleted service causes new service ID, triggering SSErrorServiceId. +-- Queues should be unassociated, resubscribed, and re-associated with new service. +testServiceIdChangeOnReconnect :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testServiceIdChangeOnReconnect ps@(_, ASType qs _) = do + withAgentClientsServers2 (agentCfg, initAgentServersClientService) (agentCfg, initAgentServers) $ \service user -> do + -- Phase 1: Establish connection with active service subscription + (_sId, _uId) <- withSmpServerStoreLogOn ps testPort $ \_ -> runRight $ do + conns@(sId, uId) <- makeConnection service user + exchangeGreetings service uId user sId + pure conns + ("", "", SERVICE_DOWN _ _) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + -- Delete service from server storage, keeping queues + _ :: () <- case qs of + SQSPostgres -> do +#if defined(dbServerPostgres) + st <- either (error . show) pure =<< Postgres.createDBStore testStoreDBOpts serverMigrations (MigrationConfig MCError Nothing) + void $ Postgres.withTransaction st (`PSQL.execute_` "DELETE FROM services") +#else + pure () +#endif + SQSMemory -> do + s <- readFile testStoreLogFile + removeFile testStoreLogFile + writeFile testStoreLogFile $ unlines $ filter (not . ("NEW_SERVICE" `isPrefixOf`)) $ lines s + -- Phase 2: Server restart with deleted service - new service ID, SSErrorServiceId + withSmpServerStoreLogOn ps testPort $ \_ -> do + ("", "", SERVICE_UP _ _) <- nGet service + ("", "", UP _ [_]) <- nGet user + pure () + -- Phase 3: Normal reconnect - service should subscribe normally + ("", "", SERVICE_DOWN _ _) <- nGet service + ("", "", DOWN _ [_]) <- nGet user + withSmpServerStoreLogOn ps testPort $ \_ -> do + liftIO $ getInAnyOrder service + [ \case ("", "", AEvt SAENone (SERVICE_UP _ _)) -> True; _ -> False, + \case ("", "", AEvt SAENone (SERVICE_ALL _)) -> True; _ -> False + ] + ("", "", UP _ [_]) <- nGet user + pure () + testMigrateConnectionsToService :: HasCallStack => (ASrvTransport, AStoreType) -> IO () testMigrateConnectionsToService ps = do (((sId1, uId1), (uId2, sId2)), ((sId3, uId3), (uId4, sId4)), ((sId5, uId5), (uId6, sId6))) <- diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index 27a72d2ac1..deace417ee 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -717,7 +717,7 @@ testServiceDeliverSubscribe = signSend_ sh aServicePK Nothing ("11", serviceId, SUBS 1 idsHash) [mId3] <- fmap catMaybes $ - receiveInAnyOrder -- race between SOKS and MSG, clients can handle it + receiveInAnyOrder -- race between SOKS, MSG and ALLS (sndQ and msgQ are separate threads) sh [ \case Resp "11" serviceId' (SOKS n idsHash') -> do @@ -731,9 +731,11 @@ testServiceDeliverSubscribe = rId'' `shouldBe` rId dec mId3 msg3 `shouldBe` Right "hello 3" pure $ Just $ Just mId3 + _ -> pure Nothing, + \case + Resp "" NoEntity ALLS -> pure $ Just Nothing _ -> pure Nothing ] - Resp "" NoEntity ALLS <- tGet1 sh Resp "12" _ OK <- signSendRecv sh rKey ("12", rId, ACK mId3) Resp "14" _ OK <- signSendRecv h sKey ("14", sId, _SEND "hello 4") Resp "" _ (Msg mId4 msg4) <- tGet1 sh @@ -811,7 +813,7 @@ testServiceUpgradeAndDowngrade = signSend_ sh aServicePK Nothing ("14", serviceId, SUBS 3 idsHash) [(rKey3_1, rId3_1, mId3_1), (rKey3_2, rId3_2, mId3_2)] <- fmap catMaybes $ - receiveInAnyOrder -- race between SOKS and MSG, clients can handle it + receiveInAnyOrder -- race between SOKS, MSG and ALLS (sndQ and msgQ are separate threads) sh [ \case Resp "14" serviceId' (SOKS n idsHash') -> do @@ -829,9 +831,11 @@ testServiceUpgradeAndDowngrade = Resp "" rId'' (Msg mId3 msg3) | rId'' == rId2 -> do dec2 mId3 msg3 `shouldBe` Right "hello 3.2" pure $ Just $ Just (rKey2, rId2, mId3) + _ -> pure Nothing, + \case + Resp "" NoEntity ALLS -> pure $ Just Nothing _ -> pure Nothing ] - Resp "" NoEntity ALLS <- tGet1 sh Resp "15" _ OK <- signSendRecv sh rKey3_1 ("15", rId3_1, ACK mId3_1) Resp "16" _ OK <- signSendRecv sh rKey3_2 ("16", rId3_2, ACK mId3_2) pure () From e762e84f46a49c73eac83c00ee842306e2a8baa3 Mon Sep 17 00:00:00 2001 From: sh <37271604+shumvgolove@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:11:29 +0000 Subject: [PATCH 82/91] prometheus: fix metrics names (#1747) --- src/Simplex/Messaging/Server/Prometheus.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Simplex/Messaging/Server/Prometheus.hs b/src/Simplex/Messaging/Server/Prometheus.hs index 33ccbd0be8..76e288afe2 100644 --- a/src/Simplex/Messaging/Server/Prometheus.hs +++ b/src/Simplex/Messaging/Server/Prometheus.hs @@ -391,13 +391,13 @@ prometheusMetrics sm rtm ts = \# TYPE simplex_smp_ntf_services_queues_count gauge\n\ \simplex_smp_ntf_services_queues_count " <> mshow (ntfServiceQueuesCount entityCounts) <> "\n# ntfServiceQueuesCount\n\ \\n\ - \# HELP simplex_smp_rcv_services_sub_msg The count of subscribed service queues with messages.\n\ - \# TYPE simplex_smp_rcv_services_sub_msg counter\n\ - \simplex_smp_rcv_services_sub_msg " <> mshow _rcvServicesSubMsg <> "\n# rcvServicesSubMsg\n\ + \# HELP simplex_smp_rcv_services_sub_msg_count The count of subscribed service queues with messages.\n\ + \# TYPE simplex_smp_rcv_services_sub_msg_count counter\n\ + \simplex_smp_rcv_services_sub_msg_count " <> mshow _rcvServicesSubMsg <> "\n# rcvServicesSubMsg\n\ \\n\ - \# HELP simplex_smp_rcv_services_sub_duplicate The count of duplicate subscribed service queues.\n\ - \# TYPE simplex_smp_rcv_services_sub_duplicate counter\n\ - \simplex_smp_rcv_services_sub_duplicate " <> mshow _rcvServicesSubDuplicate <> "\n# rcvServicesSubDuplicate\n\ + \# HELP simplex_smp_rcv_services_sub_duplicate_count The count of duplicate subscribed service queues.\n\ + \# TYPE simplex_smp_rcv_services_sub_duplicate_count counter\n\ + \simplex_smp_rcv_services_sub_duplicate_count " <> mshow _rcvServicesSubDuplicate <> "\n# rcvServicesSubDuplicate\n\ \\n" <> showServices _rcvServices "rcv" "receiving" <> showServices _ntfServices "ntf" "notification" From a54518afe1bd487f9c25340993193b5b259154c3 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:12:16 +0000 Subject: [PATCH 83/91] test: rcv service re-association on restart (#1746) --- tests/AgentTests/FunctionalAPITests.hs | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/AgentTests/FunctionalAPITests.hs b/tests/AgentTests/FunctionalAPITests.hs index b44280316c..e435e59f96 100644 --- a/tests/AgentTests/FunctionalAPITests.hs +++ b/tests/AgentTests/FunctionalAPITests.hs @@ -494,6 +494,7 @@ functionalAPITests ps = do it "should re-subscribe when service ID changed" $ testClientServiceIDChange ps it "should clear pending service sub when service unavailable" $ testServiceUnavailableClearsPending ps it "should recover when service ID changes on reconnect" $ testServiceIdChangeOnReconnect ps + it "should handle service unavailable on startup" $ testServiceUnavailableOnStartup ps it "migrate connections to and from service" $ testMigrateConnectionsToService ps describe "Connection switch" $ do describe "should switch delivery to the new queue" $ @@ -3997,6 +3998,42 @@ testServiceIdChangeOnReconnect ps@(_, ASType qs _) = do ("", "", UP _ [_]) <- nGet user pure () +-- | Test that subscribeAllConnections handles service unavailable on startup. +-- Agent has service credentials but server doesn't support services (askClientCert = False). +testServiceUnavailableOnStartup :: HasCallStack => (ASrvTransport, AStoreType) -> IO () +testServiceUnavailableOnStartup (t, msType) = do + let srv = initAgentServersClientService + noSrv = initAgentServers + -- Phase 1: Establish connection with service + (sId, uId) <- withAgentClientsServers2 (agentCfg, srv) (agentCfg, noSrv) $ \service user -> + withSmpServerStoreLogOn (t, msType) testPort $ \_ -> runRight $ do + conns@(sId, uId) <- makeConnection service user + exchangeGreetings service uId user sId + pure conns + -- Phase 2: Server without service support, new service agent + let cfgNoService = updateCfg (cfgMS msType) $ \(cfg' :: ServerConfig s) -> + let ServerConfig {transportConfig} = cfg' + in cfg' {transportConfig = transportConfig {askClientCert = False}} :: ServerConfig s + -- Phase 2: Server without service support, service agent gets NO_SERVICE + withAgentClientsServers2 (agentCfg, srv) (agentCfg, noSrv) $ \service user -> + withSmpServerConfigOn t cfgNoService testPort $ \_ -> runRight $ do + subscribeAllConnections service False Nothing + ("", "", ERR (BROKER _ NO_SERVICE)) <- get service + ("", "", UP _ [_]) <- nGet service + subscribeAllConnections user False Nothing + ("", "", UP _ [_]) <- nGet user + exchangeGreetingsMsgId 4 service uId user sId + -- Phase 3: Normal server - cert was deleted, new cert generated, + -- no service sub in DB yet, queues subscribed individually + withAgentClientsServers2 (agentCfg, srv) (agentCfg, noSrv) $ \service user -> + withSmpServerStoreLogOn (t, msType) testPort $ \_ -> runRight $ do + liftIO $ threadDelay 250000 + subscribeAllConnections service False Nothing + ("", "", UP _ [_]) <- nGet service + subscribeAllConnections user False Nothing + ("", "", UP _ [_]) <- nGet user + exchangeGreetingsMsgId 6 service uId user sId + testMigrateConnectionsToService :: HasCallStack => (ASrvTransport, AStoreType) -> IO () testMigrateConnectionsToService ps = do (((sId1, uId1), (uId2, sId2)), ((sId3, uId3), (uId4, sId4)), ((sId5, uId5), (uId6, sId6))) <- From 2012236f6594ef7bbd7e8f494915e4d239467b27 Mon Sep 17 00:00:00 2001 From: "Evgeny @ SimpleX Chat" <259188159+evgeny-simplex@users.noreply.github.com> Date: Mon, 23 Mar 2026 14:02:16 +0000 Subject: [PATCH 84/91] agent: correct log message --- src/Simplex/Messaging/Agent/Client.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index a3178b6b4e..c1f7ab6859 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -1710,7 +1710,7 @@ processRcvServiceAssocs :: SMPQueue q => AgentClient -> [q] -> AM' () processRcvServiceAssocs _ [] = pure () processRcvServiceAssocs c serviceQs = withStore' c (`setRcvServiceAssocs` serviceQs) `catchAllErrors'` \e -> do - logError $ "processClientNotices error: " <> tshow e + logError $ "processRcvServiceAssocs error: " <> tshow e notifySub' c "" $ ERR e processClientNotices :: AgentClient -> SMPTransportSession -> [(RcvQueueSub, Maybe ClientNotice)] -> AM' () From 909c974445812ee2189397dec8a8cfa752929826 Mon Sep 17 00:00:00 2001 From: Evgeny Poberezkin Date: Thu, 26 Mar 2026 20:48:09 +0000 Subject: [PATCH 85/91] docs: update whitepaper --- protocol/overview-tjr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/overview-tjr.md b/protocol/overview-tjr.md index 2fa8f717e7..f3198e2546 100644 --- a/protocol/overview-tjr.md +++ b/protocol/overview-tjr.md @@ -225,13 +225,13 @@ For encryption primitives, threat model, and detailed security analysis, see [Se SimpleX provides these security properties: -- **End-to-end encryption** with forward secrecy via double ratchet protocol, with optional post-quantum protection. +- **End-to-end encryption** using Double Ratchet algorithm with forward secrecy and post-quantum cryptography. - **No shared identifiers** across connections — contacts cannot prove they communicate with the same user. - **Sender deniability** — neither routers nor recipients can cryptographically prove message origin. -- **Transport metadata protection** — fixed-size blocks, 2-hop onion routing, and connection isolation frustrate traffic correlation. +- **Transport metadata protection** — fixed-size blocks, 2-hop onion routing, and optional connection isolation frustrate traffic correlation. - **Out-of-band key exchange** — connection requests passed outside the network protect against MITM attacks. From 3134d6206d97e07252cbc22b9015184051fc59d4 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 28 Mar 2026 09:12:23 +0000 Subject: [PATCH 86/91] smp: fix messaging client service issues (#1751) * services: fix minor issues * fix accounting for subscribed service queues, add prometheus stats * fix uncorrelated subquery * fix potential race condition when inserting service defensively, as it is also prevented by how client is created --------- Co-authored-by: Evgeny @ SimpleX Chat <259188159+evgeny-simplex@users.noreply.github.com> --- src/Simplex/Messaging/Agent.hs | 2 +- src/Simplex/Messaging/Agent/Client.hs | 4 +- .../Messaging/Agent/Store/AgentStore.hs | 32 +++---- .../Notifications/Server/Store/Postgres.hs | 2 +- src/Simplex/Messaging/Server.hs | 11 +-- src/Simplex/Messaging/Server/Prometheus.hs | 11 ++- tests/ServerTests.hs | 83 +++++++++++++++++++ 7 files changed, 118 insertions(+), 27 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 8483bfebfb..7e5c804364 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -828,7 +828,7 @@ setUserService' c userId enable = do let changed = enable /= wasEnabled when changed $ TM.insert userId enable $ useClientServices c pure (True, changed) - unless ok $ throwE $ CMD PROHIBITED "setNetworkConfig" + unless ok $ throwE $ CMD PROHIBITED "setUserService" when (changed && not enable) $ withStore' c (`deleteClientServices` userId) newConnAsync :: ConnectionModeI c => AgentClient -> UserId -> ACorrId -> Bool -> SConnectionMode c -> CR.InitialKeys -> SubscriptionMode -> AM ConnId diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index c1f7ab6859..92de1dd49c 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -625,9 +625,7 @@ getServiceCredentials c userId srv = Just service -> pure service Nothing -> do cred <- genCredentials g Nothing (25, 24 * 999999) "simplex" - let tlsCreds = tlsCredentials [cred] - createClientService db userId srv tlsCreds - pure (tlsCreds, Nothing) + createClientService db userId srv $ tlsCredentials [cred] serviceSignKey <- liftEitherWith INTERNAL $ C.x509ToPrivate' $ snd serviceCreds let creds = ServiceCredentials {serviceRole = SRMessaging, serviceCreds, serviceCertHash = XV.Fingerprint kh, serviceSignKey} pure (creds, serviceId_) diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 32e5e6aceb..f6d1daebe7 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -410,23 +410,23 @@ deleteUsersWithoutConns db = do forM_ userIds $ DB.execute db "DELETE FROM users WHERE user_id = ?" . Only pure userIds -createClientService :: DB.Connection -> UserId -> SMPServer -> (C.KeyHash, TLS.Credential) -> IO () -createClientService db userId srv (kh, (cert, pk)) = do +createClientService :: DB.Connection -> UserId -> SMPServer -> (C.KeyHash, TLS.Credential) -> IO ((C.KeyHash, TLS.Credential), Maybe ServiceId) +createClientService db userId srv tlsCreds@(kh, (cert, pk)) = do serverKeyHash_ <- createServer db srv - DB.execute - db - [sql| - INSERT INTO client_services - (user_id, host, port, server_key_hash, service_cert_hash, service_cert, service_priv_key) - VALUES (?,?,?,?,?,?,?) - ON CONFLICT (user_id, host, port, server_key_hash) - DO UPDATE SET - service_cert_hash = EXCLUDED.service_cert_hash, - service_cert = EXCLUDED.service_cert, - service_priv_key = EXCLUDED.service_priv_key, - service_id = NULL - |] - (userId, host srv, port srv, serverKeyHash_, kh, cert, pk) + (rs :: [Only Int]) <- + DB.query + db + [sql| + INSERT INTO client_services + (user_id, host, port, server_key_hash, service_cert_hash, service_cert, service_priv_key) + VALUES (?,?,?,?,?,?,?) + ON CONFLICT (user_id, host, port, server_key_hash) DO NOTHING + RETURNING 1 + |] + (userId, host srv, port srv, serverKeyHash_, kh, cert, pk) + if null rs + then fromMaybe (tlsCreds, Nothing) <$> getClientServiceCredentials db userId srv + else pure (tlsCreds, Nothing) getClientServiceCredentials :: DB.Connection -> UserId -> SMPServer -> IO (Maybe ((C.KeyHash, TLS.Credential), Maybe ServiceId)) getClientServiceCredentials db userId srv = diff --git a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs index aca573d21f..7cf5a438b2 100644 --- a/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs +++ b/src/Simplex/Messaging/Notifications/Server/Store/Postgres.hs @@ -270,7 +270,7 @@ getUsedSMPServers st = smp_host, smp_port, smp_keyhash, smp_server_id, ntf_service_id, smp_notifier_count, smp_notifier_ids_hash FROM smp_servers - WHERE EXISTS (SELECT 1 FROM subscriptions WHERE status IN ?) + WHERE EXISTS (SELECT 1 FROM subscriptions WHERE smp_server_id = smp_servers.smp_server_id AND status IN ?) |] (Only (In subscribeNtfStatuses)) where diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index e50416af67..ab001bbc3b 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -292,7 +292,7 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt pure $ as ++ as' CSService serviceId changedSubs -> do modifyTVar' subClients $ IS.insert clntId -- add ID to server's subscribed cients - modifyTVar' totalServiceSubs $ subtractServiceSubs changedSubs -- server count and IDs hash for all services + modifyTVar' totalServiceSubs $ addServiceSubs changedSubs -- server count and IDs hash for all services cancelServiceSubs serviceId =<< upsertSubscribedClient serviceId c serviceSubscribers updateSubDisconnected = case clntSub of -- do not insert client if it is already disconnected, but send END/DELD to any other client subscribed to this queue or service @@ -701,12 +701,13 @@ smpServer started cfg@ServerConfig {transports, transportConfig = tCfg, startOpt loadedCounts <- loadedQueueCounts $ fromMsgStore ms pure RealTimeMetrics {socketStats, threadsCount, clientsCount, deliveredSubs, deliveredTimes, smpSubs, ntfSubs, loadedCounts} where - getSubscribersMetrics ServerSubscribers {queueSubscribers, serviceSubscribers, subClients} = do + getSubscribersMetrics ServerSubscribers {queueSubscribers, serviceSubscribers, totalServiceSubs, subClients} = do subsCount <- M.size <$> getSubscribedClients queueSubscribers subClientsCount <- IS.size <$> readTVarIO subClients subServicesCount <- M.size <$> getSubscribedClients serviceSubscribers - pure RTSubscriberMetrics {subsCount, subClientsCount, subServicesCount} - getDeliveredMetrics ts' = foldM countClnt (RTSubscriberMetrics 0 0 0, emptyTimeBuckets) =<< getServerClients srv + subServiceSubsCount <- fst <$> readTVarIO totalServiceSubs + pure RTSubscriberMetrics {subsCount, subClientsCount, subServicesCount, subServiceSubsCount} + getDeliveredMetrics ts' = foldM countClnt (RTSubscriberMetrics 0 0 0 0, emptyTimeBuckets) =<< getServerClients srv where countClnt acc@(metrics, times) Client {subscriptions} = do (cnt, times') <- foldM countSubs (0, times) =<< readTVarIO subscriptions @@ -1863,7 +1864,7 @@ client let incSrvStat sel n = liftIO $ atomicModifyIORef'_ (sel $ servicesSel stats) (+ n) diff = fromIntegral $ count' - count if -- `count == -1` only for subscriptions by old NTF servers - | count == -1 && (diff == 0 && idsHash == idsHash') -> incSrvStat srvSubOk 1 + | count == -1 || (diff == 0 && idsHash == idsHash') -> incSrvStat srvSubOk 1 | diff > 0 -> incSrvStat srvSubMore 1 >> incSrvStat srvSubMoreTotal diff | diff < 0 -> incSrvStat srvSubFewer 1 >> incSrvStat srvSubFewerTotal (- diff) | otherwise -> incSrvStat srvSubDiff 1 diff --git a/src/Simplex/Messaging/Server/Prometheus.hs b/src/Simplex/Messaging/Server/Prometheus.hs index 76e288afe2..32e8bd9a10 100644 --- a/src/Simplex/Messaging/Server/Prometheus.hs +++ b/src/Simplex/Messaging/Server/Prometheus.hs @@ -52,7 +52,8 @@ data RealTimeMetrics = RealTimeMetrics data RTSubscriberMetrics = RTSubscriberMetrics { subsCount :: Int, subClientsCount :: Int, - subServicesCount :: Int + subServicesCount :: Int, + subServiceSubsCount :: Int64 } {-# FOURMOLU_DISABLE\n#-} @@ -517,6 +518,10 @@ prometheusMetrics sm rtm ts = \# TYPE simplex_smp_subscribtion_services_total gauge\n\ \simplex_smp_subscribtion_services_total " <> mshow (subServicesCount smpSubs) <> "\n# smp.subServicesCount\n\ \\n\ + \# HELP simplex_smp_subscribtion_service_subs_total Total queues subscribed via services\n\ + \# TYPE simplex_smp_subscribtion_service_subs_total gauge\n\ + \simplex_smp_subscribtion_service_subs_total " <> mshow (subServiceSubsCount smpSubs) <> "\n# smp.subServiceSubsCount\n\ + \\n\ \# HELP simplex_smp_subscription_ntf_total Total notification subscripbtions (from ntf server)\n\ \# TYPE simplex_smp_subscription_ntf_total gauge\n\ \simplex_smp_subscription_ntf_total " <> mshow (subsCount ntfSubs) <> "\n# ntf.subsCount\n\ @@ -529,6 +534,10 @@ prometheusMetrics sm rtm ts = \# TYPE simplex_smp_subscribtion_nts_services_total gauge\n\ \simplex_smp_subscribtion_nts_services_total " <> mshow (subServicesCount ntfSubs) <> "\n# ntf.subServicesCount\n\ \\n\ + \# HELP simplex_smp_subscription_ntf_service_subs_total Total queues subscribed via NTF services\n\ + \# TYPE simplex_smp_subscription_ntf_service_subs_total gauge\n\ + \simplex_smp_subscription_ntf_service_subs_total " <> mshow (subServiceSubsCount ntfSubs) <> "\n# ntf.subServiceSubsCount\n\ + \\n\ \# HELP simplex_smp_loaded_queues_queue_count Total loaded queues count (all queues for memory/journal storage)\n\ \# TYPE simplex_smp_loaded_queues_queue_count gauge\n\ \simplex_smp_loaded_queues_queue_count " <> mshow (loadedQueueCount loadedCounts) <> "\n# loadedCounts.loadedQueueCount\n\ diff --git a/tests/ServerTests.hs b/tests/ServerTests.hs index deace417ee..7f342f6ae3 100644 --- a/tests/ServerTests.hs +++ b/tests/ServerTests.hs @@ -33,8 +33,10 @@ import Data.Foldable (foldrM) import Data.Hashable (hash) import qualified Data.IntSet as IS import Data.List.NonEmpty (NonEmpty) +import Data.List (isPrefixOf) import Data.Maybe (catMaybes) import Data.String (IsString (..)) +import Text.Read (readMaybe) import Data.Type.Equality import qualified Data.X509.Validation as XV import GHC.Stack (withFrozenCallStack) @@ -90,6 +92,7 @@ serverTests = do describe "Service message subscriptions" $ do testServiceDeliverSubscribe testServiceUpgradeAndDowngrade + testServiceSubsTotalCount describe "Store log" testWithStoreLog describe "Restore messages" testRestoreMessages describe "Restore messages (old / v2)" testRestoreExpireMessages @@ -862,6 +865,86 @@ testServiceUpgradeAndDowngrade = Resp "25" _ OK <- signSendRecv sh rKey ("25", rId, ACK mId6) pure () +testServiceSubsTotalCount :: SpecWith (ASrvTransport, AStoreType) +testServiceSubsTotalCount = + it "should track totalServiceSubs correctly via SUBS and SUB" $ \(at@(ATransport t), msType) -> do + g <- C.newRandom + creds <- genCredentials g Nothing (0, 2400) "localhost" + let (_fp, tlsCred) = tlsCredentials [creds] + serviceKeys@(_, servicePK) <- atomically $ C.generateKeyPair g + let aServicePK = C.APrivateAuthKey C.SEd25519 servicePK + cfg' = updateCfg (cfgMS msType) $ \cfg_ -> cfg_ {prometheusInterval = Just 1} + withSmpServerConfigOn at cfg' testPort $ \_ -> runSMPClient t $ \h -> do + -- Phase 1: create 2 queues as service, reconnect with SUBS, check metric = 2 + (rPub1, rKey1) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub1, _ :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (rPub2, rKey2) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub2, _ :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + + (rId1, rId2, serviceId) <- runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + Resp "1" NoEntity (Ids_ rId1 _sId1 _srvDh1 serviceId) <- serviceSignSendRecv sh rKey1 servicePK ("1", NoEntity, New rPub1 dhPub1) + Resp "2" NoEntity (Ids_ rId2 _sId2 _srvDh2 serviceId') <- serviceSignSendRecv sh rKey2 servicePK ("2", NoEntity, New rPub2 dhPub2) + serviceId' `shouldBe` serviceId + pure (rId1, rId2, serviceId) + + runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + let idsHash = queueIdsHash [rId1, rId2] + signSend_ sh aServicePK Nothing ("3", serviceId, SUBS 2 idsHash) + void $ + receiveInAnyOrder sh + [ \case + Resp "3" serviceId' (SOKS n idsHash') -> do + n `shouldBe` 2 + idsHash' `shouldBe` idsHash + serviceId' `shouldBe` serviceId + pure $ Just () + _ -> pure Nothing, + \case + Resp "" NoEntity ALLS -> pure $ Just () + _ -> pure Nothing + ] + threadDelay 1500000 + readFile testPrometheusMetricsFile >>= \m -> readServiceSubsMetric m `shouldBe` Just 2 + + -- Phase 2: associate 1 more queue via SUB, reconnect with SUBS 3, check metric = 3 + (rPub3, rKey3) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + (dhPub3, _ :: C.PrivateKeyX25519) <- atomically $ C.generateKeyPair g + (sPub3, sKey3) <- atomically $ C.generateAuthKeyPair C.SEd25519 g + Resp "4" NoEntity (Ids rId3 sId3 _) <- signSendRecv h rKey3 ("4", NoEntity, New rPub3 dhPub3) + Resp "5" _ OK <- signSendRecv h sKey3 ("5", sId3, SKEY sPub3) + + runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + Resp "6" _ (SOK (Just serviceId')) <- serviceSignSendRecv sh rKey3 servicePK ("6", rId3, SUB) + serviceId' `shouldBe` serviceId + + runSMPServiceClient t (tlsCred, serviceKeys) $ \sh -> do + let idsHash = queueIdsHash [rId1, rId2, rId3] + signSend_ sh aServicePK Nothing ("7", serviceId, SUBS 3 idsHash) + void $ + receiveInAnyOrder sh + [ \case + Resp "7" serviceId' (SOKS n idsHash') -> do + n `shouldBe` 3 + idsHash' `shouldBe` idsHash + serviceId' `shouldBe` serviceId + pure $ Just () + _ -> pure Nothing, + \case + Resp "" NoEntity ALLS -> pure $ Just () + _ -> pure Nothing + ] + threadDelay 1500000 + readFile testPrometheusMetricsFile >>= \m -> readServiceSubsMetric m `shouldBe` Just 3 + +readServiceSubsMetric :: String -> Maybe Int +readServiceSubsMetric content = + case filter ("simplex_smp_subscribtion_service_subs_total " `isPrefixOf`) (lines content) of + (line : _) -> case words line of + [_, val, _] -> readMaybe val + [_, val] -> readMaybe val + _ -> Nothing + [] -> Nothing + receiveInAnyOrder :: (HasCallStack, Transport c) => THandleSMP c 'TClient -> [(CorrId, EntityId, Either ErrorType BrokerMsg) -> IO (Maybe b)] -> IO [b] receiveInAnyOrder h = fmap reverse . go [] where From 0ebea155065cdd666efdea841e041e7bff09a002 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 31 Mar 2026 23:57:50 +0100 Subject: [PATCH 87/91] agent: refactor cleanup if no pending subs (#1757) --- src/Simplex/Messaging/Client/Agent.hs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Simplex/Messaging/Client/Agent.hs b/src/Simplex/Messaging/Client/Agent.hs index e92af737a5..76b2a7cf93 100644 --- a/src/Simplex/Messaging/Client/Agent.hs +++ b/src/Simplex/Messaging/Client/Agent.hs @@ -47,6 +47,7 @@ import Crypto.Random (ChaChaDRG) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B import Data.Constraint (Dict (..)) +import Data.Functor (($>)) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as L import Data.Map.Strict (Map) @@ -328,11 +329,12 @@ reconnectClient ca@SMPClientAgent {active, agentCfg, smpSubWorkers, workerSeq} s atomically $ putTMVar (sessionVar v) a runSubWorker v = withRetryInterval (reconnectInterval agentCfg) $ \_ loop -> do - subs <- atomically $ do - s <- getPending TM.lookup readTVar - when (noPending s) $ cleanup v - pure s - unless (noPending subs) $ whenM (readTVarIO active) $ do + subs_ <- atomically $ do + s <- getPending TM.lookup readTVar + if noPending s + then cleanup v $> Nothing + else pure $ Just s + forM_ subs_ $ \subs -> whenM (readTVarIO active) $ do void $ netTimeoutInt tcpConnectTimeout NRMBackground `timeout` runExceptT (reconnectSMPClient ca srv subs) loop ProtocolClientConfig {networkConfig = NetworkConfig {tcpConnectTimeout}} = smpCfg agentCfg From fe30d69ec0832a2830d36abd116c421fa50144a9 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 1 Apr 2026 21:55:43 +0100 Subject: [PATCH 88/91] smp server: batch processing of subscription messages (#1753) * smp server: batch processing of subscription messages * refactor * empty line * fix --------- Co-authored-by: Evgeny @ SimpleX Chat <259188159+evgeny-simplex@users.noreply.github.com> --- ...260328_01_server_batched_sub_processing.md | 152 ++++++++++++++++++ rfcs/2026-03-28-subscription-performance.md | 90 +++++++++++ src/Simplex/Messaging/Server.hs | 31 ++-- .../Messaging/Server/MsgStore/Postgres.hs | 21 ++- .../Messaging/Server/MsgStore/Types.hs | 7 +- 5 files changed, 287 insertions(+), 14 deletions(-) create mode 100644 plans/20260328_01_server_batched_sub_processing.md create mode 100644 rfcs/2026-03-28-subscription-performance.md diff --git a/plans/20260328_01_server_batched_sub_processing.md b/plans/20260328_01_server_batched_sub_processing.md new file mode 100644 index 0000000000..411968de42 --- /dev/null +++ b/plans/20260328_01_server_batched_sub_processing.md @@ -0,0 +1,152 @@ +# Server: batched SUB command processing + +Implementation plan for Part 1 of [RFC 2026-03-28-subscription-performance](../rfcs/2026-03-28-subscription-performance.md). + +## Current state + +When a batch of ~135 SUB commands arrives, the server already batches: +- Queue record lookups (`getQueueRecs` in `receive`, Server.hs:1151) +- Command verification (`verifyLoadedQueue`, Server.hs:1152) + +But command processing is per-command (`foldrM process` in `client`, Server.hs:1372-1375). Each SUB calls `subscribeQueueAndDeliver` which calls `tryPeekMsg` - one DB query per queue. For Postgres, that's ~135 individual `SELECT ... FROM messages WHERE recipient_id = ? ORDER BY message_id ASC LIMIT 1` queries per batch. + +## Goal + +Replace ~135 individual message peek queries with 1 batched query per batch. No protocol changes. + +## Implementation + +### Step 1: Add `tryPeekMsgs` to MsgStoreClass + +File: `src/Simplex/Messaging/Server/MsgStore/Types.hs` + +Add to `MsgStoreClass`: + +```haskell +tryPeekMsgs :: s -> [StoreQueue s] -> ExceptT ErrorType IO (Map RecipientId Message) +``` + +Returns a map from recipient ID to earliest pending message for each queue that has one. Queues with no messages are absent from the map. + +### Step 2: Parameterize `deliver` to accept pre-fetched message + +File: `src/Simplex/Messaging/Server.hs` + +Currently `deliver` (inside `subscribeQueueAndDeliver`, line 1641) calls `tryPeekMsg ms q`. Add a parameter for an optional pre-fetched message: + +```haskell +deliver :: Maybe Message -> (Bool, Maybe Sub) -> M s ResponseAndMessage +deliver prefetchedMsg (hasSub, sub_) = do + stats <- asks serverStats + fmap (either ((,Nothing) . err) id) $ liftIO $ runExceptT $ do + msg_ <- maybe (tryPeekMsg ms q) (pure . Just) prefetchedMsg + ... +``` + +When `Nothing` is passed, falls back to individual `tryPeekMsg` (existing behavior). When `Just msg` is passed, uses it directly (batched path). + +### Step 3: Pre-fetch messages before the processing loop + +File: `src/Simplex/Messaging/Server.hs` + +Currently (lines 1372-1375): + +```haskell +forever $ + atomically (readTBQueue rcvQ) + >>= foldrM process ([], []) + >>= \(rs_, msgs) -> ... +``` + +Add a pre-fetch step before the existing loop: + +```haskell +forever $ do + batch <- atomically (readTBQueue rcvQ) + msgMap <- prefetchMsgs batch + foldrM (process msgMap) ([], []) batch + >>= \(rs_, msgs) -> ... +``` + +`prefetchMsgs` scans the batch, collects queues from SUB commands that have a verified queue (`q_ = Just (q, _)`), calls `tryPeekMsgs` once, returns the map. For batches with no SUBs it returns an empty map (no DB call). + +`process` passes the looked-up message (or Nothing) through to `processCommand` and down to `deliver`. + +The `foldrM process` loop, `processCommand`, `subscribeQueueAndDeliver`, and all other command handlers stay structurally the same. Only `deliver` gains one parameter, and the `client` loop gains one pre-fetch call. + +### Step 4: Review + +Review the typeclass signature and server usage. Confirm the interface has the right shape before implementing store backends. + +### Step 5: Implement for each store backend + +#### Postgres + +File: `src/Simplex/Messaging/Server/MsgStore/Postgres.hs` + +Single query using `DISTINCT ON`: + +```sql +SELECT DISTINCT ON (recipient_id) + recipient_id, msg_id, msg_ts, msg_quota, msg_ntf_flag, msg_body +FROM messages +WHERE recipient_id IN ? +ORDER BY recipient_id, message_id ASC +``` + +Build `Map RecipientId Message` from results. + +#### STM + +File: `src/Simplex/Messaging/Server/MsgStore/STM.hs` + +Loop over queues, call `tryPeekMsg` for each, collect into map. + +#### Journal + +File: `src/Simplex/Messaging/Server/MsgStore/Journal.hs` + +Loop over queues, call `tryPeekMsg` for each, collect into map. + +### Step 6: Handle edge cases + +1. **Mixed batches**: `prefetchMsgs` collects only SUB queues. Non-SUB commands get Nothing for the pre-fetched message and process unchanged. + +2. **Already-subscribed queues**: Include in pre-fetch - `deliver` is called for re-SUBs too (delivers pending message). + +3. **Service subscriptions**: The pre-fetch doesn't care about service state. `sharedSubscribeQueue` handles service association in STM; message peek is the same. + +4. **Error queues**: Verification errors from `receive` are Left values in the batch. `prefetchMsgs` only looks at Right values with SUB commands. + +5. **Empty pre-fetch**: If batch has no SUBs (e.g., all ACKs), `prefetchMsgs` returns empty map, no DB call made. + +### Step 7: Batch other commands (future, not in scope) + +The same pattern (pre-fetch before loop, parameterize handler) can extend to: +- `ACK` with `tryDelPeekMsg` - batch delete+peek +- `GET` with `tryPeekMsg` - same map lookup + +Lower priority since these don't have the N-at-once pattern of subscriptions. + +## File changes summary + +| File | Change | +|---|---| +| `src/Simplex/Messaging/Server/MsgStore/Types.hs` | Add `tryPeekMsgs` to typeclass | +| `src/Simplex/Messaging/Server/MsgStore/Postgres.hs` | Implement `tryPeekMsgs` with batch SQL | +| `src/Simplex/Messaging/Server/MsgStore/STM.hs` | Implement `tryPeekMsgs` as loop | +| `src/Simplex/Messaging/Server/MsgStore/Journal.hs` | Implement `tryPeekMsgs` as loop | +| `src/Simplex/Messaging/Server.hs` | Add `prefetchMsgs`, parameterize `deliver` | + +## Testing + +1. Existing server tests must pass unchanged (correctness preserved). +2. Add a test that subscribes a batch of queues (some with pending messages, some without) and verifies all get correct SOK + MSG responses. +3. Prometheus metrics: existing `qSub` stat should still increment correctly. + +## Performance expectation + +For 300K queues across ~2200 batches: +- Before: ~300K individual DB queries +- After: ~2200 batched DB queries (one per batch of ~135) +- ~136x reduction in DB round-trips diff --git a/rfcs/2026-03-28-subscription-performance.md b/rfcs/2026-03-28-subscription-performance.md new file mode 100644 index 0000000000..e0af42bf89 --- /dev/null +++ b/rfcs/2026-03-28-subscription-performance.md @@ -0,0 +1,90 @@ +# Subscription performance + +No protocol changes. This is an implementation RFC addressing subscription performance bottlenecks in both the SMP router and the agent. + +## Problem + +Subscribing large numbers of queues is slow. A messaging client with ~300K queues per router across 3 routers takes over 1 hour to subscribe. For comparison, the NTF server with ~1M queues per router across 12 routers took 20-30 minutes (prior to NTF client services, now in master). + +Even on fast networks (cloud VMs), a client with 1.1M active subscriptions needed ~1.5M attempts (commands sent) to fully subscribe - ~36% retry rate caused by the timeout cascade described below. + +### Root causes + +#### 1. Router: per-command processing in batches + +Batch verification and queue lookups are already done efficiently for the whole batch in `Server.hs`. But `processCommand` is called per-command in a loop - each SUB does its own individual DB query for message peek/delivery. With ~135 SUBs per batch (current SMP version), that's 135 individual DB queries per batch instead of 1 batched query. + +For 300K queues, that's ~2200 batches x 135 queries = ~300K individual DB queries on the router, which is the dominant bottleneck when using PostgreSQL storage. + +NSUB is cheaper because it just registers for notifications without message delivery - no per-queue DB query. + +#### 2. Agent: all queues read and sent at once + +`getUserServerRcvQueueSubs` reads all queues for a `(userId, server)` pair in one query with no LIMIT. For 300K queues, the entire result set is loaded into memory, then all ~2200 batches are queued to send without waiting for responses. + +The NTF server agent uses cursor-style reading with configurable batch sizes (900 subs per chunk, 90K per DB fetch) and waits for each chunk to be processed before fetching the next. + +#### 3. No backpressure on sends + +`nonBlockingWriteTBQueue` bypasses the `sndQ` bound by forking a thread when the queue is full. All batches are queued immediately, and all their response timers start simultaneously. A 30-second per-response timeout means later batches time out not because the router is slow to respond to them specifically, but because they're waiting in the router's receive queue behind thousands of earlier commands. + +This causes cascading timeouts: timed-out responses trigger `resubscribeSMPSession`, which retries all pending subs. Three consecutive timeouts can trigger connection drop via the monitor thread, causing a full reconnection and retry of everything. + +## Solution + +### Part 1: Router - batched command processing + +Move the per-command processing loop inside command handlers so that commands of the same type within a batch can be processed together. + +Current flow: +``` +receive batch -> verify all -> lookup queues all -> for each command: processCommand (individual DB query) +``` + +Proposed flow: +``` +receive batch -> verify all -> lookup queues all -> group by command type -> process group: + SUB group: one batched message peek query for all queues + NSUB group: batch registration (already cheap, but can batch DB writes) + other commands: process individually as before +``` + +For SUB, the batched processing would: +1. Collect all queue IDs from the SUB group +2. Perform a single DB query to peek messages for all queues +3. Distribute results back to individual responses + +This reduces ~135 DB queries per batch to 1, cutting router-side DB load by ~100x for subscriptions. + +Commands where batching doesn't matter (SEND, ACK, KEY, etc.) continue to be processed individually. + +### Part 2: Agent - cursor-based subscription with backpressure + +Replace the all-at-once fetch-and-send pattern with cursor-style batching, similar to what the NTF server agent does. + +Changes to `subscribeUserServer`: +1. Fetch queues in fixed-size batches (e.g., configurable, default ~1000) using LIMIT/OFFSET or cursor-based pagination. +2. Send each batch and wait for responses before sending the next. +3. Remove the use of `nonBlockingWriteTBQueue` for subscription batches - use blocking writes or structured backpressure so response timers don't start until the batch is actually sent. + +This ensures: +- Memory usage is bounded (not 300K queue records in memory at once) +- Response timeouts are meaningful (timer starts when the router receives the batch, not when it's queued locally) +- Retries are scoped to the failed batch, not all pending subs +- Works on slow/lossy networks by naturally pacing sends + +### Part 3: Response timeout for batches + +The current per-response 30-second timeout doesn't account for batch processing time. Options: + +1. **Stagger deadlines**: later responses in a batch get proportionally more time. The `rcvConcurrency` field was designed for this but is never used. +2. **Per-batch timeout**: instead of timing individual responses, timeout the entire batch with a budget proportional to batch size. +3. **No timeout for subscription responses**: since subscriptions are sent as batches with backpressure (Part 2), and the connection is monitored by pings, individual response timeouts may not be needed. A subscription that doesn't get a response will be retried on reconnect. + +## Priority and ordering + +Part 1 (router batching) gives the biggest improvement and is independent of Parts 2/3. + +Part 2 (agent cursor + backpressure) eliminates the retry cascade and is critical for slow networks. + +Part 3 (timeout handling) is a refinement that can be addressed after Parts 1 and 2. diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index ab001bbc3b..66100e97d8 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -1366,14 +1366,20 @@ client labelMyThread . B.unpack $ "client $" <> encode sessionId <> " commands" let THandleParams {thVersion} = thParams' clntServiceId = (\THClientService {serviceId} -> serviceId) <$> (peerClientService =<< thAuth thParams') - process t acc@(rs, msgs) = + process msgMap t acc@(rs, msgs) = (maybe acc (\(!r, !msg_) -> (r : rs, maybe msgs (: msgs) msg_))) - <$> processCommand clntServiceId thVersion t - forever $ - atomically (readTBQueue rcvQ) - >>= foldrM process ([], []) + <$> processCommand clntServiceId thVersion msgMap t + forever $ do + batch <- atomically (readTBQueue rcvQ) + msgMap <- prefetchMsgs batch + foldrM (process msgMap) ([], []) batch >>= \(rs_, msgs) -> mapM_ (atomically . writeTBQueue sndQ . (,msgs)) (L.nonEmpty rs_) where + prefetchMsgs :: NonEmpty (VerifiedTransmission s) -> M s (Either ErrorType (Map RecipientId Message)) + prefetchMsgs batch = + let subQs = [q | (Just (q, _), (_, _, Cmd SRecipient SUB)) <- L.toList batch] + in if null subQs then pure $ Right M.empty else liftIO $ runExceptT $ tryPeekMsgs ms subQs + processProxiedCmd :: Transmission (Command 'ProxiedClient) -> M s (Maybe ResponseAndMessage) processProxiedCmd (corrId, EntityId sessId, command) = (\t -> ((corrId, EntityId sessId, t), Nothing)) <$$> case command of PRXY srv auth -> ifM allowProxy getRelay (pure $ Just $ ERR $ PROXY BASIC_AUTH) @@ -1454,8 +1460,8 @@ client mkIncProxyStats ps psOwn own sel = do incStat $ sel ps when own $ incStat $ sel psOwn - processCommand :: Maybe ServiceId -> VersionSMP -> VerifiedTransmission s -> M s (Maybe ResponseAndMessage) - processCommand clntServiceId clntVersion (q_, (corrId, entId, cmd)) = case cmd of + processCommand :: Maybe ServiceId -> VersionSMP -> Either ErrorType (Map RecipientId Message) -> VerifiedTransmission s -> M s (Maybe ResponseAndMessage) + processCommand clntServiceId clntVersion msgMap (q_, (corrId, entId, cmd)) = case cmd of Cmd SProxiedClient command -> processProxiedCmd (corrId, entId, command) Cmd SSender command -> case command of SKEY k -> withQueue $ \q qr -> checkMode QMMessaging qr $ secureQueue_ q k @@ -1479,7 +1485,9 @@ client pure $ allowNewQueues && maybe True ((== auth_) . Just) newQueueBasicAuth Cmd SRecipient command -> case command of - SUB -> withQueue' subscribeQueueAndDeliver + SUB -> case msgMap of + Left e -> pure $ Just (err e, Nothing) + Right msgs -> withQueue' $ subscribeQueueAndDeliver (M.lookup entId msgs) GET -> withQueue getMessage ACK msgId -> withQueue $ acknowledgeMsg msgId KEY sKey -> withQueue $ \q _ -> either err (corrId,entId,) <$> secureQueue_ q sKey @@ -1620,8 +1628,8 @@ client suspendQueue_ :: (StoreQueue s, QueueRec) -> M s (Transmission BrokerMsg) suspendQueue_ (q, _) = liftIO $ either err (const ok) <$> suspendQueue (queueStore ms) q - subscribeQueueAndDeliver :: StoreQueue s -> QueueRec -> M s ResponseAndMessage - subscribeQueueAndDeliver q qr@QueueRec {rcvServiceId} = + subscribeQueueAndDeliver :: Maybe Message -> StoreQueue s -> QueueRec -> M s ResponseAndMessage + subscribeQueueAndDeliver msg_ q qr@QueueRec {rcvServiceId} = liftIO (TM.lookupIO entId $ subscriptions clnt) >>= \case Nothing -> sharedSubscribeQueue q SRecipientService rcvServiceId subscribers subscriptions serviceSubsCount (newSubscription NoSub) rcvServices >>= \case @@ -1642,7 +1650,6 @@ client deliver (hasSub, sub_) = do stats <- asks serverStats fmap (either ((,Nothing) . err) id) $ liftIO $ runExceptT $ do - msg_ <- tryPeekMsg ms q msg' <- forM msg_ $ \msg -> liftIO $ do ts <- getSystemSeconds sub <- maybe (atomically getSub) pure sub_ @@ -2087,7 +2094,7 @@ client -- rejectOrVerify filters allowed commands, no need to repeat it here. -- INTERNAL is used because processCommand never returns Nothing for sender commands (could be extracted for better types). -- `fst` removes empty message that is only returned for `SUB` command - Right t''@(_, (corrId', entId', _)) -> maybe (corrId', entId', ERR INTERNAL) fst <$> lift (processCommand Nothing fwdVersion t'') + Right t''@(_, (corrId', entId', _)) -> maybe (corrId', entId', ERR INTERNAL) fst <$> lift (processCommand Nothing fwdVersion (Right M.empty) t'') -- encode response r' <- case batchTransmissions clntTHParams [Right (Nothing, encodeTransmission clntTHParams r)] of [] -> throwE INTERNAL -- at least 1 item is guaranteed from NonEmpty/Right diff --git a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs index 77d9973e6b..b855346d4a 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Postgres.hs @@ -41,7 +41,7 @@ import Data.List (intersperse) import qualified Data.Map.Strict as M import Data.Text (Text) import Data.Time.Clock.System (SystemTime (..)) -import Database.PostgreSQL.Simple (Binary (..), Only (..), (:.) (..)) +import Database.PostgreSQL.Simple (Binary (..), In (..), Only (..), (:.) (..)) import qualified Database.PostgreSQL.Simple as DB import qualified Database.PostgreSQL.Simple.Copy as DB import Database.PostgreSQL.Simple.SqlQQ (sql) @@ -246,6 +246,25 @@ instance MsgStoreClass PostgresMsgStore where tryPeekMsg ms q = isolateQueue ms q "tryPeekMsg" $ tryPeekMsg_ q () {-# INLINE tryPeekMsg #-} + tryPeekMsgs :: PostgresMsgStore -> [PostgresQueue] -> ExceptT ErrorType IO (M.Map RecipientId Message) + tryPeekMsgs _ms [] = pure M.empty + tryPeekMsgs ms qs = + uninterruptibleMask_ $ + withDB' "tryPeekMsgs" (queueStore_ ms) $ \db -> + M.fromList . map toRcvMsg <$> + DB.query + db + [sql| + SELECT DISTINCT ON (recipient_id) + recipient_id, msg_id, msg_ts, msg_quota, msg_ntf_flag, msg_body + FROM messages + WHERE recipient_id IN ? + ORDER BY recipient_id, message_id ASC + |] + (Only (In (map recipientId' qs))) + where + toRcvMsg (Only rId :. msg) = (rId, toMessage msg) + tryDelMsg :: PostgresMsgStore -> PostgresQueue -> MsgId -> ExceptT ErrorType IO (Maybe Message) tryDelMsg ms q msgId = uninterruptibleMask_ $ diff --git a/src/Simplex/Messaging/Server/MsgStore/Types.hs b/src/Simplex/Messaging/Server/MsgStore/Types.hs index acb661a408..a14dfd4242 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Types.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Types.hs @@ -41,7 +41,9 @@ import Control.Monad.Trans.Except import Data.Functor (($>)) import Data.Int (Int64) import Data.Kind -import Data.Maybe (fromMaybe) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as M +import Data.Maybe (catMaybes, fromMaybe) import Data.Text (Text) import Data.Time.Clock.System (SystemTime (systemSeconds)) import Simplex.Messaging.Protocol @@ -91,6 +93,9 @@ class (Monad (StoreMonad s), QueueStoreClass (StoreQueue s) (QueueStore s)) => M tryPeekMsg :: s -> StoreQueue s -> ExceptT ErrorType IO (Maybe Message) tryPeekMsg st q = snd <$$> withPeekMsgQueue st q "tryPeekMsg" pure {-# INLINE tryPeekMsg #-} + + tryPeekMsgs :: s -> [StoreQueue s] -> ExceptT ErrorType IO (Map RecipientId Message) + tryPeekMsgs st qs = M.fromList . catMaybes <$> mapM (\q -> (recipientId q,) <$$> tryPeekMsg st q) qs tryDelMsg :: s -> StoreQueue s -> MsgId -> ExceptT ErrorType IO (Maybe Message) tryDelMsg st q msgId' = From 8bd3193280da6b4decf790bb57b470780c2576ba Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 8 May 2026 09:36:35 +0100 Subject: [PATCH 89/91] smp: batch queue association updates on subscriptions (#1760) * smp: batch queue association updates on subscriptions * refactor to fused batching * simpler * batch assoc functions * clean up * fix --------- Co-authored-by: Evgeny @ SimpleX Chat <259188159+evgeny-simplex@users.noreply.github.com> --- plans/20260401_01_batch_queue_associations.md | 126 ++++++++++++++++++ src/Simplex/Messaging/Server.hs | 94 +++++++------ .../Messaging/Server/MsgStore/Journal.hs | 2 + .../Messaging/Server/QueueStore/Postgres.hs | 28 +++- .../Messaging/Server/QueueStore/STM.hs | 4 + .../Messaging/Server/QueueStore/Types.hs | 2 + 6 files changed, 214 insertions(+), 42 deletions(-) create mode 100644 plans/20260401_01_batch_queue_associations.md diff --git a/plans/20260401_01_batch_queue_associations.md b/plans/20260401_01_batch_queue_associations.md new file mode 100644 index 0000000000..07f794df2f --- /dev/null +++ b/plans/20260401_01_batch_queue_associations.md @@ -0,0 +1,126 @@ +# Server: batch queue service associations + +When a batch of SUB or NSUB commands arrives from a service client, each command that needs a new or removed service association calls `setQueueService` individually - one DB write per command. For 135 commands per batch, that's 135 individual `UPDATE msg_queues` queries. + +## Goal + +Reduce to at most 2 DB queries per batch (one for rcv associations, one for ntf associations), using `UPDATE ... RETURNING recipient_id` to identify which queues were actually updated. + +Also fuse message pre-fetch and association batching into a single batch preparation step with a clean contract. + +## Contract + +```haskell +prepareBatch :: Maybe ServiceId -> NonEmpty (VerifiedTransmission s) -> M s (Either ErrorType (Map RecipientId (Maybe Message, Maybe (Either ErrorType ())))) +``` + +`Left e` = batch-level failure (message pre-fetch or association query failed entirely). All SUBs/NSUBs in the batch get this error. + +`Right map` = per-queue results as a tuple: +- `Maybe Message` - pre-fetched message for SUB queues, `Nothing` for NSUB or no message +- `Maybe (Either ErrorType ())` - association result. `Nothing` = no update needed. `Just (Right ())` = update succeeded. `Just (Left e)` = update failed for this queue. + +One map, one lookup per queue. `processCommand` passes both values to `subscribeQueueAndDeliver` / `subscribeNotifications` -> `sharedSubscribeQueue`. + +Queues not in the map (non-SUB/NSUB commands, failed verification) are not affected. + +## prepareBatch implementation + +One accumulating fold over the batch, collecting three lists: +- `subMsgQs :: [StoreQueue s]` - SUB queues for message pre-fetch +- `rcvAssocQs :: [StoreQueue s]` - SUB queues needing `rcv_service_id` update (`clntServiceId /= rcvServiceId qr`) +- `ntfAssocQs :: [StoreQueue s]` - NSUB queues needing `ntf_service_id` update (`clntServiceId /= ntfServiceId` from `NtfCreds`) + +Classification reads from the already-loaded `QueueRec` in `VerifiedTransmission` - no extra DB query. + +Then three store calls (each skipped if its list is empty): +1. `tryPeekMsgs ms subMsgQs` -> `Map RecipientId Message` +2. `setRcvQueueServices (queueStore ms) clntServiceId rcvAssocQs` -> `Set RecipientId` +3. `setNtfQueueServices (queueStore ms) clntServiceId ntfAssocQs` -> `Set RecipientId` + +Then one pass to merge results into `Map RecipientId (Maybe Message, Maybe (Either ErrorType ()))`: +- For each SUB queue: `(M.lookup rId msgMap, assocResult rId rcvUpdated rcvAssocQs)` +- For each NSUB queue: `(Nothing, assocResult rId ntfUpdated ntfAssocQs)` + +Where `assocResult rId updated assocQs` = if the queue was in `assocQs` (needed update), then `Just (Right ())` if `rId` is in `updated`, else `Just (Left AUTH)`. If not in `assocQs` (no update needed), `Nothing`. + +If any of the three calls fails entirely, return `Left e`. + +## Store interface + +Replace the polymorphic `setQueueServices` with two plain functions in `QueueStoreClass`: + +```haskell +setRcvQueueServices :: s -> Maybe ServiceId -> [q] -> IO (Set RecipientId) +setNtfQueueServices :: s -> Maybe ServiceId -> [q] -> IO (Set RecipientId) +``` + +No `SParty p` polymorphism. Each function knows its column. + +### Postgres implementation + +`setRcvQueueServices`: +```sql +UPDATE msg_queues SET rcv_service_id = ? +WHERE recipient_id IN ? AND deleted_at IS NULL +RETURNING recipient_id +``` + +`setNtfQueueServices`: +```sql +UPDATE msg_queues SET ntf_service_id = ? +WHERE recipient_id IN ? AND notifier_id IS NOT NULL AND deleted_at IS NULL +RETURNING recipient_id +``` + +After each batch query, for each queue in the returned set: +1. Read QueueRec TVar, update with new serviceId +2. Write store log entry + +### STM implementation + +Loop over queues, call existing per-item logic, collect succeeded `RecipientId`s into a Set. + +## Downstream changes in Server.hs + +### processCommand + +Gains one parameter: `Map RecipientId (Maybe Message, Maybe (Either ErrorType ()))`. + +SUB case: `M.lookup entId prepared` gives `Just (msg_, assocResult)` or `Nothing`. Pass both to `subscribeQueueAndDeliver`. + +NSUB case: `M.lookup entId prepared` gives `Just (Nothing, assocResult)` or `Nothing`. Pass `assocResult` to `subscribeNotifications`. + +Forwarded commands: pass `M.empty`. + +### subscribeQueueAndDeliver + +Takes `Maybe Message` and `Maybe (Either ErrorType ())` as before. No change in how it uses them. + +### sharedSubscribeQueue + +Takes `Maybe (Either ErrorType ())`. On paths needing association update: +- `Just (Left e)` -> return error +- `Just (Right ())` -> skip `setQueueService`, proceed with STM work +- `Nothing` -> no update needed, proceed with existing logic + +## Implementation order (top-down) + +1. Define the `prepareBatch` contract and thread one map through `processCommand` -> `subscribeQueueAndDeliver` / `subscribeNotifications` -> `sharedSubscribeQueue` (Server.hs) +2. Implement `prepareBatch` with the fold, three calls, and merge (Server.hs) +3. Add `setRcvQueueServices` and `setNtfQueueServices` to `QueueStoreClass` (Types.hs) +4. Implement for Postgres with batch `UPDATE ... RETURNING` (Postgres.hs) +5. Implement for STM as loop (STM.hs) +6. Implement for Journal as delegation (Journal.hs) + +At step 2, store functions can initially be stubs returning empty sets. Steps 3-6 fill in the real implementations. + +## Files changed + +| File | Change | +|---|---| +| `src/Simplex/Messaging/Server.hs` | `prepareBatch` with fold + merge; one map parameter through `processCommand` -> `subscribeQueueAndDeliver` / `subscribeNotifications` -> `sharedSubscribeQueue` | +| `src/Simplex/Messaging/Server/QueueStore/Types.hs` | Add `setRcvQueueServices`, `setNtfQueueServices` to `QueueStoreClass` | +| `src/Simplex/Messaging/Server/QueueStore/Postgres.hs` | Implement with batch `UPDATE ... RETURNING` + per-item TVar/log updates | +| `src/Simplex/Messaging/Server/QueueStore/STM.hs` | Implement as loop | +| `src/Simplex/Messaging/Server/MsgStore/Journal.hs` | Delegate to underlying store | diff --git a/src/Simplex/Messaging/Server.hs b/src/Simplex/Messaging/Server.hs index 66100e97d8..1b7d920ac5 100644 --- a/src/Simplex/Messaging/Server.hs +++ b/src/Simplex/Messaging/Server.hs @@ -1366,19 +1366,37 @@ client labelMyThread . B.unpack $ "client $" <> encode sessionId <> " commands" let THandleParams {thVersion} = thParams' clntServiceId = (\THClientService {serviceId} -> serviceId) <$> (peerClientService =<< thAuth thParams') - process msgMap t acc@(rs, msgs) = + process batchSubs t acc@(rs, msgs) = (maybe acc (\(!r, !msg_) -> (r : rs, maybe msgs (: msgs) msg_))) - <$> processCommand clntServiceId thVersion msgMap t + <$> processCommand clntServiceId thVersion batchSubs t forever $ do batch <- atomically (readTBQueue rcvQ) - msgMap <- prefetchMsgs batch - foldrM (process msgMap) ([], []) batch + batchSubs <- prepareBatchSubs clntServiceId batch + foldrM (process batchSubs) ([], []) batch >>= \(rs_, msgs) -> mapM_ (atomically . writeTBQueue sndQ . (,msgs)) (L.nonEmpty rs_) where - prefetchMsgs :: NonEmpty (VerifiedTransmission s) -> M s (Either ErrorType (Map RecipientId Message)) - prefetchMsgs batch = - let subQs = [q | (Just (q, _), (_, _, Cmd SRecipient SUB)) <- L.toList batch] - in if null subQs then pure $ Right M.empty else liftIO $ runExceptT $ tryPeekMsgs ms subQs + prepareBatchSubs :: + Maybe ServiceId -> + NonEmpty (VerifiedTransmission s) -> + M s (Either ErrorType (Map RecipientId Message, Map RecipientId (Either ErrorType ()), Map RecipientId (Either ErrorType ()))) + prepareBatchSubs clntServiceId_ batch = do + let (subMsgQs, rcvAssocQs, ntfAssocQs) = foldr partitionSubs ([], [], []) batch + partitionSubs t (msgQs, rcvQs, ntfQs) = case t of + (Just (q, qr), (_, _, Cmd SRecipient SUB)) + | clntServiceId_ /= rcvServiceId qr -> (q : msgQs, q : rcvQs, ntfQs) + | otherwise -> (q : msgQs, rcvQs, ntfQs) + (Just (q, qr), (_, _, Cmd SNotifier NSUB)) + | clntServiceId_ /= (notifier qr >>= ntfServiceId) -> (msgQs, rcvQs, q : ntfQs) + _ -> (msgQs, rcvQs, ntfQs) + liftIO $ runExceptT $ do + rcvAssocs <- ifNotNull rcvAssocQs $ setService SRecipientService clntServiceId_ + ntfAssocs <- ifNotNull ntfAssocQs $ setService SNotifierService clntServiceId_ + msgs <- ifNotNull subMsgQs $ tryPeekMsgs ms + pure (msgs, rcvAssocs, ntfAssocs) + where + ifNotNull qs f = if null qs then pure M.empty else f qs + setService :: (PartyI p, ServiceParty p) => SParty p -> Maybe ServiceId -> [StoreQueue s] -> ExceptT ErrorType IO (Map RecipientId (Either ErrorType ())) + setService party sId = ExceptT . setQueueServices (queueStore ms) party sId processProxiedCmd :: Transmission (Command 'ProxiedClient) -> M s (Maybe ResponseAndMessage) processProxiedCmd (corrId, EntityId sessId, command) = (\t -> ((corrId, EntityId sessId, t), Nothing)) <$$> case command of @@ -1460,8 +1478,8 @@ client mkIncProxyStats ps psOwn own sel = do incStat $ sel ps when own $ incStat $ sel psOwn - processCommand :: Maybe ServiceId -> VersionSMP -> Either ErrorType (Map RecipientId Message) -> VerifiedTransmission s -> M s (Maybe ResponseAndMessage) - processCommand clntServiceId clntVersion msgMap (q_, (corrId, entId, cmd)) = case cmd of + processCommand :: Maybe ServiceId -> VersionSMP -> Either ErrorType (Map RecipientId Message, Map RecipientId (Either ErrorType ()), Map RecipientId (Either ErrorType ())) -> VerifiedTransmission s -> M s (Maybe ResponseAndMessage) + processCommand clntServiceId clntVersion batchSubs (q_, (corrId, entId, cmd)) = case cmd of Cmd SProxiedClient command -> processProxiedCmd (corrId, entId, command) Cmd SSender command -> case command of SKEY k -> withQueue $ \q qr -> checkMode QMMessaging qr $ secureQueue_ q k @@ -1472,7 +1490,9 @@ client LKEY k -> withQueue $ \q qr -> checkMode QMMessaging qr $ secureQueue_ q k $>> getQueueLink_ q qr LGET -> withQueue $ \q qr -> checkContact qr $ getQueueLink_ q qr Cmd SNotifier NSUB -> response . (corrId,entId,) <$> case q_ of - Just (q, QueueRec {notifier = Just ntfCreds}) -> subscribeNotifications q ntfCreds + Just (q, QueueRec {notifier = Just ntfCreds}) -> + either (pure . ERR) (\_ -> subscribeNotifications q ntfCreds) + $ batchSubs >>= \(_, _, ntfAssocs) -> sequence (M.lookup (recipientId q) ntfAssocs) _ -> pure $ ERR INTERNAL Cmd SNotifierService (NSUBS n idsHash) -> response . (corrId,entId,) <$> case clntServiceId of Just serviceId -> subscribeServiceNotifications serviceId (n, idsHash) @@ -1485,9 +1505,9 @@ client pure $ allowNewQueues && maybe True ((== auth_) . Just) newQueueBasicAuth Cmd SRecipient command -> case command of - SUB -> case msgMap of + SUB -> case batchSubs >>= \(msgs, rcvAssocs, _) -> sequence (M.lookup entId rcvAssocs) $> msgs of Left e -> pure $ Just (err e, Nothing) - Right msgs -> withQueue' $ subscribeQueueAndDeliver (M.lookup entId msgs) + Right msgs -> withQueue' $ subscribeQueueAndDeliver $ M.lookup entId msgs GET -> withQueue getMessage ACK msgId -> withQueue $ acknowledgeMsg msgId KEY sKey -> withQueue $ \q _ -> either err (corrId,entId,) <$> secureQueue_ q sKey @@ -1632,9 +1652,7 @@ client subscribeQueueAndDeliver msg_ q qr@QueueRec {rcvServiceId} = liftIO (TM.lookupIO entId $ subscriptions clnt) >>= \case Nothing -> - sharedSubscribeQueue q SRecipientService rcvServiceId subscribers subscriptions serviceSubsCount (newSubscription NoSub) rcvServices >>= \case - Left e -> pure (err e, Nothing) - Right s -> deliver s + deliver =<< sharedSubscribeQueue q rcvServiceId subscribers subscriptions serviceSubsCount (newSubscription NoSub) rcvServices Just s@Sub {subThread} -> do stats <- asks serverStats case subThread of @@ -1735,26 +1753,22 @@ client else liftIO (updateQueueTime (queueStore ms) q t) >>= either (pure . err') (action q) subscribeNotifications :: StoreQueue s -> NtfCreds -> M s BrokerMsg - subscribeNotifications q NtfCreds {ntfServiceId} = - sharedSubscribeQueue q SNotifierService ntfServiceId ntfSubscribers ntfSubscriptions ntfServiceSubsCount (pure ()) ntfServices >>= \case - Left e -> pure $ ERR e - Right (hasSub, _) -> do - when (isNothing clntServiceId) $ - asks serverStats >>= incStat . (if hasSub then ntfSubDuplicate else ntfSub) - pure $ SOK clntServiceId + subscribeNotifications q NtfCreds {ntfServiceId} = do + (hasSub, _) <- sharedSubscribeQueue q ntfServiceId ntfSubscribers ntfSubscriptions ntfServiceSubsCount (pure ()) ntfServices + when (isNothing clntServiceId) $ + asks serverStats >>= incStat . (if hasSub then ntfSubDuplicate else ntfSub) + pure $ SOK clntServiceId sharedSubscribeQueue :: - (PartyI p, ServiceParty p) => StoreQueue s -> - SParty p -> Maybe ServiceId -> ServerSubscribers s -> (Client s -> TMap QueueId sub) -> (Client s -> TVar (Int64, IdsHash)) -> STM sub -> (ServerStats -> ServiceStats) -> - M s (Either ErrorType (Bool, Maybe sub)) - sharedSubscribeQueue q party queueServiceId srvSubscribers clientSubs clientServiceSubs mkSub servicesSel = do + M s (Bool, Maybe sub) + sharedSubscribeQueue q queueServiceId srvSubscribers clientSubs clientServiceSubs mkSub servicesSel = do stats <- asks serverStats let incSrvStat sel = incStat $ sel $ servicesSel stats writeSub = writeTQueue (subQ srvSubscribers) (CSClient entId queueServiceId clntServiceId, clientId) @@ -1768,25 +1782,23 @@ client incSrvStat srvSubCount incSrvStat srvSubQueues incSrvStat srvAssocDuplicate - pure $ Right (hasSub, Nothing) - | otherwise -> runExceptT $ do - -- new or updated queue-service association - ExceptT $ setQueueService (queueStore ms) q party (Just serviceId) + pure (hasSub, Nothing) + | otherwise -> do + -- association already done in prepareBatchSubs hasSub <- atomically $ (<$ incServiceQueueSubs) =<< hasServiceSub atomically writeSub - liftIO $ do - unless hasSub $ incSrvStat srvSubCount - incSrvStat srvSubQueues - incSrvStat $ maybe srvAssocNew (const srvAssocUpdated) queueServiceId + unless hasSub $ incSrvStat srvSubCount + incSrvStat srvSubQueues + incSrvStat $ maybe srvAssocNew (const srvAssocUpdated) queueServiceId pure (hasSub, Nothing) where hasServiceSub = ((0 /=) . fst) <$> readTVar (clientServiceSubs clnt) -- This function is used when queue association with the service is created. - incServiceQueueSubs = modifyTVar' (clientServiceSubs clnt) $ addServiceSubs (1, queueIdHash (recipientId q)) -- service count and IDs hash + incServiceQueueSubs = modifyTVar' (clientServiceSubs clnt) $ addServiceSubs (1, queueIdHash (recipientId q)) -- service count and IDS hash Nothing -> case queueServiceId of - Just _ -> runExceptT $ do - ExceptT $ setQueueService (queueStore ms) q party Nothing - liftIO $ incSrvStat srvAssocRemoved + Just _ -> do + -- unassociation already done in prepareBatchSubs + incSrvStat srvAssocRemoved -- getSubscription may be Just for receiving service, where clientSubs also hold active deliveries for service subscriptions. -- For notification service it can only be Just if storage and session states diverge. r <- atomically $ getSubscription >>= newSub @@ -1795,7 +1807,7 @@ client Nothing -> do r@(hasSub, _) <- atomically $ getSubscription >>= newSub unless hasSub $ atomically writeSub - pure $ Right r + pure r where getSubscription = TM.lookup entId $ clientSubs clnt newSub = \case @@ -2094,7 +2106,7 @@ client -- rejectOrVerify filters allowed commands, no need to repeat it here. -- INTERNAL is used because processCommand never returns Nothing for sender commands (could be extracted for better types). -- `fst` removes empty message that is only returned for `SUB` command - Right t''@(_, (corrId', entId', _)) -> maybe (corrId', entId', ERR INTERNAL) fst <$> lift (processCommand Nothing fwdVersion (Right M.empty) t'') + Right t''@(_, (corrId', entId', _)) -> maybe (corrId', entId', ERR INTERNAL) fst <$> lift (processCommand Nothing fwdVersion (Right (M.empty, M.empty, M.empty)) t'') -- encode response r' <- case batchTransmissions clntTHParams [Right (Nothing, encodeTransmission clntTHParams r)] of [] -> throwE INTERNAL -- at least 1 item is guaranteed from NonEmpty/Right diff --git a/src/Simplex/Messaging/Server/MsgStore/Journal.hs b/src/Simplex/Messaging/Server/MsgStore/Journal.hs index c65660c93b..185c113b78 100644 --- a/src/Simplex/Messaging/Server/MsgStore/Journal.hs +++ b/src/Simplex/Messaging/Server/MsgStore/Journal.hs @@ -353,6 +353,8 @@ instance QueueStoreClass (JournalQueue s) (QStore s) where {-# INLINE getCreateService #-} setQueueService = withQS setQueueService {-# INLINE setQueueService #-} + setQueueServices = withQS setQueueServices + {-# INLINE setQueueServices #-} getQueueNtfServices = withQS (getQueueNtfServices @(JournalQueue s)) {-# INLINE getQueueNtfServices #-} getServiceQueueCountHash = withQS (getServiceQueueCountHash @(JournalQueue s)) diff --git a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs index a8c8c040aa..ce1cf25db4 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Postgres.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Postgres.hs @@ -91,7 +91,7 @@ import Simplex.Messaging.SystemTime import Simplex.Messaging.TMap (TMap) import qualified Simplex.Messaging.TMap as TM import Simplex.Messaging.Transport (SMPServiceRole (..)) -import Simplex.Messaging.Util (eitherToMaybe, firstRow, ifM, maybeFirstRow, maybeFirstRow', tshow, (<$$>)) +import Simplex.Messaging.Util (eitherToMaybe, firstRow, ifM, maybeFirstRow, maybeFirstRow', tshow, (<$$>), ($>>=)) import System.Exit (exitFailure) import System.IO (IOMode (..), hFlush, stdout) import UnliftIO.STM @@ -504,6 +504,32 @@ instance StoreQueueClass q => QueueStoreClass q (PostgresQueueStore q) where atomically $ writeTVar (queueRec sq) $ Just q' withLog "setQueueService" st $ \sl -> logQueueService sl rId party serviceId + setQueueServices :: (PartyI p, ServiceParty p) => PostgresQueueStore q -> SParty p -> Maybe ServiceId -> [q] -> IO (Either ErrorType (M.Map RecipientId (Either ErrorType ()))) + setQueueServices _ _ _ [] = pure $ Right M.empty + setQueueServices st party serviceId qs = E.uninterruptibleMask_ $ runExceptT $ do + updated <- S.fromList <$> withDB' "setQueueServices" st (\db -> + map fromOnly <$> DB.query db updateQuery (serviceId, In (map recipientId qs))) + results <- liftIO $ forM qs $ \sq -> do + let rId = recipientId sq + (rId,) <$> if S.member rId updated + then readQueueRecIO (queueRec sq) $>>= \q -> do + atomically $ writeTVar (queueRec sq) $ Just $ updateRec q + withLog "setQueueServices" st $ \sl -> logQueueService sl rId party serviceId + pure $ Right () + else pure $ Left AUTH + pure $ M.fromList results + where + updateQuery = case party of + SRecipientService -> + "UPDATE msg_queues SET rcv_service_id = ? WHERE recipient_id IN ? AND deleted_at IS NULL RETURNING recipient_id" + SNotifierService -> + "UPDATE msg_queues SET ntf_service_id = ? WHERE recipient_id IN ? AND notifier_id IS NOT NULL AND deleted_at IS NULL RETURNING recipient_id" + updateRec q = case party of + SRecipientService -> q {rcvServiceId = serviceId} + SNotifierService -> case notifier q of + Just nc -> q {notifier = Just nc {ntfServiceId = serviceId}} + Nothing -> q + getQueueNtfServices :: PostgresQueueStore q -> [(NotifierId, a)] -> IO (Either ErrorType ([(Maybe ServiceId, [(NotifierId, a)])], [(NotifierId, a)])) getQueueNtfServices st ntfs = E.uninterruptibleMask_ $ runExceptT $ do snIds <- diff --git a/src/Simplex/Messaging/Server/QueueStore/STM.hs b/src/Simplex/Messaging/Server/QueueStore/STM.hs index 3a236076c4..583bbf3384 100644 --- a/src/Simplex/Messaging/Server/QueueStore/STM.hs +++ b/src/Simplex/Messaging/Server/QueueStore/STM.hs @@ -337,6 +337,10 @@ instance StoreQueueClass q => QueueStoreClass q (STMQueueStore q) where mapM_ (removeServiceQueue st serviceSel qId) prevSrvId mapM_ (addServiceQueue st serviceSel qId) serviceId + setQueueServices st party serviceId qs = Right . M.fromList <$> mapM setQueue qs + where + setQueue sq = (recipientId sq,) <$> setQueueService st sq party serviceId + getQueueNtfServices :: STMQueueStore q -> [(NotifierId, a)] -> IO (Either ErrorType ([(Maybe ServiceId, [(NotifierId, a)])], [(NotifierId, a)])) getQueueNtfServices st ntfs = do ss <- readTVarIO (services st) diff --git a/src/Simplex/Messaging/Server/QueueStore/Types.hs b/src/Simplex/Messaging/Server/QueueStore/Types.hs index 7d1d439bdc..415a5f33cb 100644 --- a/src/Simplex/Messaging/Server/QueueStore/Types.hs +++ b/src/Simplex/Messaging/Server/QueueStore/Types.hs @@ -16,6 +16,7 @@ import Control.Concurrent.STM import Control.Monad import Data.Int (Int64) import Data.List.NonEmpty (NonEmpty) +import Data.Map.Strict (Map) import Data.Text (Text) import Simplex.Messaging.Protocol import Simplex.Messaging.Server.QueueStore @@ -51,6 +52,7 @@ class StoreQueueClass q => QueueStoreClass q s where deleteStoreQueue :: s -> q -> IO (Either ErrorType QueueRec) getCreateService :: s -> ServiceRec -> IO (Either ErrorType ServiceId) setQueueService :: (PartyI p, ServiceParty p) => s -> q -> SParty p -> Maybe ServiceId -> IO (Either ErrorType ()) + setQueueServices :: (PartyI p, ServiceParty p) => s -> SParty p -> Maybe ServiceId -> [q] -> IO (Either ErrorType (Map RecipientId (Either ErrorType ()))) getQueueNtfServices :: s -> [(NotifierId, a)] -> IO (Either ErrorType ([(Maybe ServiceId, [(NotifierId, a)])], [(NotifierId, a)])) getServiceQueueCountHash :: (PartyI p, ServiceParty p) => s -> SParty p -> ServiceId -> IO (Either ErrorType (Int64, IdsHash)) From 07cdc42d460a33d041f1c3dfdb7818cdb7de426e Mon Sep 17 00:00:00 2001 From: Evgeny Poberezkin Date: Fri, 8 May 2026 09:48:05 +0100 Subject: [PATCH 90/91] = --- docs/ROUTERS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ROUTERS.md b/docs/ROUTERS.md index d938dc40f7..39735dd86f 100644 --- a/docs/ROUTERS.md +++ b/docs/ROUTERS.md @@ -12,7 +12,7 @@ The SMP router provides messaging queues - unidirectional, ordered sequences of `smp-server.ini` is created during initialization and controls all runtime behavior. -**Message persistence**: when store log is enabled (`enable: on`), the server saves undelivered messages on exit and restores them on start. This only works with SIGINT (keyboard interrupt); SIGTERM does not trigger message saving. The `restore_messages` setting can be used to override this behavior independently of the store log setting. +**Message persistence**: when store log is enabled (`enable = on`), the server saves undelivered messages on exit and restores them on start. This only works with SIGINT (keyboard interrupt); SIGTERM does not trigger message saving. The `restore_messages` setting can be used to override this behavior independently of the store log setting. **Tor onion addresses**: the server can have both a public hostname and an onion hostname, allowing two users to connect when only one is using Tor. Configure as: `smp://@,`. See [`scripts/tor/`](../scripts/tor/) for setup instructions. From 118a8e89bb654d6546f2b0ab6e1cf6b91fa47763 Mon Sep 17 00:00:00 2001 From: sh <37271604+shumvgolove@users.noreply.github.com> Date: Wed, 20 May 2026 12:56:55 +0000 Subject: [PATCH 91/91] agent: use primary key index in setRcvServiceAssocs (#1783) * agent: use primary key index in setRcvServiceAssocs Previous WHERE rcv_id = ? did not match the (host, port, rcv_id) primary key prefix and fell back to a table scan via idx_rcv_queues_client_notice_id. With ~390k rows per queue, each update in a 1350-row batch scanned the whole table, yielding ~290s per batch and a multi-hour rcv-services migration. * agent: pass SMPServer explicitly to setRcvServiceAssocs Avoid extracting host/port from the first queue inside setRcvServiceAssocs. The caller already has SMPServer in scope (from tSess) and the call chain is short, so threading it through is simpler than inspecting the list. Removes the empty-list guard from setRcvServiceAssocs (it remains in processRcvServiceAssocs). --- src/Simplex/Messaging/Agent.hs | 2 +- src/Simplex/Messaging/Agent/Client.hs | 10 +++++----- src/Simplex/Messaging/Agent/Store/AgentStore.hs | 14 ++++++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/Simplex/Messaging/Agent.hs b/src/Simplex/Messaging/Agent.hs index 39f6fd15f9..bd77b892a1 100644 --- a/src/Simplex/Messaging/Agent.hs +++ b/src/Simplex/Messaging/Agent.hs @@ -3106,7 +3106,7 @@ processSMPTransmissions c@AgentClient {subQ} (tSess@(userId, srv, _), THandlePar unless (null connIds) $ do notify' "" $ UP srv connIds atomically $ incSMPServerStat' c userId srv connSubscribed $ length connIds - readTVarIO serviceRQs >>= processRcvServiceAssocs c + readTVarIO serviceRQs >>= processRcvServiceAssocs c srv where withRcvConn :: SMP.RecipientId -> (forall c. RcvQueue -> Connection c -> AM ()) -> AM' () withRcvConn rId a = do diff --git a/src/Simplex/Messaging/Agent/Client.hs b/src/Simplex/Messaging/Agent/Client.hs index 1039c5d757..d33794006b 100644 --- a/src/Simplex/Messaging/Agent/Client.hs +++ b/src/Simplex/Messaging/Agent/Client.hs @@ -1692,7 +1692,7 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c unless (null notices) $ takeTMVar $ clientNoticesLock c pure r unless (null serviceQs) $ void $ - processRcvServiceAssocs c serviceQs `runReaderT` agentEnv c + processRcvServiceAssocs c srv serviceQs `runReaderT` agentEnv c unless (null notices) $ void $ (processClientNotices c tSess notices `runReaderT` agentEnv c) `E.finally` atomically (putTMVar (clientNoticesLock c) ()) @@ -1714,10 +1714,10 @@ subscribeSessQueues_ c withEvents qs = sendClientBatch_ "SUB" False subscribe_ c tSess = transportSession' smp sessId = sessionId $ thParams smp -processRcvServiceAssocs :: SMPQueue q => AgentClient -> [q] -> AM' () -processRcvServiceAssocs _ [] = pure () -processRcvServiceAssocs c serviceQs = - withStore' c (`setRcvServiceAssocs` serviceQs) `catchAllErrors'` \e -> do +processRcvServiceAssocs :: SMPQueue q => AgentClient -> SMPServer -> [q] -> AM' () +processRcvServiceAssocs _ _ [] = pure () +processRcvServiceAssocs c srv serviceQs = + withStore' c (\db -> setRcvServiceAssocs db srv serviceQs) `catchAllErrors'` \e -> do logError $ "processRcvServiceAssocs error: " <> tshow e notifySub' c "" $ ERR e diff --git a/src/Simplex/Messaging/Agent/Store/AgentStore.hs b/src/Simplex/Messaging/Agent/Store/AgentStore.hs index 6a369ee2c6..e3ea1671b4 100644 --- a/src/Simplex/Messaging/Agent/Store/AgentStore.hs +++ b/src/Simplex/Messaging/Agent/Store/AgentStore.hs @@ -2399,12 +2399,18 @@ unassocUserServerRcvQueueSubs' db userId srv@(SMPServer h p kh) = do unsetQueuesToSubscribe :: DB.Connection -> IO () unsetQueuesToSubscribe db = DB.execute_ db "UPDATE rcv_queues SET to_subscribe = 0 WHERE to_subscribe = 1" -setRcvServiceAssocs :: SMPQueue q => DB.Connection -> [q] -> IO () -setRcvServiceAssocs db rqs = do +setRcvServiceAssocs :: SMPQueue q => DB.Connection -> SMPServer -> [q] -> IO () +setRcvServiceAssocs db ProtocolServer {host, port} rqs = #if defined(dbPostgres) - DB.execute db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id IN ?" $ Only $ In (map queueId rqs) + DB.execute + db + "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE host = ? AND port = ? AND rcv_id IN ?" + (host, port, In (map queueId rqs)) #else - DB.executeMany db "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE rcv_id = ?" $ map (Only . queueId) rqs + DB.executeMany + db + "UPDATE rcv_queues SET rcv_service_assoc = 1 WHERE host = ? AND port = ? AND rcv_id = ?" + (map (\q -> (host, port, queueId q)) rqs) #endif removeRcvServiceAssocs :: DB.Connection -> UserId -> SMPServer -> IO ()