From e2846ccfdf925c6ac514ca8f3cede3bcf091274c Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 23 Feb 2026 10:10:02 +0000 Subject: [PATCH 01/50] draft refactor to generic reqres --- crates/net/src/events.rs | 62 ++++++++++-------------- crates/net/src/net_interface.rs | 64 +++++++++++------------- crates/net/src/net_sync_manager.rs | 78 +++++++++++++++++++----------- 3 files changed, 104 insertions(+), 100 deletions(-) diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index c172f1f595..d9293cb184 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -8,8 +8,8 @@ use crate::ContentHash; use actix::Message; use anyhow::{bail, Context, Result}; use e3_events::{ - AggregateId, CorrelationId, DocumentMeta, EnclaveEvent, EventContextAccessors, EventSource, - Sequenced, Unsequenced, + CorrelationId, DocumentMeta, EnclaveEvent, EventContextAccessors, EventSource, Sequenced, + Unsequenced, }; use e3_utils::{ArcBytes, OnceTake}; use libp2p::{ @@ -28,6 +28,9 @@ use std::{ use tokio::sync::{broadcast, mpsc}; use tracing::{error, trace, warn}; +pub type RequestPayload = Vec; +pub type ResponsePayload = Vec; + /// Incoming/Outgoing GossipData. We disambiguate on concerns relative to the net package. #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] pub enum GossipData { @@ -67,34 +70,23 @@ impl TryFrom for EnclaveEvent { } } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct SyncRequestValue { - pub since: HashMap, -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct SyncResponseValue { - pub events: Vec, - pub ts: u128, -} - #[derive(Message, Clone, Debug)] #[rtype("()")] -pub struct SyncRequestReceived { +pub struct IncomingRequest { pub request_id: InboundRequestId, - pub value: SyncRequestValue, - pub channel: OnceTake>, + pub payload: RequestPayload, + pub channel: OnceTake>, } #[derive(Message, Clone, Debug)] #[rtype("()")] -pub struct OutgoingSyncRequestSucceeded { - pub value: SyncResponseValue, +pub struct OutgoingRequestSucceeded { + pub payload: ResponsePayload, pub correlation_id: CorrelationId, } #[derive(Debug, Clone)] -pub struct OutgoingSyncRequestFailed { +pub struct OutgoingRequestFailed { pub correlation_id: CorrelationId, pub error: String, } @@ -126,16 +118,15 @@ pub enum NetCommand { DhtRemoveRecords { keys: Vec }, /// Shutdown signal Shutdown, - /// Called from the syning node to request libp2p events from a random peer node starting - /// from the given timestamp. - OutgoingSyncRequest { + /// Send a request to a peer and await response + OutgoingRequest { correlation_id: CorrelationId, - value: SyncRequestValue, + payload: RequestPayload, }, - /// Send libp2p events back to a peer that requested a sync. - SyncResponse { - value: SyncResponseValue, - channel: OnceTake>, + /// Send response back to a peer that made a request + Response { + payload: ResponsePayload, + channel: OnceTake>, }, } @@ -146,7 +137,7 @@ impl NetCommand { N::DhtPutRecord { correlation_id, .. } => Some(*correlation_id), N::DhtGetRecord { correlation_id, .. } => Some(*correlation_id), N::GossipPublish { correlation_id, .. } => Some(*correlation_id), - N::OutgoingSyncRequest { correlation_id, .. } => Some(*correlation_id), + N::OutgoingRequest { correlation_id, .. } => Some(*correlation_id), _ => None, } } @@ -207,12 +198,11 @@ pub enum NetEvent { count: usize, topic: TopicHash, }, - /// A peer node is requesting gossipsub events since the given timestamp. - /// Use the provided channel to send a `SyncResponse - SyncRequestReceived(SyncRequestReceived), - /// Received gossipsub events from a peer in response to a `SyncRequest`. - OutgoingSyncRequestSucceeded(OutgoingSyncRequestSucceeded), - OutgoingSyncRequestFailed(OutgoingSyncRequestFailed), + /// A peer made a request to this node + IncomingRequest(IncomingRequest), + /// Received response from a peer in response to an outgoing request + OutgoingRequestSucceeded(OutgoingRequestSucceeded), + OutgoingRequestFailed(OutgoingRequestFailed), AllPeersDialed, } @@ -232,8 +222,8 @@ impl NetEvent { N::DhtGetRecordSucceeded { correlation_id, .. } => Some(*correlation_id), N::DhtPutRecordError { correlation_id, .. } => Some(*correlation_id), N::DhtPutRecordSucceeded { correlation_id, .. } => Some(*correlation_id), - N::OutgoingSyncRequestSucceeded(msg) => Some(msg.correlation_id), - N::OutgoingSyncRequestFailed(msg) => Some(msg.correlation_id), + N::OutgoingRequestSucceeded(msg) => Some(msg.correlation_id), + N::OutgoingRequestFailed(msg) => Some(msg.correlation_id), _ => None, } } diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 156b8a867d..d784b275c0 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -21,8 +21,8 @@ use libp2p::{ Record, RecordKey, }, request_response::{ - self, cbor::Behaviour as CborRequestResponse, Event as RequestResponseEvent, - Message as RequestResponseMessage, ProtocolSupport, ResponseChannel, + self, cbor, Event as RequestResponseEvent, Message as RequestResponseMessage, + ProtocolSupport, ResponseChannel, }, swarm::{dial_opts::DialOpts, DialError, NetworkBehaviour, SwarmEvent}, PeerId, StreamProtocol, Swarm, @@ -46,9 +46,8 @@ const MAX_CONSECUTIVE_DIAL_FAILURES: u32 = 3; use crate::{ dialer::dial_peers, events::{ - estimate_hashmap_size, GossipData, NetCommand, NetEvent, OutgoingSyncRequestFailed, - OutgoingSyncRequestSucceeded, PutOrStoreError, SyncRequestReceived, SyncRequestValue, - SyncResponseValue, + GossipData, IncomingRequest, NetCommand, NetEvent, OutgoingRequestFailed, + OutgoingRequestSucceeded, PutOrStoreError, RequestPayload, ResponsePayload, }, ContentHash, }; @@ -59,7 +58,7 @@ pub struct NodeBehaviour { kademlia: KademliaBehaviour, connection_limits: connection_limits::Behaviour, identify: IdentifyBehaviour, - sync: CborRequestResponse, + sync: cbor::Behaviour, } /// Manage the peer to peer connection. This struct wraps a libp2p Swarm and enables communication @@ -210,7 +209,7 @@ fn create_behaviour( let request_response_config = request_response::Config::default().with_request_timeout(Duration::from_secs(30)); - let sync = CborRequestResponse::::new( + let sync = cbor::Behaviour::::new( [( StreamProtocol::new("/enclave/sync/0.0.1"), ProtocolSupport::Full, @@ -430,10 +429,10 @@ async fn process_swarm_event( debug!("Incoming sync request received (id={})", request_id); // received a request for events - event_tx.send(NetEvent::SyncRequestReceived(SyncRequestReceived { + event_tx.send(NetEvent::IncomingRequest(IncomingRequest { request_id, channel: OnceTake::new(channel), - value: request, + payload: request, }))?; } @@ -449,9 +448,9 @@ async fn process_swarm_event( debug!("Outgoing sync response received (id={request_id})"); let correlation_id = correlator.expire(request_id)?; debug!("Correlated sync response: {correlation_id}"); - event_tx.send(NetEvent::OutgoingSyncRequestSucceeded( - OutgoingSyncRequestSucceeded { - value: response, + event_tx.send(NetEvent::OutgoingRequestSucceeded( + OutgoingRequestSucceeded { + payload: response, correlation_id, }, ))?; @@ -469,12 +468,10 @@ async fn process_swarm_event( peer, request_id, error ); let correlation_id = correlator.expire(request_id)?; - event_tx.send(NetEvent::OutgoingSyncRequestFailed( - OutgoingSyncRequestFailed { - correlation_id, - error: format!("Outbound sync request failed: {:?}", error), - }, - ))?; + event_tx.send(NetEvent::OutgoingRequestFailed(OutgoingRequestFailed { + correlation_id, + error: format!("Outbound sync request failed: {:?}", error), + }))?; } SwarmEvent::Behaviour(NodeBehaviourEvent::Sync(RequestResponseEvent::InboundFailure { @@ -550,15 +547,15 @@ async fn process_swarm_command( handle_remove_records(swarm, keys); Ok(()) } - NetCommand::OutgoingSyncRequest { + NetCommand::OutgoingRequest { correlation_id, - value, + payload, } => { - handle_outgoing_sync_request(swarm, correlator, correlation_id, value)?; + handle_outgoing_request(swarm, correlator, correlation_id, payload)?; Ok(()) } - NetCommand::SyncResponse { value, channel } => { - handle_sync_response(swarm, channel, value)?; + NetCommand::Response { payload, channel } => { + handle_response(swarm, channel, payload)?; Ok(()) } NetCommand::Shutdown => { @@ -753,11 +750,11 @@ fn handle_shutdown(swarm: &mut Swarm) -> Result<()> { Ok(()) } -fn handle_outgoing_sync_request( +fn handle_outgoing_request( swarm: &mut Swarm, correlator: &mut Correlator, correlation_id: CorrelationId, - value: SyncRequestValue, + payload: RequestPayload, ) -> Result<()> { debug!("Outgoing sync request (cid={})", correlation_id); // TODO: @@ -777,13 +774,10 @@ fn handle_outgoing_sync_request( bail!("No peer found on swarm!") }; - debug!( - "Sync request payload size: {:?}", - estimate_hashmap_size(&value.since) - ); + debug!("Sync request payload size: {:?}", payload.len()); // Request events - let query_id = swarm.behaviour_mut().sync.send_request(&peer, value); + let query_id = swarm.behaviour_mut().sync.send_request(&peer, payload); debug!( "Sync request sent: query_id={}, correlation_id={}", query_id, correlation_id @@ -792,15 +786,15 @@ fn handle_outgoing_sync_request( Ok(()) } -fn handle_sync_response( +fn handle_response( swarm: &mut Swarm, - channel: OnceTake>, - value: SyncResponseValue, + channel: OnceTake>, + payload: ResponsePayload, ) -> Result<()> { debug!("Sending sync response"); let channel = channel.try_take()?; - if let Err(value) = swarm.behaviour_mut().sync.send_response(channel, value) { - error!("Failed to send sync response: {:?}", value); + if let Err(payload) = swarm.behaviour_mut().sync.send_response(channel, payload) { + error!("Failed to send sync response: {:?}", payload); } Ok(()) } diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index eb2291bdc6..3275be3a28 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -5,7 +5,7 @@ // or FITNESS FOR A PARTICULAR PURPOSE. use actix::{Actor, Addr, AsyncContext, Handler, Message, Recipient, ResponseFuture}; -use anyhow::{anyhow, bail, Result}; +use anyhow::{anyhow, bail, Context, Result}; use e3_events::{ prelude::*, trap, trap_fut, AggregateId, BusHandle, CorrelationId, EType, EnclaveEvent, EnclaveEventData, EventSource, EventStoreQueryBy, EventStoreQueryResponse, EventType, @@ -14,15 +14,27 @@ use e3_events::{ use e3_utils::{retry_with_backoff, to_retry, OnceTake, MAILBOX_LIMIT}; use futures::TryFutureExt; use libp2p::request_response::ResponseChannel; +use serde::{Deserialize, Serialize}; use std::{collections::HashMap, sync::Arc, time::Duration}; use tokio::sync::{broadcast, mpsc}; use tracing::{debug, info, warn}; use crate::events::{ - await_event, call_and_await_response, NetCommand, NetEvent, OutgoingSyncRequestSucceeded, - SyncRequestReceived, SyncRequestValue, SyncResponseValue, + await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, + OutgoingRequestSucceeded, ResponsePayload, }; +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SyncRequestValue { + pub since: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SyncResponseValue { + pub events: Vec, + pub ts: u128, +} + pub struct NetSyncManager { /// Enclave EventBus bus: BusHandle, @@ -31,7 +43,7 @@ pub struct NetSyncManager { /// NetEvents receiver to receive events rx: Arc>, eventstore: Recipient>, - requests: HashMap>>, + requests: HashMap>>, peers_ready: bool, } @@ -72,7 +84,7 @@ impl NetSyncManager { debug!("Received event {:?}", event); match event { // Someone is asking for our sync - NetEvent::SyncRequestReceived(value) => addr.do_send(value), + NetEvent::IncomingRequest(value) => addr.do_send(value), NetEvent::AllPeersDialed => addr.do_send(AllPeersDialed), _ => (), } @@ -128,26 +140,27 @@ impl Handler> for NetSyncManager { } /// We have received the sync response from the remote peer -impl Handler> for NetSyncManager { +impl Handler> for NetSyncManager { type Result = (); fn handle( &mut self, - msg: TypedEvent, + msg: TypedEvent, _: &mut Self::Context, ) -> Self::Result { trap(EType::Net, &self.bus.with_ec(msg.get_ctx()), || { let (msg, ctx) = msg.into_components(); + let response: SyncResponseValue = bincode::deserialize(&msg.payload) + .context("failed to deserialize sync response")?; self.bus.publish_from_remote_as_response( NetSyncEventsReceived { - events: msg - .value + events: response .events .iter() .cloned() .map(|data| data.try_into()) .collect::>>>()?, }, - msg.value.ts, + response.ts, ctx, None, EventSource::Net, @@ -159,18 +172,20 @@ impl Handler> for NetSyncManager { } /// We have received a sync request from a remote peer -impl Handler for NetSyncManager { +impl Handler for NetSyncManager { type Result = (); - fn handle(&mut self, msg: SyncRequestReceived, ctx: &mut Self::Context) -> Self::Result { + fn handle(&mut self, msg: IncomingRequest, ctx: &mut Self::Context) -> Self::Result { trap(EType::Net, &self.bus, || { info!("GOT SyncRequestReceived"); + let request: SyncRequestValue = + bincode::deserialize(&msg.payload).context("failed to deserialize sync request")?; let id = CorrelationId::new(); info!("STORING channel in requests map..."); self.requests.insert(id, msg.channel); info!("QUERYING eventstore..."); self.eventstore.try_send(EventStoreQueryBy::::new( id, - msg.value.since, + request.since, ctx.address().recipient(), ))?; Ok(()) @@ -188,16 +203,19 @@ impl Handler for NetSyncManager { bail!("request not found with {}", msg.id()); }; debug!("Sending SyncResponse with channel={:?}", channel); - if let Err(e) = self.tx.try_send(NetCommand::SyncResponse { - value: SyncResponseValue { - events: msg - .into_events() - .into_iter() - .filter(|e| e.source() == EventSource::Net) - .map(|ev| ev.try_into()) - .collect::>()?, - ts: self.bus.ts()?, // NOTE: We are storing a local timestamp on this response - }, + let response = SyncResponseValue { + events: msg + .into_events() + .into_iter() + .filter(|e| e.source() == EventSource::Net) + .map(|ev| ev.try_into()) + .collect::>()?, + ts: self.bus.ts()?, // NOTE: We are storing a local timestamp on this response + }; + let payload = + bincode::serialize(&response).context("failed to serialize sync response")?; + if let Err(e) = self.tx.try_send(NetCommand::Response { + payload, channel: channel.to_owned(), }) { warn!("Failed to send SyncResponse (channel full or closed): {e}"); @@ -226,19 +244,21 @@ async fn sync_request( net_cmds: mpsc::Sender, net_events: Arc>, since: HashMap, -) -> Result { +) -> Result { info!("RUNNING sync request..."); let id = CorrelationId::new(); + let payload = bincode::serialize(&SyncRequestValue { since }) + .context("failed to serialize sync request")?; call_and_await_response( net_cmds, net_events, - NetCommand::OutgoingSyncRequest { + NetCommand::OutgoingRequest { correlation_id: id, - value: SyncRequestValue { since }, + payload, }, |e| match e.clone() { - NetEvent::OutgoingSyncRequestSucceeded(value) => Some(Ok(value)), - NetEvent::OutgoingSyncRequestFailed(error) => { + NetEvent::OutgoingRequestSucceeded(value) => Some(Ok(value)), + NetEvent::OutgoingRequestFailed(error) => { Some(Err(anyhow!("Outgoing sync request failed: {:?}", error))) } _ => None, @@ -252,7 +272,7 @@ async fn handle_sync_request_event( net_cmds: mpsc::Sender, net_events: Arc>, event: TypedEvent, - address: impl Into>>, + address: impl Into>>, wait_for_event: bool, ) -> Result<()> { info!("Sync request event received"); From 3062690ed722c6eea1c061a79db3091be91a6e84 Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 04:18:20 +0000 Subject: [PATCH 02/50] tidy up debug comments --- crates/net/src/events.rs | 23 +++-- crates/net/src/net_interface.rs | 140 +++++++++++++++-------------- crates/net/src/net_sync_manager.rs | 5 +- 3 files changed, 88 insertions(+), 80 deletions(-) diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index d9293cb184..fee97d0c83 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -28,8 +28,13 @@ use std::{ use tokio::sync::{broadcast, mpsc}; use tracing::{error, trace, warn}; -pub type RequestPayload = Vec; -pub type ResponsePayload = Vec; +use libp2p::PeerId; + +#[derive(Clone, Debug)] +pub enum PeerTarget { + Random, + Specific(PeerId), +} /// Incoming/Outgoing GossipData. We disambiguate on concerns relative to the net package. #[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)] @@ -74,14 +79,14 @@ impl TryFrom for EnclaveEvent { #[rtype("()")] pub struct IncomingRequest { pub request_id: InboundRequestId, - pub payload: RequestPayload, - pub channel: OnceTake>, + pub payload: Vec, + pub channel: OnceTake>>, } #[derive(Message, Clone, Debug)] #[rtype("()")] pub struct OutgoingRequestSucceeded { - pub payload: ResponsePayload, + pub payload: Vec, pub correlation_id: CorrelationId, } @@ -121,12 +126,12 @@ pub enum NetCommand { /// Send a request to a peer and await response OutgoingRequest { correlation_id: CorrelationId, - payload: RequestPayload, + payload: Vec, + target: PeerTarget, }, - /// Send response back to a peer that made a request Response { - payload: ResponsePayload, - channel: OnceTake>, + payload: Vec, + channel: OnceTake>>, }, } diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index d784b275c0..2bf9535be1 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -5,7 +5,7 @@ // or FITNESS FOR A PARTICULAR PURPOSE. use crate::correlator::Correlator; -use anyhow::{bail, Result}; +use anyhow::{Context, Result}; use e3_events::CorrelationId; use e3_utils::{ArcBytes, OnceTake}; use libp2p::{ @@ -47,7 +47,7 @@ use crate::{ dialer::dial_peers, events::{ GossipData, IncomingRequest, NetCommand, NetEvent, OutgoingRequestFailed, - OutgoingRequestSucceeded, PutOrStoreError, RequestPayload, ResponsePayload, + OutgoingRequestSucceeded, PeerTarget, PutOrStoreError, }, ContentHash, }; @@ -58,7 +58,7 @@ pub struct NodeBehaviour { kademlia: KademliaBehaviour, connection_limits: connection_limits::Behaviour, identify: IdentifyBehaviour, - sync: cbor::Behaviour, + request_response: cbor::Behaviour, Vec>, } /// Manage the peer to peer connection. This struct wraps a libp2p Swarm and enables communication @@ -209,7 +209,7 @@ fn create_behaviour( let request_response_config = request_response::Config::default().with_request_timeout(Duration::from_secs(30)); - let sync = cbor::Behaviour::::new( + let request_response = cbor::Behaviour::, Vec>::new( [( StreamProtocol::new("/enclave/sync/0.0.1"), ProtocolSupport::Full, @@ -227,9 +227,6 @@ fn create_behaviour( max_provided_keys: DHT_MAX_RECORDS, }; let store = MemoryStore::with_config(peer_id, store_config); - // Force Server mode: in a private network all nodes should fully participate - // in DHT routing. Auto-detect (None) would classify containerized/NAT'd nodes - // as Clients, preventing peer discovery and record replication. let mut kademlia = KademliaBehaviour::with_config(peer_id, store, config); kademlia.set_mode(Some(kad::Mode::Server)); @@ -238,7 +235,7 @@ fn create_behaviour( kademlia, connection_limits, identify, - sync, + request_response, }) } @@ -417,16 +414,18 @@ async fn process_swarm_event( event_tx.send(NetEvent::GossipSubscribed { count, topic })?; } - SwarmEvent::Behaviour(NodeBehaviourEvent::Sync(RequestResponseEvent::Message { - message: - RequestResponseMessage::Request { - request, - channel, - request_id, - }, - .. - })) => { - debug!("Incoming sync request received (id={})", request_id); + SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( + RequestResponseEvent::Message { + message: + RequestResponseMessage::Request { + request, + channel, + request_id, + }, + .. + }, + )) => { + debug!("Incoming request received (id={})", request_id); // received a request for events event_tx.send(NetEvent::IncomingRequest(IncomingRequest { @@ -436,18 +435,20 @@ async fn process_swarm_event( }))?; } - SwarmEvent::Behaviour(NodeBehaviourEvent::Sync(RequestResponseEvent::Message { - message: - RequestResponseMessage::Response { - request_id, - response, - .. - }, - .. - })) => { - debug!("Outgoing sync response received (id={request_id})"); + SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( + RequestResponseEvent::Message { + message: + RequestResponseMessage::Response { + request_id, + response, + .. + }, + .. + }, + )) => { + debug!("Response received (id={request_id})"); let correlation_id = correlator.expire(request_id)?; - debug!("Correlated sync response: {correlation_id}"); + debug!("Correlated response: {correlation_id}"); event_tx.send(NetEvent::OutgoingRequestSucceeded( OutgoingRequestSucceeded { payload: response, @@ -456,7 +457,7 @@ async fn process_swarm_event( ))?; } - SwarmEvent::Behaviour(NodeBehaviourEvent::Sync( + SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( RequestResponseEvent::OutboundFailure { peer, request_id, @@ -464,32 +465,33 @@ async fn process_swarm_event( }, )) => { warn!( - "Outbound sync request failed: peer={}, id={}, error={:?}", + "Outbound request failed: peer={}, id={}, error={:?}", peer, request_id, error ); let correlation_id = correlator.expire(request_id)?; event_tx.send(NetEvent::OutgoingRequestFailed(OutgoingRequestFailed { correlation_id, - error: format!("Outbound sync request failed: {:?}", error), + error: format!("Outbound request failed: {:?}", error), }))?; } - SwarmEvent::Behaviour(NodeBehaviourEvent::Sync(RequestResponseEvent::InboundFailure { - peer, - request_id, - error, - })) => { + SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( + RequestResponseEvent::InboundFailure { + peer, + request_id, + error, + }, + )) => { warn!( - "Inbound sync request failed: peer={}, id={}, error={:?}", + "Inbound request failed: peer={}, id={}, error={:?}", peer, request_id, error ); } - SwarmEvent::Behaviour(NodeBehaviourEvent::Sync(RequestResponseEvent::ResponseSent { - peer, - request_id, - })) => { - debug!("Sync response sent to peer={}, id={}", peer, request_id); + SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( + RequestResponseEvent::ResponseSent { peer, request_id }, + )) => { + debug!("Response sent to peer={}, id={}", peer, request_id); } unknown => { @@ -550,8 +552,9 @@ async fn process_swarm_command( NetCommand::OutgoingRequest { correlation_id, payload, + target, } => { - handle_outgoing_request(swarm, correlator, correlation_id, payload)?; + handle_outgoing_request(swarm, correlator, correlation_id, payload, target)?; Ok(()) } NetCommand::Response { payload, channel } => { @@ -754,32 +757,27 @@ fn handle_outgoing_request( swarm: &mut Swarm, correlator: &mut Correlator, correlation_id: CorrelationId, - payload: RequestPayload, + payload: Vec, + target: PeerTarget, ) -> Result<()> { - debug!("Outgoing sync request (cid={})", correlation_id); - // TODO: - // This is a first pass. - // Lots of stuff to work through here: - // How can I know events are correct? - // How can I trust this peer? - // Can I validate events with another peer? - // Should I use an OrderedSet with a hash and request the hash from a second peer? - - // Pick a random peer - let Some(peer) = swarm - .connected_peers() - .choose(&mut rand::thread_rng()) - .copied() - else { - bail!("No peer found on swarm!") + let peer = match target { + PeerTarget::Random => swarm + .connected_peers() + .choose(&mut rand::thread_rng()) + .copied() + .context("No connected peers available")?, + PeerTarget::Specific(peer_id) => peer_id, }; - debug!("Sync request payload size: {:?}", payload.len()); + debug!("Outgoing request payload size: {:?}", payload.len()); // Request events - let query_id = swarm.behaviour_mut().sync.send_request(&peer, payload); + let query_id = swarm + .behaviour_mut() + .request_response + .send_request(&peer, payload); debug!( - "Sync request sent: query_id={}, correlation_id={}", + "Outgoing request sent: query_id={}, correlation_id={}", query_id, correlation_id ); correlator.track(query_id, correlation_id); @@ -788,13 +786,17 @@ fn handle_outgoing_request( fn handle_response( swarm: &mut Swarm, - channel: OnceTake>, - payload: ResponsePayload, + channel: OnceTake>>, + payload: Vec, ) -> Result<()> { - debug!("Sending sync response"); + debug!("Sending response"); let channel = channel.try_take()?; - if let Err(payload) = swarm.behaviour_mut().sync.send_response(channel, payload) { - error!("Failed to send sync response: {:?}", payload); + if let Err(payload) = swarm + .behaviour_mut() + .request_response + .send_response(channel, payload) + { + error!("Failed to send response: {:?}", payload); } Ok(()) } diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 3275be3a28..e7a1d9f1d4 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -21,7 +21,7 @@ use tracing::{debug, info, warn}; use crate::events::{ await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, - OutgoingRequestSucceeded, ResponsePayload, + OutgoingRequestSucceeded, PeerTarget, }; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -43,7 +43,7 @@ pub struct NetSyncManager { /// NetEvents receiver to receive events rx: Arc>, eventstore: Recipient>, - requests: HashMap>>, + requests: HashMap>>>, peers_ready: bool, } @@ -255,6 +255,7 @@ async fn sync_request( NetCommand::OutgoingRequest { correlation_id: id, payload, + target: PeerTarget::Random, }, |e| match e.clone() { NetEvent::OutgoingRequestSucceeded(value) => Some(Ok(value)), From d8bf5ed51463e3ec03fecaefaa27f5341808bc8e Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 04:57:10 +0000 Subject: [PATCH 03/50] update abstraction --- crates/net/src/events.rs | 42 ++++++++++++++++---- crates/net/src/net_interface.rs | 6 +-- crates/net/src/net_sync_manager.rs | 64 ++++++++++++++++++++---------- 3 files changed, 81 insertions(+), 31 deletions(-) diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index fee97d0c83..3da552771d 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -6,7 +6,7 @@ use crate::ContentHash; use actix::Message; -use anyhow::{bail, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use e3_events::{ CorrelationId, DocumentMeta, EnclaveEvent, EventContextAccessors, EventSource, Sequenced, Unsequenced, @@ -83,6 +83,38 @@ pub struct IncomingRequest { pub channel: OnceTake>>, } +#[derive(Debug, Clone)] +pub struct OutgoingRequest { + pub correlation_id: CorrelationId, + pub payload: Vec, + pub target: PeerTarget, +} + +impl OutgoingRequest { + pub fn new_with_correlation( + id: CorrelationId, + target: PeerTarget, + payload: impl TryInto>, + ) -> Result { + Ok(Self { + correlation_id: id, + payload: payload.try_into().map_err(|_| { + anyhow!( + "could not serialize payload for outgoing request with correlation_id={id} and target={target:?}." + ) + })?, + target, + }) + } + pub fn to_random_peer(payload: impl TryInto>) -> Result { + Self::new_with_correlation(CorrelationId::new(), PeerTarget::Random, payload) + } + + pub fn new(target: PeerId, payload: impl TryInto>) -> Result { + Self::new_with_correlation(CorrelationId::new(), PeerTarget::Specific(target), payload) + } +} + #[derive(Message, Clone, Debug)] #[rtype("()")] pub struct OutgoingRequestSucceeded { @@ -124,11 +156,7 @@ pub enum NetCommand { /// Shutdown signal Shutdown, /// Send a request to a peer and await response - OutgoingRequest { - correlation_id: CorrelationId, - payload: Vec, - target: PeerTarget, - }, + OutgoingRequest(OutgoingRequest), Response { payload: Vec, channel: OnceTake>>, @@ -142,7 +170,7 @@ impl NetCommand { N::DhtPutRecord { correlation_id, .. } => Some(*correlation_id), N::DhtGetRecord { correlation_id, .. } => Some(*correlation_id), N::GossipPublish { correlation_id, .. } => Some(*correlation_id), - N::OutgoingRequest { correlation_id, .. } => Some(*correlation_id), + N::OutgoingRequest(OutgoingRequest { correlation_id, .. }) => Some(*correlation_id), _ => None, } } diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 2bf9535be1..ca944ea71c 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -4,7 +4,7 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use crate::correlator::Correlator; +use crate::{correlator::Correlator, events::OutgoingRequest}; use anyhow::{Context, Result}; use e3_events::CorrelationId; use e3_utils::{ArcBytes, OnceTake}; @@ -549,11 +549,11 @@ async fn process_swarm_command( handle_remove_records(swarm, keys); Ok(()) } - NetCommand::OutgoingRequest { + NetCommand::OutgoingRequest(OutgoingRequest { correlation_id, payload, target, - } => { + }) => { handle_outgoing_request(swarm, correlator, correlation_id, payload, target)?; Ok(()) } diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index e7a1d9f1d4..3fba1ed929 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -15,13 +15,13 @@ use e3_utils::{retry_with_backoff, to_retry, OnceTake, MAILBOX_LIMIT}; use futures::TryFutureExt; use libp2p::request_response::ResponseChannel; use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, sync::Arc, time::Duration}; +use std::{collections::HashMap, convert::TryInto, sync::Arc, time::Duration}; use tokio::sync::{broadcast, mpsc}; use tracing::{debug, info, warn}; use crate::events::{ await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, - OutgoingRequestSucceeded, PeerTarget, + OutgoingRequest, }; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -29,12 +29,41 @@ pub struct SyncRequestValue { pub since: HashMap, } +impl TryInto> for SyncRequestValue { + type Error = anyhow::Error; + + fn try_into(self) -> Result, Self::Error> { + bincode::serialize(&self).context("failed to serialize sync request") + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SyncResponseValue { pub events: Vec, pub ts: u128, } +impl TryInto> for SyncResponseValue { + type Error = anyhow::Error; + + fn try_into(self) -> Result, Self::Error> { + bincode::serialize(&self).context("failed to serialize sync response") + } +} + +impl TryFrom> for SyncResponseValue { + type Error = anyhow::Error; + + fn try_from(value: Vec) -> Result { + bincode::deserialize(&value).context("failed to deserialize sync response") + } +} + +#[derive(Debug, Clone)] +pub struct SyncRequestSucceeded { + pub response: SyncResponseValue, +} + pub struct NetSyncManager { /// Enclave EventBus bus: BusHandle, @@ -140,17 +169,16 @@ impl Handler> for NetSyncManager { } /// We have received the sync response from the remote peer -impl Handler> for NetSyncManager { +impl Handler> for NetSyncManager { type Result = (); fn handle( &mut self, - msg: TypedEvent, + msg: TypedEvent, _: &mut Self::Context, ) -> Self::Result { trap(EType::Net, &self.bus.with_ec(msg.get_ctx()), || { let (msg, ctx) = msg.into_components(); - let response: SyncResponseValue = bincode::deserialize(&msg.payload) - .context("failed to deserialize sync response")?; + let response = msg.response; self.bus.publish_from_remote_as_response( NetSyncEventsReceived { events: response @@ -210,10 +238,9 @@ impl Handler for NetSyncManager { .filter(|e| e.source() == EventSource::Net) .map(|ev| ev.try_into()) .collect::>()?, - ts: self.bus.ts()?, // NOTE: We are storing a local timestamp on this response + ts: self.bus.ts()?, }; - let payload = - bincode::serialize(&response).context("failed to serialize sync response")?; + let payload: Vec = response.try_into()?; if let Err(e) = self.tx.try_send(NetCommand::Response { payload, channel: channel.to_owned(), @@ -244,19 +271,12 @@ async fn sync_request( net_cmds: mpsc::Sender, net_events: Arc>, since: HashMap, -) -> Result { +) -> Result { info!("RUNNING sync request..."); - let id = CorrelationId::new(); - let payload = bincode::serialize(&SyncRequestValue { since }) - .context("failed to serialize sync request")?; - call_and_await_response( + let response = call_and_await_response( net_cmds, net_events, - NetCommand::OutgoingRequest { - correlation_id: id, - payload, - target: PeerTarget::Random, - }, + NetCommand::OutgoingRequest(OutgoingRequest::to_random_peer(SyncRequestValue { since })?), |e| match e.clone() { NetEvent::OutgoingRequestSucceeded(value) => Some(Ok(value)), NetEvent::OutgoingRequestFailed(error) => { @@ -266,14 +286,16 @@ async fn sync_request( }, SYNC_REQUEST_TIMEOUT, ) - .await + .await?; + let response: SyncResponseValue = response.payload.try_into()?; + Ok(SyncRequestSucceeded { response }) } async fn handle_sync_request_event( net_cmds: mpsc::Sender, net_events: Arc>, event: TypedEvent, - address: impl Into>>, + address: impl Into>>, wait_for_event: bool, ) -> Result<()> { info!("Sync request event received"); From 063f139ddce4ee6fef2c5d61285974e1dc35fede Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 06:47:03 +0000 Subject: [PATCH 04/50] update direct requester --- crates/net/src/direct_requester.rs | 235 +++++++++++++++++++++++++++++ crates/net/src/events.rs | 2 +- crates/net/src/lib.rs | 2 + crates/net/src/net_event_batch.rs | 26 ++++ 4 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 crates/net/src/direct_requester.rs create mode 100644 crates/net/src/net_event_batch.rs diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs new file mode 100644 index 0000000000..82f074c4d4 --- /dev/null +++ b/crates/net/src/direct_requester.rs @@ -0,0 +1,235 @@ +use std::{fmt, sync::Arc, time::Duration}; + +use anyhow::{anyhow, Result}; +use e3_events::CorrelationId; +use e3_utils::{retry_with_backoff, to_retry}; +use tokio::sync::{broadcast, mpsc}; + +use crate::events::{call_and_await_response, NetCommand, NetEvent, OutgoingRequest, PeerTarget}; + +pub trait DirectRequesterOutput: TryFrom> + Send + Sync + 'static {} + +pub trait DirectRequesterInput: + TryInto> + Clone + Send + Sync + fmt::Debug + 'static +{ +} + +impl DirectRequesterOutput for T where T: TryFrom> + Send + Sync + 'static {} + +impl DirectRequesterInput for T where + T: TryInto> + Clone + Send + Sync + fmt::Debug + 'static +{ +} + +pub struct DirectRequester { + net_cmds: mpsc::Sender, + net_events: Arc>, + request_timeout: Duration, + max_retries: u32, + retry_timeout: Duration, +} + +impl DirectRequester { + /// Creates a new DirectRequester with custom timeouts. + /// + /// # Arguments + /// * `net_cmds` - Channel to send network commands + /// * `net_events` - Channel to receive network events + /// * `request_timeout` - Timeout for each individual request attempt + /// * `max_retries` - Maximum number of retry attempts + /// * `retry_timeout` - Total timeout budget for all retries (used for backoff calculation) + pub fn new( + net_cmds: mpsc::Sender, + net_events: Arc>, + request_timeout: Duration, + max_retries: u32, + retry_timeout: Duration, + ) -> Self { + Self { + net_cmds, + net_events, + request_timeout, + max_retries, + retry_timeout, + } + } + + /// Creates a new DirectRequester with default timeouts (30s request, 4 retries, 5s total retry budget). + pub fn with_defaults( + net_cmds: mpsc::Sender, + net_events: Arc>, + ) -> Self { + Self::new( + net_cmds, + net_events, + Duration::from_secs(30), + 4, + Duration::from_millis(5000), + ) + } + + /// Sends a request to a peer and retries on failure. + /// + /// Uses exponential backoff with the configured `max_retries` and `retry_timeout_ms`. + /// Each attempt times out after `request_timeout`. + /// + /// # Arguments + /// * `request` - The request payload (must implement `DirectRequesterInput`) + /// * `peer` - The target peer to send the request to + /// + /// # Returns + /// The response deserialized as type `T` (must implement `DirectRequesterOutput`) + pub async fn request(&self, request: R, peer: PeerTarget) -> Result + where + T: DirectRequesterOutput, + R: DirectRequesterInput, + { + let payload: Vec = request + .clone() + .try_into() + .map_err(|_| anyhow!("Request serialization failed for request: {:?}", request))?; + + let response = self.request_with_retry(payload, peer).await?; + + let response: T = response + .try_into() + .map_err(|_| anyhow!("Response conversion failed"))?; + + Ok(response) + } + + async fn request_with_retry(&self, payload: Vec, peer: PeerTarget) -> Result> { + let request_timeout = self.request_timeout; + retry_with_backoff( + || { + let net_cmds = self.net_cmds.clone(); + let net_events = self.net_events.clone(); + let payload = payload.clone(); + let request_timeout = request_timeout; + async move { + do_request(net_cmds, net_events, peer, payload, request_timeout) + .await + .map_err(to_retry) + } + }, + self.max_retries, + self.retry_timeout.as_millis() as u64, + ) + .await + } +} + +async fn do_request( + net_cmds: mpsc::Sender, + net_events: Arc>, + target: PeerTarget, + payload: Vec, + timeout: Duration, +) -> Result> { + let correlation_id = CorrelationId::new(); + + let response: Vec = call_and_await_response( + net_cmds, + net_events, + NetCommand::OutgoingRequest(OutgoingRequest { + correlation_id, + payload, + target, + }), + |e| match e { + NetEvent::OutgoingRequestSucceeded(value) => { + if value.correlation_id == correlation_id { + Some(Ok(value.payload.clone())) + } else { + None + } + } + NetEvent::OutgoingRequestFailed(value) => { + if value.correlation_id == correlation_id { + Some(Err(anyhow!("Request failed: {}", value.error))) + } else { + None + } + } + _ => None, + }, + timeout, + ) + .await + .map_err(|e| anyhow!("Request failed: {}", e))?; + + Ok(response) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::events::{OutgoingRequestSucceeded, PeerTarget}; + use tokio::sync::broadcast; + + #[tokio::test] + async fn test_successful_request() { + let (net_cmds_tx, mut net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::with_defaults( + net_cmds_tx.clone(), + net_events.clone(), + ); + + let net_events_tx_clone = net_events_tx.clone(); + let handle = tokio::spawn(async move { + let cmd = net_cmds_rx.recv().await.unwrap(); + if let NetCommand::OutgoingRequest(req) = cmd { + let response = OutgoingRequestSucceeded { + payload: vec![2, 2, 2], + correlation_id: req.correlation_id, + }; + net_events_tx_clone + .send(NetEvent::OutgoingRequestSucceeded(response)) + .unwrap(); + } + }); + + let response: Vec = requester + .request(vec![1, 1, 1], PeerTarget::Random) + .await + .unwrap(); + + handle.await.unwrap(); + + assert_eq!(response, vec![2, 2, 2]); + } + + #[tokio::test] + async fn test_request_with_peer_target() { + let (net_cmds_tx, mut net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::with_defaults(net_cmds_tx, net_events); + + let net_events_tx_clone = net_events_tx.clone(); + let handle = tokio::spawn(async move { + let cmd = net_cmds_rx.recv().await.unwrap(); + if let NetCommand::OutgoingRequest(req) = cmd { + assert!(matches!(req.target, PeerTarget::Random)); + let response = OutgoingRequestSucceeded { + payload: vec![], + correlation_id: req.correlation_id, + }; + net_events_tx_clone + .send(NetEvent::OutgoingRequestSucceeded(response)) + .unwrap(); + } + }); + + let _: Vec = requester + .request(vec![1], PeerTarget::Random) + .await + .unwrap(); + + handle.await.unwrap(); + } +} diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index 3da552771d..85c059eb08 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -30,7 +30,7 @@ use tracing::{error, trace, warn}; use libp2p::PeerId; -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] pub enum PeerTarget { Random, Specific(PeerId), diff --git a/crates/net/src/lib.rs b/crates/net/src/lib.rs index 45f44453c2..3f196d2e63 100644 --- a/crates/net/src/lib.rs +++ b/crates/net/src/lib.rs @@ -7,8 +7,10 @@ mod cid; mod correlator; mod dialer; +pub mod direct_requester; mod document_publisher; pub mod events; +mod net_event_batch; mod net_event_buffer; mod net_event_translator; mod net_interface; diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs new file mode 100644 index 0000000000..a6ce3e9979 --- /dev/null +++ b/crates/net/src/net_event_batch.rs @@ -0,0 +1,26 @@ +use std::sync::Arc; + +use anyhow::Result; +use e3_events::{EnclaveEvent, Unsequenced}; +use tokio::sync::{broadcast, mpsc}; + +use crate::events::{NetCommand, NetEvent}; + +pub enum BatchCursor { + Done, + Next(u128), +} + +pub struct EventBatch { + pub events: Vec>, + pub next: BatchCursor, +} + +pub async fn fetch_net_events_since( + _net_cmds: mpsc::Sender, + _net_events: Arc>, + _since_hlc: u128, + _limit: u16, +) -> Result { + todo!("fetch_net_events_since implementation") +} From ed2e0ed2b164ec404b14175a049bf52a284bb5e4 Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 07:52:38 +0000 Subject: [PATCH 05/50] formatting --- crates/net/src/direct_requester.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index 82f074c4d4..c93b649b2d 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -173,10 +173,7 @@ mod tests { let (net_events_tx, net_events_rx) = broadcast::channel::(16); let net_events = Arc::new(net_events_rx); - let requester = DirectRequester::with_defaults( - net_cmds_tx.clone(), - net_events.clone(), - ); + let requester = DirectRequester::with_defaults(net_cmds_tx.clone(), net_events.clone()); let net_events_tx_clone = net_events_tx.clone(); let handle = tokio::spawn(async move { From 675447b152ab2395dc5d327421fb86b9a2c1a832 Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 07:54:37 +0000 Subject: [PATCH 06/50] headers --- crates/net/src/direct_requester.rs | 6 ++++++ crates/net/src/net_event_batch.rs | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index c93b649b2d..e9df85d5f7 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -1,3 +1,9 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + use std::{fmt, sync::Arc, time::Duration}; use anyhow::{anyhow, Result}; diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index a6ce3e9979..bc457bdd4e 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -1,3 +1,9 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + use std::sync::Arc; use anyhow::Result; From afa256839cabdc7d929673441cc9fad147e023dc Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 11:29:07 +0000 Subject: [PATCH 07/50] make it easy to test with direct requester tester --- crates/net/src/direct_requester.rs | 397 ++++++++++++++++++++++------- crates/net/src/net_event_batch.rs | 168 +++++++++++- 2 files changed, 465 insertions(+), 100 deletions(-) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index e9df85d5f7..b2bc17b5b9 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -4,14 +4,17 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use std::{fmt, sync::Arc, time::Duration}; +use std::{fmt, marker::PhantomData, sync::Arc, time::Duration}; use anyhow::{anyhow, Result}; use e3_events::CorrelationId; use e3_utils::{retry_with_backoff, to_retry}; use tokio::sync::{broadcast, mpsc}; -use crate::events::{call_and_await_response, NetCommand, NetEvent, OutgoingRequest, PeerTarget}; +use crate::events::{ + call_and_await_response, NetCommand, NetEvent, OutgoingRequest, OutgoingRequestFailed, + OutgoingRequestSucceeded, PeerTarget, +}; pub trait DirectRequesterOutput: TryFrom> + Send + Sync + 'static {} @@ -27,65 +30,48 @@ impl DirectRequesterInput for T where { } -pub struct DirectRequester { +pub struct WithoutPeer; +pub struct WithPeer(PeerTarget); + +pub struct DirectRequester { net_cmds: mpsc::Sender, net_events: Arc>, request_timeout: Duration, max_retries: u32, retry_timeout: Duration, + peer: PeerTarget, + _state: PhantomData, } -impl DirectRequester { - /// Creates a new DirectRequester with custom timeouts. - /// - /// # Arguments - /// * `net_cmds` - Channel to send network commands - /// * `net_events` - Channel to receive network events - /// * `request_timeout` - Timeout for each individual request attempt - /// * `max_retries` - Maximum number of retry attempts - /// * `retry_timeout` - Total timeout budget for all retries (used for backoff calculation) - pub fn new( +impl DirectRequester { + pub fn builder( net_cmds: mpsc::Sender, net_events: Arc>, - request_timeout: Duration, - max_retries: u32, - retry_timeout: Duration, - ) -> Self { - Self { - net_cmds, - net_events, - request_timeout, - max_retries, - retry_timeout, + ) -> DirectRequesterBuilder { + DirectRequesterBuilder { + net_cmds: Some(net_cmds), + net_events: Some(net_events), + request_timeout: Some(Duration::from_secs(30)), + max_retries: Some(4), + retry_timeout: Some(Duration::from_millis(5000)), } } - /// Creates a new DirectRequester with default timeouts (30s request, 4 retries, 5s total retry budget). - pub fn with_defaults( - net_cmds: mpsc::Sender, - net_events: Arc>, - ) -> Self { - Self::new( - net_cmds, - net_events, - Duration::from_secs(30), - 4, - Duration::from_millis(5000), - ) + pub fn to(&self, peer: PeerTarget) -> DirectRequester { + DirectRequester { + net_cmds: self.net_cmds.clone(), + net_events: self.net_events.clone(), + request_timeout: self.request_timeout, + max_retries: self.max_retries, + retry_timeout: self.retry_timeout, + peer, + _state: PhantomData, + } } +} - /// Sends a request to a peer and retries on failure. - /// - /// Uses exponential backoff with the configured `max_retries` and `retry_timeout_ms`. - /// Each attempt times out after `request_timeout`. - /// - /// # Arguments - /// * `request` - The request payload (must implement `DirectRequesterInput`) - /// * `peer` - The target peer to send the request to - /// - /// # Returns - /// The response deserialized as type `T` (must implement `DirectRequesterOutput`) - pub async fn request(&self, request: R, peer: PeerTarget) -> Result +impl DirectRequester { + pub async fn request(&self, request: R) -> Result where T: DirectRequesterOutput, R: DirectRequesterInput, @@ -95,7 +81,7 @@ impl DirectRequester { .try_into() .map_err(|_| anyhow!("Request serialization failed for request: {:?}", request))?; - let response = self.request_with_retry(payload, peer).await?; + let response = self.request_with_retry(payload).await?; let response: T = response .try_into() @@ -104,8 +90,9 @@ impl DirectRequester { Ok(response) } - async fn request_with_retry(&self, payload: Vec, peer: PeerTarget) -> Result> { + async fn request_with_retry(&self, payload: Vec) -> Result> { let request_timeout = self.request_timeout; + let peer = self.peer; retry_with_backoff( || { let net_cmds = self.net_cmds.clone(); @@ -125,6 +112,43 @@ impl DirectRequester { } } +pub struct DirectRequesterBuilder { + net_cmds: Option>, + net_events: Option>>, + request_timeout: Option, + max_retries: Option, + retry_timeout: Option, +} + +impl DirectRequesterBuilder { + pub fn request_timeout(mut self, request_timeout: Duration) -> Self { + self.request_timeout = Some(request_timeout); + self + } + + pub fn max_retries(mut self, max_retries: u32) -> Self { + self.max_retries = Some(max_retries); + self + } + + pub fn retry_timeout(mut self, retry_timeout: Duration) -> Self { + self.retry_timeout = Some(retry_timeout); + self + } + + pub fn build(self) -> DirectRequester { + DirectRequester { + net_cmds: self.net_cmds.expect("net_cmds is required"), + net_events: self.net_events.expect("net_events is required"), + request_timeout: self.request_timeout.unwrap_or(Duration::from_secs(30)), + max_retries: self.max_retries.unwrap_or(4), + retry_timeout: self.retry_timeout.unwrap_or(Duration::from_millis(5000)), + peer: PeerTarget::Random, + _state: PhantomData, + } + } +} + async fn do_request( net_cmds: mpsc::Sender, net_events: Arc>, @@ -167,72 +191,273 @@ async fn do_request( Ok(response) } +struct Expectation { + expected_request: Vec, + response: Result, String>, +} + +pub(crate) struct DirectRequesterTester { + net_cmds_rx: mpsc::Receiver, + net_events_tx: broadcast::Sender, + respond_with: Option>, + responses: Vec>, + expectations: Vec, + error_on: Option, + num_requests: Option, +} + +pub(crate) struct ExpectationBuilder { + tester: DirectRequesterTester, + expected_request: Vec, +} + +impl ExpectationBuilder { + pub fn respond_with>>(mut self, payload: T) -> DirectRequesterTester + where + >>::Error: std::fmt::Debug, + { + self.tester.expectations.push(Expectation { + expected_request: self.expected_request, + response: Ok(payload.try_into().unwrap()), + }); + self.tester + } + + pub fn error_with(mut self, error: impl Into) -> DirectRequesterTester { + self.tester.expectations.push(Expectation { + expected_request: self.expected_request, + response: Err(error.into()), + }); + self.tester + } +} + +impl DirectRequesterTester { + pub fn new( + net_cmds_rx: mpsc::Receiver, + net_events_tx: broadcast::Sender, + ) -> Self { + Self { + net_cmds_rx, + net_events_tx, + respond_with: None, + responses: Vec::new(), + expectations: Vec::new(), + error_on: None, + num_requests: None, + } + } + + pub fn expect_request>>(self, payload: T) -> ExpectationBuilder + where + >>::Error: std::fmt::Debug, + { + ExpectationBuilder { + tester: self, + expected_request: payload.try_into().unwrap(), + } + } + + pub fn respond_with>>(mut self, payload: T) -> Self + where + >>::Error: std::fmt::Debug, + { + self.respond_with = Some(payload.try_into().unwrap()); + self + } + + pub fn respond_with_each>>( + mut self, + payloads: impl IntoIterator, + ) -> Self + where + >>::Error: std::fmt::Debug, + { + self.responses = payloads + .into_iter() + .map(|p| p.try_into().unwrap()) + .collect(); + self + } + + pub fn error_with(mut self, error: impl Into) -> Self { + self.error_on = Some(error.into()); + self + } + + pub fn num_requests(mut self, n: usize) -> Self { + self.num_requests = Some(n); + self + } + + pub fn spawn(mut self) -> tokio::task::JoinHandle<()> { + let num_requests = self.num_requests.unwrap_or_else(|| { + if !self.expectations.is_empty() { + self.expectations.len() + } else { + usize::MAX + } + }); + // Reverse expectations so we can pop from the back in order. + self.expectations.reverse(); + + tokio::spawn(async move { + let mut remaining = num_requests; + while remaining > 0 { + if let Some(cmd) = self.net_cmds_rx.recv().await { + if let NetCommand::OutgoingRequest(req) = cmd { + let response = if let Some(expectation) = self.expectations.pop() { + assert_eq!( + req.payload, expectation.expected_request, + "DirectRequesterTester: expected request {:?} but got {:?}", + expectation.expected_request, req.payload, + ); + match expectation.response { + Ok(payload) => { + NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { + payload, + correlation_id: req.correlation_id, + }) + } + Err(error) => { + NetEvent::OutgoingRequestFailed(OutgoingRequestFailed { + error, + correlation_id: req.correlation_id, + }) + } + } + } else if let Some(payload) = self.respond_with.clone() { + NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { + payload, + correlation_id: req.correlation_id, + }) + } else if let Some(payload) = self.responses.pop() { + NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { + payload, + correlation_id: req.correlation_id, + }) + } else if let Some(error) = self.error_on.clone() { + NetEvent::OutgoingRequestFailed(OutgoingRequestFailed { + error, + correlation_id: req.correlation_id, + }) + } else { + panic!("DirectRequesterTester: no response configured"); + }; + let _ = self.net_events_tx.send(response); + } + remaining -= 1; + } else { + break; + } + } + }) + } +} + #[cfg(test)] mod tests { use super::*; - use crate::events::{OutgoingRequestSucceeded, PeerTarget}; + use crate::events::PeerTarget; use tokio::sync::broadcast; #[tokio::test] async fn test_successful_request() { - let (net_cmds_tx, mut net_cmds_rx) = mpsc::channel::(16); + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); let (net_events_tx, net_events_rx) = broadcast::channel::(16); let net_events = Arc::new(net_events_rx); - let requester = DirectRequester::with_defaults(net_cmds_tx.clone(), net_events.clone()); - - let net_events_tx_clone = net_events_tx.clone(); - let handle = tokio::spawn(async move { - let cmd = net_cmds_rx.recv().await.unwrap(); - if let NetCommand::OutgoingRequest(req) = cmd { - let response = OutgoingRequestSucceeded { - payload: vec![2, 2, 2], - correlation_id: req.correlation_id, - }; - net_events_tx_clone - .send(NetEvent::OutgoingRequestSucceeded(response)) - .unwrap(); - } - }); + let requester = DirectRequester::builder(net_cmds_tx, net_events).build(); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .respond_with(b"world".to_vec()) + .num_requests(1) + .spawn(); let response: Vec = requester - .request(vec![1, 1, 1], PeerTarget::Random) + .to(PeerTarget::Random) + .request(b"hello".to_vec()) .await .unwrap(); handle.await.unwrap(); - assert_eq!(response, vec![2, 2, 2]); + assert_eq!(response, b"world"); } #[tokio::test] async fn test_request_with_peer_target() { - let (net_cmds_tx, mut net_cmds_rx) = mpsc::channel::(16); + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); let (net_events_tx, net_events_rx) = broadcast::channel::(16); let net_events = Arc::new(net_events_rx); - let requester = DirectRequester::with_defaults(net_cmds_tx, net_events); - - let net_events_tx_clone = net_events_tx.clone(); - let handle = tokio::spawn(async move { - let cmd = net_cmds_rx.recv().await.unwrap(); - if let NetCommand::OutgoingRequest(req) = cmd { - assert!(matches!(req.target, PeerTarget::Random)); - let response = OutgoingRequestSucceeded { - payload: vec![], - correlation_id: req.correlation_id, - }; - net_events_tx_clone - .send(NetEvent::OutgoingRequestSucceeded(response)) - .unwrap(); - } - }); + let requester = DirectRequester::builder(net_cmds_tx, net_events).build(); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .respond_with(b"pong".to_vec()) + .num_requests(1) + .spawn(); let _: Vec = requester - .request(vec![1], PeerTarget::Random) + .to(PeerTarget::Random) + .request(b"ping".to_vec()) .await .unwrap(); handle.await.unwrap(); } + + #[tokio::test] + async fn test_peer_requester_reuse_across_requests() { + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::builder(net_cmds_tx, net_events) + .request_timeout(Duration::from_secs(10)) + .max_retries(3) + .retry_timeout(Duration::from_secs(5)) + .build(); + + let peer_requester = requester.to(PeerTarget::Random); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .respond_with(b"ok".to_vec()) + .num_requests(2) + .spawn(); + + let response1: Vec = peer_requester.request(b"first".to_vec()).await.unwrap(); + let response2: Vec = peer_requester.request(b"second".to_vec()).await.unwrap(); + + handle.await.unwrap(); + + assert_eq!(response1, b"ok"); + assert_eq!(response2, b"ok"); + } + + #[tokio::test] + async fn test_expect_request() { + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::builder(net_cmds_tx, net_events).build(); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .expect_request(b"hello".to_vec()) + .respond_with(b"world".to_vec()) + .expect_request(b"ping".to_vec()) + .respond_with(b"pong".to_vec()) + .spawn(); + + let peer = requester.to(PeerTarget::Random); + + let r1: Vec = peer.request(b"hello".to_vec()).await.unwrap(); + let r2: Vec = peer.request(b"ping".to_vec()).await.unwrap(); + + handle.await.unwrap(); + + assert_eq!(r1, b"world"); + assert_eq!(r2, b"pong"); + } } diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index bc457bdd4e..6182d4d9ae 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -4,29 +4,169 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use std::sync::Arc; +use std::fmt::Debug; -use anyhow::Result; -use e3_events::{EnclaveEvent, Unsequenced}; -use tokio::sync::{broadcast, mpsc}; +use anyhow::{Context, Result}; +use e3_events::AggregateId; -use crate::events::{NetCommand, NetEvent}; +use crate::{ + direct_requester::{DirectRequester, WithPeer, WithoutPeer}, + events::PeerTarget, +}; +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] pub enum BatchCursor { Done, Next(u128), } -pub struct EventBatch { - pub events: Vec>, +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub struct EventBatch { + pub events: Vec, pub next: BatchCursor, + pub aggregate_id: AggregateId, } -pub async fn fetch_net_events_since( - _net_cmds: mpsc::Sender, - _net_events: Arc>, - _since_hlc: u128, - _limit: u16, -) -> Result { - todo!("fetch_net_events_since implementation") +impl TryFrom> for EventBatch +where + E: serde::de::DeserializeOwned, +{ + type Error = anyhow::Error; + + fn try_from(value: Vec) -> Result { + bincode::deserialize(&value).context("failed to deserialize EventBatch") + } +} + +impl TryFrom> for Vec +where + E: serde::Serialize, +{ + type Error = anyhow::Error; + + fn try_from(value: EventBatch) -> Result { + bincode::serialize(&value).context("failed to serialize EventBatch") + } +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub struct FetchEventsSince { + aggregate_id: AggregateId, + since: u128, + limit: u16, +} + +impl FetchEventsSince { + pub fn new(aggregate_id: AggregateId, since: u128, limit: u16) -> Self { + Self { + aggregate_id, + since, + limit, + } + } +} + +impl TryFrom for Vec { + type Error = anyhow::Error; + + fn try_from(value: FetchEventsSince) -> Result { + bincode::serialize(&value).context("failed to serialize FetchEventsSince") + } +} + +impl TryFrom> for FetchEventsSince { + type Error = anyhow::Error; + + fn try_from(value: Vec) -> Result { + bincode::deserialize(&value).context("failed to deserialize FetchEventsSince") + } +} + +pub async fn fetch_events_since( + requester: DirectRequester, + request: FetchEventsSince, +) -> Result> +where + E: TryFrom> + Send + Sync + 'static, + EventBatch: TryFrom>, +{ + requester.request(request).await +} + +pub async fn fetch_all_batched_events( + requester: DirectRequester, + peer: PeerTarget, + aggregate_id: AggregateId, + since: i128, +) -> Result> +where + E: TryFrom> + Send + Sync + 'static, + EventBatch: TryFrom>, +{ + let requester = requester.to(peer); + let mut all_events = Vec::new(); + let mut cursor: u128 = since as u128; + let limit = u16::MAX; + + loop { + let request = FetchEventsSince::new(aggregate_id, cursor, limit); + let batch: EventBatch = requester.request(request).await?; + + all_events.extend(batch.events); + + match batch.next { + BatchCursor::Done => break, + BatchCursor::Next(next_cursor) => cursor = next_cursor, + } + } + + Ok(all_events) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::direct_requester::DirectRequesterTester; + use crate::events::{NetCommand, NetEvent, PeerTarget}; + use std::sync::Arc; + use tokio::sync::{broadcast, mpsc}; + + #[tokio::test] + async fn test_fetch_all_batched_events() { + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::builder(net_cmds_tx, net_events).build(); + + let batch1 = EventBatch { + events: vec![b"event1".to_vec(), b"event2".to_vec()], + next: BatchCursor::Next(100), + aggregate_id: AggregateId::new(1), + }; + let batch2 = EventBatch { + events: vec![b"event3".to_vec()], + next: BatchCursor::Done, + aggregate_id: AggregateId::new(1), + }; + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .expect_request(FetchEventsSince::new(AggregateId::new(1), 0, u16::MAX)) + .respond_with(batch1) + .expect_request(FetchEventsSince::new(AggregateId::new(1), 100, u16::MAX)) + .respond_with(batch2) + .spawn(); + + let events: Vec> = + fetch_all_batched_events(requester, PeerTarget::Random, AggregateId::new(1), 0) + .await + .unwrap(); + + handle.await.unwrap(); + + assert_eq!( + events, + vec![b"event1".to_vec(), b"event2".to_vec(), b"event3".to_vec(),] + ); + } } From 72810881de75f324326ca72dc315804fdddcb1e6 Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 11:44:12 +0000 Subject: [PATCH 08/50] add failure test --- crates/net/src/direct_requester.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index b2bc17b5b9..7b4dab6f9c 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -460,4 +460,30 @@ mod tests { assert_eq!(r1, b"world"); assert_eq!(r2, b"pong"); } + + #[tokio::test] + async fn test_request_failure() { + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::builder(net_cmds_tx, net_events) + .max_retries(0) + .build(); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .error_with("connection refused") + .num_requests(1) + .spawn(); + + let result: std::result::Result, _> = requester + .to(PeerTarget::Random) + .request(b"hello".to_vec()) + .await; + + handle.await.unwrap(); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("connection refused")); + } } From 42931a0de55e47f06916523439ccf2bc71584516 Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 20:34:07 +0000 Subject: [PATCH 09/50] fix bad type --- crates/net/src/net_event_batch.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index 6182d4d9ae..d38bbd0cdf 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -97,7 +97,8 @@ pub async fn fetch_all_batched_events( requester: DirectRequester, peer: PeerTarget, aggregate_id: AggregateId, - since: i128, + since: u128, + batch_size: u16, ) -> Result> where E: TryFrom> + Send + Sync + 'static, @@ -105,11 +106,10 @@ where { let requester = requester.to(peer); let mut all_events = Vec::new(); - let mut cursor: u128 = since as u128; - let limit = u16::MAX; + let mut cursor = since; loop { - let request = FetchEventsSince::new(aggregate_id, cursor, limit); + let request = FetchEventsSince::new(aggregate_id, cursor, batch_size); let batch: EventBatch = requester.request(request).await?; all_events.extend(batch.events); @@ -151,14 +151,14 @@ mod tests { }; let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) - .expect_request(FetchEventsSince::new(AggregateId::new(1), 0, u16::MAX)) + .expect_request(FetchEventsSince::new(AggregateId::new(1), 0, 100)) .respond_with(batch1) - .expect_request(FetchEventsSince::new(AggregateId::new(1), 100, u16::MAX)) + .expect_request(FetchEventsSince::new(AggregateId::new(1), 100, 100)) .respond_with(batch2) .spawn(); let events: Vec> = - fetch_all_batched_events(requester, PeerTarget::Random, AggregateId::new(1), 0) + fetch_all_batched_events(requester, PeerTarget::Random, AggregateId::new(1), 0, 100) .await .unwrap(); From c2a9eb8ea35a3a0c270d772459530d98725b93cb Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 24 Feb 2026 20:35:12 +0000 Subject: [PATCH 10/50] formatting --- crates/net/src/direct_requester.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index 7b4dab6f9c..d153a914bc 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -484,6 +484,9 @@ mod tests { handle.await.unwrap(); assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("connection refused")); + assert!(result + .unwrap_err() + .to_string() + .contains("connection refused")); } } From fbb75ccb6de1d112027c797baae5e902398e3424 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 06:01:50 +0000 Subject: [PATCH 11/50] refactor: add DirectResponder and ProtocolResponse for structured request handling - Add DirectResponder helper for responding to incoming libp2p requests - Add ProtocolResponse enum (Ok, BadRequest, Error) for structured responses - Update DirectRequester to handle ProtocolResponse variants - Refactor NetSyncManager to use DirectResponder instead of raw channels - Update net_interface to integrate DirectResponder into request handling --- crates/net/src/direct_requester.rs | 70 +++++++++-- crates/net/src/direct_responder.rs | 194 +++++++++++++++++++++++++++++ crates/net/src/events.rs | 48 +++++-- crates/net/src/lib.rs | 1 + crates/net/src/net_interface.rs | 47 +++---- crates/net/src/net_sync_manager.rs | 60 +++++---- 6 files changed, 347 insertions(+), 73 deletions(-) create mode 100644 crates/net/src/direct_responder.rs diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index d153a914bc..4248700596 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -13,7 +13,7 @@ use tokio::sync::{broadcast, mpsc}; use crate::events::{ call_and_await_response, NetCommand, NetEvent, OutgoingRequest, OutgoingRequestFailed, - OutgoingRequestSucceeded, PeerTarget, + OutgoingRequestSucceeded, PeerTarget, ProtocolResponse, }; pub trait DirectRequesterOutput: TryFrom> + Send + Sync + 'static {} @@ -33,6 +33,28 @@ impl DirectRequesterInput for T where pub struct WithoutPeer; pub struct WithPeer(PeerTarget); +/// DirectRequester is used to send direct requests to a specific peer. +/// +/// # Example +/// +/// ```ignore +/// use crate::net::direct_requester::DirectRequester; +/// use crate::events::PeerTarget; +/// use libp2p::PeerId; +/// +/// // Create a requester with default settings +/// let requester = DirectRequester::builder(net_cmds, net_events) +/// .request_timeout(Duration::from_secs(30)) +/// .max_retries(4) +/// .build(); +/// +/// // Target a specific peer (use Random to pick a random peer) +/// let peer_id: PeerId = "12D3KooWEZiPVmEZkwCFEWYxPL6xts6LnPHRFqsSEDGmt1vQ17By".parse().unwrap(); +/// let peer_requester = requester.to(PeerTarget::Specific(peer_id)); +/// +/// // Make a request (any type implementing TryInto> and TryFrom> works) +/// let response: MyResponse = peer_requester.request(my_request).await?; +/// ``` pub struct DirectRequester { net_cmds: mpsc::Sender, net_events: Arc>, @@ -44,6 +66,12 @@ pub struct DirectRequester { } impl DirectRequester { + /// Creates a new DirectRequester builder. + /// + /// Default settings: + /// - request_timeout: 30 seconds + /// - max_retries: 4 + /// - retry_timeout: 5 seconds pub fn builder( net_cmds: mpsc::Sender, net_events: Arc>, @@ -57,6 +85,10 @@ impl DirectRequester { } } + /// Sets the target peer for requests. + /// + /// Use `PeerTarget::Random` to send to a random peer, or + /// `PeerTarget::Specific(peer_id)` to target a specific peer. pub fn to(&self, peer: PeerTarget) -> DirectRequester { DirectRequester { net_cmds: self.net_cmds.clone(), @@ -71,6 +103,15 @@ impl DirectRequester { } impl DirectRequester { + /// Sends a direct request to the peer and waits for a response. + /// + /// The request type must implement `TryInto>` with `Clone + Send + Sync + Debug`. + /// The response type must implement `TryFrom>`. + /// + /// # Errors + /// + /// Returns an error if request serialization fails, the peer responds with an error, + /// or if the request times out after retries. pub async fn request(&self, request: R) -> Result where T: DirectRequesterOutput, @@ -83,14 +124,16 @@ impl DirectRequester { let response = self.request_with_retry(payload).await?; - let response: T = response - .try_into() - .map_err(|_| anyhow!("Response conversion failed"))?; - - Ok(response) + match response { + ProtocolResponse::Ok(data) => Ok(data + .try_into() + .map_err(|_| anyhow!("Could not deserialize ProtocolResponse"))?), + ProtocolResponse::BadRequest(msg) => Err(anyhow!("BadRequest: {}", msg)), + ProtocolResponse::Error(msg) => Err(anyhow!("ProtocolError: {}", msg)), + } } - async fn request_with_retry(&self, payload: Vec) -> Result> { + async fn request_with_retry(&self, payload: Vec) -> Result { let request_timeout = self.request_timeout; let peer = self.peer; retry_with_backoff( @@ -121,16 +164,19 @@ pub struct DirectRequesterBuilder { } impl DirectRequesterBuilder { + /// Sets the timeout for each request attempt. pub fn request_timeout(mut self, request_timeout: Duration) -> Self { self.request_timeout = Some(request_timeout); self } + /// Sets the maximum number of retry attempts. pub fn max_retries(mut self, max_retries: u32) -> Self { self.max_retries = Some(max_retries); self } + /// Sets the timeout between retry attempts. pub fn retry_timeout(mut self, retry_timeout: Duration) -> Self { self.retry_timeout = Some(retry_timeout); self @@ -155,10 +201,10 @@ async fn do_request( target: PeerTarget, payload: Vec, timeout: Duration, -) -> Result> { +) -> Result { let correlation_id = CorrelationId::new(); - let response: Vec = call_and_await_response( + let response = call_and_await_response( net_cmds, net_events, NetCommand::OutgoingRequest(OutgoingRequest { @@ -315,7 +361,7 @@ impl DirectRequesterTester { match expectation.response { Ok(payload) => { NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { - payload, + payload: ProtocolResponse::Ok(payload), correlation_id: req.correlation_id, }) } @@ -328,12 +374,12 @@ impl DirectRequesterTester { } } else if let Some(payload) = self.respond_with.clone() { NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { - payload, + payload: ProtocolResponse::Ok(payload), correlation_id: req.correlation_id, }) } else if let Some(payload) = self.responses.pop() { NetEvent::OutgoingRequestSucceeded(OutgoingRequestSucceeded { - payload, + payload: ProtocolResponse::Ok(payload), correlation_id: req.correlation_id, }) } else if let Some(error) = self.error_on.clone() { diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs new file mode 100644 index 0000000000..6cbd0e48ef --- /dev/null +++ b/crates/net/src/direct_responder.rs @@ -0,0 +1,194 @@ +use crate::events::{IncomingResponse, NetCommand, ProtocolResponse, ProtocolResponseChannel}; +use anyhow::{anyhow, Context, Result}; +use e3_utils::OnceTake; +use libp2p::request_response::InboundRequestId; +use tokio::sync::mpsc; + +/// Helper trait to extract id from libp2p things like InboundRequestId +pub trait IntoId { + fn into_id(self) -> u64; +} + +impl IntoId for u64 { + fn into_id(self) -> u64 { + self + } +} + +impl IntoId for InboundRequestId { + fn into_id(self) -> u64 { + format!("{:?}", self) + .chars() + .filter(|c| c.is_ascii_digit()) + .collect::() + .parse::() + .expect("Failed to extract u64 from InboundRequestId") + } +} +#[derive(Debug)] +/// DirectResponder is used to respond to incoming libp2p requests. +/// +/// # Example +/// +/// ``` +/// # use tokio::sync::mpsc; +/// use e3_net::direct_responder::DirectResponder; +/// # fn main() -> anyhow::Result<()> { +/// # let request_id = 6; +/// # let channel_orig = String::from("channel"); +/// # let channel = channel_orig.clone(); +/// let (cmd_tx, _) = mpsc::channel(16); +/// +/// // We create a responder and send it over our event channel +/// let responder = DirectResponder::new( +/// // request_id comes from libp2p anything that looks like a u64 will work +/// request_id, +/// // Likely ResponseChannel from libp2p event but does not matter will just get passed on +/// channel, +/// // Our NetCommand channel Sender +/// &cmd_tx +/// ); +/// +/// // Now in our handlers we can respond with ok() or bad_request() this will consume the responder +/// responder.ok(String::from("Something that implements TryInto>"))?; +/// // or: +/// # let responder = DirectResponder::new(request_id,channel_orig,&cmd_tx); +/// responder.bad_request("Hey something went wrong!")?; +/// # Ok(()) +/// # } +/// ``` +pub struct DirectResponder { + id: u64, + response: Option, + channel: OnceTake, + net_cmds: mpsc::Sender>, +} + +impl Clone for DirectResponder { + fn clone(&self) -> Self { + Self { + id: self.id.clone(), + response: self.response.clone(), + channel: self.channel.clone(), + net_cmds: self.net_cmds.clone(), + } + } +} + +impl DirectResponder { + /// Creates a new responder for an incoming request. + /// + /// * `id` - is the request identifier used for debugging (e.g., `InboundRequestId` or `u64`). + /// * `channel` - is usually the response channel provided by libp2p but can be anything that is passed along with the response + /// * `net_cmds` - sender is used to send the response back to the net interface. + pub fn new(id: impl IntoId, channel: C, net_cmds: &mpsc::Sender>) -> Self { + Self { + id: id.into_id(), + response: None, + channel: OnceTake::new(channel), + net_cmds: net_cmds.clone(), + } + } + + /// Extract the payload information to send to swarm + pub fn to_response(mut self) -> Result<(C, ProtocolResponse)> { + let channel = self.channel.try_take()?; + let response = self + .response + .take() + .context("No response found on responder")?; + Ok((channel, response)) + } + + /// Consumes self and responds + pub fn respond(mut self, value: ProtocolResponse) -> Result<()> { + let response = value; + self.response = Some(response); + let cmds = self.net_cmds.clone(); + let incoming = IncomingResponse::::new(self); + Ok(cmds + .clone() + .try_send(NetCommand::::IncomingResponse(incoming)) + .map_err(|_| anyhow!("Failed to send response command"))?) + } + + /// Request is ok returning response + pub fn ok>>(self, data: T) -> Result<()> { + let bytes: Vec = data + .try_into() + .map_err(|_| anyhow!("Could not serialize response."))?; + self.respond(ProtocolResponse::Ok(bytes)) + } + + /// Return a bad request + pub fn bad_request(self, reason: impl Into) -> Result<()> { + self.respond(ProtocolResponse::BadRequest(reason.into())) + } + + /// Get the id (for logging purposes) + pub fn id(&self) -> u64 { + self.id + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::sync::mpsc; + + fn make_responder() -> (DirectResponder, mpsc::Receiver>) { + let (tx, rx) = mpsc::channel::>(16); + let responder = DirectResponder::new(42u64, "test_channel".to_string(), &tx); + (responder, rx) + } + + fn extract_response( + rx: &mut mpsc::Receiver>, + ) -> Result<(String, ProtocolResponse)> { + let cmd = rx.try_recv().unwrap(); + match cmd { + NetCommand::IncomingResponse(incoming) => incoming.responder.to_response(), + other => panic!("Expected IncomingResponse, got {:?}", other), + } + } + + #[test] + fn to_response_fails_without_response_set() { + let (responder, _rx) = make_responder(); + assert!(responder.to_response().is_err()); + } + + #[test] + fn channel_can_only_be_taken_once() { + let (mut responder, _rx) = make_responder(); + responder.response = Some(ProtocolResponse::Ok(Vec::new())); + let cloned = responder.clone(); + let _ = responder.to_response().unwrap(); + assert!(cloned.to_response().is_err()); + } + + #[test] + fn ok_sends_serialized_payload() { + let (responder, mut rx) = make_responder(); + responder.ok(b"foo".to_vec()).unwrap(); + let (channel, response) = extract_response(&mut rx).unwrap(); + assert_eq!(channel, "test_channel"); + assert!(matches!(response, ProtocolResponse::Ok(v) if v == b"foo")); + } + + #[test] + fn respond_sends_bad_request() { + let (responder, mut rx) = make_responder(); + responder.bad_request("bad").unwrap(); + let (channel, response) = extract_response(&mut rx).unwrap(); + assert_eq!(channel, "test_channel"); + assert!(matches!(response, ProtocolResponse::BadRequest(r) if r == "bad")); + } + + #[test] + fn respond_fails_when_receiver_dropped() { + let (responder, rx) = make_responder(); + drop(rx); + assert!(responder.respond(ProtocolResponse::Ok(vec![])).is_err()); + } +} diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index 85c059eb08..c76497ec59 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -4,18 +4,18 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use crate::ContentHash; +use crate::{direct_responder::DirectResponder, ContentHash}; use actix::Message; use anyhow::{anyhow, bail, Context, Result}; use e3_events::{ CorrelationId, DocumentMeta, EnclaveEvent, EventContextAccessors, EventSource, Sequenced, Unsequenced, }; -use e3_utils::{ArcBytes, OnceTake}; +use e3_utils::ArcBytes; use libp2p::{ gossipsub::{MessageId, PublishError, TopicHash}, kad::{store, GetRecordError, PutRecordError}, - request_response::{InboundRequestId, ResponseChannel}, + request_response::ResponseChannel, swarm::{dial_opts::DialOpts, ConnectionId, DialError}, }; use serde::{Deserialize, Serialize}; @@ -75,12 +75,31 @@ impl TryFrom for EnclaveEvent { } } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum ProtocolResponse { + Ok(Vec), + BadRequest(String), + Error(String), +} + +pub type ProtocolResponseChannel = ResponseChannel; + #[derive(Message, Clone, Debug)] #[rtype("()")] pub struct IncomingRequest { - pub request_id: InboundRequestId, - pub payload: Vec, - pub channel: OnceTake>>, + pub responder: DirectResponder, + pub request: Vec, +} + +#[derive(Clone, Debug)] +pub struct IncomingResponse>> { + pub responder: DirectResponder, +} + +impl IncomingResponse { + pub fn new(responder: DirectResponder) -> Self { + Self { responder } + } } #[derive(Debug, Clone)] @@ -118,7 +137,7 @@ impl OutgoingRequest { #[derive(Message, Clone, Debug)] #[rtype("()")] pub struct OutgoingRequestSucceeded { - pub payload: Vec, + pub payload: ProtocolResponse, pub correlation_id: CorrelationId, } @@ -130,7 +149,11 @@ pub struct OutgoingRequestFailed { /// NetInterface Commands are sent to the network peer over a mspc channel #[derive(Debug)] -pub enum NetCommand { +// The generics here aid testing allowing us to avoid constructing complex types +// This is probably not an issue aside from complex types that are actively hidden from +// clone such as passing around a response channel which we don't control. +// Basically this helps us test and I don't expect this list to grow much. +pub enum NetCommand { /// Publish message to gossipsub GossipPublish { topic: String, @@ -152,15 +175,14 @@ pub enum NetCommand { key: ContentHash, }, /// Remove DHT records associated with a completed E3 - DhtRemoveRecords { keys: Vec }, + DhtRemoveRecords { + keys: Vec, + }, /// Shutdown signal Shutdown, /// Send a request to a peer and await response OutgoingRequest(OutgoingRequest), - Response { - payload: Vec, - channel: OnceTake>>, - }, + IncomingResponse(IncomingResponse), } impl NetCommand { diff --git a/crates/net/src/lib.rs b/crates/net/src/lib.rs index 3f196d2e63..1f97f2270d 100644 --- a/crates/net/src/lib.rs +++ b/crates/net/src/lib.rs @@ -8,6 +8,7 @@ mod cid; mod correlator; mod dialer; pub mod direct_requester; +pub mod direct_responder; mod document_publisher; pub mod events; mod net_event_batch; diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index ca944ea71c..130dd839df 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -4,10 +4,14 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use crate::{correlator::Correlator, events::OutgoingRequest}; +use crate::{ + correlator::Correlator, + direct_responder::DirectResponder, + events::{IncomingResponse, OutgoingRequest, ProtocolResponse}, +}; use anyhow::{Context, Result}; use e3_events::CorrelationId; -use e3_utils::{ArcBytes, OnceTake}; +use e3_utils::ArcBytes; use libp2p::{ connection_limits::{self, ConnectionLimits}, futures::StreamExt, @@ -22,7 +26,7 @@ use libp2p::{ }, request_response::{ self, cbor, Event as RequestResponseEvent, Message as RequestResponseMessage, - ProtocolSupport, ResponseChannel, + ProtocolSupport, }, swarm::{dial_opts::DialOpts, DialError, NetworkBehaviour, SwarmEvent}, PeerId, StreamProtocol, Swarm, @@ -58,7 +62,8 @@ pub struct NodeBehaviour { kademlia: KademliaBehaviour, connection_limits: connection_limits::Behaviour, identify: IdentifyBehaviour, - request_response: cbor::Behaviour, Vec>, + /// Send bytes reply with enumeration for errors + request_response: cbor::Behaviour, ProtocolResponse>, } /// Manage the peer to peer connection. This struct wraps a libp2p Swarm and enables communication @@ -145,6 +150,7 @@ impl NetInterface { trace!("Peers to dial: {:?}", self.peers); tokio::spawn({ let event_tx = event_tx.clone(); + let cmd_tx = cmd_tx.clone(); let peers = self.peers.clone(); async move { dial_peers(&cmd_tx, &event_tx, &peers).await?; @@ -170,7 +176,7 @@ impl NetInterface { } // Process events event = self.swarm.select_next_some() => { - match process_swarm_event(&mut self.swarm, &event_tx, &mut correlator, &mut peer_failures, event).await { + match process_swarm_event(&mut self.swarm, &event_tx, &cmd_tx, &mut correlator, &mut peer_failures, event).await { Ok(_) => (), Err(e) => error!("Error processing NetEvent: {e}") } @@ -209,7 +215,7 @@ fn create_behaviour( let request_response_config = request_response::Config::default().with_request_timeout(Duration::from_secs(30)); - let request_response = cbor::Behaviour::, Vec>::new( + let request_response = cbor::Behaviour::, ProtocolResponse>::new( [( StreamProtocol::new("/enclave/sync/0.0.1"), ProtocolSupport::Full, @@ -243,6 +249,7 @@ fn create_behaviour( async fn process_swarm_event( swarm: &mut Swarm, event_tx: &broadcast::Sender, + cmd_tx: &mpsc::Sender, correlator: &mut Correlator, peer_failures: &mut PeerFailureTracker, event: SwarmEvent, @@ -426,12 +433,12 @@ async fn process_swarm_event( }, )) => { debug!("Incoming request received (id={})", request_id); + let responder = DirectResponder::new(request_id, channel, &cmd_tx); // received a request for events event_tx.send(NetEvent::IncomingRequest(IncomingRequest { - request_id, - channel: OnceTake::new(channel), - payload: request, + responder, + request, }))?; } @@ -557,8 +564,8 @@ async fn process_swarm_command( handle_outgoing_request(swarm, correlator, correlation_id, payload, target)?; Ok(()) } - NetCommand::Response { payload, channel } => { - handle_response(swarm, channel, payload)?; + NetCommand::IncomingResponse(IncomingResponse { responder }) => { + handle_response(swarm, responder)?; Ok(()) } NetCommand::Shutdown => { @@ -784,20 +791,14 @@ fn handle_outgoing_request( Ok(()) } -fn handle_response( - swarm: &mut Swarm, - channel: OnceTake>>, - payload: Vec, -) -> Result<()> { - debug!("Sending response"); - let channel = channel.try_take()?; - if let Err(payload) = swarm +fn handle_response(swarm: &mut Swarm, responder: DirectResponder) -> Result<()> { + debug!("Sending response to {}", responder.id()); + let (channel, response) = responder.to_response()?; + swarm .behaviour_mut() .request_response - .send_response(channel, payload) - { - error!("Failed to send response: {:?}", payload); - } + .send_response(channel, response) + .map_err(|payload| anyhow::anyhow!("Failed to send response: {:?}", payload))?; Ok(()) } diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 3fba1ed929..6b696bc154 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -19,9 +19,12 @@ use std::{collections::HashMap, convert::TryInto, sync::Arc, time::Duration}; use tokio::sync::{broadcast, mpsc}; use tracing::{debug, info, warn}; -use crate::events::{ - await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, - OutgoingRequest, +use crate::{ + direct_responder::DirectResponder, + events::{ + await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, + OutgoingRequest, ProtocolResponse, + }, }; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -33,7 +36,15 @@ impl TryInto> for SyncRequestValue { type Error = anyhow::Error; fn try_into(self) -> Result, Self::Error> { - bincode::serialize(&self).context("failed to serialize sync request") + bincode::serialize(&self).context("failed to serialize SyncRequestValue") + } +} + +impl TryFrom> for SyncRequestValue { + type Error = anyhow::Error; + + fn try_from(value: Vec) -> Result { + bincode::deserialize(&value).context("failed to deserialize SyncRequestValue") } } @@ -72,7 +83,7 @@ pub struct NetSyncManager { /// NetEvents receiver to receive events rx: Arc>, eventstore: Recipient>, - requests: HashMap>>>, + requests: HashMap, peers_ready: bool, } @@ -204,13 +215,13 @@ impl Handler for NetSyncManager { type Result = (); fn handle(&mut self, msg: IncomingRequest, ctx: &mut Self::Context) -> Self::Result { trap(EType::Net, &self.bus, || { - info!("GOT SyncRequestReceived"); - let request: SyncRequestValue = - bincode::deserialize(&msg.payload).context("failed to deserialize sync request")?; let id = CorrelationId::new(); - info!("STORING channel in requests map..."); - self.requests.insert(id, msg.channel); - info!("QUERYING eventstore..."); + info!("Processing incoming request with correlation={}", id); + let request: SyncRequestValue = msg + .request + .try_into() + .context("Failed to parse SyncRequestValue for id={id}")?; + self.requests.insert(id, msg.responder); self.eventstore.try_send(EventStoreQueryBy::::new( id, request.since, @@ -227,11 +238,11 @@ impl Handler for NetSyncManager { fn handle(&mut self, msg: EventStoreQueryResponse, _: &mut Self::Context) -> Self::Result { trap(EType::Net, &self.bus.clone(), || { info!("Received response from eventstore."); - let Some(channel) = self.requests.get(&msg.id()) else { - bail!("request not found with {}", msg.id()); + let Some(responder) = self.requests.remove(&msg.id()) else { + bail!("responder not found for {}", msg.id()); }; - debug!("Sending SyncResponse with channel={:?}", channel); - let response = SyncResponseValue { + + responder.ok(SyncResponseValue { events: msg .into_events() .into_iter() @@ -239,14 +250,7 @@ impl Handler for NetSyncManager { .map(|ev| ev.try_into()) .collect::>()?, ts: self.bus.ts()?, - }; - let payload: Vec = response.try_into()?; - if let Err(e) = self.tx.try_send(NetCommand::Response { - payload, - channel: channel.to_owned(), - }) { - warn!("Failed to send SyncResponse (channel full or closed): {e}"); - } + })?; Ok(()) }) @@ -287,8 +291,14 @@ async fn sync_request( SYNC_REQUEST_TIMEOUT, ) .await?; - let response: SyncResponseValue = response.payload.try_into()?; - Ok(SyncRequestSucceeded { response }) + match response.payload { + ProtocolResponse::Ok(data) => { + let response: SyncResponseValue = data.try_into()?; + Ok(SyncRequestSucceeded { response }) + } + ProtocolResponse::BadRequest(msg) => Err(anyhow!("BadRequest: {}", msg)), + ProtocolResponse::Error(msg) => Err(anyhow!("ProtocolError: {}", msg)), + } } async fn handle_sync_request_event( From 1ec5d44a0f74e75b399642e38c8de8f456fab6a6 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 06:04:09 +0000 Subject: [PATCH 12/50] headers --- crates/net/src/direct_responder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs index 6cbd0e48ef..c7657382db 100644 --- a/crates/net/src/direct_responder.rs +++ b/crates/net/src/direct_responder.rs @@ -1,3 +1,9 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + use crate::events::{IncomingResponse, NetCommand, ProtocolResponse, ProtocolResponseChannel}; use anyhow::{anyhow, Context, Result}; use e3_utils::OnceTake; From d2729710c2d5f2d0a96441de746df28aece4f736 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 06:55:43 +0000 Subject: [PATCH 13/50] tidy up imports --- crates/net/src/net_sync_manager.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 6b696bc154..1fc828b1b1 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -11,13 +11,12 @@ use e3_events::{ EnclaveEventData, EventSource, EventStoreQueryBy, EventStoreQueryResponse, EventType, HistoricalNetSyncStart, NetSyncEventsReceived, TsAgg, TypedEvent, Unsequenced, }; -use e3_utils::{retry_with_backoff, to_retry, OnceTake, MAILBOX_LIMIT}; +use e3_utils::{retry_with_backoff, to_retry, MAILBOX_LIMIT}; use futures::TryFutureExt; -use libp2p::request_response::ResponseChannel; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, convert::TryInto, sync::Arc, time::Duration}; use tokio::sync::{broadcast, mpsc}; -use tracing::{debug, info, warn}; +use tracing::{debug, info}; use crate::{ direct_responder::DirectResponder, From 2dcf48c48306be340587483232351d4afc42e30d Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 07:39:12 +0000 Subject: [PATCH 14/50] first attempt at batch query --- crates/net/src/direct_responder.rs | 28 +++++++++++++++++++++ crates/net/src/events.rs | 3 ++- crates/net/src/net_event_batch.rs | 14 ++++++++++- crates/net/src/net_interface.rs | 7 ++---- crates/net/src/net_sync_manager.rs | 40 ++++++++++++++++++++---------- 5 files changed, 72 insertions(+), 20 deletions(-) diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs index c7657382db..7b16fb6e31 100644 --- a/crates/net/src/direct_responder.rs +++ b/crates/net/src/direct_responder.rs @@ -65,6 +65,7 @@ impl IntoId for InboundRequestId { /// ``` pub struct DirectResponder { id: u64, + request: Vec, response: Option, channel: OnceTake, net_cmds: mpsc::Sender>, @@ -74,6 +75,7 @@ impl Clone for DirectResponder { fn clone(&self) -> Self { Self { id: self.id.clone(), + request: self.request.clone(), response: self.response.clone(), channel: self.channel.clone(), net_cmds: self.net_cmds.clone(), @@ -90,12 +92,38 @@ impl DirectResponder { pub fn new(id: impl IntoId, channel: C, net_cmds: &mpsc::Sender>) -> Self { Self { id: id.into_id(), + request: Vec::new(), response: None, channel: OnceTake::new(channel), net_cmds: net_cmds.clone(), } } + /// Sets the request data on the responder. + /// + /// This should be called when creating a responder for an incoming request, + /// passing the raw request bytes. + pub fn with_request(mut self, request: Vec) -> Self { + self.request = request; + self + } + + /// Get the request data + pub fn request(&self) -> Vec { + self.request.clone() + } + + /// Get the request data + pub fn try_request_into(&self) -> Result + where + T: TryFrom>, + { + self.request + .clone() + .try_into() + .map_err(|_| anyhow!("Could not serialize request bytes")) + } + /// Extract the payload information to send to swarm pub fn to_response(mut self) -> Result<(C, ProtocolResponse)> { let channel = self.channel.try_take()?; diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index c76497ec59..bc3bdf5022 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -86,12 +86,13 @@ pub type ProtocolResponseChannel = ResponseChannel; #[derive(Message, Clone, Debug)] #[rtype("()")] +/// Remote has sent us a request pub struct IncomingRequest { pub responder: DirectResponder, - pub request: Vec, } #[derive(Clone, Debug)] +/// We are responding to a remote request pub struct IncomingResponse>> { pub responder: DirectResponder, } diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index d38bbd0cdf..7abce64751 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -53,7 +53,7 @@ where pub struct FetchEventsSince { aggregate_id: AggregateId, since: u128, - limit: u16, + limit: usize, } impl FetchEventsSince { @@ -64,6 +64,18 @@ impl FetchEventsSince { limit, } } + + pub fn aggregate_id(&self) -> AggregateId { + self.aggregate_id + } + + pub fn since(&self) -> u128 { + self.since + } + + pub fn limit(&self) -> usize { + self.limit + } } impl TryFrom for Vec { diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 130dd839df..78424d7226 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -433,13 +433,10 @@ async fn process_swarm_event( }, )) => { debug!("Incoming request received (id={})", request_id); - let responder = DirectResponder::new(request_id, channel, &cmd_tx); + let responder = DirectResponder::new(request_id, channel, &cmd_tx).with_request(request); // received a request for events - event_tx.send(NetEvent::IncomingRequest(IncomingRequest { - responder, - request, - }))?; + event_tx.send(NetEvent::IncomingRequest(IncomingRequest { responder }))?; } SwarmEvent::Behaviour(NodeBehaviourEvent::RequestResponse( diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 1fc828b1b1..41cce67ec9 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -24,6 +24,7 @@ use crate::{ await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, OutgoingRequest, ProtocolResponse, }, + net_event_batch::{BatchCursor, EventBatch, FetchEventsSince}, }; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -216,14 +217,13 @@ impl Handler for NetSyncManager { trap(EType::Net, &self.bus, || { let id = CorrelationId::new(); info!("Processing incoming request with correlation={}", id); - let request: SyncRequestValue = msg - .request - .try_into() - .context("Failed to parse SyncRequestValue for id={id}")?; + let fetch_request: FetchEventsSince = msg.responder.try_request_into()?; self.requests.insert(id, msg.responder); + let query: HashMap = + HashMap::from([(fetch_request.aggregate_id(), fetch_request.since())]); self.eventstore.try_send(EventStoreQueryBy::::new( id, - request.since, + query, ctx.address().recipient(), ))?; Ok(()) @@ -241,14 +241,28 @@ impl Handler for NetSyncManager { bail!("responder not found for {}", msg.id()); }; - responder.ok(SyncResponseValue { - events: msg - .into_events() - .into_iter() - .filter(|e| e.source() == EventSource::Net) - .map(|ev| ev.try_into()) - .collect::>()?, - ts: self.bus.ts()?, + let fetch_request: FetchEventsSince = responder.try_request_into()?; + let limit = fetch_request.limit(); + let aggregate_id = fetch_request.aggregate_id(); + let events: Vec> = msg + .into_events() + .into_iter() + .filter(|e| e.source() == EventSource::Net) + .take(limit) + .map(|ev| ev.clone_unsequenced()) + .collect(); + + let next = if events.len() == limit { + let last_event_ts = events.get(limit - 1).map(|e| e.ts()).unwrap_or(0); + BatchCursor::Next(last_event_ts) + } else { + BatchCursor::Done + }; + + responder.ok(EventBatch { + events, + next, + aggregate_id, })?; Ok(()) From c9417a16443a2156e8723e7f43a2f36d6a812c7f Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 07:43:23 +0000 Subject: [PATCH 15/50] fix type --- crates/net/src/net_event_batch.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index 7abce64751..81be21ce40 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -57,7 +57,7 @@ pub struct FetchEventsSince { } impl FetchEventsSince { - pub fn new(aggregate_id: AggregateId, since: u128, limit: u16) -> Self { + pub fn new(aggregate_id: AggregateId, since: u128, limit: usize) -> Self { Self { aggregate_id, since, @@ -110,7 +110,7 @@ pub async fn fetch_all_batched_events( peer: PeerTarget, aggregate_id: AggregateId, since: u128, - batch_size: u16, + batch_size: usize, ) -> Result> where E: TryFrom> + Send + Sync + 'static, From 70d6aad92a2f6a205fd12a310f1002fd7081e773 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 09:41:33 +0000 Subject: [PATCH 16/50] feat: add limit and filter to EventStoreQueryBy, make EventLog and StoreEvent generic --- crates/events/src/events.rs | 90 +++++++++++++++++++++++++- crates/events/src/eventstore.rs | 84 +++++++++++++++--------- crates/events/src/eventstore_router.rs | 22 +++++-- crates/events/src/traits.rs | 6 +- 4 files changed, 161 insertions(+), 41 deletions(-) diff --git a/crates/events/src/events.rs b/crates/events/src/events.rs index 3ff819c7ea..b39ff5eaf3 100644 --- a/crates/events/src/events.rs +++ b/crates/events/src/events.rs @@ -8,7 +8,13 @@ use std::collections::HashMap; use actix::{Message, Recipient}; -use crate::{AggregateId, CorrelationId, EnclaveEvent, Sequenced, Unsequenced}; +use crate::traits::EventContextAccessors; +use crate::{AggregateId, CorrelationId, EnclaveEvent, EventSource, Sequenced, Unsequenced}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum EventStoreFilter { + Source(EventSource), +} /// Direct event received by the EventStore to store an event #[derive(Message, Debug)] @@ -98,6 +104,8 @@ pub struct EventStoreQueryBy { correlation_id: CorrelationId, query: Q::Shape, sender: Recipient, + limit: Option, + filter: Option, } impl EventStoreQueryBy { @@ -110,12 +118,32 @@ impl EventStoreQueryBy { correlation_id, query, sender: sender.into(), + limit: None, + filter: None, } } pub fn query(&self) -> &HashMap { &self.query } + + pub fn limit(&self) -> Option { + self.limit + } + + pub fn filter(&self) -> Option<&EventStoreFilter> { + self.filter.as_ref() + } + + pub fn with_limit(mut self, limit: u64) -> Self { + self.limit = Some(limit); + self + } + + pub fn with_filter(mut self, filter: EventStoreFilter) -> Self { + self.filter = Some(filter); + self + } } impl EventStoreQueryBy { @@ -128,12 +156,32 @@ impl EventStoreQueryBy { correlation_id, query, sender: sender.into(), + limit: None, + filter: None, } } pub fn query(&self) -> &HashMap { &self.query } + + pub fn limit(&self) -> Option { + self.limit + } + + pub fn filter(&self) -> Option<&EventStoreFilter> { + self.filter.as_ref() + } + + pub fn with_limit(mut self, limit: u64) -> Self { + self.limit = Some(limit); + self + } + + pub fn with_filter(mut self, filter: EventStoreFilter) -> Self { + self.filter = Some(filter); + self + } } impl EventStoreQueryBy { @@ -146,12 +194,32 @@ impl EventStoreQueryBy { correlation_id, query, sender: sender.into(), + limit: None, + filter: None, } } pub fn query(&self) -> u128 { self.query } + + pub fn limit(&self) -> Option { + self.limit + } + + pub fn filter(&self) -> Option<&EventStoreFilter> { + self.filter.as_ref() + } + + pub fn with_limit(mut self, limit: u64) -> Self { + self.limit = Some(limit); + self + } + + pub fn with_filter(mut self, filter: EventStoreFilter) -> Self { + self.filter = Some(filter); + self + } } impl EventStoreQueryBy { @@ -164,12 +232,32 @@ impl EventStoreQueryBy { correlation_id, query, sender: sender.into(), + limit: None, + filter: None, } } pub fn query(&self) -> u64 { self.query } + + pub fn limit(&self) -> Option { + self.limit + } + + pub fn filter(&self) -> Option<&EventStoreFilter> { + self.filter.as_ref() + } + + pub fn with_limit(mut self, limit: u64) -> Self { + self.limit = Some(limit); + self + } + + pub fn with_filter(mut self, filter: EventStoreFilter) -> Self { + self.filter = Some(filter); + self + } } impl EventStoreQueryBy { diff --git a/crates/events/src/eventstore.rs b/crates/events/src/eventstore.rs index fb52fe74c4..ee4dcc3e6a 100644 --- a/crates/events/src/eventstore.rs +++ b/crates/events/src/eventstore.rs @@ -6,8 +6,8 @@ use crate::{ events::{StoreEventRequested, StoreEventResponse}, - EventContextAccessors, EventLog, EventStoreQueryBy, EventStoreQueryResponse, Seq, - SequenceIndex, Ts, + EnclaveEvent, EventContextAccessors, EventLog, EventStoreFilter, EventStoreQueryBy, + EventStoreQueryResponse, Seq, SequenceIndex, Sequenced, Ts, Unsequenced, }; use actix::{Actor, Handler}; use anyhow::{bail, Result}; @@ -15,13 +15,13 @@ use tracing::{error, warn}; const MAX_STORAGE_ERRORS: u64 = 10; -pub struct EventStore { +pub struct EventStore>> { index: I, log: L, storage_errors: u64, } -impl EventStore { +impl>> EventStore { pub fn handle_store_event_requested(&mut self, msg: StoreEventRequested) -> Result<()> { let event = msg.event; let sender = msg.sender; @@ -43,39 +43,57 @@ impl EventStore { Ok(()) } + fn query_events( + &self, + iter: Box)>>, + filter: Option, + limit: Option, + ) -> Vec> { + let iter = iter.map(|(s, e)| e.into_sequenced(s)); + + match filter { + Some(EventStoreFilter::Source(source)) => { + let iter = iter.filter(move |e| e.get_ctx().source() == source); + match limit { + Some(lim) => iter.take(lim as usize).collect(), + None => iter.collect(), + } + } + None => match limit { + Some(lim) => iter.take(lim as usize).collect(), + None => iter.collect(), + }, + } + } + pub fn handle_event_store_query_ts(&mut self, msg: EventStoreQueryBy) -> Result<()> { - // if there are no events after the timestamp return an empty vector let id = msg.id(); - let Some(seq) = self.index.seek(msg.query())? else { - msg.sender() - .try_send(EventStoreQueryResponse::new(id, vec![]))?; + let query = msg.query(); + let filter = msg.filter().cloned(); + let limit = msg.limit(); + let sender = msg.sender(); + + let Some(seq) = self.index.seek(query)? else { + sender.try_send(EventStoreQueryResponse::new(id, vec![]))?; return Ok(()); }; - // read and return the events - let evts = self - .log - .read_from(seq) - .map(|(s, e)| e.into_sequenced(s)) - .collect::>(); - - msg.sender() - .try_send(EventStoreQueryResponse::new(id, evts))?; + + let evts = self.query_events(self.log.read_from(seq), filter, limit); + + sender.try_send(EventStoreQueryResponse::new(id, evts))?; Ok(()) } pub fn handle_event_store_query_seq(&mut self, msg: EventStoreQueryBy) -> Result<()> { - // if there are no events after the timestamp return an empty vector let id = msg.id(); + let query = msg.query(); + let filter = msg.filter().cloned(); + let limit = msg.limit(); + let sender = msg.sender(); - // read and return the events - let evts = self - .log - .read_from(msg.query()) - .map(|(s, e)| e.into_sequenced(s)) - .collect::>(); + let evts = self.query_events(self.log.read_from(query), filter, limit); - msg.sender() - .try_send(EventStoreQueryResponse::new(id, evts))?; + sender.try_send(EventStoreQueryResponse::new(id, evts))?; Ok(()) } } @@ -90,11 +108,13 @@ impl EventStore { } } -impl Actor for EventStore { +impl>> Actor for EventStore { type Context = actix::Context; } -impl Handler for EventStore { +impl>> Handler + for EventStore +{ type Result = (); fn handle(&mut self, msg: StoreEventRequested, _: &mut Self::Context) -> Self::Result { if let Err(e) = self.handle_store_event_requested(msg) { @@ -105,7 +125,9 @@ impl Handler for EventStore< } } -impl Handler> for EventStore { +impl>> Handler> + for EventStore +{ type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, _: &mut Self::Context) -> Self::Result { if let Err(e) = self.handle_event_store_query_ts(msg) { @@ -114,7 +136,9 @@ impl Handler> for EventStor } } -impl Handler> for EventStore { +impl>> Handler> + for EventStore +{ type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, _: &mut Self::Context) -> Self::Result { if let Err(e) = self.handle_event_store_query_seq(msg) { diff --git a/crates/events/src/eventstore_router.rs b/crates/events/src/eventstore_router.rs index 3f3da61f3b..f58b224fba 100644 --- a/crates/events/src/eventstore_router.rs +++ b/crates/events/src/eventstore_router.rs @@ -8,7 +8,9 @@ use crate::{ events::{EventStoreQueryResponse, StoreEventRequested}, AggregateId, EventContextAccessors, EventLog, SequenceIndex, }; -use crate::{CorrelationId, Die, EnclaveEvent, EventStoreQueryBy, Seq, SeqAgg, Ts, TsAgg}; +use crate::{ + CorrelationId, Die, EnclaveEvent, EventStoreQueryBy, Seq, SeqAgg, Ts, TsAgg, Unsequenced, +}; use actix::{Actor, ActorContext, Addr, AsyncContext, Context, Handler, Recipient}; use anyhow::Result; use e3_utils::MAILBOX_LIMIT_LARGE; @@ -84,11 +86,11 @@ impl Handler for QueryAggregator { } /// EventStoreRouter - routes events and spawns query aggregators to handle eventstore queries -pub struct EventStoreRouter { +pub struct EventStoreRouter>> { stores: HashMap>>, } -impl EventStoreRouter { +impl>> EventStoreRouter { pub fn new(stores: HashMap>>) -> Self { debug!("Making eventstore router..."); let stores = stores @@ -199,7 +201,7 @@ impl EventStoreRouter { } } -impl Actor for EventStoreRouter { +impl>> Actor for EventStoreRouter { type Context = Context; fn started(&mut self, ctx: &mut Self::Context) { @@ -207,7 +209,9 @@ impl Actor for EventStoreRouter { } } -impl Handler for EventStoreRouter { +impl>> Handler + for EventStoreRouter +{ type Result = (); fn handle(&mut self, msg: StoreEventRequested, _: &mut Self::Context) -> Self::Result { @@ -215,7 +219,9 @@ impl Handler for EventStoreR } } -impl Handler> for EventStoreRouter { +impl>> Handler> + for EventStoreRouter +{ type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, ctx: &mut Self::Context) -> Self::Result { @@ -225,7 +231,9 @@ impl Handler> for EventS } } -impl Handler> for EventStoreRouter { +impl>> Handler> + for EventStoreRouter +{ type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, ctx: &mut Self::Context) -> Self::Result { diff --git a/crates/events/src/traits.rs b/crates/events/src/traits.rs index 5ac78dd3ed..631abf56b2 100644 --- a/crates/events/src/traits.rs +++ b/crates/events/src/traits.rs @@ -169,11 +169,11 @@ pub trait SequenceIndex: Unpin + 'static { } /// Store and retrieve events from a write ahead log -pub trait EventLog: Unpin + 'static { +pub trait EventLog>: Unpin + 'static { /// Append an event to the log, returning its sequence number - fn append(&mut self, event: &EnclaveEvent) -> Result; + fn append(&mut self, event: &E) -> Result; /// Read all events starting from the given sequence number (inclusive) - fn read_from(&self, from: u64) -> Box)>>; + fn read_from(&self, from: u64) -> Box>; } /// EventContext allows consumers to extract infrastructure metadata from event objects From d6c507617fe3bfa833b9e3e799b35bd8cff04898 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 12:23:28 +0000 Subject: [PATCH 17/50] refactor: simplify EventLog trait by removing type parameter Remove generic parameter from EventLog trait, making it work directly with EnclaveEvent instead of any event type. This simplifies the implementation and allows EventStore to be more generic. Add comprehensive unit tests for store_event, query_by_seq, and query_by_ts. --- crates/events/src/eventstore.rs | 457 ++++++++++++++++++++++--- crates/events/src/eventstore_router.rs | 22 +- crates/events/src/traits.rs | 6 +- 3 files changed, 416 insertions(+), 69 deletions(-) diff --git a/crates/events/src/eventstore.rs b/crates/events/src/eventstore.rs index ee4dcc3e6a..93751e3aa5 100644 --- a/crates/events/src/eventstore.rs +++ b/crates/events/src/eventstore.rs @@ -15,16 +15,19 @@ use tracing::{error, warn}; const MAX_STORAGE_ERRORS: u64 = 10; -pub struct EventStore>> { +pub struct EventStore { index: I, log: L, storage_errors: u64, } -impl>> EventStore { - pub fn handle_store_event_requested(&mut self, msg: StoreEventRequested) -> Result<()> { - let event = msg.event; - let sender = msg.sender; +impl EventStore { + /// Attempt to store an event. Returns the sequenced event on success, + /// `None` if the event was a duplicate, or an error on failure. + pub fn store_event( + &mut self, + event: EnclaveEvent, + ) -> Result>> { let ts = event.ts(); if let Some(_) = self.index.get(ts)? { warn!("Event already stored at timestamp {ts}! This might happen when recovering from a snapshot. Skipping storage"); @@ -35,15 +38,14 @@ impl>> EventStore self.storage_errors ); } - return Ok(()); + return Ok(None); } let seq = self.log.append(&event)?; self.index.insert(ts, seq)?; - sender.do_send(StoreEventResponse(event.into_sequenced(seq))); - Ok(()) + Ok(Some(event.into_sequenced(seq))) } - fn query_events( + fn collect_events( &self, iter: Box)>>, filter: Option, @@ -66,35 +68,27 @@ impl>> EventStore } } - pub fn handle_event_store_query_ts(&mut self, msg: EventStoreQueryBy) -> Result<()> { - let id = msg.id(); - let query = msg.query(); - let filter = msg.filter().cloned(); - let limit = msg.limit(); - let sender = msg.sender(); - + /// Query events by timestamp. Returns events at or after the given timestamp. + pub fn query_by_ts( + &self, + query: u128, + filter: Option, + limit: Option, + ) -> Result>> { let Some(seq) = self.index.seek(query)? else { - sender.try_send(EventStoreQueryResponse::new(id, vec![]))?; - return Ok(()); + return Ok(vec![]); }; - - let evts = self.query_events(self.log.read_from(seq), filter, limit); - - sender.try_send(EventStoreQueryResponse::new(id, evts))?; - Ok(()) + Ok(self.collect_events(self.log.read_from(seq), filter, limit)) } - pub fn handle_event_store_query_seq(&mut self, msg: EventStoreQueryBy) -> Result<()> { - let id = msg.id(); - let query = msg.query(); - let filter = msg.filter().cloned(); - let limit = msg.limit(); - let sender = msg.sender(); - - let evts = self.query_events(self.log.read_from(query), filter, limit); - - sender.try_send(EventStoreQueryResponse::new(id, evts))?; - Ok(()) + /// Query events by sequence number. Returns events at or after the given sequence. + pub fn query_by_seq( + &self, + query: u64, + filter: Option, + limit: Option, + ) -> Vec> { + self.collect_events(self.log.read_from(query), filter, limit) } } @@ -108,41 +102,402 @@ impl EventStore { } } -impl>> Actor for EventStore { +impl Actor for EventStore { type Context = actix::Context; } -impl>> Handler - for EventStore -{ +impl Handler for EventStore { type Result = (); fn handle(&mut self, msg: StoreEventRequested, _: &mut Self::Context) -> Self::Result { - if let Err(e) = self.handle_store_event_requested(msg) { - // Log append or index insert failed — storage is broken. - error!("Event storage failed: {e}"); - panic!("Unrecoverable event storage failure: {e}"); + match self.store_event(msg.event) { + Ok(Some(sequenced)) => { + msg.sender.do_send(StoreEventResponse(sequenced)); + } + Ok(None) => {} // duplicate — already warned inside store_event + Err(e) => { + error!("Event storage failed: {e}"); + panic!("Unrecoverable event storage failure: {e}"); + } } } } -impl>> Handler> - for EventStore -{ +impl Handler> for EventStore { type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, _: &mut Self::Context) -> Self::Result { - if let Err(e) = self.handle_event_store_query_ts(msg) { - error!("{e}"); + let query = msg.query(); + let id = msg.id(); + let limit = msg.limit(); + let filter = msg.filter().cloned(); + let sender = msg.sender(); + match self.query_by_ts(query, filter, limit) { + Ok(evts) => { + if let Err(e) = sender.try_send(EventStoreQueryResponse::new(id, evts)) { + error!("{e}"); + } + } + Err(e) => error!("{e}"), } } } -impl>> Handler> - for EventStore -{ +impl Handler> for EventStore { type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, _: &mut Self::Context) -> Self::Result { - if let Err(e) = self.handle_event_store_query_seq(msg) { + let id = msg.id(); + let query = msg.query(); + let limit = msg.limit(); + let filter = msg.filter().cloned(); + let sender = msg.sender(); + let evts = self.query_by_seq(query, filter, limit); + if let Err(e) = sender.try_send(EventStoreQueryResponse::new(id, evts)) { error!("{e}"); } } } + +#[cfg(test)] +mod tests { + use crate::{EventConstructorWithTimestamp, EventSource, TestEvent}; + + use super::*; + use anyhow::Result; + use std::collections::BTreeMap; + + // --------------------------------------------------------------------------- + // Mock SequenceIndex backed by BTreeMap + // --------------------------------------------------------------------------- + struct MockIndex(BTreeMap); + + impl MockIndex { + fn new() -> Self { + Self(BTreeMap::new()) + } + } + + impl SequenceIndex for MockIndex { + fn insert(&mut self, key: u128, value: u64) -> Result<()> { + self.0.insert(key, value); + Ok(()) + } + + fn get(&self, key: u128) -> Result> { + Ok(self.0.get(&key).copied()) + } + + fn seek(&self, key: u128) -> Result> { + Ok(self.0.range(key..).next().map(|(_, &v)| v)) + } + } + + // --------------------------------------------------------------------------- + // Mock EventLog backed by Vec + // --------------------------------------------------------------------------- + struct MockLog(Vec>); + + impl MockLog { + fn new() -> Self { + Self(Vec::new()) + } + } + + impl EventLog for MockLog { + fn append(&mut self, event: &EnclaveEvent) -> Result { + let seq = self.0.len() as u64; + self.0.push(event.clone()); + Ok(seq) + } + + fn read_from( + &self, + from: u64, + ) -> Box)>> { + let items: Vec<_> = self + .0 + .iter() + .enumerate() + .filter(move |(i, _)| *i as u64 >= from) + .map(|(i, e)| (i as u64, e.clone())) + .collect(); + Box::new(items.into_iter()) + } + } + + // --------------------------------------------------------------------------- + // Test helpers + // --------------------------------------------------------------------------- + fn make_event(ts: u128, source: EventSource) -> EnclaveEvent { + EnclaveEvent::::new_with_timestamp( + TestEvent::new("test", 1).into(), + None, + ts, + None, + source, + ) + } + + fn make_local_event(ts: u128) -> EnclaveEvent { + make_event(ts, EventSource::Local) + } + + fn make_network_event(ts: u128) -> EnclaveEvent { + make_event(ts, EventSource::Net) + } + + fn new_store() -> EventStore { + EventStore::new(MockIndex::new(), MockLog::new()) + } + + fn populated_store(events: &[EnclaveEvent]) -> EventStore { + let mut store = new_store(); + for event in events { + store.store_event(event.clone()).unwrap(); + } + store + } + + // =========================================================================== + // store_event + // =========================================================================== + + #[test] + fn store_event_returns_sequenced_event() { + let mut store = new_store(); + let event = make_local_event(100); + + let result = store.store_event(event).unwrap().unwrap(); + + assert_eq!(result.get_ctx().ts(), 100); + } + + #[test] + fn store_event_assigns_incrementing_sequence_numbers() { + let mut store = new_store(); + + let _a = store.store_event(make_local_event(100)).unwrap().unwrap(); + let _b = store.store_event(make_local_event(200)).unwrap().unwrap(); + let _c = store.store_event(make_local_event(300)).unwrap().unwrap(); + + assert_eq!(store.index.get(100).unwrap(), Some(0)); + assert_eq!(store.index.get(200).unwrap(), Some(1)); + assert_eq!(store.index.get(300).unwrap(), Some(2)); + } + + #[test] + fn store_event_appends_to_log() { + let mut store = new_store(); + store.store_event(make_local_event(100)).unwrap(); + store.store_event(make_local_event(200)).unwrap(); + + let logged: Vec<_> = store.log.read_from(0).collect(); + assert_eq!(logged.len(), 2); + } + + #[test] + fn store_event_returns_none_for_duplicate_timestamp() { + let mut store = new_store(); + store.store_event(make_local_event(100)).unwrap(); + + let result = store.store_event(make_local_event(100)).unwrap(); + + assert!(result.is_none()); + assert_eq!(store.storage_errors, 1); + // Log should still have only one event + assert_eq!(store.log.read_from(0).count(), 1); + } + + #[test] + fn store_event_bails_after_max_storage_errors() { + let mut store = new_store(); + store.store_event(make_local_event(100)).unwrap(); + + for _ in 0..MAX_STORAGE_ERRORS { + let result = store.store_event(make_local_event(100)).unwrap(); + assert!(result.is_none()); + } + + assert_eq!(store.storage_errors, MAX_STORAGE_ERRORS); + + let result = store.store_event(make_local_event(100)); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("too many storage errors")); + } + + // =========================================================================== + // query_by_seq + // =========================================================================== + + #[test] + fn seq_query_returns_all_events() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(200), + make_local_event(300), + ]); + + let events = store.query_by_seq(0, None, None); + + assert_eq!(events.len(), 3); + } + + #[test] + fn seq_query_reads_from_given_offset() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(200), + make_local_event(300), + make_local_event(400), + ]); + + let events = store.query_by_seq(2, None, None); + + assert_eq!(events.len(), 2); + } + + #[test] + fn seq_query_with_source_filter() { + let store = populated_store(&[ + make_local_event(100), + make_network_event(200), + make_local_event(300), + make_network_event(400), + ]); + + let events = + store.query_by_seq(0, Some(EventStoreFilter::Source(EventSource::Local)), None); + + assert_eq!(events.len(), 2); + for e in &events { + assert_eq!(e.get_ctx().source(), EventSource::Local); + } + } + + #[test] + fn seq_query_with_limit() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(200), + make_local_event(300), + make_local_event(400), + make_local_event(500), + ]); + + let events = store.query_by_seq(0, None, Some(2)); + + assert_eq!(events.len(), 2); + } + + #[test] + fn seq_query_with_filter_and_limit() { + let store = populated_store(&[ + make_local_event(100), + make_network_event(200), + make_local_event(300), + make_local_event(400), + make_network_event(500), + ]); + + let events = store.query_by_seq( + 0, + Some(EventStoreFilter::Source(EventSource::Local)), + Some(2), + ); + + assert_eq!(events.len(), 2); + for e in &events { + assert_eq!(e.get_ctx().source(), EventSource::Local); + } + } + + #[test] + fn seq_query_on_empty_log_returns_empty() { + let store = new_store(); + + let events = store.query_by_seq(0, None, None); + + assert!(events.is_empty()); + } + + // =========================================================================== + // query_by_ts + // =========================================================================== + + #[test] + fn ts_query_returns_events_from_exact_timestamp() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(200), + make_local_event(300), + make_local_event(400), + ]); + + let events = store.query_by_ts(200, None, None).unwrap(); + + assert_eq!(events.len(), 3); + } + + #[test] + fn ts_query_seeks_to_nearest_future_timestamp() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(300), + make_local_event(500), + ]); + + // ts=200 has no match; seek finds ts=300 onwards + let events = store.query_by_ts(200, None, None).unwrap(); + + assert_eq!(events.len(), 2); + } + + #[test] + fn ts_query_returns_empty_when_no_matching_timestamp() { + let store = new_store(); + + let events = store.query_by_ts(999, None, None).unwrap(); + + assert!(events.is_empty()); + } + + #[test] + fn ts_query_returns_empty_when_past_all_events() { + let store = populated_store(&[make_local_event(100), make_local_event(200)]); + + let events = store.query_by_ts(999, None, None).unwrap(); + + assert!(events.is_empty()); + } + + #[test] + fn ts_query_with_filter() { + let store = populated_store(&[ + make_local_event(100), + make_network_event(200), + make_local_event(300), + ]); + + let events = store + .query_by_ts(100, Some(EventStoreFilter::Source(EventSource::Net)), None) + .unwrap(); + + assert_eq!(events.len(), 1); + assert_eq!(events[0].get_ctx().source(), EventSource::Net); + } + + #[test] + fn ts_query_with_limit() { + let store = populated_store(&[ + make_local_event(100), + make_local_event(200), + make_local_event(300), + make_local_event(400), + ]); + + let events = store.query_by_ts(100, None, Some(2)).unwrap(); + + assert_eq!(events.len(), 2); + } +} diff --git a/crates/events/src/eventstore_router.rs b/crates/events/src/eventstore_router.rs index f58b224fba..3f3da61f3b 100644 --- a/crates/events/src/eventstore_router.rs +++ b/crates/events/src/eventstore_router.rs @@ -8,9 +8,7 @@ use crate::{ events::{EventStoreQueryResponse, StoreEventRequested}, AggregateId, EventContextAccessors, EventLog, SequenceIndex, }; -use crate::{ - CorrelationId, Die, EnclaveEvent, EventStoreQueryBy, Seq, SeqAgg, Ts, TsAgg, Unsequenced, -}; +use crate::{CorrelationId, Die, EnclaveEvent, EventStoreQueryBy, Seq, SeqAgg, Ts, TsAgg}; use actix::{Actor, ActorContext, Addr, AsyncContext, Context, Handler, Recipient}; use anyhow::Result; use e3_utils::MAILBOX_LIMIT_LARGE; @@ -86,11 +84,11 @@ impl Handler for QueryAggregator { } /// EventStoreRouter - routes events and spawns query aggregators to handle eventstore queries -pub struct EventStoreRouter>> { +pub struct EventStoreRouter { stores: HashMap>>, } -impl>> EventStoreRouter { +impl EventStoreRouter { pub fn new(stores: HashMap>>) -> Self { debug!("Making eventstore router..."); let stores = stores @@ -201,7 +199,7 @@ impl>> EventStoreRouter< } } -impl>> Actor for EventStoreRouter { +impl Actor for EventStoreRouter { type Context = Context; fn started(&mut self, ctx: &mut Self::Context) { @@ -209,9 +207,7 @@ impl>> Actor for EventSt } } -impl>> Handler - for EventStoreRouter -{ +impl Handler for EventStoreRouter { type Result = (); fn handle(&mut self, msg: StoreEventRequested, _: &mut Self::Context) -> Self::Result { @@ -219,9 +215,7 @@ impl>> Handler>> Handler> - for EventStoreRouter -{ +impl Handler> for EventStoreRouter { type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, ctx: &mut Self::Context) -> Self::Result { @@ -231,9 +225,7 @@ impl>> Handler>> Handler> - for EventStoreRouter -{ +impl Handler> for EventStoreRouter { type Result = (); fn handle(&mut self, msg: EventStoreQueryBy, ctx: &mut Self::Context) -> Self::Result { diff --git a/crates/events/src/traits.rs b/crates/events/src/traits.rs index 631abf56b2..5ac78dd3ed 100644 --- a/crates/events/src/traits.rs +++ b/crates/events/src/traits.rs @@ -169,11 +169,11 @@ pub trait SequenceIndex: Unpin + 'static { } /// Store and retrieve events from a write ahead log -pub trait EventLog>: Unpin + 'static { +pub trait EventLog: Unpin + 'static { /// Append an event to the log, returning its sequence number - fn append(&mut self, event: &E) -> Result; + fn append(&mut self, event: &EnclaveEvent) -> Result; /// Read all events starting from the given sequence number (inclusive) - fn read_from(&self, from: u64) -> Box>; + fn read_from(&self, from: u64) -> Box)>>; } /// EventContext allows consumers to extract infrastructure metadata from event objects From c7346fb1b29f6377f11be270cc488262c4956839 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 12:25:21 +0000 Subject: [PATCH 18/50] tidy up formatting --- crates/net/src/net_interface.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 78424d7226..899732e6f9 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -433,7 +433,8 @@ async fn process_swarm_event( }, )) => { debug!("Incoming request received (id={})", request_id); - let responder = DirectResponder::new(request_id, channel, &cmd_tx).with_request(request); + let responder = + DirectResponder::new(request_id, channel, &cmd_tx).with_request(request); // received a request for events event_tx.send(NetEvent::IncomingRequest(IncomingRequest { responder }))?; From 80080520ba64ebf7de406951b1a701a2e024a297 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 12:49:19 +0000 Subject: [PATCH 19/50] fix: improve doc examples and add error context in DirectResponder --- crates/net/src/direct_responder.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs index 7b16fb6e31..227e901e59 100644 --- a/crates/net/src/direct_responder.rs +++ b/crates/net/src/direct_responder.rs @@ -43,7 +43,7 @@ impl IntoId for InboundRequestId { /// # let request_id = 6; /// # let channel_orig = String::from("channel"); /// # let channel = channel_orig.clone(); -/// let (cmd_tx, _) = mpsc::channel(16); +/// # let (cmd_tx, _rx) = mpsc::channel(400); /// /// // We create a responder and send it over our event channel /// let responder = DirectResponder::new( @@ -57,9 +57,9 @@ impl IntoId for InboundRequestId { /// /// // Now in our handlers we can respond with ok() or bad_request() this will consume the responder /// responder.ok(String::from("Something that implements TryInto>"))?; -/// // or: -/// # let responder = DirectResponder::new(request_id,channel_orig,&cmd_tx); -/// responder.bad_request("Hey something went wrong!")?; +/// # let responder = DirectResponder::new(request_id, channel_orig, &cmd_tx); +/// // or +/// responder.bad_request("It was pretty bad.")?; /// # Ok(()) /// # } /// ``` @@ -70,7 +70,6 @@ pub struct DirectResponder { channel: OnceTake, net_cmds: mpsc::Sender>, } - impl Clone for DirectResponder { fn clone(&self) -> Self { Self { @@ -143,7 +142,7 @@ impl DirectResponder { Ok(cmds .clone() .try_send(NetCommand::::IncomingResponse(incoming)) - .map_err(|_| anyhow!("Failed to send response command"))?) + .map_err(|e| anyhow!("Failed to send response command {:?}", e))?) } /// Request is ok returning response From 37f4aff71babcb1755d02ace360eca24d86e5976 Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 13:04:10 +0000 Subject: [PATCH 20/50] feat: propagate limit and filter options in EventStoreRouter --- crates/events/src/events.rs | 10 ++++++++++ crates/events/src/eventstore_router.rs | 10 ++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/events/src/events.rs b/crates/events/src/events.rs index b39ff5eaf3..24d6b33142 100644 --- a/crates/events/src/events.rs +++ b/crates/events/src/events.rs @@ -268,4 +268,14 @@ impl EventStoreQueryBy { pub fn sender(self) -> Recipient { self.sender } + + pub fn with_options(mut self, limit: Option, filter: Option) -> Self { + if let Some(l) = limit { + self.limit = Some(l); + } + if let Some(f) = filter { + self.filter = Some(f); + } + self + } } diff --git a/crates/events/src/eventstore_router.rs b/crates/events/src/eventstore_router.rs index 3f3da61f3b..47431bc33a 100644 --- a/crates/events/src/eventstore_router.rs +++ b/crates/events/src/eventstore_router.rs @@ -119,6 +119,8 @@ impl EventStoreRouter { debug!("Received request for timestamp query."); let parent_id = msg.id(); let query = msg.query().clone(); + let limit = msg.limit(); + let filter = msg.filter().cloned(); let sender = msg.sender(); let sub_queries: Vec<_> = query @@ -145,7 +147,8 @@ impl EventStoreRouter { for (aggregate_id, ts, sub_query_id, store_addr) in sub_queries { let get_events_msg = - EventStoreQueryBy::::new(sub_query_id, ts, aggregator_addr.clone().recipient()); + EventStoreQueryBy::::new(sub_query_id, ts, aggregator_addr.clone().recipient()) + .with_options(limit, filter.clone()); debug!("Sending query for aggregate {:?}", aggregate_id); store_addr.do_send(get_events_msg); } @@ -161,6 +164,8 @@ impl EventStoreRouter { debug!("Received request for sequence query."); let parent_id = msg.id(); let query = msg.query().clone(); + let limit = msg.limit(); + let filter = msg.filter().cloned(); let sender = msg.sender(); let sub_queries: Vec<_> = query @@ -190,7 +195,8 @@ impl EventStoreRouter { sub_query_id, seq, aggregator_addr.clone().recipient(), - ); + ) + .with_options(limit, filter.clone()); debug!("Sending query for aggregate {:?}", aggregate_id); store_addr.do_send(get_events_msg); } From 090abb4d591d7d9f33e20e4160c954dff57fec7b Mon Sep 17 00:00:00 2001 From: ryardley Date: Thu, 26 Feb 2026 13:45:48 +0000 Subject: [PATCH 21/50] avoid spurious intermediate event states --- crates/events/src/enclave_event/mod.rs | 8 ++ crates/net/src/net_event_batch.rs | 17 +++- crates/net/src/net_sync_manager.rs | 131 +++++++++---------------- 3 files changed, 70 insertions(+), 86 deletions(-) diff --git a/crates/events/src/enclave_event/mod.rs b/crates/events/src/enclave_event/mod.rs index 30ecdde43f..3136e8ca05 100644 --- a/crates/events/src/enclave_event/mod.rs +++ b/crates/events/src/enclave_event/mod.rs @@ -373,6 +373,14 @@ impl EnclaveEvent { } } +impl TryFrom> for EnclaveEvent { + type Error = bincode::Error; + + fn try_from(value: Vec) -> Result { + EnclaveEvent::from_bytes(&value) + } +} + #[cfg(feature = "test-helpers")] impl EnclaveEvent { /// test-helpers only utility function to create a new sequenced event diff --git a/crates/net/src/net_event_batch.rs b/crates/net/src/net_event_batch.rs index 81be21ce40..b1b934c02c 100644 --- a/crates/net/src/net_event_batch.rs +++ b/crates/net/src/net_event_batch.rs @@ -8,6 +8,7 @@ use std::fmt::Debug; use anyhow::{Context, Result}; use e3_events::AggregateId; +use tracing::info; use crate::{ direct_requester::{DirectRequester, WithPeer, WithoutPeer}, @@ -95,7 +96,7 @@ impl TryFrom> for FetchEventsSince { } pub async fn fetch_events_since( - requester: DirectRequester, + requester: &DirectRequester, request: FetchEventsSince, ) -> Result> where @@ -122,7 +123,17 @@ where loop { let request = FetchEventsSince::new(aggregate_id, cursor, batch_size); - let batch: EventBatch = requester.request(request).await?; + info!( + "Fetching batch aggregate={} cursor={} batch_size={}", + aggregate_id, cursor, batch_size + ); + let batch = fetch_events_since(&requester, request).await?; + info!( + "Batch received with {} events for aggregate={} cursor={}", + batch.events.len(), + aggregate_id, + cursor + ); all_events.extend(batch.events); @@ -132,6 +143,8 @@ where } } + info!("Batch is done returning {} events", all_events.len()); + Ok(all_events) } diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 41cce67ec9..050d2f2d2a 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -5,52 +5,28 @@ // or FITNESS FOR A PARTICULAR PURPOSE. use actix::{Actor, Addr, AsyncContext, Handler, Message, Recipient, ResponseFuture}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{bail, Context, Result}; use e3_events::{ prelude::*, trap, trap_fut, AggregateId, BusHandle, CorrelationId, EType, EnclaveEvent, EnclaveEventData, EventSource, EventStoreQueryBy, EventStoreQueryResponse, EventType, HistoricalNetSyncStart, NetSyncEventsReceived, TsAgg, TypedEvent, Unsequenced, }; -use e3_utils::{retry_with_backoff, to_retry, MAILBOX_LIMIT}; -use futures::TryFutureExt; +use e3_utils::MAILBOX_LIMIT; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, convert::TryInto, sync::Arc, time::Duration}; use tokio::sync::{broadcast, mpsc}; use tracing::{debug, info}; use crate::{ + direct_requester::DirectRequester, direct_responder::DirectResponder, - events::{ - await_event, call_and_await_response, GossipData, IncomingRequest, NetCommand, NetEvent, - OutgoingRequest, ProtocolResponse, - }, - net_event_batch::{BatchCursor, EventBatch, FetchEventsSince}, + events::{await_event, IncomingRequest, NetCommand, NetEvent, PeerTarget}, + net_event_batch::{fetch_all_batched_events, BatchCursor, EventBatch, FetchEventsSince}, }; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SyncRequestValue { - pub since: HashMap, -} - -impl TryInto> for SyncRequestValue { - type Error = anyhow::Error; - - fn try_into(self) -> Result, Self::Error> { - bincode::serialize(&self).context("failed to serialize SyncRequestValue") - } -} - -impl TryFrom> for SyncRequestValue { - type Error = anyhow::Error; - - fn try_from(value: Vec) -> Result { - bincode::deserialize(&value).context("failed to deserialize SyncRequestValue") - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SyncResponseValue { - pub events: Vec, + pub events: Vec>, pub ts: u128, } @@ -192,12 +168,7 @@ impl Handler> for NetSyncManager { let response = msg.response; self.bus.publish_from_remote_as_response( NetSyncEventsReceived { - events: response - .events - .iter() - .cloned() - .map(|data| data.try_into()) - .collect::>>>()?, + events: response.events.iter().cloned().collect(), }, response.ts, ctx, @@ -282,38 +253,6 @@ impl Handler for NetSyncManager { #[rtype(result = "()")] struct AllPeersDialed; -const SYNC_REQUEST_TIMEOUT: Duration = Duration::from_secs(30); - -async fn sync_request( - net_cmds: mpsc::Sender, - net_events: Arc>, - since: HashMap, -) -> Result { - info!("RUNNING sync request..."); - let response = call_and_await_response( - net_cmds, - net_events, - NetCommand::OutgoingRequest(OutgoingRequest::to_random_peer(SyncRequestValue { since })?), - |e| match e.clone() { - NetEvent::OutgoingRequestSucceeded(value) => Some(Ok(value)), - NetEvent::OutgoingRequestFailed(error) => { - Some(Err(anyhow!("Outgoing sync request failed: {:?}", error))) - } - _ => None, - }, - SYNC_REQUEST_TIMEOUT, - ) - .await?; - match response.payload { - ProtocolResponse::Ok(data) => { - let response: SyncResponseValue = data.try_into()?; - Ok(SyncRequestSucceeded { response }) - } - ProtocolResponse::BadRequest(msg) => Err(anyhow!("BadRequest: {}", msg)), - ProtocolResponse::Error(msg) => Err(anyhow!("ProtocolError: {}", msg)), - } -} - async fn handle_sync_request_event( net_cmds: mpsc::Sender, net_events: Arc>, @@ -341,24 +280,48 @@ async fn handle_sync_request_event( } info!("handle_sync_request_event: All peers have been dialed."); - // Make the sync request - // value returned includes the timestamp from the remote peer - let value = retry_with_backoff( - || { - info!("Running SYNC REQUEST!!"); - sync_request( - net_cmds.clone(), - net_events.clone(), - event.since.clone().into_iter().collect(), - ) - .map_err(to_retry) + let mut all_events: Vec> = Vec::new(); + let mut latest_timestamp: u128 = 0; + + for (aggregate_id, since) in event.since.iter() { + info!( + "Requesting batched events for aggregate_id={} since={}", + aggregate_id, since + ); + let requester = DirectRequester::builder(net_cmds.clone(), net_events.clone()).build(); + let events: Vec> = + fetch_all_batched_events(requester, PeerTarget::Random, *aggregate_id, *since, 100) + .await?; + + info!( + "Received {} events for aggregate_id={}", + events.len(), + aggregate_id + ); + + for enclave_event in events { + let ts = enclave_event.ts(); + if ts > latest_timestamp { + latest_timestamp = ts; + } + all_events.push(enclave_event); + } + } + + info!( + "Sync complete: collected {} events across {} aggregates, latest_timestamp={}", + all_events.len(), + event.since.len(), + latest_timestamp + ); + + let value = SyncRequestSucceeded { + response: SyncResponseValue { + events: all_events, + ts: latest_timestamp, }, - 4, - 5000, - ) - .await?; + }; - // send the sync request succeeded to ourselves address.into().try_send(TypedEvent::new(value, ctx))?; Ok(()) } From 76e8b906a0b0ce523f8d0a21f2d8b8b1ecd3e96d Mon Sep 17 00:00:00 2001 From: ryardley Date: Fri, 27 Feb 2026 07:07:06 +0000 Subject: [PATCH 22/50] add test event builder for testing sync events --- crates/events/src/enclave_event/mod.rs | 113 +++++++++++++++ crates/sync/src/sync.rs | 187 +++++++++++++++++++------ 2 files changed, 259 insertions(+), 41 deletions(-) diff --git a/crates/events/src/enclave_event/mod.rs b/crates/events/src/enclave_event/mod.rs index 3136e8ca05..4374f7b2ac 100644 --- a/crates/events/src/enclave_event/mod.rs +++ b/crates/events/src/enclave_event/mod.rs @@ -640,3 +640,116 @@ impl EventConstructorWithTimestamp for EnclaveEvent { } } } + +// Add a test_event function on the EnclaveEvent for testing. This is available in production code +// for now as we cannot expose for other packages without setting up a feature flag although we +// should limit this with a feature flag +impl EnclaveEvent { + pub fn test_event(label: &str) -> TestEventBuilder { + TestEventBuilder::::test_event(label) + } +} + +/// Build out a test event +pub struct TestEventBuilder { + label: String, + seq: S::Seq, + id: Option, + data: Option, + aggregate_id: Option, + e3_id: Option, + ts: Option, +} + +impl TestEventBuilder { + pub fn test_event(label: &str) -> Self { + Self { + label: label.to_owned(), + seq: (), + id: None, + aggregate_id: None, + data: None, + e3_id: None, + ts: None, + } + } + pub fn seq(self, seq: u64) -> TestEventBuilder { + TestEventBuilder:: { + seq, + label: self.label, + id: self.id, + data: self.data, + aggregate_id: self.aggregate_id, + e3_id: self.e3_id, + ts: self.ts, + } + } +} + +impl TestEventBuilder { + pub fn id(mut self, id: u64) -> Self { + self.id = Some(id); + self + } + + pub fn aggregate_id(mut self, id: u64) -> Self { + self.aggregate_id = Some(id); + self + } + + pub fn e3_id(mut self, e3_id: E3id) -> Self { + self.e3_id = Some(e3_id); + self + } + + pub fn ts(mut self, ts: u128) -> Self { + self.ts = Some(ts); + self + } + pub fn data(mut self, data: impl Into) -> Self { + self.data = Some(data.into()); + self + } + + pub fn get_built_event(self) -> EnclaveEvent { + let event = self.data.unwrap_or( + TestEvent { + msg: self.label, + entropy: self.id.unwrap_or(0), + e3_id: resolve_e3_id(self.e3_id, self.id, self.aggregate_id), + } + .into(), + ); + + EnclaveEvent::::new_with_timestamp( + event, + None, + self.ts.unwrap_or(0), + None, + EventSource::Evm, + ) + } +} + +impl TestEventBuilder { + pub fn build(self) -> EnclaveEvent { + self.get_built_event() + } +} +impl TestEventBuilder { + pub fn build(self) -> EnclaveEvent { + let seq = self.seq; + let unseq = self.get_built_event(); + unseq.into_sequenced(seq) + } +} + +fn resolve_e3_id(e3_id: Option, id: Option, aggregate_id: Option) -> Option { + match (e3_id, id, aggregate_id) { + (Some(_), Some(id), Some(agg)) if agg != 0 => Some(E3id::new(id.to_string(), agg)), + (Some(e3), Some(id), _) => Some(E3id::new(id.to_string(), e3.chain_id())), + (Some(e3), _, Some(agg)) if agg != 0 => Some(E3id::new(e3.e3_id(), agg)), + (None, Some(id), Some(agg)) if agg != 0 => Some(E3id::new(id.to_string(), agg)), + (e3, _, _) => e3, + } +} diff --git a/crates/sync/src/sync.rs b/crates/sync/src/sync.rs index 19d97b3dfc..c896dcbc88 100644 --- a/crates/sync/src/sync.rs +++ b/crates/sync/src/sync.rs @@ -9,7 +9,7 @@ use actix::{Message, Recipient}; use anyhow::Result; use e3_data::Repositories; use e3_events::{ - AggregateConfig, AggregateId, BusHandle, CorrelationId, EffectsEnabled, EnclaveEvent, + AggregateConfig, AggregateId, BusHandle, CorrelationId, E3id, EffectsEnabled, EnclaveEvent, EnclaveEventData, Event, EventContextAccessors, EventPublisher, EventStoreQueryBy, EventStoreQueryResponse, EvmEventConfig, EvmEventConfigChain, HistoricalEvmEventsReceived, HistoricalEvmSyncStart, HistoricalNetEventsReceived, HistoricalNetSyncStart, SeqAgg, SyncEnded, @@ -92,22 +92,21 @@ pub async fn sync( "{} historical blockchain events loaded.", historical_evm_events.len() ); - - // XXX: Skipping as we have bugs in libp2p netevent requests + let net_config = find_net_hlc(&historical_evm_events); // 6. Load the historical libp2p events to memory - // info!("Loading historical libp2p events..."); - // let (addr, rx) = actix_toolbox::oneshot::(); - // bus.publish_without_context(HistoricalNetSyncStart::new(addr, net_config.clone()))?; - // let historical_net_events = rx.await?.events; - // info!( - // "{} historical libp2p events loaded.", - // historical_net_events.len() - // ); + info!("Loading historical libp2p events..."); + let (addr, rx) = actix_toolbox::oneshot::(); + bus.publish_without_context(HistoricalNetSyncStart::new(addr, net_config.clone()))?; + let historical_net_events = rx.await?.events; + info!( + "{} historical libp2p events loaded.", + historical_net_events.len() + ); // 7. Sort both the evm and libp2p events together by HLC timestamp let mut historical = historical_evm_events .into_iter() - // .chain(historical_net_events) // Commenting out to skip + .chain(historical_net_events) .collect::>(); historical.sort_by_key(|event| event.ts()); @@ -178,6 +177,28 @@ pub async fn collect_historical_evm_events( results } +fn find_net_hlc(events: &[EnclaveEvent]) -> BTreeMap { + // find all E3s that are closed + let e3s: Vec = events + .iter() + .filter_map(|e| match e.get_data() { + EnclaveEventData::E3Failed(d) => Some(d.e3_id.clone()), + EnclaveEventData::E3RequestComplete(d) => Some(d.e3_id.clone()), + _ => None, + }) + .collect(); + events + .to_vec() + .into_iter() + .filter(|e| e.get_e3_id().map_or(true, |id| !e3s.contains(&id))) + .fold(BTreeMap::new(), |mut acc, e| { + acc.entry(e.aggregate_id()) + .and_modify(|ts| *ts = (*ts).max(e.ts())) + .or_insert(e.ts()); + acc + }) +} + /// Latest event information in store #[derive(Clone)] pub struct AggregateState { @@ -279,29 +300,16 @@ impl SnapshotLoaded { Self { snapshot } } } - #[cfg(test)] mod tests { - use super::is_infrastructure_event; + use super::*; use e3_ciphernode_builder::EventSystem; use e3_events::{ - EffectsEnabled, EnclaveEvent, EnclaveEventData, Event, EventConstructorWithTimestamp, - EventSource, EvmEventConfig, HistoricalEvmSyncStart, SyncEnded, TakeEvents, TestEvent, + E3Failed, E3RequestComplete, E3Stage, E3id, EffectsEnabled, EnclaveEvent, EnclaveEventData, + Event, EvmEventConfig, FailureReason, HistoricalEvmSyncStart, SyncEnded, TakeEvents, Unsequenced, }; - fn make_sequenced(data: impl Into, seq: u64) -> EnclaveEvent { - EnclaveEvent::::new_with_timestamp( - data.into(), - None, - 1000, - None, - EventSource::Local, - ) - .into_sequenced(seq) - } - - /// `sender` is `Option>` — `None` is safe here since we're not dispatching. fn make_historical_evm_sync_start() -> HistoricalEvmSyncStart { HistoricalEvmSyncStart { evm_config: EvmEventConfig::new(), @@ -311,10 +319,22 @@ mod tests { #[test] fn infrastructure_events_are_detected() { - let sync_ended = make_sequenced(SyncEnded::new(), 1); - let effects_enabled = make_sequenced(EffectsEnabled::new(), 2); - let evm_sync_start = make_sequenced(make_historical_evm_sync_start(), 3); - let test_event = make_sequenced(TestEvent::new("hello", 42), 4); + let sync_ended = EnclaveEvent::::test_event("sync") + .data(SyncEnded::new()) + .seq(1) + .build(); + let effects_enabled = EnclaveEvent::::test_event("fx") + .data(EffectsEnabled::new()) + .seq(2) + .build(); + let evm_sync_start = EnclaveEvent::::test_event("evm") + .data(make_historical_evm_sync_start()) + .seq(3) + .build(); + let test_event = EnclaveEvent::::test_event("hello") + .id(42) + .seq(4) + .build(); assert!(is_infrastructure_event(&sync_ended)); assert!(is_infrastructure_event(&effects_enabled)); @@ -322,9 +342,6 @@ mod tests { assert!(!is_infrastructure_event(&test_event)); } - /// Regression: infrastructure events replayed from the EventStore must be filtered before - /// they reach the bus. If they aren't, the bloom-filter deduplicates the copy that `sync()` - /// re-publishes later, causing it to be silently dropped. #[actix::test] async fn infrastructure_events_are_filtered_during_replay() -> anyhow::Result<()> { let system = EventSystem::new().with_fresh_bus(); @@ -332,11 +349,26 @@ mod tests { let history = bus.history(); let events: Vec = vec![ - make_sequenced(TestEvent::new("before", 1), 1), - make_sequenced(SyncEnded::new(), 2), - make_sequenced(EffectsEnabled::new(), 3), - make_sequenced(make_historical_evm_sync_start(), 4), - make_sequenced(TestEvent::new("after", 2), 5), + EnclaveEvent::::test_event("before") + .id(1) + .seq(1) + .build(), + EnclaveEvent::::test_event("sync") + .data(SyncEnded::new()) + .seq(2) + .build(), + EnclaveEvent::::test_event("fx") + .data(EffectsEnabled::new()) + .seq(3) + .build(), + EnclaveEvent::::test_event("evm") + .data(make_historical_evm_sync_start()) + .seq(4) + .build(), + EnclaveEvent::::test_event("after") + .id(2) + .seq(5) + .build(), ]; for event in events { @@ -371,8 +403,81 @@ mod tests { } }) .collect(); - assert_eq!(msgs, vec!["before", "after"]); + assert_eq!(msgs, vec!["before", "after"]); Ok(()) } + + #[test] + fn test_find_net_hlc() { + let closed_1 = E3id::new("1", 1); + let closed_2 = E3id::new("2", 2); + let open_1 = E3id::new("3", 3); + let open_2 = E3id::new("4", 4); + + let events = vec![ + // closed e3s -> should be filtered out + EnclaveEvent::::test_event("a") + .e3_id(closed_1.clone()) + .ts(1000) + .build(), + EnclaveEvent::::test_event("a") + .e3_id(closed_1.clone()) + .ts(2000) + .build(), + EnclaveEvent::::test_event("complete") + .data(E3RequestComplete { + e3_id: closed_1.clone(), + }) + .ts(3000) + .build(), + EnclaveEvent::::test_event("b") + .e3_id(closed_2.clone()) + .ts(1500) + .build(), + EnclaveEvent::::test_event("failed") + .data(E3Failed { + e3_id: closed_2.clone(), + failed_at_stage: E3Stage::CommitteeFinalized, + reason: FailureReason::InsufficientCommitteeMembers, + }) + .ts(2500) + .build(), + // open e3s -> should be kept + EnclaveEvent::::test_event("c") + .e3_id(open_1.clone()) + .ts(4000) + .build(), + EnclaveEvent::::test_event("c") + .e3_id(open_1.clone()) + .ts(5000) + .build(), + EnclaveEvent::::test_event("d") + .e3_id(open_2.clone()) + .ts(6000) + .build(), + // no e3_id -> aggregate 0, always kept + EnclaveEvent::::test_event("e") + .ts(7000) + .build(), + EnclaveEvent::::test_event("e") + .ts(8000) + .build(), + ]; + + let result = find_net_hlc(&events); + + // closed e3s excluded + assert!(!result.contains_key(&AggregateId::new(1))); + assert!(!result.contains_key(&AggregateId::new(2))); + + // open e3s kept with max ts + assert_eq!(result[&AggregateId::new(3)], 5000); + assert_eq!(result[&AggregateId::new(4)], 6000); + + // no-e3 events kept with max ts + assert_eq!(result[&AggregateId::new(0)], 8000); + + assert_eq!(result.len(), 3); + } } From 85c0e1e67834a9f65fba69e73e9e20f5d6f70dc9 Mon Sep 17 00:00:00 2001 From: ryardley Date: Fri, 27 Feb 2026 07:15:50 +0000 Subject: [PATCH 23/50] add tests and use feature flag --- crates/events/src/enclave_event/mod.rs | 24 ++++++++++++++++++------ crates/sync/Cargo.toml | 1 + 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/crates/events/src/enclave_event/mod.rs b/crates/events/src/enclave_event/mod.rs index 4374f7b2ac..16f599b1f4 100644 --- a/crates/events/src/enclave_event/mod.rs +++ b/crates/events/src/enclave_event/mod.rs @@ -641,12 +641,11 @@ impl EventConstructorWithTimestamp for EnclaveEvent { } } -// Add a test_event function on the EnclaveEvent for testing. This is available in production code -// for now as we cannot expose for other packages without setting up a feature flag although we -// should limit this with a feature flag +#[cfg(feature = "test-helpers")] impl EnclaveEvent { + /// Create a test event using the TestEventBuilder struct pub fn test_event(label: &str) -> TestEventBuilder { - TestEventBuilder::::test_event(label) + TestEventBuilder::::new(label) } } @@ -662,7 +661,8 @@ pub struct TestEventBuilder { } impl TestEventBuilder { - pub fn test_event(label: &str) -> Self { + /// Create a new test event + pub fn new(label: &str) -> Self { Self { label: label.to_owned(), seq: (), @@ -673,6 +673,8 @@ impl TestEventBuilder { ts: None, } } + + /// make it a sequenced event pub fn seq(self, seq: u64) -> TestEventBuilder { TestEventBuilder:: { seq, @@ -687,31 +689,38 @@ impl TestEventBuilder { } impl TestEventBuilder { + /// Add an e3_id based on a u64 this takes preference over e3_id() pub fn id(mut self, id: u64) -> Self { self.id = Some(id); self } + /// Ensure the event holds the given aggregate_id this takes preference over e3_id() pub fn aggregate_id(mut self, id: u64) -> Self { self.aggregate_id = Some(id); self } + /// Ensure the event holds the given e3_id. pub fn e3_id(mut self, e3_id: E3id) -> Self { self.e3_id = Some(e3_id); self } + /// Ensure the event holds a ts pub fn ts(mut self, ts: u128) -> Self { self.ts = Some(ts); self } + + /// Ensure the event holds the given EnclaveEventData object. This overrides all other params + /// aiside from seq(n) pub fn data(mut self, data: impl Into) -> Self { self.data = Some(data.into()); self } - pub fn get_built_event(self) -> EnclaveEvent { + fn get_built_event(self) -> EnclaveEvent { let event = self.data.unwrap_or( TestEvent { msg: self.label, @@ -732,11 +741,14 @@ impl TestEventBuilder { } impl TestEventBuilder { + /// Build the event pub fn build(self) -> EnclaveEvent { self.get_built_event() } } + impl TestEventBuilder { + /// Build the event pub fn build(self) -> EnclaveEvent { let seq = self.seq; let unseq = self.get_built_event(); diff --git a/crates/sync/Cargo.toml b/crates/sync/Cargo.toml index 268d7b1aa3..c06369c819 100644 --- a/crates/sync/Cargo.toml +++ b/crates/sync/Cargo.toml @@ -17,5 +17,6 @@ tokio.workspace = true tracing.workspace = true [dev-dependencies] +e3-events = { workspace = true, features = ["test-helpers"] } e3-ciphernode-builder.workspace = true e3-test-helpers.workspace = true From 0b72951a90a0c60d10754b28e71ebbc6280af6a2 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sat, 28 Feb 2026 11:38:09 +0000 Subject: [PATCH 24/50] update HistoricalSyncNetEventsReceived --- crates/events/src/bus_handle.rs | 13 ++++++++-- .../src/enclave_event/enable_effects.rs | 2 +- crates/events/src/enclave_event/mod.rs | 12 ++++++---- ...t_sync_events_received.rs => net_ready.rs} | 18 +++++++------- crates/events/src/enclave_event/sync_end.rs | 2 +- crates/events/src/enclave_event/sync_start.rs | 22 ++++------------- crates/events/src/traits.rs | 5 +++- crates/net/src/dialer.rs | 9 ++++--- crates/net/src/events.rs | 4 ++++ crates/net/src/net_event_translator.rs | 6 ++--- crates/net/src/net_sync_manager.rs | 18 +++++++++----- crates/sync/src/sync.rs | 24 ++++++++++++++----- crates/utils/src/retry.rs | 2 +- 13 files changed, 83 insertions(+), 54 deletions(-) rename crates/events/src/enclave_event/{net_sync_events_received.rs => net_ready.rs} (61%) diff --git a/crates/events/src/bus_handle.rs b/crates/events/src/bus_handle.rs index 94ccc6ba25..e08a1cf1c5 100644 --- a/crates/events/src/bus_handle.rs +++ b/crates/events/src/bus_handle.rs @@ -7,8 +7,8 @@ use actix::{Actor, Addr, Handler, Recipient}; use anyhow::Result; use derivative::Derivative; -use e3_utils::MAILBOX_LIMIT; -use std::marker::PhantomData; +use e3_utils::{actix::channel::oneshot, MAILBOX_LIMIT}; +use std::{future::Future, marker::PhantomData, pin::Pin}; use tracing::error; use crate::{ @@ -287,6 +287,15 @@ impl EventSubscriber> for BusHandle { self.event_bus .do_send(Unsubscribe::new(event_type, recipient)); } + + fn wait_for( + &self, + event_type: EventType, + ) -> Pin>> + Send>> { + let (addr, rx) = oneshot::>(); + self.subscribe(event_type, addr.clone()); + Box::pin(async move { Ok(rx.await?) }) + } } impl EventContextManager for BusHandle { diff --git a/crates/events/src/enclave_event/enable_effects.rs b/crates/events/src/enclave_event/enable_effects.rs index 4657fab0bd..e6830661b4 100644 --- a/crates/events/src/enclave_event/enable_effects.rs +++ b/crates/events/src/enclave_event/enable_effects.rs @@ -13,7 +13,7 @@ use std::fmt::{self, Display}; #[derive(Message, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[rtype(result = "()")] pub struct EffectsEnabled { - pub correlation_id: CorrelationId, + correlation_id: CorrelationId, } impl EffectsEnabled { diff --git a/crates/events/src/enclave_event/mod.rs b/crates/events/src/enclave_event/mod.rs index abf37df8a8..9cc6fcee0a 100644 --- a/crates/events/src/enclave_event/mod.rs +++ b/crates/events/src/enclave_event/mod.rs @@ -28,7 +28,7 @@ mod encryption_key_created; mod encryption_key_pending; mod encryption_key_received; mod keyshare_created; -mod net_sync_events_received; +mod net_ready; mod operator_activation_changed; mod outgoing_sync_requested; mod pk_generation_proof_signed; @@ -77,7 +77,7 @@ pub use encryption_key_created::*; pub use encryption_key_pending::*; pub use encryption_key_received::*; pub use keyshare_created::*; -pub use net_sync_events_received::*; +pub use net_ready::*; pub use operator_activation_changed::*; pub use outgoing_sync_requested::*; pub use pk_generation_proof_signed::*; @@ -239,12 +239,13 @@ pub enum EnclaveEventData { ComputeRequestError(ComputeRequestError), // ComputeRequestFailed SignedProofFailed(SignedProofFailed), OutgoingSyncRequested(OutgoingSyncRequested), - NetSyncEventsReceived(NetSyncEventsReceived), HistoricalEvmSyncStart(HistoricalEvmSyncStart), HistoricalNetSyncStart(HistoricalNetSyncStart), + HistoricalNetSyncEventsReceived(HistoricalNetSyncEventsReceived), SyncEffect(SyncEffect), SyncEnded(SyncEnded), EffectsEnabled(EffectsEnabled), + NetReady(NetReady), /// This is a test event to use in testing TestEvent(TestEvent), } @@ -569,12 +570,13 @@ impl_event_types!( ComputeRequestError, SignedProofFailed, OutgoingSyncRequested, - NetSyncEventsReceived, HistoricalEvmSyncStart, HistoricalNetSyncStart, + HistoricalNetSyncEventsReceived, SyncEffect, SyncEnded, - EffectsEnabled + EffectsEnabled, + NetReady ); impl TryFrom<&EnclaveEvent> for EnclaveError { diff --git a/crates/events/src/enclave_event/net_sync_events_received.rs b/crates/events/src/enclave_event/net_ready.rs similarity index 61% rename from crates/events/src/enclave_event/net_sync_events_received.rs rename to crates/events/src/enclave_event/net_ready.rs index eefe1828fb..c8205df15c 100644 --- a/crates/events/src/enclave_event/net_sync_events_received.rs +++ b/crates/events/src/enclave_event/net_ready.rs @@ -4,25 +4,27 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. +use crate::CorrelationId; use actix::Message; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display}; -use super::{EnclaveEvent, Unsequenced}; - +/// Dispatched once effects (side-effects) should be activated after a sync pass #[derive(Message, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[rtype(result = "()")] -pub struct NetSyncEventsReceived { - pub events: Vec>, +pub struct NetReady { + correlation_id: CorrelationId, } -impl NetSyncEventsReceived { - pub fn new(events: Vec>) -> Self { - Self { events } +impl NetReady { + pub fn new() -> Self { + Self { + correlation_id: CorrelationId::new(), + } } } -impl Display for NetSyncEventsReceived { +impl Display for NetReady { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self) } diff --git a/crates/events/src/enclave_event/sync_end.rs b/crates/events/src/enclave_event/sync_end.rs index 881cf0dd71..5507705815 100644 --- a/crates/events/src/enclave_event/sync_end.rs +++ b/crates/events/src/enclave_event/sync_end.rs @@ -13,7 +13,7 @@ use std::fmt::{self, Display}; #[derive(Message, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[rtype(result = "()")] pub struct SyncEnded { - pub correlation_id: CorrelationId, + correlation_id: CorrelationId, } impl SyncEnded { diff --git a/crates/events/src/enclave_event/sync_start.rs b/crates/events/src/enclave_event/sync_start.rs index 89993094ed..1db06bd508 100644 --- a/crates/events/src/enclave_event/sync_start.rs +++ b/crates/events/src/enclave_event/sync_start.rs @@ -63,23 +63,11 @@ impl Display for HistoricalEvmSyncStart { #[rtype(result = "()")] pub struct HistoricalNetSyncStart { pub since: BTreeMap, - #[serde(skip)] - /// We include the sender here so that the evm can communicate directly with the sync actor - pub sender: Option>, // Must be Option to allow serde deserialize on - // EnclaveEvent as Default is required to be - // implemented this is fine as this event is never - // shared } impl HistoricalNetSyncStart { - pub fn new( - sender: impl Into>, - since: BTreeMap, - ) -> Self { - Self { - since, - sender: Some(sender.into()), - } + pub fn new(since: BTreeMap) -> Self { + Self { since } } } @@ -110,17 +98,17 @@ impl Display for HistoricalEvmEventsReceived { #[derive(Message, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[rtype(result = "()")] -pub struct HistoricalNetEventsReceived { +pub struct HistoricalNetSyncEventsReceived { pub events: Vec>, } -impl HistoricalNetEventsReceived { +impl HistoricalNetSyncEventsReceived { pub fn new(events: Vec>) -> Self { Self { events } } } -impl Display for HistoricalNetEventsReceived { +impl Display for HistoricalNetSyncEventsReceived { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self) } diff --git a/crates/events/src/traits.rs b/crates/events/src/traits.rs index 5ac78dd3ed..599562d12f 100644 --- a/crates/events/src/traits.rs +++ b/crates/events/src/traits.rs @@ -6,8 +6,9 @@ use actix::{Message, Recipient}; use anyhow::Result; -use std::fmt::Display; use std::hash::Hash; +use std::pin::Pin; +use std::{fmt::Display, future::Future}; use crate::{ event_context::{AggregateId, EventContext}, @@ -140,6 +141,8 @@ pub trait EventSubscriber { fn subscribe_all(&self, event_types: &[EventType], recipient: Recipient); /// Subscribe the recipient to events matching the given event type fn unsubscribe(&self, event_type: &str, recipient: Recipient); + /// Return a future that waits for a specific event + fn wait_for(&self, event_type: EventType) -> Pin> + Send>>; } /// Trait to create an event with a timestamp from its associated type data diff --git a/crates/net/src/dialer.rs b/crates/net/src/dialer.rs index bae685397a..3201622323 100644 --- a/crates/net/src/dialer.rs +++ b/crates/net/src/dialer.rs @@ -19,7 +19,10 @@ use tracing::trace; use tracing::warn; use crate::events::{NetCommand, NetEvent}; -use e3_utils::{retry_with_backoff, to_retry, RetryError, BACKOFF_DELAY, BACKOFF_MAX_RETRIES}; +use e3_utils::{retry_with_backoff, to_retry, RetryError}; + +const DIAL_DELAY: u64 = 3000; +const DIAL_RETRIES: u32 = 10; /// Dial a single Multiaddr with retries and return an error should those retries not work async fn dial_multiaddr( @@ -31,8 +34,8 @@ async fn dial_multiaddr( info!("Now dialing in to {}", multiaddr); retry_with_backoff( || attempt_connection(cmd_tx, event_tx, multiaddr), - BACKOFF_MAX_RETRIES, - BACKOFF_DELAY, + DIAL_RETRIES, + DIAL_DELAY, ) .await?; Ok(()) diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index bc3bdf5022..9d73fc3019 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -148,6 +148,10 @@ pub struct OutgoingRequestFailed { pub error: String, } +#[derive(Message, Debug, Clone)] +#[rtype("()")] +pub struct AllPeersDialed; + /// NetInterface Commands are sent to the network peer over a mspc channel #[derive(Debug)] // The generics here aid testing allowing us to avoid constructing complex types diff --git a/crates/net/src/net_event_translator.rs b/crates/net/src/net_event_translator.rs index 709d68e410..4747f63723 100644 --- a/crates/net/src/net_event_translator.rs +++ b/crates/net/src/net_event_translator.rs @@ -11,13 +11,13 @@ use anyhow::Result; use bloom::{BloomFilter, ASMS}; use e3_events::{ prelude::*, trap, BusHandle, CorrelationId, EType, EnclaveEvent, EnclaveEventData, Event, - EventContextAccessors, EventSource, EventType, Unsequenced, + EventContextAccessors, EventSource, EventType, NetReady, Unsequenced, }; use e3_utils::MAILBOX_LIMIT; use std::sync::Arc; use tokio::sync::broadcast; use tokio::sync::mpsc; -use tracing::{trace, warn}; +use tracing::{info, trace, warn}; // TODO: store event filtering here on this actor instead of is_local_only() on the event. We // should do this as this functionality is not global and ramifications should stay local to here @@ -65,7 +65,7 @@ impl NetEventTranslator { // Listen on all events bus.subscribe(EventType::All, addr.clone().recipient()); - + info!("NetEventTranslator is running"); tokio::spawn({ let addr = addr.clone(); async move { diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 050d2f2d2a..70df7cc002 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -9,7 +9,8 @@ use anyhow::{bail, Context, Result}; use e3_events::{ prelude::*, trap, trap_fut, AggregateId, BusHandle, CorrelationId, EType, EnclaveEvent, EnclaveEventData, EventSource, EventStoreQueryBy, EventStoreQueryResponse, EventType, - HistoricalNetSyncStart, NetSyncEventsReceived, TsAgg, TypedEvent, Unsequenced, + HistoricalNetSyncEventsReceived, HistoricalNetSyncStart, NetReady, TsAgg, TypedEvent, + Unsequenced, }; use e3_utils::MAILBOX_LIMIT; use serde::{Deserialize, Serialize}; @@ -164,10 +165,11 @@ impl Handler> for NetSyncManager { _: &mut Self::Context, ) -> Self::Result { trap(EType::Net, &self.bus.with_ec(msg.get_ctx()), || { + info!("SYNC REQUEST SUCCEEDED"); let (msg, ctx) = msg.into_components(); let response = msg.response; self.bus.publish_from_remote_as_response( - NetSyncEventsReceived { + HistoricalNetSyncEventsReceived { events: response.events.iter().cloned().collect(), }, response.ts, @@ -244,8 +246,12 @@ impl Handler for NetSyncManager { impl Handler for NetSyncManager { type Result = (); fn handle(&mut self, _: AllPeersDialed, _: &mut Self::Context) -> Self::Result { - info!("Received handler: All peers dialed"); - self.peers_ready = true; + trap(EType::Sync, &self.bus.clone(), || { + info!("NetSyncManager: AllPeersDialed"); + self.peers_ready = true; + self.bus.publish_without_context(NetReady::new())?; + Ok(()) + }) } } @@ -262,7 +268,7 @@ async fn handle_sync_request_event( ) -> Result<()> { info!("Sync request event received"); let (event, ctx) = event.into_components(); - info!("Waiting for peers to have been contacted..."); + info!("Checking for AllPeersDialed..."); if wait_for_event { await_event( &net_events, @@ -278,7 +284,7 @@ async fn handle_sync_request_event( ) .await?; } - info!("handle_sync_request_event: All peers have been dialed."); + info!("handle_sync_request_event: AllPeersDialed"); let mut all_events: Vec> = Vec::new(); let mut latest_timestamp: u128 = 0; diff --git a/crates/sync/src/sync.rs b/crates/sync/src/sync.rs index c896dcbc88..487b681cff 100644 --- a/crates/sync/src/sync.rs +++ b/crates/sync/src/sync.rs @@ -6,13 +6,13 @@ use crate::SyncRepositoryFactory; use actix::{Message, Recipient}; -use anyhow::Result; +use anyhow::{bail, Result}; use e3_data::Repositories; use e3_events::{ AggregateConfig, AggregateId, BusHandle, CorrelationId, E3id, EffectsEnabled, EnclaveEvent, EnclaveEventData, Event, EventContextAccessors, EventPublisher, EventStoreQueryBy, - EventStoreQueryResponse, EvmEventConfig, EvmEventConfigChain, HistoricalEvmEventsReceived, - HistoricalEvmSyncStart, HistoricalNetEventsReceived, HistoricalNetSyncStart, SeqAgg, SyncEnded, + EventStoreQueryResponse, EventSubscriber, EventType, EvmEventConfig, EvmEventConfigChain, + HistoricalEvmEventsReceived, HistoricalEvmSyncStart, HistoricalNetSyncStart, SeqAgg, SyncEnded, Unsequenced, }; use e3_utils::actix::channel as actix_toolbox; @@ -39,6 +39,9 @@ pub async fn sync( aggregate_config: &AggregateConfig, eventstore: &Recipient>, ) -> Result<()> { + // 0. start listening early for net ready + let net_ready = bus.wait_for(EventType::NetReady); + // 1. Load snapsshot metadata info!("Loading snapshot metadata..."); let snapshot = @@ -94,10 +97,19 @@ pub async fn sync( ); let net_config = find_net_hlc(&historical_evm_events); // 6. Load the historical libp2p events to memory + info!("Waiting until NetReady..."); + net_ready.await?; + info!("NetReady!"); info!("Loading historical libp2p events..."); - let (addr, rx) = actix_toolbox::oneshot::(); - bus.publish_without_context(HistoricalNetSyncStart::new(addr, net_config.clone()))?; - let historical_net_events = rx.await?.events; + // let (addr, rx) = actix_toolbox::oneshot::(); + let events_received = bus.wait_for(EventType::HistoricalNetSyncEventsReceived); + bus.publish_without_context(HistoricalNetSyncStart::new(net_config.clone()))?; + let EnclaveEventData::HistoricalNetSyncEventsReceived(event) = + events_received.await?.into_data() + else { + bail!("failed to get HistoricalNetSyncEventsReceived"); + }; + let historical_net_events = event.events; info!( "{} historical libp2p events loaded.", historical_net_events.len() diff --git a/crates/utils/src/retry.rs b/crates/utils/src/retry.rs index 6eb82724e8..f8bcd1c046 100644 --- a/crates/utils/src/retry.rs +++ b/crates/utils/src/retry.rs @@ -19,7 +19,7 @@ pub fn to_retry(e: impl Into) -> RetryError { RetryError::Retry(e.into()) } -pub const BACKOFF_DELAY: u64 = 500; +pub const BACKOFF_DELAY: u64 = 3000; pub const BACKOFF_MAX_RETRIES: u32 = 10; /// Retries an async operation with exponential backoff From 6dad25efa102195380640cc39fded7528ab29f26 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 01:39:56 +0000 Subject: [PATCH 25/50] remove generic --- crates/net/src/dialer.rs | 4 +-- crates/net/src/direct_responder.rs | 48 +++++++++++++++++++----------- crates/net/src/events.rs | 22 ++++++-------- crates/net/src/net_interface.rs | 13 +++++--- 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/crates/net/src/dialer.rs b/crates/net/src/dialer.rs index 3201622323..d28ae911ec 100644 --- a/crates/net/src/dialer.rs +++ b/crates/net/src/dialer.rs @@ -19,7 +19,7 @@ use tracing::trace; use tracing::warn; use crate::events::{NetCommand, NetEvent}; -use e3_utils::{retry_with_backoff, to_retry, RetryError}; +use e3_utils::{retry_with_backoff, to_retry, OnceTake, RetryError}; const DIAL_DELAY: u64 = 3000; const DIAL_RETRIES: u32 = 10; @@ -82,7 +82,7 @@ async fn attempt_connection( dial_connection ); cmd_tx - .send(NetCommand::Dial(opts)) + .send(NetCommand::Dial(OnceTake::new(opts))) .await .map_err(to_retry)?; wait_for_connection(&mut event_rx, dial_connection).await diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs index 227e901e59..801b44fe7a 100644 --- a/crates/net/src/direct_responder.rs +++ b/crates/net/src/direct_responder.rs @@ -7,7 +7,7 @@ use crate::events::{IncomingResponse, NetCommand, ProtocolResponse, ProtocolResponseChannel}; use anyhow::{anyhow, Context, Result}; use e3_utils::OnceTake; -use libp2p::request_response::InboundRequestId; +use libp2p::request_response::{InboundRequestId, ResponseChannel}; use tokio::sync::mpsc; /// Helper trait to extract id from libp2p things like InboundRequestId @@ -31,6 +31,13 @@ impl IntoId for InboundRequestId { .expect("Failed to extract u64 from InboundRequestId") } } + +#[derive(Debug)] +pub enum ChannelType { + Test(String), // For testing + Channel(ResponseChannel), // actual libp2p response channel +} + #[derive(Debug)] /// DirectResponder is used to respond to incoming libp2p requests. /// @@ -63,14 +70,14 @@ impl IntoId for InboundRequestId { /// # Ok(()) /// # } /// ``` -pub struct DirectResponder { +pub struct DirectResponder { id: u64, request: Vec, response: Option, - channel: OnceTake, - net_cmds: mpsc::Sender>, + channel: OnceTake, + net_cmds: mpsc::Sender, } -impl Clone for DirectResponder { +impl Clone for DirectResponder { fn clone(&self) -> Self { Self { id: self.id.clone(), @@ -82,13 +89,13 @@ impl Clone for DirectResponder { } } -impl DirectResponder { +impl DirectResponder { /// Creates a new responder for an incoming request. /// /// * `id` - is the request identifier used for debugging (e.g., `InboundRequestId` or `u64`). /// * `channel` - is usually the response channel provided by libp2p but can be anything that is passed along with the response /// * `net_cmds` - sender is used to send the response back to the net interface. - pub fn new(id: impl IntoId, channel: C, net_cmds: &mpsc::Sender>) -> Self { + pub fn new(id: impl IntoId, channel: ChannelType, net_cmds: &mpsc::Sender) -> Self { Self { id: id.into_id(), request: Vec::new(), @@ -124,7 +131,7 @@ impl DirectResponder { } /// Extract the payload information to send to swarm - pub fn to_response(mut self) -> Result<(C, ProtocolResponse)> { + pub fn to_response(mut self) -> Result<(ChannelType, ProtocolResponse)> { let channel = self.channel.try_take()?; let response = self .response @@ -138,10 +145,10 @@ impl DirectResponder { let response = value; self.response = Some(response); let cmds = self.net_cmds.clone(); - let incoming = IncomingResponse::::new(self); + let incoming = IncomingResponse::new(self); Ok(cmds .clone() - .try_send(NetCommand::::IncomingResponse(incoming)) + .try_send(NetCommand::IncomingResponse(incoming)) .map_err(|e| anyhow!("Failed to send response command {:?}", e))?) } @@ -167,20 +174,27 @@ impl DirectResponder { #[cfg(test)] mod tests { use super::*; + use anyhow::bail; use tokio::sync::mpsc; - fn make_responder() -> (DirectResponder, mpsc::Receiver>) { - let (tx, rx) = mpsc::channel::>(16); - let responder = DirectResponder::new(42u64, "test_channel".to_string(), &tx); + fn make_responder() -> (DirectResponder, mpsc::Receiver) { + let (tx, rx) = mpsc::channel::(16); + let responder = + DirectResponder::new(42u64, ChannelType::Test("test_channel".to_string()), &tx); (responder, rx) } - fn extract_response( - rx: &mut mpsc::Receiver>, - ) -> Result<(String, ProtocolResponse)> { + fn extract_response(rx: &mut mpsc::Receiver) -> Result<(String, ProtocolResponse)> { let cmd = rx.try_recv().unwrap(); match cmd { - NetCommand::IncomingResponse(incoming) => incoming.responder.to_response(), + NetCommand::IncomingResponse(incoming) => { + let (channel, response) = incoming.responder.to_response().unwrap(); + let ChannelType::Test(channel) = channel else { + bail!("bad channel"); + }; + Ok((channel, response)) + } + other => panic!("Expected IncomingResponse, got {:?}", other), } } diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index 9d73fc3019..d1f9be5a91 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -11,7 +11,7 @@ use e3_events::{ CorrelationId, DocumentMeta, EnclaveEvent, EventContextAccessors, EventSource, Sequenced, Unsequenced, }; -use e3_utils::ArcBytes; +use e3_utils::{ArcBytes, OnceTake}; use libp2p::{ gossipsub::{MessageId, PublishError, TopicHash}, kad::{store, GetRecordError, PutRecordError}, @@ -93,12 +93,12 @@ pub struct IncomingRequest { #[derive(Clone, Debug)] /// We are responding to a remote request -pub struct IncomingResponse>> { - pub responder: DirectResponder, +pub struct IncomingResponse { + pub responder: DirectResponder, } -impl IncomingResponse { - pub fn new(responder: DirectResponder) -> Self { +impl IncomingResponse { + pub fn new(responder: DirectResponder) -> Self { Self { responder } } } @@ -153,12 +153,8 @@ pub struct OutgoingRequestFailed { pub struct AllPeersDialed; /// NetInterface Commands are sent to the network peer over a mspc channel -#[derive(Debug)] -// The generics here aid testing allowing us to avoid constructing complex types -// This is probably not an issue aside from complex types that are actively hidden from -// clone such as passing around a response channel which we don't control. -// Basically this helps us test and I don't expect this list to grow much. -pub enum NetCommand { +#[derive(Debug, Clone)] +pub enum NetCommand { /// Publish message to gossipsub GossipPublish { topic: String, @@ -166,7 +162,7 @@ pub enum NetCommand { correlation_id: CorrelationId, }, /// Dial peer - Dial(DialOpts), + Dial(OnceTake), /// Command to PublishDocument to Kademlia DhtPutRecord { correlation_id: CorrelationId, @@ -187,7 +183,7 @@ pub enum NetCommand { Shutdown, /// Send a request to a peer and await response OutgoingRequest(OutgoingRequest), - IncomingResponse(IncomingResponse), + IncomingResponse(IncomingResponse), } impl NetCommand { diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 899732e6f9..cc4c04f0fb 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -6,10 +6,10 @@ use crate::{ correlator::Correlator, - direct_responder::DirectResponder, + direct_responder::{ChannelType, DirectResponder}, events::{IncomingResponse, OutgoingRequest, ProtocolResponse}, }; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use e3_events::CorrelationId; use e3_utils::ArcBytes; use libp2p::{ @@ -434,7 +434,8 @@ async fn process_swarm_event( )) => { debug!("Incoming request received (id={})", request_id); let responder = - DirectResponder::new(request_id, channel, &cmd_tx).with_request(request); + DirectResponder::new(request_id, ChannelType::Channel(channel), &cmd_tx) + .with_request(request); // received a request for events event_tx.send(NetEvent::IncomingRequest(IncomingRequest { responder }))?; @@ -522,7 +523,8 @@ async fn process_swarm_command( handle_gossip_publish(swarm, event_tx, data, topic, correlation_id)?; Ok(()) } - NetCommand::Dial(multi) => { + NetCommand::Dial(env) => { + let multi = env.take().context("Dial received without payload")?; handle_dial(swarm, event_tx, multi)?; Ok(()) } @@ -792,6 +794,9 @@ fn handle_outgoing_request( fn handle_response(swarm: &mut Swarm, responder: DirectResponder) -> Result<()> { debug!("Sending response to {}", responder.id()); let (channel, response) = responder.to_response()?; + let ChannelType::Channel(channel) = channel else { + bail!("responder did not return the correct type of channel"); + }; swarm .behaviour_mut() .request_response From 3712e3175d087851337bac5d043c5f5a305127b6 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 10:04:24 +0000 Subject: [PATCH 26/50] refactor to allow simuating libp2p at the net_interface level --- Cargo.lock | 2 + crates/ciphernode-builder/Cargo.toml | 1 + crates/ciphernode-builder/src/ciphernode.rs | 28 ++- .../src/ciphernode_builder.rs | 39 ++-- crates/entrypoint/src/helpers/shutdown.rs | 51 +---- crates/net/src/bin/p2p_test.rs | 12 +- crates/net/src/document_publisher.rs | 18 +- crates/net/src/events.rs | 4 +- crates/net/src/lib.rs | 56 ++++-- crates/net/src/net_event_translator.rs | 4 +- crates/net/src/net_interface.rs | 55 ++++-- crates/net/src/net_interface_handle.rs | 187 ++++++++++++++++++ crates/net/src/net_sync_manager.rs | 2 +- crates/test-helpers/Cargo.toml | 1 + crates/test-helpers/src/ciphernode_system.rs | 7 +- crates/test-helpers/src/lib.rs | 127 ++++++++++-- crates/test-helpers/src/libp2p_mock.rs | 121 ++++++++++++ 17 files changed, 572 insertions(+), 143 deletions(-) create mode 100644 crates/net/src/net_interface_handle.rs create mode 100644 crates/test-helpers/src/libp2p_mock.rs diff --git a/Cargo.lock b/Cargo.lock index b4168fa91f..2af7b3383f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3105,6 +3105,7 @@ dependencies = [ "e3-trbfv", "e3-utils", "e3-zk-prover", + "libp2p", "once_cell", "rayon", "tempfile", @@ -3703,6 +3704,7 @@ dependencies = [ "fhe", "fhe-traits", "hex", + "libp2p", "rand 0.8.5", "rand_chacha 0.3.1", "tokio", diff --git a/crates/ciphernode-builder/Cargo.toml b/crates/ciphernode-builder/Cargo.toml index 0e86c91666..8e19b71d3d 100644 --- a/crates/ciphernode-builder/Cargo.toml +++ b/crates/ciphernode-builder/Cargo.toml @@ -29,6 +29,7 @@ e3-sortition.workspace = true e3-sync.workspace = true e3-trbfv.workspace = true e3-utils.workspace = true +libp2p.workspace = true rayon.workspace = true tempfile.workspace = true tokio.workspace = true diff --git a/crates/ciphernode-builder/src/ciphernode.rs b/crates/ciphernode-builder/src/ciphernode.rs index c4af8deaca..77dcdba75a 100644 --- a/crates/ciphernode-builder/src/ciphernode.rs +++ b/crates/ciphernode-builder/src/ciphernode.rs @@ -8,7 +8,8 @@ use actix::Addr; use anyhow::Result; use e3_data::{DataStore, InMemStore, StoreAddr}; use e3_events::{BusHandle, EnclaveEvent, HistoryCollector}; -use tokio::task::JoinHandle; +use e3_net::NetInterfaceInvertedHandle; +use libp2p::PeerId; /// A Sharable handle to a Ciphernode. NOTE: clones are available for use in the CiphernodeSystem /// but they cannot await the task. @@ -19,10 +20,18 @@ pub struct CiphernodeHandle { pub bus: BusHandle, pub history: Option>>, pub errors: Option>>, - pub peer_id: String, - pub join_handle: JoinHandle>, + pub peer_id: PeerId, + pub net_simulate_adaptor: Option, } +impl PartialEq for CiphernodeHandle { + fn eq(&self, other: &Self) -> bool { + self.address == other.address && self.peer_id == other.peer_id + } +} + +impl Eq for CiphernodeHandle {} + impl CiphernodeHandle { pub fn new( address: String, @@ -30,8 +39,8 @@ impl CiphernodeHandle { bus: BusHandle, history: Option>>, errors: Option>>, - peer_id: String, - join_handle: JoinHandle>, + peer_id: PeerId, + net_simulate_adaptor: Option, ) -> Self { Self { address, @@ -40,7 +49,7 @@ impl CiphernodeHandle { history, errors, peer_id, - join_handle, + net_simulate_adaptor, } } @@ -64,8 +73,11 @@ impl CiphernodeHandle { &self.store } - pub fn split(self) -> (BusHandle, JoinHandle>) { - (self.bus, self.join_handle) + pub fn get_test_interface(&self) -> Result { + Ok(self + .net_simulate_adaptor + .clone() + .ok_or(anyhow::anyhow!("No interface exists"))?) } pub fn in_mem_store(&self) -> Option<&Addr> { diff --git a/crates/ciphernode-builder/src/ciphernode_builder.rs b/crates/ciphernode-builder/src/ciphernode_builder.rs index b44edddbbf..e88359ac5a 100644 --- a/crates/ciphernode-builder/src/ciphernode_builder.rs +++ b/crates/ciphernode-builder/src/ciphernode_builder.rs @@ -23,7 +23,10 @@ use e3_fhe::ext::FheExtension; use e3_fhe_params::BfvPreset; use e3_keyshare::ext::ThresholdKeyshareExtension; use e3_multithread::{Multithread, MultithreadReport, TaskPool}; -use e3_net::{setup_net, NetRepositoryFactory}; +use e3_net::{ + create_test_net_interface, setup_libp2p_keypair, setup_net, setup_net_interface, NetInterface, + NetInterfaceInverted, NetRepositoryFactory, TestNetInterface, +}; use e3_request::E3Router; use e3_sortition::{ CiphernodeSelector, CiphernodeSelectorFactory, FinalizedCommitteesRepositoryFactory, @@ -32,6 +35,7 @@ use e3_sortition::{ use e3_sync::sync; use e3_utils::SharedRng; use e3_zk_prover::{setup_zk_actors, ZkBackend}; +use libp2p::PeerId; use std::time::Duration; use std::{collections::HashMap, path::PathBuf, sync::Arc}; use tracing::{error, info}; @@ -470,27 +474,26 @@ impl CiphernodeBuilder { )) } - info!("building..."); - + info!("E3Router building..."); e3_builder.build().await?; - let (join_handle, peer_id) = if let Some(net_config) = self.net_config { + let topic = "enclave-gossip"; + let out = if let Some(net_config) = self.net_config { let repositories = store.repositories(); - setup_net( - bus.clone(), - net_config.peers, - &self.cipher, - net_config.quic_port, - repositories.libp2p_keypair(), - eventstore_ts, - ) - .await? + let keypair = setup_libp2p_keypair(repositories.libp2p_keypair(), &self.cipher).await?; + let peer_id = keypair.peer_id(); + let handle = + setup_net_interface(topic, keypair, net_config.peers, net_config.quic_port)?; + (peer_id, handle, None) } else { - ( - tokio::spawn(std::future::ready(Ok(()))), - "-not set-".to_string(), - ) + let (handle, inverted) = create_test_net_interface(); + let peer_id = PeerId::random(); + let test_handle = Some(inverted); + (peer_id, handle, test_handle) }; + let (peer_id, handle, test_handle) = out; + + setup_net(topic, bus.clone(), eventstore_ts, handle)?; // Run the sync routine sync( @@ -509,7 +512,7 @@ impl CiphernodeBuilder { history, errors, peer_id, - join_handle, + test_handle, )) } diff --git a/crates/entrypoint/src/helpers/shutdown.rs b/crates/entrypoint/src/helpers/shutdown.rs index 353ced3c75..b81f126092 100644 --- a/crates/entrypoint/src/helpers/shutdown.rs +++ b/crates/entrypoint/src/helpers/shutdown.rs @@ -7,53 +7,20 @@ use e3_ciphernode_builder::CiphernodeHandle; use e3_events::{prelude::*, Shutdown}; use std::time::Duration; -use tokio::{ - select, - signal::unix::{signal, SignalKind}, -}; +use tokio::signal::unix::{signal, SignalKind}; use tracing::{error, info}; pub async fn listen_for_shutdown(node: CiphernodeHandle) { - let (bus, mut handle) = node.split(); + let bus = node.bus; let mut sigterm = signal(SignalKind::terminate()).expect("Failed to create SIGTERM signal stream"); - select! { - _ = sigterm.recv() => { - info!("SIGTERM received, initiating graceful shutdown..."); + sigterm.recv().await; + info!("SIGTERM received, initiating graceful shutdown..."); - // Stop the actor system - match bus.publish_without_context(Shutdown){ - Ok(_) => (), - Err(e) => error!("Shutdown failed to publish! {e}") - } - - // Wait for all events to propagate - tokio::time::sleep(Duration::from_secs(2)).await; - - // Abort the spawned task - handle.abort(); - - // Wait for all actor processes to disconnect - tokio::time::sleep(Duration::from_secs(2)).await; - - // Wait for the task to finish - let _ = handle.await; - - info!("Graceful shutdown complete"); - - } - result = &mut handle => { - match result { - Ok(Ok(_)) => { - info!("Completed"); - } - Ok(Err(e)) => { - error!("Failed: {}", e); - } - Err(e) => { - error!("Panicked: {}", e); - } - } - } + if let Err(e) = bus.publish_without_context(Shutdown) { + error!("Shutdown failed to publish! {e}"); } + + tokio::time::sleep(Duration::from_secs(2)).await; + info!("Graceful shutdown complete"); } diff --git a/crates/net/src/bin/p2p_test.rs b/crates/net/src/bin/p2p_test.rs index 6621795184..3b8d0b6247 100644 --- a/crates/net/src/bin/p2p_test.rs +++ b/crates/net/src/bin/p2p_test.rs @@ -7,7 +7,7 @@ use anyhow::{Context, Result}; use e3_events::CorrelationId; use e3_net::events::{GossipData, NetCommand, NetEvent}; -use e3_net::{ContentHash, NetInterface}; +use e3_net::{ContentHash, Libp2pKeypair, Libp2pNetInterface, NetInterface}; use e3_utils::ArcBytes; use libp2p::gossipsub::IdentTopic; use std::sync::atomic::{AtomicU8, Ordering}; @@ -221,11 +221,11 @@ impl TestPeer { let topic = IdentTopic::new("test"); let peers: Vec = dial_to.iter().cloned().collect(); - let id = libp2p::identity::Keypair::generate_ed25519(); - - let mut peer = NetInterface::new(&id, peers, udp_port, &topic.to_string())?; - let tx = peer.tx(); - let mut rx = peer.rx(); + let keypair = Libp2pKeypair::generate(); + let mut peer = Libp2pNetInterface::new(keypair, peers, udp_port, &topic.to_string())?; + let handle = peer.handle(); + let tx = handle.tx(); + let mut rx = handle.rx(); tokio::spawn({ let name = name.clone(); diff --git a/crates/net/src/document_publisher.rs b/crates/net/src/document_publisher.rs index b2ce0d5451..3780f3a585 100644 --- a/crates/net/src/document_publisher.rs +++ b/crates/net/src/document_publisher.rs @@ -40,7 +40,7 @@ use tracing::{debug, info}; const KADEMLIA_PUT_TIMEOUT: Duration = Duration::from_secs(30); const KADEMLIA_GET_TIMEOUT: Duration = Duration::from_secs(30); const KADEMLIA_BROADCAST_TIMEOUT: Duration = Duration::from_secs(30); -/// DocumentPublisher is an actor that monitors events from both the NetInterface and the Enclave +/// DocumentPublisher is an actor that monitors events from both the Libp2pNetInterface and the Enclave /// EventBus in order to manage document publishing interactions. In particular this involves the /// interactions of publishing a document and listening for notifications, determining if the node /// is interested in a specific document and fetching the document to broadcast on the local event @@ -48,9 +48,9 @@ const KADEMLIA_BROADCAST_TIMEOUT: Duration = Duration::from_secs(30); pub struct DocumentPublisher { /// Enclave EventBus bus: BusHandle, - /// NetCommand sender to forward commands to the NetInterface + /// NetCommand sender to forward commands to the Libp2pNetInterface tx: mpsc::Sender, - /// NetEvent receiver to resubscribe for events from the NetInterface. This is in an Arc so + /// NetEvent receiver to resubscribe for events from the Libp2pNetInterface. This is in an Arc so /// that we do not do excessive resubscribes without actually listening for events. rx: Arc>, /// The gossipsub broadcast topic @@ -333,7 +333,7 @@ pub async fn handle_document_published_notification( Ok(()) } -/// Call DhtPutRecord Command on the NetInterface and handle the results +/// Call DhtPutRecord Command on the Libp2pNetInterface and handle the results async fn put_record( net_cmds: mpsc::Sender, net_events: Arc>, @@ -363,7 +363,7 @@ async fn put_record( .await } -/// Call DhtPutRecord Command on the NetInterface and handle the results +/// Call DhtPutRecord Command on the Libp2pNetInterface and handle the results async fn get_record( net_cmds: mpsc::Sender, net_events: Arc>, @@ -389,7 +389,7 @@ async fn get_record( .await } -/// Broadcasts document published notification on NetInterface +/// Broadcasts document published notification on Libp2pNetInterface async fn broadcast_document_published_notification( net_cmds: mpsc::Sender, net_events: Arc>, @@ -745,7 +745,7 @@ mod tests { value: value.clone(), })?; - // 2. Document publisher should have asked the NetInterface to put the doc on Kademlia + // 2. Document publisher should have asked the Libp2pNetInterface to put the doc on Kademlia let Some(NetCommand::DhtPutRecord { correlation_id, expires, @@ -934,7 +934,7 @@ mod tests { ..CiphernodeSelected::default() })?; - // 2. Dispatch a NetEvent from the NetInterface signaling that a document was published + // 2. Dispatch a NetEvent from the Libp2pNetInterface signaling that a document was published net_evt_tx.send(NetEvent::GossipData( GossipData::DocumentPublishedNotification(DocumentPublishedNotification { key: ContentHash::from_content(&b"wrong document".to_vec()), @@ -952,7 +952,7 @@ mod tests { let result = timeout(Duration::from_secs(1), net_cmd_rx.recv()).await; assert!(result.is_err(), "Expected timeout but received a message"); - // 4. Dispatch a NetEvent from the NetInterface signaling that a document we ARE interested + // 4. Dispatch a NetEvent from the Libp2pNetInterface signaling that a document we ARE interested // in was published net_evt_tx.send(NetEvent::GossipData( GossipData::DocumentPublishedNotification(DocumentPublishedNotification { diff --git a/crates/net/src/events.rs b/crates/net/src/events.rs index d1f9be5a91..4083c2b0d4 100644 --- a/crates/net/src/events.rs +++ b/crates/net/src/events.rs @@ -152,7 +152,7 @@ pub struct OutgoingRequestFailed { #[rtype("()")] pub struct AllPeersDialed; -/// NetInterface Commands are sent to the network peer over a mspc channel +/// Libp2pNetInterface Commands are sent to the network peer over a mspc channel #[derive(Debug, Clone)] pub enum NetCommand { /// Publish message to gossipsub @@ -334,7 +334,7 @@ where // We don't have access to this later and we cannot clone command let debug_cmd = format!("{:?}", command); - // Send the command to NetInterface + // Send the command to Libp2pNetInterface trace!( "call_and_await_response: sending command {:?} with timeout {:?}", command, diff --git a/crates/net/src/lib.rs b/crates/net/src/lib.rs index 1f97f2270d..ed496bc56a 100644 --- a/crates/net/src/lib.rs +++ b/crates/net/src/lib.rs @@ -15,6 +15,7 @@ mod net_event_batch; mod net_event_buffer; mod net_event_translator; mod net_interface; +mod net_interface_handle; mod net_sync_manager; mod repo; @@ -22,31 +23,25 @@ use std::sync::Arc; use actix::Recipient; use anyhow::bail; +use anyhow::Result; pub use cid::ContentHash; pub use document_publisher::*; use e3_crypto::Cipher; use e3_data::Repository; use e3_events::{run_once, BusHandle, EffectsEnabled, EventStoreQueryBy, EventSubscriber, TsAgg}; -use libp2p::identity::ed25519; use net_event_buffer::NetEventBuffer; pub use net_event_translator::*; pub use net_interface::*; +pub use net_interface_handle::*; use net_sync_manager::NetSyncManager; pub use repo::*; +use tracing::error; use tracing::{info, instrument}; -/// Spawn a Libp2p interface and hook it up to this actor -#[instrument(name = "libp2p", skip_all)] -pub async fn setup_net( - bus: BusHandle, - peers: Vec, - cipher: &Arc, - quic_port: u16, +pub async fn setup_libp2p_keypair( repository: Repository>, - eventstore: impl Into>>, -) -> anyhow::Result<(tokio::task::JoinHandle>, String)> { - let topic = "enclave-gossip"; - + cipher: &Arc, +) -> Result { // Get existing keypair or generate a new one let mut bytes = match repository.read().await? { Some(bytes) => { @@ -55,14 +50,36 @@ pub async fn setup_net( } None => bail!("No network keypair found in repository, please generate a new one using `enclave net generate-key`"), }; + Libp2pKeypair::try_from_bytes(&mut bytes) +} + +pub fn setup_net_interface( + topic: &str, + keypair: Libp2pKeypair, + peers: Vec, + quic_port: u16, +) -> Result { + let mut interface = Libp2pNetInterface::new(keypair, peers, Some(quic_port), topic)?; + + let handle = interface.handle(); - // Create peer from keypair - let keypair: libp2p::identity::Keypair = - ed25519::Keypair::try_from_bytes(&mut bytes)?.try_into()?; + actix::spawn(async move { + if let Err(e) = interface.start().await { + error!("{e}"); + } + }); - // Generate a new interface to read and write peer events to - let mut interface = NetInterface::new(&keypair, peers, Some(quic_port), topic)?; + Ok(handle) +} +/// Spawn a Libp2p interface and hook it up to this actor +#[instrument(name = "libp2p", skip_all)] +pub fn setup_net( + topic: &str, + bus: BusHandle, + eventstore: impl Into>>, + interface: impl NetInterface, +) -> Result<()> { // NOTE: Pass the unbuffered rx to SyncManager as it must operate before live events are // processed let _net_sync = NetSyncManager::setup( @@ -90,8 +107,5 @@ pub async fn setup_net( bus.subscribe(e3_events::EventType::EffectsEnabled, runner.recipient()); - // TODO: actix::spawn might avoid all the cleanup code - let handle = tokio::spawn(async move { Ok(interface.start().await?) }); - - Ok((handle, keypair.public().to_peer_id().to_string())) + Ok(()) } diff --git a/crates/net/src/net_event_translator.rs b/crates/net/src/net_event_translator.rs index 4747f63723..cb08ad5662 100644 --- a/crates/net/src/net_event_translator.rs +++ b/crates/net/src/net_event_translator.rs @@ -23,7 +23,7 @@ use tracing::{info, trace, warn}; // should do this as this functionality is not global and ramifications should stay local to here /// NetEventTranslator Actor converts between EventBus events and Libp2p events forwarding them to a -/// NetInterface for propagation over the p2p network +/// Libp2pNetInterface for propagation over the p2p network pub struct NetEventTranslator { bus: BusHandle, tx: mpsc::Sender, @@ -38,7 +38,7 @@ impl Actor for NetEventTranslator { } } -/// Libp2pEvent is used to send data to the NetInterface from the NetEventTranslator +/// Libp2pEvent is used to send data to the Libp2pNetInterface from the NetEventTranslator #[derive(Message, Clone, Debug, PartialEq, Eq)] #[rtype(result = "()")] struct LibP2pEvent(pub GossipData); diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index cc4c04f0fb..59d3d55d7e 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -8,6 +8,7 @@ use crate::{ correlator::Correlator, direct_responder::{ChannelType, DirectResponder}, events::{IncomingResponse, OutgoingRequest, ProtocolResponse}, + net_interface_handle::NetInterfaceHandle, }; use anyhow::{bail, Context, Result}; use e3_events::CorrelationId; @@ -17,7 +18,7 @@ use libp2p::{ futures::StreamExt, gossipsub, identify::{Behaviour as IdentifyBehaviour, Config as IdentifyConfig}, - identity::Keypair, + identity::{ed25519, Keypair}, kad::{ self, store::{MemoryStore, MemoryStoreConfig, RecordStore}, @@ -38,7 +39,10 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use tokio::{select, sync::broadcast, sync::mpsc}; +use tokio::{ + select, + sync::{broadcast, mpsc}, +}; use tracing::{debug, error, info, trace, warn}; const PROTOCOL_NAME: StreamProtocol = StreamProtocol::new("/enclave/kad/1.0.0"); @@ -68,7 +72,7 @@ pub struct NodeBehaviour { /// Manage the peer to peer connection. This struct wraps a libp2p Swarm and enables communication /// with it using channels. -pub struct NetInterface { +pub struct Libp2pNetInterface { /// The Libp2p Swarm instance swarm: Swarm, /// A list of peers to automatically dial @@ -79,15 +83,15 @@ pub struct NetInterface { topic: gossipsub::IdentTopic, /// Broadcast channel to report NetEvents to listeners event_tx: broadcast::Sender, - /// Transmission channel to send NetCommands to the NetInterface + /// Transmission channel to send NetCommands to the Libp2pNetInterface cmd_tx: mpsc::Sender, /// Local receiver to process NetCommands from cmd_rx: mpsc::Receiver, } -impl NetInterface { +impl Libp2pNetInterface { pub fn new( - id: &Keypair, + id: Libp2pKeypair, peers: Vec, udp_port: Option, topic: &str, @@ -95,7 +99,7 @@ impl NetInterface { let (event_tx, _) = broadcast::channel(1000); // TODO : tune this param let (cmd_tx, cmd_rx) = mpsc::channel(1000); // TODO : tune this param - let swarm = libp2p::SwarmBuilder::with_existing_identity(id.clone()) + let swarm = libp2p::SwarmBuilder::with_existing_identity(id.into_keypair()) .with_tokio() .with_quic() .with_dns() @@ -117,12 +121,8 @@ impl NetInterface { }) } - pub fn rx(&mut self) -> broadcast::Receiver { - self.event_tx.subscribe() - } - - pub fn tx(&self) -> mpsc::Sender { - self.cmd_tx.clone() + pub fn handle(&self) -> NetInterfaceHandle { + NetInterfaceHandle::new(self.cmd_tx.clone(), self.event_tx.subscribe()) } pub async fn start(&mut self) -> Result<()> { @@ -190,6 +190,33 @@ impl NetInterface { } } +pub struct Libp2pKeypair { + keypair: libp2p::identity::Keypair, +} + +impl Libp2pKeypair { + pub fn new(keypair: libp2p::identity::Keypair) -> Self { + Self { keypair } + } + + pub fn generate() -> Self { + let id = libp2p::identity::Keypair::generate_ed25519(); + Self::new(id) + } + + pub fn try_from_bytes(bytes: &mut [u8]) -> Result { + let keypair: libp2p::identity::Keypair = + ed25519::Keypair::try_from_bytes(bytes)?.try_into()?; + Ok(Self { keypair }) + } + + pub fn into_keypair(self) -> libp2p::identity::Keypair { + self.keypair + } + pub fn peer_id(&self) -> PeerId { + self.keypair.public().to_peer_id() + } +} /// Create the libp2p behaviour fn create_behaviour( key: &Keypair, @@ -569,7 +596,7 @@ async fn process_swarm_command( Ok(()) } NetCommand::Shutdown => { - unreachable!("shutdown command must be handled in NetInterface::start") + unreachable!("shutdown command must be handled in Libp2pNetInterface::start") } } } diff --git a/crates/net/src/net_interface_handle.rs b/crates/net/src/net_interface_handle.rs new file mode 100644 index 0000000000..b708a3b96c --- /dev/null +++ b/crates/net/src/net_interface_handle.rs @@ -0,0 +1,187 @@ +use std::time::Duration; + +use tokio::{ + sync::{broadcast, mpsc}, + time::sleep, +}; +use tracing::error; + +use crate::events::{NetCommand, NetEvent}; + +#[derive(Debug)] +pub struct NetInterfaceHandle { + tx: mpsc::Sender, + rx: broadcast::Receiver, +} +impl NetInterfaceHandle { + pub fn new(tx: mpsc::Sender, rx: broadcast::Receiver) -> Self { + Self { tx, rx } + } +} + +pub trait NetInterface: Sized { + fn tx(&self) -> mpsc::Sender; + fn rx(&self) -> broadcast::Receiver; + fn handle(&self) -> NetInterfaceHandle { + NetInterfaceHandle::from(self) + } +} + +#[derive(Debug, Clone)] +pub struct NetInterfaceInvertedHandle { + cmd_tx: broadcast::Sender, + tx: mpsc::Sender, + event_tx: broadcast::Sender, +} + +impl NetInterfaceHandle { + pub fn from(interface: &impl NetInterface) -> Self { + Self { + tx: interface.tx(), + rx: interface.rx(), + } + } +} +impl NetInterface for NetInterfaceHandle { + fn rx(&self) -> broadcast::Receiver { + self.rx.resubscribe() + } + + fn tx(&self) -> mpsc::Sender { + self.tx.clone() + } +} +pub fn create_test_net_interface() -> (NetInterfaceHandle, NetInterfaceInvertedHandle) { + let (m_cmd_tx, mut m_cmd_rx) = mpsc::channel::(1000); + let (b_evt_tx, _) = broadcast::channel(1000); + let (b_cmd_tx, _) = broadcast::channel(1000); + + let tx = b_cmd_tx.clone(); + let startup_event_tx = b_evt_tx.clone(); + let keep_alive = b_cmd_tx.subscribe(); + + tokio::spawn(async move { + let _rx_guard = keep_alive; + sleep(Duration::from_millis(100)).await; + let _ = startup_event_tx.send(NetEvent::AllPeersDialed); + while let Some(cmd) = m_cmd_rx.recv().await { + let _ = tx.send(cmd); + } + }); + + let handle = NetInterfaceHandle { + tx: m_cmd_tx.clone(), + rx: b_evt_tx.subscribe(), + }; + + let inverted = NetInterfaceInvertedHandle { + tx: m_cmd_tx, + cmd_tx: b_cmd_tx, + event_tx: b_evt_tx, + }; + + (handle, inverted) +} + +#[derive(Clone)] +pub struct TestNetInterface { + m_cmd_tx: mpsc::Sender, + b_cmd_tx: broadcast::Sender, + b_evt_tx: broadcast::Sender, +} + +impl TestNetInterface { + pub fn new() -> Self { + let (m_cmd_tx, mut m_cmd_rx) = mpsc::channel::(1000); + let (b_evt_tx, _) = broadcast::channel(1000); + let (b_cmd_tx, _) = broadcast::channel(1000); + + // Bridge mpsc commands to broadcast so the mock can subscribe + let tx = b_cmd_tx.clone(); + let startup_event_tx = b_evt_tx.clone(); + tokio::spawn(async move { + // Simulate dial-in delay like TestNetInterface + sleep(Duration::from_millis(100)).await; + let _ = startup_event_tx.send(NetEvent::AllPeersDialed); + + while let Some(cmd) = m_cmd_rx.recv().await { + if let Err(e) = tx.send(cmd.clone()) { + error!("Error sending on channel. cmd={cmd:?} with error={e}"); + } + } + println!("***** ERROR CLOSING CHANNEL!!!! ****"); + }); + + Self { + m_cmd_tx, + b_evt_tx, + b_cmd_tx, + } + } +} + +impl NetInterface for TestNetInterface { + fn tx(&self) -> mpsc::Sender { + self.m_cmd_tx.clone() + } + + fn rx(&self) -> broadcast::Receiver { + self.b_evt_tx.subscribe() + } +} + +impl NetInterfaceInverted for TestNetInterface { + fn tx(&self) -> mpsc::Sender { + self.m_cmd_tx.clone() + } + fn cmd_tx(&self) -> broadcast::Sender { + self.b_cmd_tx.clone() + } + + fn cmd_rx(&self) -> broadcast::Receiver { + self.b_cmd_tx.subscribe() + } + + fn event_tx(&self) -> broadcast::Sender { + self.b_evt_tx.clone() + } + + fn event_rx(&self) -> broadcast::Receiver { + self.b_evt_tx.subscribe() + } +} + +pub trait NetInterfaceInverted: Sized { + fn tx(&self) -> mpsc::Sender; + fn event_tx(&self) -> broadcast::Sender; + fn event_rx(&self) -> broadcast::Receiver; + fn cmd_tx(&self) -> broadcast::Sender; + fn cmd_rx(&self) -> broadcast::Receiver; + + fn into_handle_inverted(self) -> NetInterfaceInvertedHandle { + NetInterfaceInvertedHandle { + tx: self.tx(), + event_tx: self.event_tx(), + cmd_tx: self.cmd_tx(), + } + } +} + +impl NetInterfaceInverted for NetInterfaceInvertedHandle { + fn tx(&self) -> mpsc::Sender { + self.tx.clone() + } + + fn cmd_rx(&self) -> broadcast::Receiver { + self.cmd_tx.subscribe() + } + fn event_tx(&self) -> broadcast::Sender { + self.event_tx.clone() + } + fn cmd_tx(&self) -> broadcast::Sender { + self.cmd_tx.clone() + } + fn event_rx(&self) -> broadcast::Receiver { + self.event_tx.subscribe() + } +} diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 70df7cc002..030713934b 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -55,7 +55,7 @@ pub struct SyncRequestSucceeded { pub struct NetSyncManager { /// Enclave EventBus bus: BusHandle, - /// NetCommand sender to forward commands to the NetInterface + /// NetCommand sender to forward commands to the Libp2pNetInterface tx: mpsc::Sender, /// NetEvents receiver to receive events rx: Arc>, diff --git a/crates/test-helpers/Cargo.toml b/crates/test-helpers/Cargo.toml index 6cf492e796..8d8c85b31f 100644 --- a/crates/test-helpers/Cargo.toml +++ b/crates/test-helpers/Cargo.toml @@ -31,6 +31,7 @@ e3-sortition = { workspace = true } fhe = { workspace = true } fhe-traits = { workspace = true } hex = { workspace = true } +libp2p = { workspace = true } rand = { workspace = true } rand_chacha = { workspace = true } tokio = { workspace = true } diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index 042a34da09..26ea767588 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -199,7 +199,7 @@ mod tests { use e3_ciphernode_builder::EventSystem; use e3_data::InMemStore; use e3_events::{EventBus, EventBusConfig}; - use tokio::task::JoinHandle; + use libp2p::PeerId; async fn mock_setup_node(address: String) -> Result { // Create mock actors for the test @@ -212,7 +212,6 @@ mod tests { .with_event_bus(bus) .handle()? .enable("test"); - let handle: JoinHandle> = tokio::spawn(async { Ok(()) }); Ok(CiphernodeHandle { address, @@ -220,8 +219,8 @@ mod tests { bus, history: Some(history), errors: Some(errors), - join_handle: handle, - peer_id: "-unknown peer id-".to_string(), + peer_id: PeerId::random(), + net_simulate_adaptor: None, }) } diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs index a90588bada..cae16da185 100644 --- a/crates/test-helpers/src/lib.rs +++ b/crates/test-helpers/src/lib.rs @@ -6,35 +6,49 @@ pub mod application; pub mod ciphernode_system; +pub mod libp2p_mock; mod plaintext_writer; mod public_key_writer; pub mod usecase_helpers; mod utils; use actix::prelude::*; use alloy::primitives::Address; -use anyhow::*; +use anyhow::Result; use e3_ciphernode_builder::{CiphernodeHandle, EventSystem}; use e3_events::{ BusHandle, CiphernodeAdded, Enabled, EnclaveEvent, EnclaveEventData, EventBus, EventBusConfig, - EventContextAccessors, EventPublisher, EventSubscriber, EventType, HistoryCollector, Seed, - Sequenced, Subscribe, + EventContextAccessors, EventPublisher, EventType, HistoryCollector, Seed, Sequenced, Subscribe, }; use e3_fhe_params::BfvParamSet; use e3_fhe_params::DEFAULT_BFV_PRESET; use e3_fhe_params::{build_bfv_params_arc, create_deterministic_crp_from_default_seed}; -use e3_net::{DocumentPublisher, NetEventTranslator}; +use e3_net::events::NetCommand; +use e3_net::events::NetEvent; +use e3_net::ContentHash; +use e3_net::NetInterfaceInvertedHandle; +use e3_net::{DocumentPublisher, NetEventTranslator, NetInterfaceInverted}; +use e3_utils::ArcBytes; use e3_utils::SharedRng; use fhe::bfv::{BfvParameters, Ciphertext, Encoding, Plaintext, PublicKey}; use fhe::mbfv::CommonRandomPoly; use fhe_traits::Serialize; use fhe_traits::{FheEncoder, FheEncrypter}; +use libp2p::gossipsub::MessageId; +use libp2p::kad::GetRecordError; +use libp2p::kad::RecordKey; +use libp2p_mock::Libp2pMock; pub use plaintext_writer::*; pub use public_key_writer::*; use rand::Rng; use rand_chacha::rand_core::SeedableRng; use rand_chacha::ChaCha20Rng; +use std::collections::HashMap; use std::sync::Arc; +use tokio::sync::broadcast; +use tracing::error; +use tracing::info; use tracing::trace; +use tracing::warn; pub use utils::*; pub fn create_shared_rng_from_u64(value: u64) -> Arc> { @@ -153,21 +167,102 @@ impl Handler> for SimulatedNetPipe { /// │ FIL │───────────────┘ /// └─────┘ /// ``` -pub fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { +pub async fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { + let mock = Libp2pMock::new(); for node in nodes.iter() { - let source = node.bus(); - for (_, node) in nodes.iter().enumerate() { - let dest = node.bus(); - if source != dest { - let pipe = SimulatedNetPipe { dest: dest.clone() }.start(); - source.subscribe(EventType::All, pipe.into()); - } else { - trace!("Source = Dest! Not piping bus to itself"); - } - } + let interface = node + .net_simulate_adaptor + .clone() + .expect("net_simulate_adaptor must be set for simulated nodes"); + mock.add_node(node.peer_id, interface).await; } } +// fn pipe(src: NetInterfaceInvertedHandle, dest: NetInterfaceInvertedHandle) { +// let src_event_tx = src.event_tx(); +// let dest_event_tx = dest.event_tx(); +// let mut src_cmd_rx = src.cmd_rx(); +// +// tokio::spawn(async move { +// let mut store: HashMap = HashMap::new(); +// +// loop { +// match src_cmd_rx.recv().await { +// Ok(NetCommand::GossipPublish { +// data, +// correlation_id, +// .. +// }) => { +// if let Err(e) = dest_event_tx.send(NetEvent::GossipData(data)) { +// error!("pipe: failed to forward GossipData to dest: {e}"); +// } +// +// let message_id = MessageId::new(&format!("{correlation_id:?}").into_bytes()); +// if let Err(e) = src_event_tx.send(NetEvent::GossipPublished { +// correlation_id, +// message_id, +// }) { +// error!("pipe: failed to send GossipPublished to src: {e}"); +// } +// } +// Ok(NetCommand::DhtPutRecord { +// correlation_id, +// key, +// value, +// .. +// }) => { +// store.insert(key.clone(), value.clone()); +// +// if let Err(e) = dest_event_tx.send(NetEvent::DhtGetRecordSucceeded { +// key: key.clone(), +// correlation_id, +// value, +// }) { +// error!("pipe: failed to forward DhtGetRecordSucceeded to dest: {e}"); +// } +// +// if let Err(e) = src_event_tx.send(NetEvent::DhtPutRecordSucceeded { +// key, +// correlation_id, +// }) { +// error!("pipe: failed to send DhtPutRecordSucceeded to src: {e}"); +// } +// } +// Ok(NetCommand::DhtGetRecord { +// correlation_id, +// key, +// }) => { +// if let Some(value) = store.get(&key).cloned() { +// if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordSucceeded { +// key, +// correlation_id, +// value, +// }) { +// error!("pipe: failed to send DhtGetRecordSucceeded to src: {e}"); +// } +// } else { +// if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordError { +// correlation_id, +// error: GetRecordError::NotFound { +// key: RecordKey::new(&key.into_inner()), +// closest_peers: vec![], +// }, +// }) { +// error!("pipe: failed to send DhtGetRecordError to src: {e}"); +// } +// } +// } +// Err(broadcast::error::RecvError::Lagged(n)) => { +// warn!("pipe: src cmd receiver lagged by {n} messages"); +// continue; +// } +// Err(_) => break, +// _ => continue, +// } +// } +// }); +// } + /// Creates test eth addresses /// NOTE: THESE ARE NOT ACTUAL ADDRESSES JUST RANDOM DATA pub fn create_random_eth_addrs(how_many: u32) -> Vec { @@ -224,7 +319,7 @@ pub fn encrypt_ciphertext( .map(|pt| { pubkey .try_encrypt(&pt, &mut rng) - .map_err(|e| anyhow!("{e}")) + .map_err(|e| anyhow::anyhow!("{e}")) }) .collect::>>()?; Ok((ciphertext, plaintext)) diff --git a/crates/test-helpers/src/libp2p_mock.rs b/crates/test-helpers/src/libp2p_mock.rs new file mode 100644 index 0000000000..7042c5ddc0 --- /dev/null +++ b/crates/test-helpers/src/libp2p_mock.rs @@ -0,0 +1,121 @@ +use std::{collections::HashMap, sync::Arc}; + +use e3_net::{ + events::{NetCommand, NetEvent}, + ContentHash, NetInterfaceInverted, NetInterfaceInvertedHandle, +}; +use e3_utils::ArcBytes; +use libp2p::{gossipsub::MessageId, kad::GetRecordError, PeerId}; +use tokio::sync::{broadcast, RwLock}; +use tracing::{error, warn}; + +#[derive(Debug, Clone)] +pub struct Libp2pMock { + store: Arc>>, + nodes: Arc>>, +} + +impl Libp2pMock { + pub fn new() -> Self { + Self { + store: Arc::new(RwLock::new(HashMap::new())), + nodes: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub async fn add_node(&self, peer_id: PeerId, handle: NetInterfaceInvertedHandle) { + self.nodes.write().await.insert(peer_id, handle.clone()); + + let src_event_tx = handle.event_tx(); + let mut src_cmd_rx = handle.cmd_rx(); + let store = self.store.clone(); + let nodes = self.nodes.clone(); + let self_peer_id = peer_id; + + tokio::spawn(async move { + loop { + match src_cmd_rx.recv().await { + Ok(NetCommand::GossipPublish { + data, + correlation_id, + .. + }) => { + // Broadcast to all other nodes + let peers = nodes.read().await; + for (id, peer) in peers.iter() { + if *id == self_peer_id { + continue; + } + if let Err(e) = peer.event_tx().send(NetEvent::GossipData(data.clone())) + { + error!("Libp2pMock: failed to forward GossipData to {id}: {e}"); + } + } + + let message_id = + MessageId::new(&format!("{correlation_id:?}").into_bytes()); + if let Err(e) = src_event_tx.send(NetEvent::GossipPublished { + correlation_id, + message_id, + }) { + error!("Libp2pMock: failed to send GossipPublished: {e}"); + } + } + Ok(NetCommand::DhtPutRecord { + correlation_id, + key, + value, + .. + }) => { + store.write().await.insert(key.clone(), value); + + if let Err(e) = src_event_tx.send(NetEvent::DhtPutRecordSucceeded { + key, + correlation_id, + }) { + error!("Libp2pMock: failed to send DhtPutRecordSucceeded: {e}"); + } + } + Ok(NetCommand::DhtGetRecord { + correlation_id, + key, + }) => { + let maybe_value = store.read().await.get(&key).cloned(); + + if let Some(value) = maybe_value { + if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordSucceeded { + key, + correlation_id, + value, + }) { + error!("Libp2pMock: failed to send DhtGetRecordSucceeded: {e}"); + } + } else { + if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordError { + correlation_id, + error: GetRecordError::NotFound { + key: libp2p::kad::RecordKey::new(&key.into_inner()), + closest_peers: vec![], + }, + }) { + error!("Libp2pMock: failed to send DhtGetRecordError: {e}"); + } + } + } + Ok(NetCommand::DhtRemoveRecords { keys }) => { + let mut s = store.write().await; + for key in keys { + s.remove(&key); + } + } + Err(broadcast::error::RecvError::Lagged(n)) => { + warn!("Libp2pMock: cmd receiver lagged by {n} messages"); + continue; + } + Err(_) => break, + _ => continue, + } + } + }); + } +} From e4ac1b4fec416462c1742858bfe04e8abf92d0aa Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 10:47:40 +0000 Subject: [PATCH 27/50] fix not awaiting async funnction --- crates/net/src/document_publisher.rs | 2 +- crates/test-helpers/src/ciphernode_system.rs | 2 +- crates/test-helpers/src/lib.rs | 1 + crates/test-helpers/src/libp2p_mock.rs | 4 ++++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/net/src/document_publisher.rs b/crates/net/src/document_publisher.rs index 3780f3a585..6e2180f536 100644 --- a/crates/net/src/document_publisher.rs +++ b/crates/net/src/document_publisher.rs @@ -363,7 +363,7 @@ async fn put_record( .await } -/// Call DhtPutRecord Command on the Libp2pNetInterface and handle the results +/// Call DhtGetRecord Command on the Libp2pNetInterface and handle the results async fn get_record( net_cmds: mpsc::Sender, net_events: Arc>, diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index 26ea767588..9b5de5bae5 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -81,7 +81,7 @@ impl<'a> CiphernodeSystemBuilder<'a> { } if self.simulate { - simulate_libp2p_net(&nodes); + simulate_libp2p_net(&nodes).await; } for then_fn in self.thens { diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs index cae16da185..4906fd8ba0 100644 --- a/crates/test-helpers/src/lib.rs +++ b/crates/test-helpers/src/lib.rs @@ -168,6 +168,7 @@ impl Handler> for SimulatedNetPipe { /// └─────┘ /// ``` pub async fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { + println!("MOCK: simulate_libp2p_net"); let mock = Libp2pMock::new(); for node in nodes.iter() { let interface = node diff --git a/crates/test-helpers/src/libp2p_mock.rs b/crates/test-helpers/src/libp2p_mock.rs index 7042c5ddc0..294f9dfb7e 100644 --- a/crates/test-helpers/src/libp2p_mock.rs +++ b/crates/test-helpers/src/libp2p_mock.rs @@ -33,6 +33,7 @@ impl Libp2pMock { let self_peer_id = peer_id; tokio::spawn(async move { + println!("MOCK: SPAWNING TASK!"); loop { match src_cmd_rx.recv().await { Ok(NetCommand::GossipPublish { @@ -40,12 +41,15 @@ impl Libp2pMock { correlation_id, .. }) => { + println!("MOCK: RECEIVED GOSSIP PUBLISH..."); + // Broadcast to all other nodes let peers = nodes.read().await; for (id, peer) in peers.iter() { if *id == self_peer_id { continue; } + println!("MOCK: FORWARDING GOSSIP PUBLISH..."); if let Err(e) = peer.event_tx().send(NetEvent::GossipData(data.clone())) { error!("Libp2pMock: failed to forward GossipData to {id}: {e}"); From 725a45286e7d18a53751666e173c82a42164e499 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 10:49:11 +0000 Subject: [PATCH 28/50] fix pnpm lock --- pnpm-lock.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 75bf516c4d..380cb05f77 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -712,7 +712,7 @@ importers: version: 5.3.0 '@risc0/ethereum': specifier: file:lib/risc0-ethereum - version: file:templates/default/lib/risc0-ethereum + version: risc0-ethereum@file:templates/default/lib/risc0-ethereum '@types/chai': specifier: ^4.2.0 version: 4.3.20 @@ -3068,9 +3068,6 @@ packages: '@reown/appkit@1.7.8': resolution: {integrity: sha512-51kTleozhA618T1UvMghkhKfaPcc9JlKwLJ5uV+riHyvSoWPKPRIa5A6M1Wano5puNyW0s3fwywhyqTHSilkaA==} - '@risc0/ethereum@file:templates/default/lib/risc0-ethereum': - resolution: {directory: templates/default/lib/risc0-ethereum, type: directory} - '@rolldown/pluginutils@1.0.0-beta.27': resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==} @@ -8703,6 +8700,9 @@ packages: resolution: {integrity: sha512-5Di9UC0+8h1L6ZD2d7awM7E/T4uA1fJRlx6zk/NvdCCVEoAnFqvHmCuNeIKoCeIixBX/q8uM+6ycDvF8woqosA==} engines: {node: '>= 0.8'} + risc0-ethereum@file:templates/default/lib/risc0-ethereum: + resolution: {directory: templates/default/lib/risc0-ethereum, type: directory} + robust-predicates@3.0.2: resolution: {integrity: sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==} @@ -13127,8 +13127,6 @@ snapshots: - utf-8-validate - zod - '@risc0/ethereum@file:templates/default/lib/risc0-ethereum': {} - '@rolldown/pluginutils@1.0.0-beta.27': {} '@rollup/plugin-inject@5.0.5(rollup@4.52.5)': @@ -20768,6 +20766,8 @@ snapshots: hash-base: 3.1.2 inherits: 2.0.4 + risc0-ethereum@file:templates/default/lib/risc0-ethereum: {} + robust-predicates@3.0.2: {} rollup@4.52.5: From 691bfadee502e029269d492920a4b81c178d3390 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 11:09:27 +0000 Subject: [PATCH 29/50] update pnpm lock file --- pnpm-lock.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 380cb05f77..6cf8d8e7c1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -19918,7 +19918,7 @@ snapshots: '@noble/hashes': 1.8.0 '@scure/bip32': 1.7.0 '@scure/bip39': 1.6.0 - abitype: 1.1.1(typescript@5.8.3)(zod@3.25.76) + abitype: 1.1.1(typescript@5.8.3)(zod@3.22.4) eventemitter3: 5.0.1 optionalDependencies: typescript: 5.8.3 @@ -21968,7 +21968,7 @@ snapshots: '@noble/hashes': 1.8.0 '@scure/bip32': 1.7.0 '@scure/bip39': 1.6.0 - abitype: 1.1.0(typescript@5.8.3)(zod@3.25.76) + abitype: 1.1.0(typescript@5.8.3)(zod@3.22.4) isows: 1.0.7(ws@8.18.3(bufferutil@4.0.9)(utf-8-validate@5.0.10)) ox: 0.9.6(typescript@5.8.3) ws: 8.18.3(bufferutil@4.0.9)(utf-8-validate@5.0.10) From ccd379d28aafcb24af9c8d112b56beafd9cf4335 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 11:18:41 +0000 Subject: [PATCH 30/50] fix headers --- crates/net/src/net_interface_handle.rs | 6 ++++++ crates/test-helpers/src/lib.rs | 16 +--------------- crates/test-helpers/src/libp2p_mock.rs | 6 ++++++ 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/crates/net/src/net_interface_handle.rs b/crates/net/src/net_interface_handle.rs index b708a3b96c..24c6985041 100644 --- a/crates/net/src/net_interface_handle.rs +++ b/crates/net/src/net_interface_handle.rs @@ -1,3 +1,9 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + use std::time::Duration; use tokio::{ diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs index 4906fd8ba0..000c3361cc 100644 --- a/crates/test-helpers/src/lib.rs +++ b/crates/test-helpers/src/lib.rs @@ -22,33 +22,19 @@ use e3_events::{ use e3_fhe_params::BfvParamSet; use e3_fhe_params::DEFAULT_BFV_PRESET; use e3_fhe_params::{build_bfv_params_arc, create_deterministic_crp_from_default_seed}; -use e3_net::events::NetCommand; -use e3_net::events::NetEvent; -use e3_net::ContentHash; -use e3_net::NetInterfaceInvertedHandle; -use e3_net::{DocumentPublisher, NetEventTranslator, NetInterfaceInverted}; -use e3_utils::ArcBytes; +use e3_net::{DocumentPublisher, NetEventTranslator}; use e3_utils::SharedRng; use fhe::bfv::{BfvParameters, Ciphertext, Encoding, Plaintext, PublicKey}; use fhe::mbfv::CommonRandomPoly; use fhe_traits::Serialize; use fhe_traits::{FheEncoder, FheEncrypter}; -use libp2p::gossipsub::MessageId; -use libp2p::kad::GetRecordError; -use libp2p::kad::RecordKey; use libp2p_mock::Libp2pMock; pub use plaintext_writer::*; pub use public_key_writer::*; use rand::Rng; use rand_chacha::rand_core::SeedableRng; use rand_chacha::ChaCha20Rng; -use std::collections::HashMap; use std::sync::Arc; -use tokio::sync::broadcast; -use tracing::error; -use tracing::info; -use tracing::trace; -use tracing::warn; pub use utils::*; pub fn create_shared_rng_from_u64(value: u64) -> Arc> { diff --git a/crates/test-helpers/src/libp2p_mock.rs b/crates/test-helpers/src/libp2p_mock.rs index 294f9dfb7e..ffb518f689 100644 --- a/crates/test-helpers/src/libp2p_mock.rs +++ b/crates/test-helpers/src/libp2p_mock.rs @@ -1,3 +1,9 @@ +// SPDX-License-Identifier: LGPL-3.0-only +// +// This file is provided WITHOUT ANY WARRANTY; +// without even the implied warranty of MERCHANTABILITY +// or FITNESS FOR A PARTICULAR PURPOSE. + use std::{collections::HashMap, sync::Arc}; use e3_net::{ From db06cee9fb811880ebe0d0e9269d5ab9664baafe Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 11:51:49 +0000 Subject: [PATCH 31/50] fix tests --- crates/net/src/direct_responder.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/net/src/direct_responder.rs b/crates/net/src/direct_responder.rs index 801b44fe7a..f45fe2095a 100644 --- a/crates/net/src/direct_responder.rs +++ b/crates/net/src/direct_responder.rs @@ -4,7 +4,7 @@ // without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. -use crate::events::{IncomingResponse, NetCommand, ProtocolResponse, ProtocolResponseChannel}; +use crate::events::{IncomingResponse, NetCommand, ProtocolResponse}; use anyhow::{anyhow, Context, Result}; use e3_utils::OnceTake; use libp2p::request_response::{InboundRequestId, ResponseChannel}; @@ -46,10 +46,11 @@ pub enum ChannelType { /// ``` /// # use tokio::sync::mpsc; /// use e3_net::direct_responder::DirectResponder; +/// # use e3_net::direct_responder::ChannelType; /// # fn main() -> anyhow::Result<()> { /// # let request_id = 6; -/// # let channel_orig = String::from("channel"); -/// # let channel = channel_orig.clone(); +/// # let channel_orig = ChannelType::Test("channel".to_string()); +/// # let channel = ChannelType::Test("channel".to_string()); /// # let (cmd_tx, _rx) = mpsc::channel(400); /// /// // We create a responder and send it over our event channel From 52e1469e865b194d0b95df4a9dd52ede852da4f1 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 12:19:35 +0000 Subject: [PATCH 32/50] update names --- crates/ciphernode-builder/src/ciphernode.rs | 17 ++-- .../src/ciphernode_builder.rs | 21 ++--- crates/net/src/net_interface_handle.rs | 90 +++---------------- crates/test-helpers/src/lib.rs | 7 +- crates/test-helpers/src/libp2p_mock.rs | 6 +- 5 files changed, 37 insertions(+), 104 deletions(-) diff --git a/crates/ciphernode-builder/src/ciphernode.rs b/crates/ciphernode-builder/src/ciphernode.rs index 77dcdba75a..ddf1bfa7f9 100644 --- a/crates/ciphernode-builder/src/ciphernode.rs +++ b/crates/ciphernode-builder/src/ciphernode.rs @@ -8,7 +8,7 @@ use actix::Addr; use anyhow::Result; use e3_data::{DataStore, InMemStore, StoreAddr}; use e3_events::{BusHandle, EnclaveEvent, HistoryCollector}; -use e3_net::NetInterfaceInvertedHandle; +use e3_net::NetChannelBridge; use libp2p::PeerId; /// A Sharable handle to a Ciphernode. NOTE: clones are available for use in the CiphernodeSystem @@ -21,7 +21,7 @@ pub struct CiphernodeHandle { pub history: Option>>, pub errors: Option>>, pub peer_id: PeerId, - pub net_simulate_adaptor: Option, + pub channel_bridge: Option, } impl PartialEq for CiphernodeHandle { @@ -40,7 +40,7 @@ impl CiphernodeHandle { history: Option>>, errors: Option>>, peer_id: PeerId, - net_simulate_adaptor: Option, + channel_bridge: Option, ) -> Self { Self { address, @@ -49,7 +49,7 @@ impl CiphernodeHandle { history, errors, peer_id, - net_simulate_adaptor, + channel_bridge, } } @@ -73,11 +73,10 @@ impl CiphernodeHandle { &self.store } - pub fn get_test_interface(&self) -> Result { - Ok(self - .net_simulate_adaptor - .clone() - .ok_or(anyhow::anyhow!("No interface exists"))?) + pub fn channel_bridge(&self) -> Result { + Ok(self.channel_bridge.clone().ok_or(anyhow::anyhow!( + "No channel bridge exists. We are likely not in test mode" + ))?) } pub fn in_mem_store(&self) -> Option<&Addr> { diff --git a/crates/ciphernode-builder/src/ciphernode_builder.rs b/crates/ciphernode-builder/src/ciphernode_builder.rs index 5a8896aa78..50509c1b1f 100644 --- a/crates/ciphernode-builder/src/ciphernode_builder.rs +++ b/crates/ciphernode-builder/src/ciphernode_builder.rs @@ -24,7 +24,7 @@ use e3_fhe_params::BfvPreset; use e3_keyshare::ext::ThresholdKeyshareExtension; use e3_multithread::{Multithread, MultithreadReport, TaskPool}; use e3_net::{ - create_test_net_interface, setup_libp2p_keypair, setup_net, setup_net_interface, NetInterface, + create_channel_bridge, setup_libp2p_keypair, setup_net, setup_net_interface, NetInterface, NetInterfaceInverted, NetRepositoryFactory, TestNetInterface, }; use e3_request::E3Router; @@ -482,22 +482,23 @@ impl CiphernodeBuilder { e3_builder.build().await?; let topic = "enclave-gossip"; - let out = if let Some(net_config) = self.net_config { + let (peer_id, interface, channel_bridge) = if let Some(net_config) = self.net_config { + // Setup real net interface let repositories = store.repositories(); let keypair = setup_libp2p_keypair(repositories.libp2p_keypair(), &self.cipher).await?; let peer_id = keypair.peer_id(); - let handle = + let interface = setup_net_interface(topic, keypair, net_config.peers, net_config.quic_port)?; - (peer_id, handle, None) + (peer_id, interface, None) } else { - let (handle, inverted) = create_test_net_interface(); + // Setup test net interface with random PeerId + let (interface, channel_bridge) = create_channel_bridge(); let peer_id = PeerId::random(); - let test_handle = Some(inverted); - (peer_id, handle, test_handle) + let channel_bridge = Some(channel_bridge); + (peer_id, interface, channel_bridge) }; - let (peer_id, handle, test_handle) = out; - setup_net(topic, bus.clone(), eventstore_ts, handle)?; + setup_net(topic, bus.clone(), eventstore_ts, interface)?; // Run the sync routine sync( @@ -516,7 +517,7 @@ impl CiphernodeBuilder { history, errors, peer_id, - test_handle, + channel_bridge, )) } diff --git a/crates/net/src/net_interface_handle.rs b/crates/net/src/net_interface_handle.rs index 24c6985041..f90b44ea4c 100644 --- a/crates/net/src/net_interface_handle.rs +++ b/crates/net/src/net_interface_handle.rs @@ -10,7 +10,6 @@ use tokio::{ sync::{broadcast, mpsc}, time::sleep, }; -use tracing::error; use crate::events::{NetCommand, NetEvent}; @@ -34,7 +33,9 @@ pub trait NetInterface: Sized { } #[derive(Debug, Clone)] -pub struct NetInterfaceInvertedHandle { +/// Allow Net events and commands to be bridged between nodes. This is used for testing purposes to +/// simulate libp2p without running libp2p. +pub struct NetChannelBridge { cmd_tx: broadcast::Sender, tx: mpsc::Sender, event_tx: broadcast::Sender, @@ -57,7 +58,9 @@ impl NetInterface for NetInterfaceHandle { self.tx.clone() } } -pub fn create_test_net_interface() -> (NetInterfaceHandle, NetInterfaceInvertedHandle) { + +/// This creates a channel bridge which allows for network events to be connected between test nodes +pub fn create_channel_bridge() -> (NetInterfaceHandle, NetChannelBridge) { let (m_cmd_tx, mut m_cmd_rx) = mpsc::channel::(1000); let (b_evt_tx, _) = broadcast::channel(1000); let (b_cmd_tx, _) = broadcast::channel(1000); @@ -66,6 +69,7 @@ pub fn create_test_net_interface() -> (NetInterfaceHandle, NetInterfaceInvertedH let startup_event_tx = b_evt_tx.clone(); let keep_alive = b_cmd_tx.subscribe(); + // Bridge from mpsc channel to broadcast channel simulating AllPeersDialed for each node tokio::spawn(async move { let _rx_guard = keep_alive; sleep(Duration::from_millis(100)).await; @@ -80,7 +84,7 @@ pub fn create_test_net_interface() -> (NetInterfaceHandle, NetInterfaceInvertedH rx: b_evt_tx.subscribe(), }; - let inverted = NetInterfaceInvertedHandle { + let inverted = NetChannelBridge { tx: m_cmd_tx, cmd_tx: b_cmd_tx, event_tx: b_evt_tx, @@ -89,83 +93,15 @@ pub fn create_test_net_interface() -> (NetInterfaceHandle, NetInterfaceInvertedH (handle, inverted) } -#[derive(Clone)] -pub struct TestNetInterface { - m_cmd_tx: mpsc::Sender, - b_cmd_tx: broadcast::Sender, - b_evt_tx: broadcast::Sender, -} - -impl TestNetInterface { - pub fn new() -> Self { - let (m_cmd_tx, mut m_cmd_rx) = mpsc::channel::(1000); - let (b_evt_tx, _) = broadcast::channel(1000); - let (b_cmd_tx, _) = broadcast::channel(1000); - - // Bridge mpsc commands to broadcast so the mock can subscribe - let tx = b_cmd_tx.clone(); - let startup_event_tx = b_evt_tx.clone(); - tokio::spawn(async move { - // Simulate dial-in delay like TestNetInterface - sleep(Duration::from_millis(100)).await; - let _ = startup_event_tx.send(NetEvent::AllPeersDialed); - - while let Some(cmd) = m_cmd_rx.recv().await { - if let Err(e) = tx.send(cmd.clone()) { - error!("Error sending on channel. cmd={cmd:?} with error={e}"); - } - } - println!("***** ERROR CLOSING CHANNEL!!!! ****"); - }); - - Self { - m_cmd_tx, - b_evt_tx, - b_cmd_tx, - } - } -} - -impl NetInterface for TestNetInterface { - fn tx(&self) -> mpsc::Sender { - self.m_cmd_tx.clone() - } - - fn rx(&self) -> broadcast::Receiver { - self.b_evt_tx.subscribe() - } -} - -impl NetInterfaceInverted for TestNetInterface { - fn tx(&self) -> mpsc::Sender { - self.m_cmd_tx.clone() - } - fn cmd_tx(&self) -> broadcast::Sender { - self.b_cmd_tx.clone() - } - - fn cmd_rx(&self) -> broadcast::Receiver { - self.b_cmd_tx.subscribe() - } - - fn event_tx(&self) -> broadcast::Sender { - self.b_evt_tx.clone() - } - - fn event_rx(&self) -> broadcast::Receiver { - self.b_evt_tx.subscribe() - } -} - pub trait NetInterfaceInverted: Sized { fn tx(&self) -> mpsc::Sender; - fn event_tx(&self) -> broadcast::Sender; + fn event_tx(&self) -> broadcast::Sender; //U fn event_rx(&self) -> broadcast::Receiver; fn cmd_tx(&self) -> broadcast::Sender; - fn cmd_rx(&self) -> broadcast::Receiver; + fn cmd_rx(&self) -> broadcast::Receiver; //U - fn into_handle_inverted(self) -> NetInterfaceInvertedHandle { - NetInterfaceInvertedHandle { + fn into_handle_inverted(self) -> NetChannelBridge { + NetChannelBridge { tx: self.tx(), event_tx: self.event_tx(), cmd_tx: self.cmd_tx(), @@ -173,7 +109,7 @@ pub trait NetInterfaceInverted: Sized { } } -impl NetInterfaceInverted for NetInterfaceInvertedHandle { +impl NetInterfaceInverted for NetChannelBridge { fn tx(&self) -> mpsc::Sender { self.tx.clone() } diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs index 000c3361cc..d5ff5c2e8b 100644 --- a/crates/test-helpers/src/lib.rs +++ b/crates/test-helpers/src/lib.rs @@ -157,15 +157,12 @@ pub async fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { println!("MOCK: simulate_libp2p_net"); let mock = Libp2pMock::new(); for node in nodes.iter() { - let interface = node - .net_simulate_adaptor - .clone() - .expect("net_simulate_adaptor must be set for simulated nodes"); + let interface = node.channel_bridge().unwrap(); mock.add_node(node.peer_id, interface).await; } } -// fn pipe(src: NetInterfaceInvertedHandle, dest: NetInterfaceInvertedHandle) { +// fn pipe(src: NetChannelBridge, dest: NetChannelBridge) { // let src_event_tx = src.event_tx(); // let dest_event_tx = dest.event_tx(); // let mut src_cmd_rx = src.cmd_rx(); diff --git a/crates/test-helpers/src/libp2p_mock.rs b/crates/test-helpers/src/libp2p_mock.rs index ffb518f689..be98387c27 100644 --- a/crates/test-helpers/src/libp2p_mock.rs +++ b/crates/test-helpers/src/libp2p_mock.rs @@ -8,7 +8,7 @@ use std::{collections::HashMap, sync::Arc}; use e3_net::{ events::{NetCommand, NetEvent}, - ContentHash, NetInterfaceInverted, NetInterfaceInvertedHandle, + ContentHash, NetChannelBridge, NetInterfaceInverted, }; use e3_utils::ArcBytes; use libp2p::{gossipsub::MessageId, kad::GetRecordError, PeerId}; @@ -18,7 +18,7 @@ use tracing::{error, warn}; #[derive(Debug, Clone)] pub struct Libp2pMock { store: Arc>>, - nodes: Arc>>, + nodes: Arc>>, } impl Libp2pMock { @@ -29,7 +29,7 @@ impl Libp2pMock { } } - pub async fn add_node(&self, peer_id: PeerId, handle: NetInterfaceInvertedHandle) { + pub async fn add_node(&self, peer_id: PeerId, handle: NetChannelBridge) { self.nodes.write().await.insert(peer_id, handle.clone()); let src_event_tx = handle.event_tx(); From 36fc70701fc96801ac3614ddcb1e7b60131a56d4 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 12:28:06 +0000 Subject: [PATCH 33/50] rename and tidy up --- crates/ciphernode-builder/src/ciphernode_builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ciphernode-builder/src/ciphernode_builder.rs b/crates/ciphernode-builder/src/ciphernode_builder.rs index 50509c1b1f..cbd1baf187 100644 --- a/crates/ciphernode-builder/src/ciphernode_builder.rs +++ b/crates/ciphernode-builder/src/ciphernode_builder.rs @@ -24,8 +24,8 @@ use e3_fhe_params::BfvPreset; use e3_keyshare::ext::ThresholdKeyshareExtension; use e3_multithread::{Multithread, MultithreadReport, TaskPool}; use e3_net::{ - create_channel_bridge, setup_libp2p_keypair, setup_net, setup_net_interface, NetInterface, - NetInterfaceInverted, NetRepositoryFactory, TestNetInterface, + create_channel_bridge, setup_libp2p_keypair, setup_net, setup_net_interface, + NetRepositoryFactory, }; use e3_request::E3Router; use e3_sortition::{ From 186d5722520bf135873c9296649002b0e1814db4 Mon Sep 17 00:00:00 2001 From: ryardley Date: Sun, 1 Mar 2026 15:14:22 +0000 Subject: [PATCH 34/50] fix up bad field --- crates/test-helpers/src/ciphernode_system.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index 9b5de5bae5..74a8bb941b 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -220,7 +220,7 @@ mod tests { history: Some(history), errors: Some(errors), peer_id: PeerId::random(), - net_simulate_adaptor: None, + channel_bridge: None, }) } From 238d0feabf43fc8d8691acc69bb0b47fae0c719b Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 2 Mar 2026 07:26:51 +0000 Subject: [PATCH 35/50] --wip-- [skip ci] --- crates/events/src/eventbus.rs | 332 +++++++++++++------ crates/evm/src/evm_chain_gateway.rs | 2 + crates/test-helpers/src/ciphernode_system.rs | 62 +++- crates/tests/tests/integration.rs | 122 ++++--- 4 files changed, 358 insertions(+), 160 deletions(-) diff --git a/crates/events/src/eventbus.rs b/crates/events/src/eventbus.rs index 6760976f8e..3a272efaec 100644 --- a/crates/events/src/eventbus.rs +++ b/crates/events/src/eventbus.rs @@ -9,9 +9,11 @@ use crate::EventType; use actix::prelude::*; use bloom::{BloomFilter, ASMS}; use e3_utils::{colorize, Color, MAILBOX_LIMIT, MAILBOX_LIMIT_LARGE}; -use std::collections::{HashMap, VecDeque}; +use std::collections::HashMap; +use std::fmt; use std::marker::PhantomData; -use tracing::info; +use std::time::Duration; +use tokio::sync::mpsc; ////////////////////////////////////////////////////////////////////////////// // Configuration @@ -254,6 +256,7 @@ impl GetEvents { #[rtype(result = "Vec")] pub struct TakeEvents { amount: usize, + timeout: Duration, _d: PhantomData, } @@ -261,30 +264,24 @@ impl TakeEvents { pub fn new(amount: usize) -> Self { Self { amount, + timeout: Duration::from_secs(1), _d: PhantomData, } } -} -struct PendingTake { - count: usize, - collected: Vec, - responder: tokio::sync::oneshot::Sender>, + pub fn with_per_evt_timeout(amount: usize, timeout: Duration) -> Self { + Self { + amount, + timeout, + _d: PhantomData, + } + } } #[derive(Message)] #[rtype(result = "()")] pub struct ResetHistory; -impl Handler for HistoryCollector { - type Result = (); - - fn handle(&mut self, _: ResetHistory, _: &mut Context) { - self.history.clear(); - self.pending_takes.clear(); - } -} - #[derive(Message)] #[rtype(result = "Vec")] pub struct GetErrors(PhantomData); @@ -298,123 +295,242 @@ impl GetErrors { ////////////////////////////////////////////////////////////////////////////// // History Collector ////////////////////////////////////////////////////////////////////////////// +// +// /// Actor to subscribe to EventBus to capture all history +// pub struct HistoryCollector { +// history: VecDeque, +// pending_takes: Vec>, +// } +// +// impl HistoryCollector { +// pub fn new() -> Self { +// Self { +// history: VecDeque::new(), +// pending_takes: Vec::new(), +// } +// } +// +// fn try_fulfill_pending_takes(&mut self) { +// let mut completed = Vec::new(); +// +// // For each pending take, try to fulfill it +// for (idx, pending) in self.pending_takes.iter_mut().enumerate() { +// // Fill from history first +// while pending.collected.len() < pending.count && !self.history.is_empty() { +// pending.collected.push(self.history.pop_front().unwrap()); +// } +// +// // If we have enough, mark as complete +// if pending.collected.len() >= pending.count { +// info!( +// "HHH: > [{}] We have received all {} events", +// idx, +// pending.collected.len() +// ); +// completed.push(idx); +// } else { +// info!( +// "HHH: > [{}] We are still waiting on events {}/{}...", +// idx, +// pending.collected.len(), +// pending.count +// ) +// } +// } +// +// // Send responses for completed takes (in reverse order to maintain indices) +// for idx in completed.into_iter().rev() { +// let pending = self.pending_takes.swap_remove(idx); +// let events = pending.collected.into_iter().take(pending.count).collect(); +// let _ = pending.responder.send(events); +// } +// } +// +// fn add_event(&mut self, event: E) { +// info!("HHH: Received event {}", event.event_type()); +// // First try to give to pending takes +// for (idx, pending) in &mut self.pending_takes.iter_mut().enumerate() { +// if pending.collected.len() < pending.count { +// info!( +// "HHH: > [{}] Pushing {} to pending take {}/{}...", +// idx, +// event.event_type(), +// pending.collected.len() + 1, +// pending.count +// ); +// pending.collected.push(event); +// self.try_fulfill_pending_takes(); +// return; +// } +// } +// +// // No pending take needed it, add to history +// self.history.push_back(event); +// } +// } +// +// impl Handler> for HistoryCollector { +// type Result = Vec; +// +// fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { +// self.history.iter().cloned().collect() +// } +// } +// +// impl Handler> for HistoryCollector { +// type Result = ResponseActFuture>; +// +// fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { +// let count = msg.amount; +// info!("HHH: take() requested for {} events.", msg.amount); +// // If we have enough events in history, return immediately +// if self.history.len() >= count { +// let events: Vec = self.history.drain(..count).collect(); +// info!( +// "HHH: Returning {} events. Completed take() immediately.", +// events.len() +// ); +// return Box::pin(async move { events }.into_actor(self)); +// } +// +// info!( +// "HHH: Requesting {} events but only {} in the buffer. waiting for more...", +// msg.amount, +// self.history.len() +// ); +// +// // Create a tokio oneshot channel for the response +// let (tx, rx) = tokio::sync::oneshot::channel(); +// +// // Collect what we can from history +// let mut collected = Vec::new(); +// while !self.history.is_empty() && collected.len() < count { +// collected.push(self.history.pop_front().unwrap()); +// } +// +// // Store the pending request +// let idx = self.pending_takes.len(); +// self.pending_takes.push(PendingTake { +// count, +// collected, +// responder: tx, +// }); +// info!("HHH: [{}] waiting for take to complete...", idx); +// // Return future that waits for the response +// Box::pin(async move { rx.await.unwrap_or_else(|_| Vec::new()) }.into_actor(self)) +// } +// } +// +// impl Actor for HistoryCollector { +// type Context = Context; +// fn started(&mut self, ctx: &mut Self::Context) { +// ctx.set_mailbox_capacity(MAILBOX_LIMIT) +// } +// } +// +// impl Handler for HistoryCollector { +// type Result = E::Result; +// fn handle(&mut self, msg: E, _ctx: &mut Self::Context) -> Self::Result { +// self.add_event(msg); +// } +// } +// -/// Actor to subscribe to EventBus to capture all history -pub struct HistoryCollector { - history: VecDeque, - pending_takes: Vec>, +struct HistoryCollectorWaiter { + rx: Option>, } -impl HistoryCollector { - pub fn new() -> Self { - Self { - history: VecDeque::new(), - pending_takes: Vec::new(), - } - } - - fn try_fulfill_pending_takes(&mut self) { - let mut completed = Vec::new(); - - // For each pending take, try to fulfill it - for (idx, pending) in self.pending_takes.iter_mut().enumerate() { - // Fill from history first - while pending.collected.len() < pending.count && !self.history.is_empty() { - pending.collected.push(self.history.pop_front().unwrap()); - } +impl Actor for HistoryCollectorWaiter { + type Context = Context; +} - // If we have enough, mark as complete - if pending.collected.len() >= pending.count { - completed.push(idx); +impl Handler> for HistoryCollectorWaiter { + type Result = ResponseActFuture>; + fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { + let count = msg.amount; + let timeout = msg.timeout; + let mut rx = self.rx.take().unwrap(); + Box::pin( + async move { + let mut events = Vec::with_capacity(count); + for _ in 0..count { + match tokio::time::timeout(timeout, rx.recv()).await { + Ok(Some(e)) => events.push(e), + _ => break, + } + } + (events, rx) } - } - - // Send responses for completed takes (in reverse order to maintain indices) - for idx in completed.into_iter().rev() { - let pending = self.pending_takes.swap_remove(idx); - let events = pending.collected.into_iter().take(pending.count).collect(); - let _ = pending.responder.send(events); - } + .into_actor(self) + .map(|(events, rx), actor, _| { + actor.rx = Some(rx); + events + }), + ) } +} - fn add_event(&mut self, event: E) { - // First try to give to pending takes - for pending in &mut self.pending_takes { - if pending.collected.len() < pending.count { - info!( - "Received event {}. Pushing to pending take {}/{}...", - event.event_type(), - pending.collected.len() + 1, - pending.count - ); - pending.collected.push(event); - self.try_fulfill_pending_takes(); - return; - } +impl Handler for HistoryCollectorWaiter { + type Result = (); + fn handle(&mut self, _: ResetHistory, _: &mut Context) { + if let Some(ref mut rx) = self.rx { + while rx.try_recv().is_ok() {} } - - // No pending take needed it, add to history - self.history.push_back(event); } } -impl Handler> for HistoryCollector { - type Result = Vec; - - fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { - self.history.iter().cloned().collect() - } +pub struct HistoryCollector { + history: Vec, + tx: mpsc::UnboundedSender, + waiter: Addr>, } -impl Handler> for HistoryCollector { - type Result = ResponseActFuture>; - - fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { - let count = msg.amount; - - // If we have enough events in history, return immediately - if self.history.len() >= count { - let events: Vec = self.history.drain(..count).collect(); - return Box::pin(async move { events }.into_actor(self)); - } - - info!( - "Requesting {} events but only {} in the buffer. waiting for more...", - msg.amount, - self.history.len() - ); - - // Create a tokio oneshot channel for the response - let (tx, rx) = tokio::sync::oneshot::channel(); - - // Collect what we can from history - let mut collected = Vec::new(); - while !self.history.is_empty() && collected.len() < count { - collected.push(self.history.pop_front().unwrap()); +impl HistoryCollector { + pub fn new() -> Self { + let (tx, rx) = mpsc::unbounded_channel(); + let waiter = HistoryCollectorWaiter { rx: Some(rx) }.start(); + Self { + history: Vec::new(), + tx, + waiter, } - - // Store the pending request - self.pending_takes.push(PendingTake { - count, - collected, - responder: tx, - }); - - // Return future that waits for the response - Box::pin(async move { rx.await.unwrap_or_else(|_| Vec::new()) }.into_actor(self)) } } impl Actor for HistoryCollector { type Context = Context; fn started(&mut self, ctx: &mut Self::Context) { - ctx.set_mailbox_capacity(MAILBOX_LIMIT) + ctx.set_mailbox_capacity(MAILBOX_LIMIT); } } impl Handler for HistoryCollector { type Result = E::Result; fn handle(&mut self, msg: E, _ctx: &mut Self::Context) -> Self::Result { - self.add_event(msg); + self.history.push(msg.clone()); + let _ = self.tx.send(msg); + } +} + +impl Handler for HistoryCollector { + type Result = (); + fn handle(&mut self, _: ResetHistory, _: &mut Context) { + self.history.clear(); + self.waiter.do_send(ResetHistory); + } +} + +impl Handler> for HistoryCollector { + type Result = ResponseActFuture>; + fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { + let fut = self.waiter.send(msg); + Box::pin(async move { fut.await.unwrap() }.into_actor(self)) + } +} + +impl Handler> for HistoryCollector { + type Result = Vec; + fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { + self.history.clone() } } diff --git a/crates/evm/src/evm_chain_gateway.rs b/crates/evm/src/evm_chain_gateway.rs index 65c7677fc4..46cda8b5dd 100644 --- a/crates/evm/src/evm_chain_gateway.rs +++ b/crates/evm/src/evm_chain_gateway.rs @@ -246,6 +246,8 @@ impl Handler for EvmChainGateway { #[cfg(test)] mod tests { + use std::time::Duration; + use crate::EvmEvent; use super::*; diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index 74a8bb941b..c8d15e0bf7 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -116,11 +116,50 @@ impl CiphernodeSystem { .await } + pub async fn expect_events(&self, expected: &[&str]) -> Result { + let h = self + .take_history_with_timeout_impl( + 0, + expected.len(), + Duration::from_secs(1000), + Duration::from_secs(30), + ) + .await?; + println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); + h.expect(expected.to_vec()); + Ok(h) + } + + pub async fn expect_events_with_timeout( + &self, + expected: &[&str], + total_to: Duration, + ) -> Result { + let h = self + .take_history_with_timeout(0, expected.len(), total_to) + .await?; + println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); + + h.expect(expected.to_vec()); + Ok(h) + } + pub async fn take_history_with_timeout( &self, index: usize, count: usize, - tout: Duration, + total_to: Duration, + ) -> Result { + self.take_history_with_timeout_impl(index, count, total_to, Duration::from_millis(1000)) + .await + } + + pub async fn take_history_with_timeout_impl( + &self, + index: usize, + count: usize, + total_to: Duration, + event_to: Duration, ) -> Result { let Some(node) = self.0.get(index) else { bail!("No node found"); @@ -130,12 +169,15 @@ impl CiphernodeSystem { return Ok(CiphernodeHistory(vec![])); }; - let history = timeout(tout, history.send(TakeEvents::new(count))) - .await - .context(format!( - "Could not take {} events from node {}", - count, index - ))??; + let history = timeout( + total_to, + history.send(TakeEvents::with_per_evt_timeout(count, event_to)), + ) + .await + .context(format!( + "Could not take {} events from node {}", + count, index + ))??; Ok(CiphernodeHistory(history)) } @@ -146,7 +188,7 @@ impl CiphernodeSystem { break; }; loop { - let nhs = history.send(TakeEvents::new(1)); + let nhs = history.send(TakeEvents::with_per_evt_timeout(1, Duration::from_secs(1))); let tr = timeout(Duration::from_millis(millis), nhs).await; if !tr.is_ok() { break; @@ -182,6 +224,10 @@ impl CiphernodeHistory { pub fn event_types(&self) -> Vec { self.0.iter().map(|e| e.event_type()).collect() } + + pub fn expect(&self, event_types: Vec<&str>) { + assert_eq!(self.event_types(), event_types); + } } impl Deref for CiphernodeHistory { diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index eeb2f43cfe..93ca3a9e52 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -28,8 +28,8 @@ use e3_test_helpers::{ create_seed_from_u64, create_shared_rng_from_u64, with_tracing, AddToCommittee, }; use e3_trbfv::helpers::calculate_error_size; -use e3_utils::rand_eth_addr; use e3_utils::utility_types::ArcBytes; +use e3_utils::{colorize, rand_eth_addr, Color}; use e3_zk_prover::test_utils::get_tempdir; use e3_zk_prover::ZkBackend; use fhe::bfv::PublicKey; @@ -335,7 +335,7 @@ async fn setup_score_sortition_environment( Ok(()) } -fn serialize_report(report: &[(&str, Duration)]) -> String { +fn serialize_report(report: &[(String, Duration)]) -> String { let max_key_len = report.iter().map(|(k, _)| k.len()).max().unwrap_or(0); report @@ -352,14 +352,49 @@ fn serialize_report(report: &[(&str, Duration)]) -> String { .join("\n") } +#[derive(Default)] +struct Report { + inner: Vec<(String, Duration)>, +} + +fn repeat(ch: char, num: usize) -> String { + let mut s = String::new(); + while s.len() < num { + s.push(ch); + } + s +} + +impl Report { + pub fn push(&mut self, repo: (&str, Duration)) { + let (label, dur) = repo; + self.show(label); + self.inner.push((label.to_owned(), dur)); + } + + pub fn show(&self, label: &str) { + println!( + "\n\n {}\n {}{}{}\n {}\n", + colorize(repeat('#', label.len() + 6), Color::Yellow), + colorize("## ", Color::Yellow), + colorize(label.to_uppercase(), Color::White), + colorize(" ##", Color::Yellow), + colorize(repeat('#', label.len() + 6), Color::Yellow), + ); + } + + pub fn serialize(&self) -> String { + serialize_report(&self.inner) + } +} + /// Test trbfv #[actix::test] #[serial_test::serial] async fn test_trbfv_actor() -> Result<()> { - println!("Running test_trbfv_actor..."); - let mut report: Vec<(&str, Duration)> = vec![]; + let mut report = Report::default(); + report.push(("Starting trbfv actor test", Duration::from_secs(0))); let whole_test = Instant::now(); - let _guard = with_tracing("info"); // NOTE: Here we are trying to make it as clear as possible as to what is going on so attempting to @@ -466,7 +501,7 @@ async fn test_trbfv_actor() -> Result<()> { .build() .await?; - report.push(("Setup", setup.elapsed())); + report.push(("Setup completed", setup.elapsed())); let committee_setup = Instant::now(); let chain_id = 1u64; @@ -491,8 +526,9 @@ async fn test_trbfv_actor() -> Result<()> { setup_score_sortition_environment(&bus, ð_addrs, chain_id).await?; // Flush all events - nodes.flush_all_history(100).await?; - report.push(("Committee Setup", committee_setup.elapsed())); + nodes.flush_all_history(10000).await?; + + report.push(("Committee Setup Completed", committee_setup.elapsed())); /////////////////////////////////////////////////////////////////////////////////// // 2. Trigger E3Requested @@ -532,16 +568,13 @@ async fn test_trbfv_actor() -> Result<()> { &collector_addr, )?; - println!( + report.show(&format!( "Committee selected: {} nodes, {} buffer nodes", committee.len(), buffer_nodes.len() - ); + )); - let expected = vec!["E3Requested"]; - let _ = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(1000)) - .await?; + nodes.expect_events(&["E3Requested"]).await?; bus.publish_without_context(CommitteeFinalized { e3_id: e3_id.clone(), @@ -551,13 +584,10 @@ async fn test_trbfv_actor() -> Result<()> { let committee_finalized_timer = Instant::now(); - let expected = vec!["CommitteeFinalized"]; - let _ = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(1000)) - .await?; + nodes.expect_events(&["CommitteeFinalized"]).await?; report.push(( - "Committee Finalization", + "Committee Finalization Complete", committee_finalized_timer.elapsed(), )); @@ -566,16 +596,17 @@ async fn test_trbfv_actor() -> Result<()> { // - EncryptionKeyCreated × 5 (one per party, passes is_document_publisher_event filter) // Internal events (EncryptionKeyPending, ComputeRequest/Response) stay on committee nodes' local buses. let encryption_keys_timer = Instant::now(); - let expected = vec![ - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - ]; - let _ = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(1000)) + + nodes + .expect_events(&[ + "EncryptionKeyCreated", + "EncryptionKeyCreated", + "EncryptionKeyCreated", + "EncryptionKeyCreated", + "EncryptionKeyCreated", + ]) .await?; + report.push(( "All EncryptionKeyCreated events", encryption_keys_timer.elapsed(), @@ -587,10 +618,14 @@ async fn test_trbfv_actor() -> Result<()> { // Internal events (ComputeRequest/Response for GenPk, GenEsi, ZK proofs, ThresholdSharePending, // PkGenerationProofSigned, DkgProofSigned) stay on committee nodes' local buses. let shares_timer = Instant::now(); - let expected: Vec<&str> = (0..25).map(|_| "ThresholdShareCreated").collect(); - let _ = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(3000)) + + nodes + .expect_events_with_timeout( + &(0..25).map(|_| "ThresholdShareCreated").collect::>(), + Duration::from_secs(3000), + ) .await?; + report.push(("All ThresholdShareCreated events", shares_timer.elapsed())); // Wait for DecryptionKeyShared (Exchange #3) events @@ -598,10 +633,10 @@ async fn test_trbfv_actor() -> Result<()> { // Each committee node publishes DecryptionKeyShared after computing its decryption key // and generating C4 (share decryption) proofs. let decryption_key_shared_timer = Instant::now(); - let expected: Vec<&str> = (0..5).map(|_| "DecryptionKeyShared").collect(); - let _ = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(1000)) + nodes + .expect_events(&(0..5).map(|_| "DecryptionKeyShared").collect::>()) .await?; + report.push(( "All DecryptionKeyShared events", decryption_key_shared_timer.elapsed(), @@ -613,16 +648,15 @@ async fn test_trbfv_actor() -> Result<()> { // After DecryptionKeySharedCollector collects all shares and C4 proofs are verified, // each party publishes KeyshareCreated. let shares_to_pubkey_agg_timer = Instant::now(); - let expected = vec![ - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "PublicKeyAggregated", - ]; let h = nodes - .take_history_with_timeout(0, expected.len(), Duration::from_secs(1000)) + .expect_events(&[ + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "PublicKeyAggregated", + ]) .await?; report.push(( @@ -751,7 +785,7 @@ async fn test_trbfv_actor() -> Result<()> { println!("{}", mt_report); report.push(("Entire Test", whole_test.elapsed())); - println!("{}", serialize_report(&report)); + println!("{}", report.serialize()); Ok(()) } From e4b3fded5840b539f969dcc378579a4946c7b4cd Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 2 Mar 2026 12:21:17 +0000 Subject: [PATCH 36/50] tidy up libp2p simulation to actually use net components --- .../src/enclave_event/publickey_aggregated.rs | 5 +- crates/events/src/eventbus.rs | 30 +++-- crates/events/src/sequencer.rs | 1 + crates/evm/src/evm_chain_gateway.rs | 3 +- crates/net/src/document_publisher.rs | 6 +- crates/sync/src/sync.rs | 2 + crates/test-helpers/src/ciphernode_system.rs | 86 ++++++++++--- crates/test-helpers/src/lib.rs | 114 +---------------- crates/test-helpers/src/libp2p_mock.rs | 4 - crates/tests/tests/integration.rs | 115 ++++++------------ 10 files changed, 138 insertions(+), 228 deletions(-) diff --git a/crates/events/src/enclave_event/publickey_aggregated.rs b/crates/events/src/enclave_event/publickey_aggregated.rs index 9190fb6573..0a084b6bac 100644 --- a/crates/events/src/enclave_event/publickey_aggregated.rs +++ b/crates/events/src/enclave_event/publickey_aggregated.rs @@ -15,8 +15,9 @@ use std::fmt::{self, Display}; #[rtype(result = "()")] pub struct PublicKeyAggregated { #[derivative(Debug(format_with = "e3_utils::formatters::hexf"))] - pub pubkey: Vec, - pub public_key_hash: [u8; 32], + pub pubkey: Vec, // TODO: ArcBytes ? + #[derivative(Debug(format_with = "e3_utils::formatters::hexf"))] + pub public_key_hash: [u8; 32], // TODO: ArcBytes32 ? pub e3_id: E3id, pub nodes: OrderedSet, } diff --git a/crates/events/src/eventbus.rs b/crates/events/src/eventbus.rs index 3a272efaec..9578c8b358 100644 --- a/crates/events/src/eventbus.rs +++ b/crates/events/src/eventbus.rs @@ -253,13 +253,19 @@ impl GetEvents { } #[derive(Message)] -#[rtype(result = "Vec")] +#[rtype(result = "TakeEventsResult")] pub struct TakeEvents { amount: usize, timeout: Duration, _d: PhantomData, } +#[derive(Debug)] +pub struct TakeEventsResult { + pub events: Vec, + pub timed_out: bool, +} + impl TakeEvents { pub fn new(amount: usize) -> Self { Self { @@ -445,7 +451,7 @@ impl Actor for HistoryCollectorWaiter { } impl Handler> for HistoryCollectorWaiter { - type Result = ResponseActFuture>; + type Result = ResponseActFuture>; fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { let count = msg.amount; let timeout = msg.timeout; @@ -453,18 +459,23 @@ impl Handler> for HistoryCollectorWaiter Box::pin( async move { let mut events = Vec::with_capacity(count); + let mut timed_out = false; for _ in 0..count { match tokio::time::timeout(timeout, rx.recv()).await { Ok(Some(e)) => events.push(e), - _ => break, + Ok(None) => break, + Err(_) => { + timed_out = true; + break; + } } } - (events, rx) + (TakeEventsResult { events, timed_out }, rx) } .into_actor(self) - .map(|(events, rx), actor, _| { + .map(|(result, rx), actor, _| { actor.rx = Some(rx); - events + result }), ) } @@ -473,6 +484,7 @@ impl Handler> for HistoryCollectorWaiter impl Handler for HistoryCollectorWaiter { type Result = (); fn handle(&mut self, _: ResetHistory, _: &mut Context) { + println!("HHH: WAITER RESETING HISTORY"); if let Some(ref mut rx) = self.rx { while rx.try_recv().is_ok() {} } @@ -507,6 +519,7 @@ impl Actor for HistoryCollector { impl Handler for HistoryCollector { type Result = E::Result; fn handle(&mut self, msg: E, _ctx: &mut Self::Context) -> Self::Result { + println!("HHH: EVENT: {}", msg.event_type()); self.history.push(msg.clone()); let _ = self.tx.send(msg); } @@ -515,14 +528,16 @@ impl Handler for HistoryCollector { impl Handler for HistoryCollector { type Result = (); fn handle(&mut self, _: ResetHistory, _: &mut Context) { + println!("HHH: RESET"); self.history.clear(); self.waiter.do_send(ResetHistory); } } impl Handler> for HistoryCollector { - type Result = ResponseActFuture>; + type Result = ResponseActFuture>; fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { + println!("HHH: TAKE EVENTS"); let fut = self.waiter.send(msg); Box::pin(async move { fut.await.unwrap() }.into_actor(self)) } @@ -531,6 +546,7 @@ impl Handler> for HistoryCollector { impl Handler> for HistoryCollector { type Result = Vec; fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { + println!("HHH: GET EVENTS"); self.history.clone() } } diff --git a/crates/events/src/sequencer.rs b/crates/events/src/sequencer.rs index 73e33314da..6e8b6cb40b 100644 --- a/crates/events/src/sequencer.rs +++ b/crates/events/src/sequencer.rs @@ -81,6 +81,7 @@ mod tests { assert_eq!( events + .events .iter() .map(EnclaveEvent::strip_ts) .collect::>(), diff --git a/crates/evm/src/evm_chain_gateway.rs b/crates/evm/src/evm_chain_gateway.rs index 46cda8b5dd..e192883a29 100644 --- a/crates/evm/src/evm_chain_gateway.rs +++ b/crates/evm/src/evm_chain_gateway.rs @@ -351,6 +351,7 @@ mod tests { let full = history_collector.send(TakeEvents::new(5)).await?; let test_events: Vec = full + .events .iter() .filter_map(|e| { if let EnclaveEventData::TestEvent(TestEvent { msg, .. }) = e.get_data() { @@ -366,7 +367,7 @@ mod tests { vec!["Before Complete", "Before SyncEnded", "After SyncEnded"] ); - let event_types: Vec = full.iter().map(|e| e.event_type()).collect(); + let event_types: Vec = full.events.iter().map(|e| e.event_type()).collect(); assert_eq!( event_types, diff --git a/crates/net/src/document_publisher.rs b/crates/net/src/document_publisher.rs index 6e2180f536..7f848f7d98 100644 --- a/crates/net/src/document_publisher.rs +++ b/crates/net/src/document_publisher.rs @@ -507,6 +507,7 @@ impl EventConverter { Ok(()) } + fn handle_encryption_key_created(&self, msg: TypedEvent) -> Result<()> { let (msg, ctx) = msg.into_components(); if msg.external { @@ -535,6 +536,7 @@ impl EventConverter { Ok(()) } + // TODO: Split this off to a separate module/actor to make each component unidirectional /// Convert received document to internal events. /// Note: Filtering already happened in DocumentPublisher before DHT fetch. fn handle_document_received(&self, msg: TypedEvent) -> Result<()> { @@ -850,7 +852,7 @@ mod tests { // wait for events to settle let errors = errors.send(TakeEvents::new(1)).await?; - let error: EnclaveError = errors.first().unwrap().try_into()?; + let error: EnclaveError = errors.events.first().unwrap().try_into()?; assert_eq!( error.message, "Operation failed after 4 attempts. Last error: DHT get record failed: Timeout { key: Key(b\"\\xda-\\xe1\\xc0T\\x11$X\\x05\\xd1\\xd4\\xa6C\\x86\\x96\\xb7e\\xd9j\\x96\\x1bD\\xc8P#\\x0f\\\"\\xea A@b\") }" @@ -907,7 +909,7 @@ mod tests { // Expect error to exist let errors = errors.send(TakeEvents::new(1)).await?; - let error: EnclaveError = errors.first().unwrap().try_into()?; + let error: EnclaveError = errors.events.first().unwrap().try_into()?; assert_eq!( error.message, "Operation failed after 4 attempts. Last error: DHT put record failed: PutRecordError(QuorumFailed { key: Key(b\"I got the secret\"), success: [], quorum: 1 })" diff --git a/crates/sync/src/sync.rs b/crates/sync/src/sync.rs index 487b681cff..e7e8297fc0 100644 --- a/crates/sync/src/sync.rs +++ b/crates/sync/src/sync.rs @@ -393,6 +393,7 @@ mod tests { let received = history.send(TakeEvents::new(2)).await?; let event_types: Vec<&'static str> = received + .events .iter() .map(|e| match e.get_data() { EnclaveEventData::TestEvent(_) => "TestEvent", @@ -406,6 +407,7 @@ mod tests { assert_eq!(event_types, vec!["TestEvent", "TestEvent"]); let msgs: Vec = received + .events .iter() .filter_map(|e| { if let EnclaveEventData::TestEvent(t) = e.get_data() { diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index c8d15e0bf7..67e8d31440 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -5,10 +5,13 @@ // or FITNESS FOR A PARTICULAR PURPOSE. use crate::simulate_libp2p_net; -use anyhow::*; +use anyhow::bail; +use anyhow::Context; +use anyhow::Result; use e3_ciphernode_builder::CiphernodeHandle; use e3_events::Event; use e3_events::{EnclaveEvent, GetEvents, ResetHistory, TakeEvents}; +use std::u64; use std::{future::Future, ops::Deref, pin::Pin, time::Duration}; use tokio::time::timeout; @@ -116,28 +119,47 @@ impl CiphernodeSystem { .await } + /// expect events to fire with the default timeout 1000sec per event pub async fn expect_events(&self, expected: &[&str]) -> Result { let h = self .take_history_with_timeout_impl( 0, expected.len(), - Duration::from_secs(1000), - Duration::from_secs(30), + Some(Duration::from_secs(1000)), + Some(Duration::from_secs(1000)), ) - .await?; + .await + .map_err(|e| anyhow::anyhow!("FAILURE: {expected:?} : {e}"))?; + println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); h.expect(expected.to_vec()); Ok(h) } - pub async fn expect_events_with_timeout( + pub async fn expect_events_without_timeout( &self, expected: &[&str], - total_to: Duration, ) -> Result { let h = self - .take_history_with_timeout(0, expected.len(), total_to) - .await?; + .take_history_with_timeout_impl(0, expected.len(), None, None) + .await + .map_err(|e| anyhow::anyhow!("FAILURE: {expected:?} : {e}"))?; + + println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); + h.expect(expected.to_vec()); + Ok(h) + } + + pub async fn expect_events_with_timeouts( + &self, + expected: &[&str], + total_to: Duration, // total + per_evt_to: Duration, // per event + ) -> Result { + let h = self + .take_history_with_timeout_impl(0, expected.len(), Some(total_to), Some(per_evt_to)) + .await + .map_err(|e| anyhow::anyhow!("FAILURE: {expected:?} : {e}"))?; println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); h.expect(expected.to_vec()); @@ -150,16 +172,21 @@ impl CiphernodeSystem { count: usize, total_to: Duration, ) -> Result { - self.take_history_with_timeout_impl(index, count, total_to, Duration::from_millis(1000)) - .await + self.take_history_with_timeout_impl( + index, + count, + Some(total_to), + Some(Duration::from_millis(1000)), + ) + .await } pub async fn take_history_with_timeout_impl( &self, index: usize, count: usize, - total_to: Duration, - event_to: Duration, + total_to: Option, + event_to: Option, ) -> Result { let Some(node) = self.0.get(index) else { bail!("No node found"); @@ -170,8 +197,11 @@ impl CiphernodeSystem { }; let history = timeout( - total_to, - history.send(TakeEvents::with_per_evt_timeout(count, event_to)), + total_to.unwrap_or(Duration::from_secs(u64::MAX)), // No timeout + history.send(TakeEvents::with_per_evt_timeout( + count, + event_to.unwrap_or(Duration::from_secs(u64::MAX)), + )), ) .await .context(format!( @@ -179,7 +209,15 @@ impl CiphernodeSystem { count, index ))??; - Ok(CiphernodeHistory(history)) + if history.timed_out { + bail!( + "Take History timed out was trying to take {} events. Returned {:?}", + count, + history + ); + }; + + Ok(CiphernodeHistory(history.events)) } pub async fn flush_all_history(&self, millis: u64) -> Result<()> { let nodes = &self.0; @@ -188,15 +226,25 @@ impl CiphernodeSystem { break; }; loop { - let nhs = history.send(TakeEvents::with_per_evt_timeout(1, Duration::from_secs(1))); + println!("IN FLUSH LOOP..."); + let nhs = history.send(TakeEvents::new(1)); let tr = timeout(Duration::from_millis(millis), nhs).await; - if !tr.is_ok() { - break; + match tr { + Ok(Ok(result)) if result.timed_out => { + println!("PER-EVENT TIMEOUT, BREAKING LOOP..."); + break; + } + Err(_) => { + println!("OUTER TIMEOUT, BREAKING LOOP..."); + break; + } + _ => { + // Got events, keep draining + } } } history.send(ResetHistory).await?; } - Ok(()) } } diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs index d5ff5c2e8b..0b932e79fc 100644 --- a/crates/test-helpers/src/lib.rs +++ b/crates/test-helpers/src/lib.rs @@ -127,34 +127,9 @@ impl Handler> for SimulatedNetPipe { } } -/// Simulate libp2p by taking output events on each local bus and filter for !is_local_only() and forward remaining events back to the event bus -/// deduplication will remove previously seen events. -/// This sets up a set of cyphernodes without libp2p. -/// The way it works is that it feeds back all events from -/// all nodes filteres by whether they are broadcastible or not -/// ```txt -/// -/// ┌─────┐ -/// │ BUS │ -/// └─────┘ -/// │ -/// ┌────────────┼────────────┐ -/// │ │ │ -/// ▼ ▼ ▼ -/// ┌────┐ ┌────┐ ┌────┐ -/// │ B1 │ │ B2 │ │ B3 │◀──┐ -/// └────┘ └────┘ └────┘ │ -/// │ │ │ │ -/// │ │ │ │ -/// └────────────┼────────────┘ │ -/// │ │ -/// ▼ │ -/// ┌─────┐ │ -/// │ FIL │───────────────┘ -/// └─────┘ -/// ``` +/// Simulate libp2p by taking output net commands and converting them to net events sending them to +/// the other nodes pub async fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { - println!("MOCK: simulate_libp2p_net"); let mock = Libp2pMock::new(); for node in nodes.iter() { let interface = node.channel_bridge().unwrap(); @@ -162,91 +137,6 @@ pub async fn simulate_libp2p_net(nodes: &[CiphernodeHandle]) { } } -// fn pipe(src: NetChannelBridge, dest: NetChannelBridge) { -// let src_event_tx = src.event_tx(); -// let dest_event_tx = dest.event_tx(); -// let mut src_cmd_rx = src.cmd_rx(); -// -// tokio::spawn(async move { -// let mut store: HashMap = HashMap::new(); -// -// loop { -// match src_cmd_rx.recv().await { -// Ok(NetCommand::GossipPublish { -// data, -// correlation_id, -// .. -// }) => { -// if let Err(e) = dest_event_tx.send(NetEvent::GossipData(data)) { -// error!("pipe: failed to forward GossipData to dest: {e}"); -// } -// -// let message_id = MessageId::new(&format!("{correlation_id:?}").into_bytes()); -// if let Err(e) = src_event_tx.send(NetEvent::GossipPublished { -// correlation_id, -// message_id, -// }) { -// error!("pipe: failed to send GossipPublished to src: {e}"); -// } -// } -// Ok(NetCommand::DhtPutRecord { -// correlation_id, -// key, -// value, -// .. -// }) => { -// store.insert(key.clone(), value.clone()); -// -// if let Err(e) = dest_event_tx.send(NetEvent::DhtGetRecordSucceeded { -// key: key.clone(), -// correlation_id, -// value, -// }) { -// error!("pipe: failed to forward DhtGetRecordSucceeded to dest: {e}"); -// } -// -// if let Err(e) = src_event_tx.send(NetEvent::DhtPutRecordSucceeded { -// key, -// correlation_id, -// }) { -// error!("pipe: failed to send DhtPutRecordSucceeded to src: {e}"); -// } -// } -// Ok(NetCommand::DhtGetRecord { -// correlation_id, -// key, -// }) => { -// if let Some(value) = store.get(&key).cloned() { -// if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordSucceeded { -// key, -// correlation_id, -// value, -// }) { -// error!("pipe: failed to send DhtGetRecordSucceeded to src: {e}"); -// } -// } else { -// if let Err(e) = src_event_tx.send(NetEvent::DhtGetRecordError { -// correlation_id, -// error: GetRecordError::NotFound { -// key: RecordKey::new(&key.into_inner()), -// closest_peers: vec![], -// }, -// }) { -// error!("pipe: failed to send DhtGetRecordError to src: {e}"); -// } -// } -// } -// Err(broadcast::error::RecvError::Lagged(n)) => { -// warn!("pipe: src cmd receiver lagged by {n} messages"); -// continue; -// } -// Err(_) => break, -// _ => continue, -// } -// } -// }); -// } - /// Creates test eth addresses /// NOTE: THESE ARE NOT ACTUAL ADDRESSES JUST RANDOM DATA pub fn create_random_eth_addrs(how_many: u32) -> Vec { diff --git a/crates/test-helpers/src/libp2p_mock.rs b/crates/test-helpers/src/libp2p_mock.rs index be98387c27..92442b60b9 100644 --- a/crates/test-helpers/src/libp2p_mock.rs +++ b/crates/test-helpers/src/libp2p_mock.rs @@ -39,7 +39,6 @@ impl Libp2pMock { let self_peer_id = peer_id; tokio::spawn(async move { - println!("MOCK: SPAWNING TASK!"); loop { match src_cmd_rx.recv().await { Ok(NetCommand::GossipPublish { @@ -47,15 +46,12 @@ impl Libp2pMock { correlation_id, .. }) => { - println!("MOCK: RECEIVED GOSSIP PUBLISH..."); - // Broadcast to all other nodes let peers = nodes.read().await; for (id, peer) in peers.iter() { if *id == self_peer_id { continue; } - println!("MOCK: FORWARDING GOSSIP PUBLISH..."); if let Err(e) = peer.event_tx().send(NetEvent::GossipData(data.clone())) { error!("Libp2pMock: failed to forward GossipData to {id}: {e}"); diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index 93ca3a9e52..bb9dcf4faa 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -335,23 +335,6 @@ async fn setup_score_sortition_environment( Ok(()) } -fn serialize_report(report: &[(String, Duration)]) -> String { - let max_key_len = report.iter().map(|(k, _)| k.len()).max().unwrap_or(0); - - report - .iter() - .map(|(key, duration)| { - format!( - "{:width$}: {:.3}s", - key, - duration.as_secs_f64(), - width = max_key_len - ) - }) - .collect::>() - .join("\n") -} - #[derive(Default)] struct Report { inner: Vec<(String, Duration)>, @@ -384,7 +367,20 @@ impl Report { } pub fn serialize(&self) -> String { - serialize_report(&self.inner) + let max_key_len = self.inner.iter().map(|(k, _)| k.len()).max().unwrap_or(0); + + self.inner + .iter() + .map(|(key, duration)| { + format!( + "{:width$}: {:.3}s", + key, + duration.as_secs_f64(), + width = max_key_len + ) + }) + .collect::>() + .join("\n") } } @@ -591,57 +587,6 @@ async fn test_trbfv_actor() -> Result<()> { committee_finalized_timer.elapsed(), )); - // First, wait for all EncryptionKeyCreated events (BFV key exchange) - // The collector (node 0) only sees events forwarded by simulate_libp2p: - // - EncryptionKeyCreated × 5 (one per party, passes is_document_publisher_event filter) - // Internal events (EncryptionKeyPending, ComputeRequest/Response) stay on committee nodes' local buses. - let encryption_keys_timer = Instant::now(); - - nodes - .expect_events(&[ - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - "EncryptionKeyCreated", - ]) - .await?; - - report.push(( - "All EncryptionKeyCreated events", - encryption_keys_timer.elapsed(), - )); - - // Then wait for all ThresholdShareCreated events - // Each of the 5 parties publishes 5 events (one per target party) = 25 total - // Only ThresholdShareCreated passes the simulate_libp2p filter (is_document_publisher_event). - // Internal events (ComputeRequest/Response for GenPk, GenEsi, ZK proofs, ThresholdSharePending, - // PkGenerationProofSigned, DkgProofSigned) stay on committee nodes' local buses. - let shares_timer = Instant::now(); - - nodes - .expect_events_with_timeout( - &(0..25).map(|_| "ThresholdShareCreated").collect::>(), - Duration::from_secs(3000), - ) - .await?; - - report.push(("All ThresholdShareCreated events", shares_timer.elapsed())); - - // Wait for DecryptionKeyShared (Exchange #3) events - // - DecryptionKeyShared × 5 (passes is_document_publisher_event filter) - // Each committee node publishes DecryptionKeyShared after computing its decryption key - // and generating C4 (share decryption) proofs. - let decryption_key_shared_timer = Instant::now(); - nodes - .expect_events(&(0..5).map(|_| "DecryptionKeyShared").collect::>()) - .await?; - - report.push(( - "All DecryptionKeyShared events", - decryption_key_shared_timer.elapsed(), - )); - // Wait for KeyshareCreated + PublicKeyAggregated // - KeyshareCreated × 5 (passes is_forwardable_event filter) // - PublicKeyAggregated × 1 (passes is_forwardable_event filter) @@ -649,14 +594,18 @@ async fn test_trbfv_actor() -> Result<()> { // each party publishes KeyshareCreated. let shares_to_pubkey_agg_timer = Instant::now(); let h = nodes - .expect_events(&[ - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "KeyshareCreated", - "PublicKeyAggregated", - ]) + .expect_events_with_timeouts( + &[ + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "KeyshareCreated", + "PublicKeyAggregated", + ], + Duration::from_secs(5000), + Duration::from_secs(600), + ) .await?; report.push(( @@ -876,6 +825,7 @@ async fn test_p2p_actor_forwards_events_to_network() -> Result<()> { assert_eq!( history + .events .into_iter() .map(|e| e.into_data()) .collect::>(), @@ -922,6 +872,7 @@ async fn test_p2p_actor_forwards_events_to_bus() -> Result<()> { assert_eq!( history + .events .into_iter() .map(|e| e.into_data()) .collect::>(), @@ -994,7 +945,7 @@ async fn test_stopped_keyshares_retain_state() -> Result<()> { let tuple = setup_local_ciphernode(&bus, &rng, true, addr, None, cipher).await?; result.push(tuple); } - simulate_libp2p_net(&result); + simulate_libp2p_net(&result).await; Ok(result) } @@ -1084,12 +1035,13 @@ async fn test_stopped_keyshares_retain_state() -> Result<()> { ) .await?; let history_collector = cn1.history().unwrap(); - simulate_libp2p_net(&[cn1, cn2]); + simulate_libp2p_net(&[cn1, cn2]).await; println!("getting collector from cn1.6"); // get the public key from history. let pubkey: PublicKey = history + .events .iter() .filter_map(|evt| match evt.get_data() { EnclaveEventData::KeyshareCreated(data) => { @@ -1116,6 +1068,7 @@ async fn test_stopped_keyshares_retain_state() -> Result<()> { .await?; let actual = history + .events .into_iter() .filter_map(|e| match e.into_data() { EnclaveEventData::PlaintextAggregated(data) => Some(data), @@ -1282,7 +1235,7 @@ async fn test_duplicate_e3_id_with_different_chain_id() -> Result<()> { .await?; assert_eq!( - history.last().cloned().unwrap().into_data(), + history.events.last().cloned().unwrap().into_data(), PublicKeyAggregated { pubkey: test_pubkey.to_bytes(), public_key_hash, @@ -1324,7 +1277,7 @@ async fn test_duplicate_e3_id_with_different_chain_id() -> Result<()> { .await?; assert_eq!( - history.last().cloned().unwrap().into_data(), + history.events.last().cloned().unwrap().into_data(), PublicKeyAggregated { pubkey: test_pubkey.to_bytes(), public_key_hash, From 73eaadaf90eeef58cf2f9d493399b97591ab9a58 Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 2 Mar 2026 12:23:43 +0000 Subject: [PATCH 37/50] add todo comment for tomorrow --- crates/tests/tests/integration.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index bb9dcf4faa..9b827bd39b 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -888,6 +888,7 @@ async fn test_p2p_actor_forwards_events_to_bus() -> Result<()> { /// Test that stopped keyshares retain their state after restart. /// This test needs to be ported to the new trBFV system once Sync is completed. +// XXX: ENABLE THIS!! #[actix::test] #[ignore = "Needs to be ported to trBFV system after Sync is completed"] async fn test_stopped_keyshares_retain_state() -> Result<()> { From f103b2aa3fcb8f7610f0ce90f3940da1774a6d01 Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 2 Mar 2026 12:59:41 +0000 Subject: [PATCH 38/50] make all tests pass locally --- crates/zk-prover/tests/backend_tests.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/zk-prover/tests/backend_tests.rs b/crates/zk-prover/tests/backend_tests.rs index b3cfc3b2df..e0f7dea600 100644 --- a/crates/zk-prover/tests/backend_tests.rs +++ b/crates/zk-prover/tests/backend_tests.rs @@ -6,6 +6,8 @@ mod common; +use std::env; + use common::test_backend; use e3_zk_prover::{test_utils::get_tempdir, ZkConfig, ZkProver}; use tokio::fs; @@ -84,6 +86,11 @@ async fn test_work_dir_path_traversal_protection() { #[test] fn test_prover_requires_bb() { + if env::var("E3_CUSTOM_BB").is_ok() { + // Cannot run this test when E3_CUSTOM_BB is set + return; + } + let temp = get_tempdir().unwrap(); let backend = test_backend(temp.path(), ZkConfig::default()); let prover = ZkProver::new(&backend); From 470d088189950d5dfb06e0b1aba3df741fcc9712 Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 2 Mar 2026 13:02:39 +0000 Subject: [PATCH 39/50] remove old history collector --- crates/events/src/eventbus.rs | 145 ---------------------------------- 1 file changed, 145 deletions(-) diff --git a/crates/events/src/eventbus.rs b/crates/events/src/eventbus.rs index 9578c8b358..14eb21939e 100644 --- a/crates/events/src/eventbus.rs +++ b/crates/events/src/eventbus.rs @@ -301,146 +301,6 @@ impl GetErrors { ////////////////////////////////////////////////////////////////////////////// // History Collector ////////////////////////////////////////////////////////////////////////////// -// -// /// Actor to subscribe to EventBus to capture all history -// pub struct HistoryCollector { -// history: VecDeque, -// pending_takes: Vec>, -// } -// -// impl HistoryCollector { -// pub fn new() -> Self { -// Self { -// history: VecDeque::new(), -// pending_takes: Vec::new(), -// } -// } -// -// fn try_fulfill_pending_takes(&mut self) { -// let mut completed = Vec::new(); -// -// // For each pending take, try to fulfill it -// for (idx, pending) in self.pending_takes.iter_mut().enumerate() { -// // Fill from history first -// while pending.collected.len() < pending.count && !self.history.is_empty() { -// pending.collected.push(self.history.pop_front().unwrap()); -// } -// -// // If we have enough, mark as complete -// if pending.collected.len() >= pending.count { -// info!( -// "HHH: > [{}] We have received all {} events", -// idx, -// pending.collected.len() -// ); -// completed.push(idx); -// } else { -// info!( -// "HHH: > [{}] We are still waiting on events {}/{}...", -// idx, -// pending.collected.len(), -// pending.count -// ) -// } -// } -// -// // Send responses for completed takes (in reverse order to maintain indices) -// for idx in completed.into_iter().rev() { -// let pending = self.pending_takes.swap_remove(idx); -// let events = pending.collected.into_iter().take(pending.count).collect(); -// let _ = pending.responder.send(events); -// } -// } -// -// fn add_event(&mut self, event: E) { -// info!("HHH: Received event {}", event.event_type()); -// // First try to give to pending takes -// for (idx, pending) in &mut self.pending_takes.iter_mut().enumerate() { -// if pending.collected.len() < pending.count { -// info!( -// "HHH: > [{}] Pushing {} to pending take {}/{}...", -// idx, -// event.event_type(), -// pending.collected.len() + 1, -// pending.count -// ); -// pending.collected.push(event); -// self.try_fulfill_pending_takes(); -// return; -// } -// } -// -// // No pending take needed it, add to history -// self.history.push_back(event); -// } -// } -// -// impl Handler> for HistoryCollector { -// type Result = Vec; -// -// fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { -// self.history.iter().cloned().collect() -// } -// } -// -// impl Handler> for HistoryCollector { -// type Result = ResponseActFuture>; -// -// fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { -// let count = msg.amount; -// info!("HHH: take() requested for {} events.", msg.amount); -// // If we have enough events in history, return immediately -// if self.history.len() >= count { -// let events: Vec = self.history.drain(..count).collect(); -// info!( -// "HHH: Returning {} events. Completed take() immediately.", -// events.len() -// ); -// return Box::pin(async move { events }.into_actor(self)); -// } -// -// info!( -// "HHH: Requesting {} events but only {} in the buffer. waiting for more...", -// msg.amount, -// self.history.len() -// ); -// -// // Create a tokio oneshot channel for the response -// let (tx, rx) = tokio::sync::oneshot::channel(); -// -// // Collect what we can from history -// let mut collected = Vec::new(); -// while !self.history.is_empty() && collected.len() < count { -// collected.push(self.history.pop_front().unwrap()); -// } -// -// // Store the pending request -// let idx = self.pending_takes.len(); -// self.pending_takes.push(PendingTake { -// count, -// collected, -// responder: tx, -// }); -// info!("HHH: [{}] waiting for take to complete...", idx); -// // Return future that waits for the response -// Box::pin(async move { rx.await.unwrap_or_else(|_| Vec::new()) }.into_actor(self)) -// } -// } -// -// impl Actor for HistoryCollector { -// type Context = Context; -// fn started(&mut self, ctx: &mut Self::Context) { -// ctx.set_mailbox_capacity(MAILBOX_LIMIT) -// } -// } -// -// impl Handler for HistoryCollector { -// type Result = E::Result; -// fn handle(&mut self, msg: E, _ctx: &mut Self::Context) -> Self::Result { -// self.add_event(msg); -// } -// } -// struct HistoryCollectorWaiter { rx: Option>, @@ -484,7 +344,6 @@ impl Handler> for HistoryCollectorWaiter impl Handler for HistoryCollectorWaiter { type Result = (); fn handle(&mut self, _: ResetHistory, _: &mut Context) { - println!("HHH: WAITER RESETING HISTORY"); if let Some(ref mut rx) = self.rx { while rx.try_recv().is_ok() {} } @@ -519,7 +378,6 @@ impl Actor for HistoryCollector { impl Handler for HistoryCollector { type Result = E::Result; fn handle(&mut self, msg: E, _ctx: &mut Self::Context) -> Self::Result { - println!("HHH: EVENT: {}", msg.event_type()); self.history.push(msg.clone()); let _ = self.tx.send(msg); } @@ -528,7 +386,6 @@ impl Handler for HistoryCollector { impl Handler for HistoryCollector { type Result = (); fn handle(&mut self, _: ResetHistory, _: &mut Context) { - println!("HHH: RESET"); self.history.clear(); self.waiter.do_send(ResetHistory); } @@ -537,7 +394,6 @@ impl Handler for HistoryCollector { impl Handler> for HistoryCollector { type Result = ResponseActFuture>; fn handle(&mut self, msg: TakeEvents, _: &mut Context) -> Self::Result { - println!("HHH: TAKE EVENTS"); let fut = self.waiter.send(msg); Box::pin(async move { fut.await.unwrap() }.into_actor(self)) } @@ -546,7 +402,6 @@ impl Handler> for HistoryCollector { impl Handler> for HistoryCollector { type Result = Vec; fn handle(&mut self, _: GetEvents, _: &mut Context) -> Vec { - println!("HHH: GET EVENTS"); self.history.clone() } } From 281466d51bd273c76b9cf93fe92bdab6655d06dc Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 3 Mar 2026 00:48:47 +0000 Subject: [PATCH 40/50] ensure timeouts are long enough so that tests pass --- crates/test-helpers/src/ciphernode_system.rs | 10 +++++----- crates/tests/tests/integration.rs | 7 ++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/crates/test-helpers/src/ciphernode_system.rs b/crates/test-helpers/src/ciphernode_system.rs index 67e8d31440..3a5a079323 100644 --- a/crates/test-helpers/src/ciphernode_system.rs +++ b/crates/test-helpers/src/ciphernode_system.rs @@ -122,7 +122,7 @@ impl CiphernodeSystem { /// expect events to fire with the default timeout 1000sec per event pub async fn expect_events(&self, expected: &[&str]) -> Result { let h = self - .take_history_with_timeout_impl( + .take_history_with_timeouts( 0, expected.len(), Some(Duration::from_secs(1000)), @@ -141,7 +141,7 @@ impl CiphernodeSystem { expected: &[&str], ) -> Result { let h = self - .take_history_with_timeout_impl(0, expected.len(), None, None) + .take_history_with_timeouts(0, expected.len(), None, None) .await .map_err(|e| anyhow::anyhow!("FAILURE: {expected:?} : {e}"))?; @@ -157,7 +157,7 @@ impl CiphernodeSystem { per_evt_to: Duration, // per event ) -> Result { let h = self - .take_history_with_timeout_impl(0, expected.len(), Some(total_to), Some(per_evt_to)) + .take_history_with_timeouts(0, expected.len(), Some(total_to), Some(per_evt_to)) .await .map_err(|e| anyhow::anyhow!("FAILURE: {expected:?} : {e}"))?; println!(">> {:?} == {:?}", h.event_types(), expected.to_vec()); @@ -172,7 +172,7 @@ impl CiphernodeSystem { count: usize, total_to: Duration, ) -> Result { - self.take_history_with_timeout_impl( + self.take_history_with_timeouts( index, count, Some(total_to), @@ -181,7 +181,7 @@ impl CiphernodeSystem { .await } - pub async fn take_history_with_timeout_impl( + pub async fn take_history_with_timeouts( &self, index: usize, count: usize, diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index 9b827bd39b..b0d631cd19 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -687,7 +687,12 @@ async fn test_trbfv_actor() -> Result<()> { let expected_count = 1 + 5 + 1 + 1 + 1; let h = nodes - .take_history_with_timeout(0, expected_count, Duration::from_secs(1000)) + .take_history_with_timeouts( + 0, + expected_count, + Some(Duration::from_secs(1000)), + Some(Duration::from_secs(1000)), + ) .await?; report.push(( From b1ed650d59f8ae8720dbec504a4c68a4f084d9dd Mon Sep 17 00:00:00 2001 From: ryardley Date: Tue, 3 Mar 2026 02:04:12 +0000 Subject: [PATCH 41/50] unsubscribe on wait_for --- crates/events/src/bus_handle.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/events/src/bus_handle.rs b/crates/events/src/bus_handle.rs index e08a1cf1c5..87ce6d08da 100644 --- a/crates/events/src/bus_handle.rs +++ b/crates/events/src/bus_handle.rs @@ -294,7 +294,12 @@ impl EventSubscriber> for BusHandle { ) -> Pin>> + Send>> { let (addr, rx) = oneshot::>(); self.subscribe(event_type, addr.clone()); - Box::pin(async move { Ok(rx.await?) }) + let bus = self.event_bus.clone(); + Box::pin(async move { + let r = rx.await?; + bus.do_send(Unsubscribe::new(event_type, addr)); + Ok(r) + }) } } From a4ccf0811ec59b591648a151691d6f8d731ea1ce Mon Sep 17 00:00:00 2001 From: ryardley Date: Wed, 4 Mar 2026 05:55:50 +0000 Subject: [PATCH 42/50] fix bad merge --- crates/tests/tests/integration.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index 3b9f08f48a..f94b794bc8 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -612,6 +612,7 @@ async fn test_trbfv_actor() -> Result<()> { // - ComputeRequest (C5 PkAggregation proof dispatched by PublicKeyAggregator) // - ComputeResponse (C5 PkAggregation proof result) // - PublicKeyAggregated × 1 + let shares_to_pubkey_agg_timer = Instant::now(); let h = nodes .expect_events_with_timeouts( &[ From 23fcb7657cd9185e8722a3c2cf4b07f85e9c496f Mon Sep 17 00:00:00 2001 From: ryardley Date: Wed, 4 Mar 2026 06:23:20 +0000 Subject: [PATCH 43/50] fix up unlikely error condition --- crates/events/src/bus_handle.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/events/src/bus_handle.rs b/crates/events/src/bus_handle.rs index 87ce6d08da..df424f062b 100644 --- a/crates/events/src/bus_handle.rs +++ b/crates/events/src/bus_handle.rs @@ -292,13 +292,13 @@ impl EventSubscriber> for BusHandle { &self, event_type: EventType, ) -> Pin>> + Send>> { - let (addr, rx) = oneshot::>(); - self.subscribe(event_type, addr.clone()); let bus = self.event_bus.clone(); Box::pin(async move { - let r = rx.await?; + let (addr, rx) = oneshot::>(); + bus.do_send(Subscribe::new(event_type, addr.clone())); + let received = rx.await; bus.do_send(Unsubscribe::new(event_type, addr)); - Ok(r) + Ok(received?) }) } } From cbfae201b6d572656c47959f348645a277f46ce7 Mon Sep 17 00:00:00 2001 From: ryardley Date: Wed, 4 Mar 2026 06:28:40 +0000 Subject: [PATCH 44/50] fix up bugs in requester --- crates/net/src/direct_requester.rs | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/net/src/direct_requester.rs b/crates/net/src/direct_requester.rs index 4248700596..2c099659f1 100644 --- a/crates/net/src/direct_requester.rs +++ b/crates/net/src/direct_requester.rs @@ -346,12 +346,14 @@ impl DirectRequesterTester { }); // Reverse expectations so we can pop from the back in order. self.expectations.reverse(); + self.responses.reverse(); tokio::spawn(async move { let mut remaining = num_requests; while remaining > 0 { if let Some(cmd) = self.net_cmds_rx.recv().await { if let NetCommand::OutgoingRequest(req) = cmd { + remaining -= 1; let response = if let Some(expectation) = self.expectations.pop() { assert_eq!( req.payload, expectation.expected_request, @@ -392,7 +394,6 @@ impl DirectRequesterTester { }; let _ = self.net_events_tx.send(response); } - remaining -= 1; } else { break; } @@ -535,4 +536,34 @@ mod tests { .to_string() .contains("connection refused")); } + + #[tokio::test] + async fn test_respond_with_each() { + let (net_cmds_tx, net_cmds_rx) = mpsc::channel::(16); + let (net_events_tx, net_events_rx) = broadcast::channel::(16); + let net_events = Arc::new(net_events_rx); + + let requester = DirectRequester::builder(net_cmds_tx, net_events).build(); + + let handle = DirectRequesterTester::new(net_cmds_rx, net_events_tx) + .respond_with_each(vec![ + b"first_response".to_vec(), + b"second_response".to_vec(), + b"third_response".to_vec(), + ]) + .num_requests(3) + .spawn(); + + let peer = requester.to(PeerTarget::Random); + + let r1: Vec = peer.request(b"req1".to_vec()).await.unwrap(); + let r2: Vec = peer.request(b"req2".to_vec()).await.unwrap(); + let r3: Vec = peer.request(b"req3".to_vec()).await.unwrap(); + + handle.await.unwrap(); + + assert_eq!(r1, b"first_response"); + assert_eq!(r2, b"second_response"); + assert_eq!(r3, b"third_response"); + } } From 19bd161e029890717ce4fca4ddbd01bec6651a25 Mon Sep 17 00:00:00 2001 From: ryardley Date: Wed, 4 Mar 2026 07:00:21 +0000 Subject: [PATCH 45/50] Revert "fix up unlikely error condition" This reverts commit 23fcb7657cd9185e8722a3c2cf4b07f85e9c496f. --- crates/events/src/bus_handle.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/events/src/bus_handle.rs b/crates/events/src/bus_handle.rs index df424f062b..87ce6d08da 100644 --- a/crates/events/src/bus_handle.rs +++ b/crates/events/src/bus_handle.rs @@ -292,13 +292,13 @@ impl EventSubscriber> for BusHandle { &self, event_type: EventType, ) -> Pin>> + Send>> { + let (addr, rx) = oneshot::>(); + self.subscribe(event_type, addr.clone()); let bus = self.event_bus.clone(); Box::pin(async move { - let (addr, rx) = oneshot::>(); - bus.do_send(Subscribe::new(event_type, addr.clone())); - let received = rx.await; + let r = rx.await?; bus.do_send(Unsubscribe::new(event_type, addr)); - Ok(received?) + Ok(r) }) } } From dfecba971db238fadf86b14abc51ce6ed9380f0f Mon Sep 17 00:00:00 2001 From: ryardley Date: Wed, 4 Mar 2026 07:28:39 +0000 Subject: [PATCH 46/50] add other code fixes --- crates/net/src/net_interface.rs | 9 ++++++++- crates/net/src/net_sync_manager.rs | 6 +++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 59d3d55d7e..c0f39d516c 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -588,7 +588,14 @@ async fn process_swarm_command( payload, target, }) => { - handle_outgoing_request(swarm, correlator, correlation_id, payload, target)?; + if let Err(e) = + handle_outgoing_request(swarm, correlator, correlation_id, payload, target) + { + event_tx.send(NetEvent::OutgoingRequestFailed(OutgoingRequestFailed { + correlation_id, + error: e.to_string(), + }))?; + }; Ok(()) } NetCommand::IncomingResponse(IncomingResponse { responder }) => { diff --git a/crates/net/src/net_sync_manager.rs b/crates/net/src/net_sync_manager.rs index 030713934b..483b13507e 100644 --- a/crates/net/src/net_sync_manager.rs +++ b/crates/net/src/net_sync_manager.rs @@ -216,6 +216,10 @@ impl Handler for NetSyncManager { let fetch_request: FetchEventsSince = responder.try_request_into()?; let limit = fetch_request.limit(); + if limit == 0 { + responder.bad_request("limit must be greater than 0")?; + return Ok(()); + } let aggregate_id = fetch_request.aggregate_id(); let events: Vec> = msg .into_events() @@ -226,7 +230,7 @@ impl Handler for NetSyncManager { .collect(); let next = if events.len() == limit { - let last_event_ts = events.get(limit - 1).map(|e| e.ts()).unwrap_or(0); + let last_event_ts = events.last().map(|e| e.ts()).unwrap_or(0); BatchCursor::Next(last_event_ts) } else { BatchCursor::Done From 5a741459790776ddc95e126e02255e769a7babdb Mon Sep 17 00:00:00 2001 From: ryardley Date: Fri, 6 Mar 2026 05:20:09 +0000 Subject: [PATCH 47/50] add comma --- crates/events/src/enclave_event/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/events/src/enclave_event/mod.rs b/crates/events/src/enclave_event/mod.rs index 31d5358c0d..3ae7093106 100644 --- a/crates/events/src/enclave_event/mod.rs +++ b/crates/events/src/enclave_event/mod.rs @@ -613,7 +613,7 @@ impl_event_types!( SyncEffect, SyncEnded, EffectsEnabled, - NetReady + NetReady, DecryptionShareProofSigned, ShareDecryptionProofPending, PkAggregationProofPending, From accc8ea2443acdaa40b3736f3d074db35e13416f Mon Sep 17 00:00:00 2001 From: ryardley Date: Fri, 6 Mar 2026 05:59:54 +0000 Subject: [PATCH 48/50] fix up output and use arcbytes for large keys --- crates/aggregator/src/publickey_aggregator.rs | 9 +++++---- .../src/enclave_event/pk_aggregation_proof_pending.rs | 3 ++- crates/events/src/enclave_event/publickey_aggregated.rs | 3 ++- crates/evm/src/ciphernode_registry_sol.rs | 6 +++--- crates/tests/tests/integration.rs | 6 +++--- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/crates/aggregator/src/publickey_aggregator.rs b/crates/aggregator/src/publickey_aggregator.rs index f4dad40569..2a3d6d33f2 100644 --- a/crates/aggregator/src/publickey_aggregator.rs +++ b/crates/aggregator/src/publickey_aggregator.rs @@ -42,13 +42,13 @@ pub enum PublicKeyAggregatorState { no_proof_parties: Vec, }, GeneratingC5Proof { - public_key: Vec, + public_key: ArcBytes, public_key_hash: [u8; 32], keyshare_bytes: Vec, nodes: OrderedSet, }, Complete { - public_key: Vec, + public_key: ArcBytes, keyshares: OrderedSet, nodes: OrderedSet, }, @@ -293,12 +293,13 @@ impl PublicKeyAggregator { let committee_h = honest_keyshares.len(); info!("Publishing PkAggregationProofPending for C5 proof generation..."); + let pubkey = ArcBytes::from_bytes(&pubkey); self.bus.publish( PkAggregationProofPending { e3_id: self.e3_id.clone(), proof_request: PkAggregationProofRequest { keyshare_bytes: honest_keyshares.clone(), - aggregated_pk_bytes: ArcBytes::from_bytes(&pubkey), + aggregated_pk_bytes: pubkey.clone(), params_preset: self.params_preset.clone(), // this field is not really used in the circuit, we only use H committee_n: committee_h, @@ -316,7 +317,7 @@ impl PublicKeyAggregator { // Transition to GeneratingC5Proof self.state.try_mutate(&ec, |_| { Ok(PublicKeyAggregatorState::GeneratingC5Proof { - public_key: pubkey, + public_key: pubkey.clone(), public_key_hash, keyshare_bytes: honest_keyshares, nodes: honest_nodes_set, diff --git a/crates/events/src/enclave_event/pk_aggregation_proof_pending.rs b/crates/events/src/enclave_event/pk_aggregation_proof_pending.rs index 2ee5b1afe8..52dd677868 100644 --- a/crates/events/src/enclave_event/pk_aggregation_proof_pending.rs +++ b/crates/events/src/enclave_event/pk_aggregation_proof_pending.rs @@ -12,6 +12,7 @@ //! `PkAggregationProofSigned`. use crate::{E3id, OrderedSet, PkAggregationProofRequest}; +use e3_utils::ArcBytes; use serde::{Deserialize, Serialize}; /// PublicKeyAggregator -> ProofRequestActor: generate and sign C5 proof. @@ -19,7 +20,7 @@ use serde::{Deserialize, Serialize}; pub struct PkAggregationProofPending { pub e3_id: E3id, pub proof_request: PkAggregationProofRequest, - pub public_key: Vec, + pub public_key: ArcBytes, pub public_key_hash: [u8; 32], pub nodes: OrderedSet, } diff --git a/crates/events/src/enclave_event/publickey_aggregated.rs b/crates/events/src/enclave_event/publickey_aggregated.rs index f0f13b814d..bcf4563890 100644 --- a/crates/events/src/enclave_event/publickey_aggregated.rs +++ b/crates/events/src/enclave_event/publickey_aggregated.rs @@ -7,6 +7,7 @@ use crate::{E3id, OrderedSet, Proof}; use actix::Message; use derivative::Derivative; +use e3_utils::ArcBytes; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display}; @@ -15,7 +16,7 @@ use std::fmt::{self, Display}; #[rtype(result = "()")] pub struct PublicKeyAggregated { #[derivative(Debug(format_with = "e3_utils::formatters::hexf"))] - pub pubkey: Vec, // TODO: ArcBytes ? + pub pubkey: ArcBytes, // TODO: ArcBytes ? #[derivative(Debug(format_with = "e3_utils::formatters::hexf"))] pub public_key_hash: [u8; 32], // TODO: ArcBytes32 ? pub e3_id: E3id, diff --git a/crates/evm/src/ciphernode_registry_sol.rs b/crates/evm/src/ciphernode_registry_sol.rs index d56a9e6c4f..624c7aed39 100644 --- a/crates/evm/src/ciphernode_registry_sol.rs +++ b/crates/evm/src/ciphernode_registry_sol.rs @@ -23,7 +23,7 @@ use e3_events::{ EffectsEnabled, EnclaveEvent, EnclaveEventData, EventSubscriber, EventType, OrderedSet, PublicKeyAggregated, Seed, Shutdown, TicketGenerated, TicketId, }; -use e3_utils::{NotifySync, MAILBOX_LIMIT}; +use e3_utils::{ArcBytes, NotifySync, MAILBOX_LIMIT}; use tracing::{error, info, trace}; sol!( @@ -501,11 +501,11 @@ pub async fn publish_committee_to_registry, - public_key: Vec, + public_key: ArcBytes, public_key_hash: [u8; 32], ) -> Result { let e3_id_u256: U256 = e3_id.try_into()?; - let public_key_bytes = Bytes::from(public_key); + let public_key_bytes = Bytes::from(public_key.extract_bytes()); let public_key_hash_fixed = FixedBytes::from(public_key_hash); let nodes_vec: Vec
= nodes .into_iter() diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index 0e08149340..ac3779ef40 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -661,6 +661,7 @@ async fn test_trbfv_actor() -> Result<()> { "KeyshareCreated", "KeyshareCreated", "KeyshareCreated", + "ShareVerificationDispatched", "ComputeRequest", "ComputeResponse", "ShareVerificationComplete", @@ -668,7 +669,6 @@ async fn test_trbfv_actor() -> Result<()> { "ComputeRequest", "ComputeResponse", "PkAggregationProofSigned", - "PublicKeyAggregated", ], Duration::from_secs(5000), Duration::from_secs(600), @@ -1344,7 +1344,7 @@ async fn test_duplicate_e3_id_with_different_chain_id() -> Result<()> { assert_eq!( history.events.last().cloned().unwrap().into_data(), PublicKeyAggregated { - pubkey: test_pubkey.to_bytes(), + pubkey: ArcBytes::from_bytes(&test_pubkey.to_bytes()), public_key_hash, e3_id: E3id::new("1234", 1), nodes: OrderedSet::from(eth_addrs.clone()), @@ -1387,7 +1387,7 @@ async fn test_duplicate_e3_id_with_different_chain_id() -> Result<()> { assert_eq!( history.events.last().cloned().unwrap().into_data(), PublicKeyAggregated { - pubkey: test_pubkey.to_bytes(), + pubkey: ArcBytes::from_bytes(&test_pubkey.to_bytes()), public_key_hash, e3_id: E3id::new("1234", 2), nodes: OrderedSet::from(eth_addrs.clone()), From 6179f2efef609e6ba7b51763ab10cea9c3e4afc0 Mon Sep 17 00:00:00 2001 From: ryardley Date: Fri, 6 Mar 2026 06:02:58 +0000 Subject: [PATCH 49/50] add public key aggregated event --- crates/tests/tests/integration.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/tests/tests/integration.rs b/crates/tests/tests/integration.rs index ac3779ef40..e123893ec9 100644 --- a/crates/tests/tests/integration.rs +++ b/crates/tests/tests/integration.rs @@ -669,6 +669,7 @@ async fn test_trbfv_actor() -> Result<()> { "ComputeRequest", "ComputeResponse", "PkAggregationProofSigned", + "PublicKeyAggregated", ], Duration::from_secs(5000), Duration::from_secs(600), @@ -1260,7 +1261,7 @@ async fn test_duplicate_e3_id_with_different_chain_id() -> Result<()> { .await?; result.push(tuple); } - simulate_libp2p_net(&result); + simulate_libp2p_net(&result).await; Ok(result) } From c5deb8355764f77470a7aa9c87024252f603e817 Mon Sep 17 00:00:00 2001 From: ryardley Date: Mon, 9 Mar 2026 03:58:44 +0000 Subject: [PATCH 50/50] use iter() over to_vec().into_iter() --- crates/sync/src/sync.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/sync/src/sync.rs b/crates/sync/src/sync.rs index e7e8297fc0..b5f20e0b56 100644 --- a/crates/sync/src/sync.rs +++ b/crates/sync/src/sync.rs @@ -200,8 +200,7 @@ fn find_net_hlc(events: &[EnclaveEvent]) -> BTreeMap