diff --git a/CMakeLists.txt b/CMakeLists.txt index cee1aa7..c94c7a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,6 +139,7 @@ add_library(moqx_core STATIC src/relay/PropertyRanking.cpp src/relay/CrossExecFilter.cpp src/relay/CrossExecForwarderCallback.cpp + src/relay/WeakRelayForwarderCallback.cpp src/relay/PublisherCrossExecFilter.cpp src/relay/SubscriberCrossExecFilter.cpp ) diff --git a/deps/moxygen b/deps/moxygen index e282ba6..8850292 160000 --- a/deps/moxygen +++ b/deps/moxygen @@ -1 +1 @@ -Subproject commit e282ba6c871fed0fd0ebc90dc98d4893ae04f9df +Subproject commit 8850292d49db9ca70a7bf627b2ba3d5539e9a2d0 diff --git a/scripts/perf-test.sh b/scripts/perf-test.sh index ca3691a..efc9d88 100755 --- a/scripts/perf-test.sh +++ b/scripts/perf-test.sh @@ -15,6 +15,8 @@ # --delivery-timeout N Delivery timeout in ms (default: 500) # --transport TYPE quic or webtransport (default: quic) # --no-relay-thread Disable relay exec thread (use_relay_thread: false) +# --local-forwarders Enable per-subscriber-thread local forwarders +# (use_local_forwarders: true; requires relay thread) # --io-threads N Number of relay IO threads (default: 1) # --threads N Number of perf client threads (default: 2) # --relay-log SPEC folly XLOG config passed as --logging=SPEC to relay @@ -34,6 +36,9 @@ # locally. The binary is expected at # /tmp/moqperf_test_client on the remote host. # HOST may include a user prefix (user@host). +# --udp-socket-buffer-bytes N +# UDP socket send/recv buffer size in bytes for the +# relay listener (defaults to net.core.wmem_max). # # Linux-only options (not supported on macOS): # --metrics, --perf-duration, --perf-events, --perf-stat, --jemalloc, @@ -52,6 +57,7 @@ DURATION=30 DELIVERY_TIMEOUT=500 TRANSPORT="quic" USE_RELAY_THREAD="true" +USE_LOCAL_FORWARDERS="false" IO_THREADS=1 CLIENT_THREADS=2 RELAY_LOG_SPEC="" @@ -62,6 +68,7 @@ PERF_DURATION=0 PERF_EVENTS="" RUN_PERF_STAT=false REMOTE_CLIENT_HOST="" +UDP_SOCKET_BUFFER_BYTES=$(cat /proc/sys/net/core/wmem_max 2>/dev/null || echo 1048576) TRACE_SCRIPT="" CLIENT_EXTRA_ARGS=() @@ -80,6 +87,7 @@ while [[ $# -gt 0 ]]; do --delivery-timeout) DELIVERY_TIMEOUT="$2"; shift 2 ;; --transport) TRANSPORT="$2"; shift 2 ;; --no-relay-thread) USE_RELAY_THREAD="false"; shift ;; + --local-forwarders) USE_LOCAL_FORWARDERS="true"; shift ;; --io-threads) IO_THREADS="$2"; shift 2 ;; --threads) CLIENT_THREADS="$2"; shift 2 ;; --relay-log) RELAY_LOG_SPEC="$2"; shift 2 ;; @@ -93,6 +101,7 @@ while [[ $# -gt 0 ]]; do --trace-script) TRACE_SCRIPT="$2"; shift 2 ;; --client-args) read -ra CLIENT_EXTRA_ARGS <<< "$2"; shift 2 ;; --remote-client) REMOTE_CLIENT_HOST="$2"; shift 2 ;; + --udp-socket-buffer-bytes) UDP_SOCKET_BUFFER_BYTES="$2"; shift 2 ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done @@ -206,6 +215,7 @@ cat >"$RELAY_CFG" </dev/null || true -echo "Starting relay (use_relay_thread=$USE_RELAY_THREAD, io_threads=$IO_THREADS, transport=$TRANSPORT, mvfst_bpf_steering=$BPF_STEERING)..." +echo "Starting relay (use_relay_thread=$USE_RELAY_THREAD, local_forwarders=$USE_LOCAL_FORWARDERS, io_threads=$IO_THREADS, transport=$TRANSPORT, mvfst_bpf_steering=$BPF_STEERING)..." RELAY_LOGGING_ARG=() [[ -n "$RELAY_LOG_SPEC" ]] && RELAY_LOGGING_ARG=("--logging=$RELAY_LOG_SPEC") if [[ -n "$JEMALLOC" ]]; then diff --git a/src/MoqxCache.cpp b/src/MoqxCache.cpp index ca37d07..6eac3aa 100644 --- a/src/MoqxCache.cpp +++ b/src/MoqxCache.cpp @@ -7,6 +7,7 @@ */ #include "MoqxCache.h" +#include "relay/NullConsumers.h" #include #include @@ -1292,6 +1293,10 @@ class MoqxCache::FetchWriteback : public FetchConsumer { } }; +std::shared_ptr MoqxCache::makePassiveConsumer(const FullTrackName& ftn) { + return getSubscribeWriteback(ftn, std::make_shared()); +} + std::shared_ptr MoqxCache::getSubscribeWriteback( const FullTrackName& ftn, std::shared_ptr consumer diff --git a/src/MoqxCache.h b/src/MoqxCache.h index 19951a7..2412512 100644 --- a/src/MoqxCache.h +++ b/src/MoqxCache.h @@ -48,6 +48,11 @@ class MoqxCache { std::shared_ptr consumer ); + // Returns a TrackConsumer suitable for use as a passive forwarder subscriber. + // Objects delivered to it are written to the cache; nothing is forwarded + // downstream (the inner consumer is a NullTrackConsumer). + std::shared_ptr makePassiveConsumer(const moxygen::FullTrackName& ftn); + // Serves objects from the cache to the consumer. If objects in the range are // not in cache, issue one-or-more FETCH'es upstream. Objects fetched from // upstream are written back to the cache and passed to the consumer. diff --git a/src/MoqxRelay.cpp b/src/MoqxRelay.cpp index dbe2271..d6a926f 100644 --- a/src/MoqxRelay.cpp +++ b/src/MoqxRelay.cpp @@ -7,6 +7,13 @@ */ #include "MoqxRelay.h" +#include "relay/CrossExecFilter.h" +#include "relay/CrossExecForwarderCallback.h" +#include "relay/LocalForwarderCallback.h" +#include "relay/NullConsumers.h" +#include "relay/PublisherCrossExecFilter.h" +#include "relay/SubscriberCrossExecFilter.h" +#include "relay/WeakRelayForwarderCallback.h" #include #include #include @@ -28,9 +35,11 @@ class MoqxRelayNamespaceHandle : public Publisher::NamespacePublishHandle { MoqxRelayNamespaceHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID = {} + std::string peerID = {}, + folly::Executor* relayExec = nullptr ) - : relay_(std::move(relay)), session_(std::move(session)), peerID_(std::move(peerID)) {} + : relay_(std::move(relay)), session_(std::move(session)), peerID_(std::move(peerID)), + relayExec_(relayExec) {} ~MoqxRelayNamespaceHandle() { auto relay = relay_.lock(); @@ -38,52 +47,68 @@ class MoqxRelayNamespaceHandle : public Publisher::NamespacePublishHandle { return; } for (const auto& ns : activeNamespaces_) { - relay->doPublishNamespaceDone(ns, session_); + runOnExec(relayExec_, [relay, ns, session = session_]() mutable { + relay->doPublishNamespaceDone(ns, session); + }); } } void namespaceMsg(const TrackNamespace& suffix) override { - auto relay = relay_.lock(); - if (!relay || !session_) { - return; - } activeNamespaces_.insert(suffix); PublishNamespace pubNs; pubNs.trackNamespace = suffix; - relay->doPublishNamespace(std::move(pubNs), session_, nullptr, peerID_); + runOnExec( + relayExec_, + [relay = relay_, pubNs = std::move(pubNs), session = session_, peerID = peerID_]() mutable { + if (auto r = relay.lock()) { + r->doPublishNamespace(std::move(pubNs), session, nullptr, peerID); + } + } + ); } void namespaceDoneMsg(const TrackNamespace& suffix) override { - auto relay = relay_.lock(); - if (!relay || !session_) { - return; - } activeNamespaces_.erase(suffix); - relay->doPublishNamespaceDone(suffix, session_); + runOnExec(relayExec_, [relay = relay_, suffix, session = session_]() mutable { + if (auto r = relay.lock()) { + r->doPublishNamespaceDone(suffix, session); + } + }); } private: std::weak_ptr relay_; std::shared_ptr session_; std::string peerID_; + folly::Executor* relayExec_; folly::F14FastSet activeNamespaces_; }; std::shared_ptr makeNamespaceBridgeHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID + std::string peerID, + folly::Executor* relayExec ) { return std::make_shared( std::move(relay), std::move(session), - std::move(peerID) + std::move(peerID), + relayExec ); } folly::coro::Task MoqxRelay::onUpstreamConnect(std::shared_ptr session) { - auto nsHandle = makeNamespaceBridgeHandle(weak_from_this(), session); - auto result = co_await session->subscribeNamespace(makePeerSubNs(relayID_), nsHandle); + co_return co_await onUpstreamConnectImpl(std::move(session)); +} + +folly::coro::Task MoqxRelay::onUpstreamConnectImpl(std::shared_ptr session) { + auto nsHandle = makeNamespaceBridgeHandle(weak_from_this(), session, {}, relayExec_); + // subscribeNamespace must run on the upstream session's executor + auto result = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(session->getExecutor()), + session->subscribeNamespace(makePeerSubNs(relayID_), nsHandle) + ); if (result.hasValue()) { upstreamSubNsHandle_ = std::move(result.value()); } else { @@ -168,7 +193,7 @@ std::shared_ptr MoqxRelay::doPublishNamespac if (outSession != session && (info.options == SubscribeNamespaceOptions::NAMESPACE || info.options == SubscribeNamespaceOptions::BOTH)) { if (info.namespacePublishHandle) { - // Draft 16+: send NAMESPACE message on the bidi stream + // Draft 16+: send NAMESPACE message on the bidi stream. TrackNamespace suffix(std::vector( pubNs.trackNamespace.trackNamespace.begin() + info.trackNamespacePrefix.size(), pubNs.trackNamespace.trackNamespace.end() @@ -187,6 +212,13 @@ std::shared_ptr MoqxRelay::doPublishNamespac folly::coro::Task MoqxRelay::publishNamespace( PublishNamespace pubNs, std::shared_ptr callback +) { + return publishNamespaceImpl(std::move(pubNs), std::move(callback)); +} + +folly::coro::Task MoqxRelay::publishNamespaceImpl( + PublishNamespace pubNs, + std::shared_ptr callback ) { // TODO: store auth for forwarding on future SubscribeNamespace? auto session = MoQSession::getRequestSession(); @@ -274,20 +306,69 @@ Subscriber::PublishResult MoqxRelay::publish(PublishRequest pub, std::shared_ptr handle) { XLOG(DBG1) << __func__ << " ftn=" << pub.fullTrackName; XCHECK(handle) << "Publish handle cannot be null"; + // Validate before touching relay state (safe to do on calling thread). if (!pub.fullTrackName.trackNamespace.startsWith(allowedNamespacePrefix_)) { return folly::makeUnexpected( PublishError{pub.requestID, PublishErrorCode::UNINTERESTED, "bad namespace"} ); } - if (pub.fullTrackName.trackNamespace.empty()) { return folly::makeUnexpected( PublishError({pub.requestID, PublishErrorCode::INTERNAL_ERROR, "namespace required"}) ); } - + // When relayExec_ is set, SubscriberCrossExecFilter (wired at session + // registration) has already dispatched to relayExec_ before calling this + // method, so getRequestSession() is valid and publishWithSession() runs on + // the correct thread. auto session = MoQSession::getRequestSession(); + if (relayExec_ && useLocalForwarders_) { + // Forwarder lives on publisherExec; relay chain attached as a channel sub + // (null downstream until wired to topNFilter below). All work is inline + // since SubscriberCrossExecFilter already dispatched to relayExec_. + auto ftn = pub.fullTrackName; // save before pub is moved + auto forwarder = std::make_shared(ftn, pub.largest); + forwarder->setExtensions(pub.extensions); + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // TODO: update forward=true when observers register with top-N (and think about cache) + forwarder->addChannelSubscriber(relayExec_, /*forward=*/false, relayChainFilter); + auto setup = + publishWithSession(std::move(pub), std::move(handle), std::move(session), forwarder); + if (setup.hasError()) { + return folly::makeUnexpected(setup.error()); + } + auto topNView = registry_.getTopNView(ftn); + if (topNView && topNView->topNFilter) { + relayChainFilter->setDownstream(topNView->topNFilter); + } + return PublishConsumerAndReplyTask{ + forwarder, + folly::coro::makeTask>( + folly::Expected(std::move(setup.value().publishOk)) + ) + }; + } + + maybeSetSessionExec(*session); + auto setup = publishWithSession(std::move(pub), std::move(handle), std::move(session)); + if (setup.hasError()) { + return folly::makeUnexpected(setup.error()); + } + return PublishConsumerAndReplyTask{ + std::move(setup.value().consumer), + folly::coro::makeTask>( + folly::Expected(std::move(setup.value().publishOk)) + ) + }; +} + +MoqxRelay::PublishSetupResult MoqxRelay::publishWithSession( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr forwarder +) { // Handle duplicate publisher at relay level before registering in the tree. // Move the forwarder out and erase the entry BEFORE calling publishDone. // publishDone iterates subscribers via forEachSubscriber; if a subscriber @@ -299,13 +380,17 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptr(pub.fullTrackName, pub.largest); - forwarder->setExtensions(pub.extensions); + if (!forwarder) { + forwarder = std::make_shared(pub.fullTrackName, pub.largest); + forwarder->setExtensions(pub.extensions); + } + auto publisherWrapped = maybeWrapPublisher(relayExec_, session); auto publishEntry = registry_.createFromPublish( pub.fullTrackName, forwarder, session, + std::move(publisherWrapped), pub.requestID, std::move(handle), [&](std::shared_ptr f) { return buildFilterChain(pub.fullTrackName, f); } @@ -348,6 +433,18 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptrsetCallback(std::make_shared(weak_from_this())); + } else if (!relayExec_) { + forwarder->setCallback(shared_from_this()); + } + // useLocalForwarders_ case: the local forwarder lives on publisherExec and + // already had its CrossExecForwarderCallback installed by setupPublisherLocal, + // which dispatches onEmpty to relayExec_. Must not overwrite it here. + uint64_t nSubscribers = 0; bool hasTrackFilterSub = false; for (auto& [outSession, info] : sessions) { @@ -360,20 +457,28 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptrgetExecutor(); - co_withExecutor(exec, publishToSession(outSession, forwarder, info.forward)).start(); + auto* publisherExec = relayExec_ ? session->getExecutor() : nullptr; + if (!addSubscriberAndPublish( + outSession, + forwarder, + info.forward, + /*pinned=*/true, + publisherExec + )) { + XLOG(ERR) << "addSubscriberAndPublish failed for " << forwarder->fullTrackName(); + continue; + } } } - forwarder->setCallback(shared_from_this()); // Forward if there are direct subscribers OR TRACK_FILTER subscribers // (PropertyRanking needs objects to evaluate property values for ranking). // When subscribers join later via subscribeNamespace, forwardChanged() sends REQUEST_UPDATE. bool shouldForward = (nSubscribers > 0) || hasTrackFilterSub; - return PublishConsumerAndReplyTask{ + return PublishSetup{ publishEntry.consumer, - folly::coro::makeTask>(PublishOk{ + PublishOk{ pub.requestID, /*forward=*/shouldForward, kDefaultPriority, @@ -381,53 +486,102 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptr MoqxRelay::publishToSession( - std::shared_ptr session, - std::shared_ptr forwarder, - bool forward, - bool trackFilterSubscriber +namespace { + +folly::coro::Task awaitPublishReply( + std::shared_ptr forwarder, // keeps subscriber's raw ref alive + std::shared_ptr subscriber, + folly::coro::Task> reply ) { - if (session->isClosed()) { - XLOG(WARN) << "publishToSession: session closed, skipping " << forwarder->fullTrackName(); + auto result = co_await co_awaitTry(std::move(reply)); + if (result.hasException()) { + XLOG(ERR) << "Publish reply exception for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get() << ": " << result.exception().what(); + subscriber->unsubscribe(); co_return; } - auto subscriber = forwarder->addSubscriber(session, forward); - if (!subscriber) { - XLOG(ERR) << "Subscribe failed: addSubscriber returned null for " << forwarder->fullTrackName(); + if (result.value().hasError()) { + XLOG(ERR) << "Publish reply error for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get() << ": " << result.value().error().reasonPhrase; + subscriber->unsubscribe(); co_return; } - // Direct subscribers are pinned (not evictable by PropertyRanking). - // TRACK_FILTER subscribers are unpinned so onTrackEvicted can remove them. - subscriber->pinned = !trackFilterSubscriber; - XLOG(DBG4) << "added subscriber for ftn=" << forwarder->fullTrackName(); - auto guard = folly::makeGuard([subscriber] { subscriber->unsubscribe(); }); + XLOG(DBG1) << "Received PublishOk for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get(); + subscriber->onPublishOk(result.value().value()); +} - auto pubInitial = session->publish(subscriber->getPublishRequest(), subscriber); - if (pubInitial.hasError()) { - XLOG(ERR) << "Publish failed err=" << pubInitial.error().reasonPhrase; - co_return; +} // namespace + +std::optional MoqxRelay::startPublish( + std::shared_ptr session, + std::shared_ptr forwarder, + bool forward, + bool pinned, + folly::Executor* subscriberExec +) { + auto subscriber = forwarder->addSubscriber(session, forward); + if (!subscriber) { + XLOG(ERR) << "startPublish: addSubscriber null for " << forwarder->fullTrackName(); + return std::nullopt; } - subscriber->trackConsumer = std::move(pubInitial->consumer); - auto pubResult = co_await co_awaitTry(std::move(pubInitial->reply)); - if (pubResult.hasException()) { - XLOG(ERR) << "Publish failed err=" << pubResult.exception().what(); - co_return; + subscriber->pinned = pinned; + Subscriber::PublishResult pub; + if (subscriberExec) { + SubscriberCrossExecFilter wrapped(subscriberExec, session); + pub = wrapped.publish(subscriber->getPublishRequest(), subscriber); + } else { + pub = session->publish(subscriber->getPublishRequest(), subscriber); } - if (pubResult.value().hasError()) { - XLOG(ERR) << "Publish failed err=" << pubResult.value().error().reasonPhrase; - co_return; + if (pub.hasError()) { + XLOG(ERR) << "startPublish: publish failed: " << pub.error().reasonPhrase; + subscriber->unsubscribe(); + return std::nullopt; } - guard.dismiss(); - XLOG(DBG1) << "Publish OK sess=" << session.get(); - auto& pubOk = pubResult.value().value(); + subscriber->trackConsumer = std::move(pub->consumer); + return PreparedPublish{std::move(subscriber), std::move(pub->reply)}; +} - // Process the PUBLISH_OK response - updates range, forward flag, and - // handles NEW_GROUP_REQUEST forwarding via callback - subscriber->onPublishOk(pubOk); +bool MoqxRelay::addSubscriberAndPublish( + std::shared_ptr subscriberSession, + std::shared_ptr forwarder, + bool forward, + bool pinned, + folly::Executor* publisherExec +) { + if (relayExec_ && useLocalForwarders_) { + XCHECK(publisherExec + ) << "addSubscriberAndPublish: publisherExec required in local-forwarder mode"; + co_withExecutor( + subscriberSession->getExecutor(), + publishViaLocalForwarder(subscriberSession, forwarder, publisherExec, forward, pinned) + ) + .start(); + return true; + } + auto p = startPublish( + subscriberSession, + forwarder, + forward, + pinned, + relayExec_ ? subscriberSession->getExecutor() : nullptr + ); + if (!p) { + return false; + } + // Run awaitPublishReply on relayExec_ so onPublishOk and detach() (from + // publishDone) are always on the same thread and cannot race. For + // single-thread (relayExec_ == nullptr) this is the subscriber's exec. + co_withExecutor( + relayExec_ ? static_cast(relayExec_) : subscriberSession->getExecutor(), + awaitPublishReply(forwarder, std::move(p->subscriber), std::move(p->reply)) + ) + .start(); + return true; } class MoqxRelay::NamespaceSubscription : public Publisher::SubscribeNamespaceHandle { @@ -505,16 +659,42 @@ std::shared_ptr MoqxRelay::getSubscribeWriteback( SubscriptionRegistry::FilterChainResult MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr forwarder) { - // Build chain: TopNFilter → TerminationFilter → (cache?) → Forwarder - // This ensures property values are observed in both PUBLISH and SUBSCRIBE paths. - auto baseConsumer = cache_ ? cache_->getSubscribeWriteback(ftn, forwarder) - : std::static_pointer_cast(forwarder); - auto terminationFilter = - std::make_shared(shared_from_this(), ftn, std::move(baseConsumer)); + if (relayExec_ && useLocalForwarders_) { + // Multi-iothread with local forwarders: publisher writes directly to forwarder on primaryExec. + // relayChainFilter (added by publish()) fans off to topNFilter/termination/cache. + std::shared_ptr chainEnd = + cache_ ? cache_->makePassiveConsumer(ftn) : std::make_shared(); + auto terminationFilter = + std::make_shared(shared_from_this(), ftn, std::move(chainEnd)); + auto topNFilter = std::make_shared( + ftn, + std::static_pointer_cast(terminationFilter) + ); + topNFilter->setActivityThreshold(activityThreshold_); + return SubscriptionRegistry::FilterChainResult{ + .consumer = std::static_pointer_cast(forwarder), + .topNFilter = topNFilter + }; + } + + // Single-threaded: chain wraps forwarder directly (no cross-exec needed). + // Cache attaches as a passive subscriber of the forwarder. + if (cache_) { + forwarder->addSubscriber( + /*session=*/nullptr, + /*forward=*/true, + cache_->makePassiveConsumer(ftn), + /*passive=*/true + ); + } + auto terminationFilter = std::make_shared( + shared_from_this(), + ftn, + std::static_pointer_cast(forwarder) + ); auto topNFilter = std::make_shared(ftn, std::static_pointer_cast(terminationFilter)); topNFilter->setActivityThreshold(activityThreshold_); - return SubscriptionRegistry::FilterChainResult{ .consumer = std::static_pointer_cast(topNFilter), .topNFilter = topNFilter @@ -524,6 +704,13 @@ MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr MoqxRelay::subscribeNamespace( SubscribeNamespace subNs, std::shared_ptr namespacePublishHandle +) { + return subscribeNamespaceImpl(std::move(subNs), std::move(namespacePublishHandle)); +} + +folly::coro::Task MoqxRelay::subscribeNamespaceImpl( + SubscribeNamespace subNs, + std::shared_ptr namespacePublishHandle ) { XLOG(DBG1) << __func__ << " nsp=" << subNs.trackNamespacePrefix; @@ -539,10 +726,11 @@ folly::coro::Task MoqxRelay::subscribeNames << ", reciprocating peer subNs"; // Tag with the peer's relay ID so we suppress echoing these namespaces // back to that peer on reconnect. - auto handle = makeNamespaceBridgeHandle(weak_from_this(), session, incomingPeerID); - auto recipResult = co_await session->subscribeNamespace( - makePeerSubNs(), - handle + auto handle = makeNamespaceBridgeHandle(weak_from_this(), session, incomingPeerID, relayExec_); + // subscribeNamespace must run on the peer session's executor. + auto recipResult = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(session->getExecutor()), + session->subscribeNamespace(makePeerSubNs(), handle) ); // no token: reciprocal, prevents loop if (recipResult.hasError()) { XLOG(ERR) << "Reciprocal peer subNs failed: " << recipResult.error().reasonPhrase; @@ -593,7 +781,7 @@ folly::coro::Task MoqxRelay::subscribeNames // If TRACK_FILTER is present, enroll session in PropertyRanking for top-N selection. // NOTE: onSelected callbacks fire synchronously within addSessionToTopNGroup() for - // tracks already in top-N, triggering publishToSession() before this call returns. + // tracks already in top-N, triggering onTrackSelected() before this call returns. if (trackFilter) { auto ranking = getOrCreateRanking(nodePtr, trackFilter->propertyType, subNs.trackNamespacePrefix); @@ -648,7 +836,17 @@ folly::coro::Task MoqxRelay::subscribeNames (subNs.options == SubscribeNamespaceOptions::BOTH || subNs.options == SubscribeNamespaceOptions::PUBLISH)) { if (publishSession != session) { - co_withExecutor(exec, publishToSession(session, forwarder, subNs.forward)).start(); + auto* publisherExec = relayExec_ ? publishSession->getExecutor() : nullptr; + if (!addSubscriberAndPublish( + session, + forwarder, + subNs.forward, + /*pinned=*/true, + publisherExec + )) { + XLOG(ERR) << "addSubscriberAndPublish failed for " << ftn; + return; + } } } }); @@ -695,9 +893,892 @@ MoqxRelay::PublishState MoqxRelay::findPublishState(const FullTrackName& ftn) { return state; } +// === LocalSubscribeFilter, createPublisherFilter, and createSubscriberFilter === + +// Installed as the publish handler for client sessions when useLocalForwarders_ +// is true. Overrides subscribe() to call subscribeFromSubscriberThread() directly +// on the subscriber's executor instead of hopping to relayExec_ first. +// All other Publisher methods fall through to PublisherCrossExecFilter which +// still dispatches to relayExec_. +class MoqxRelay::LocalSubscribeFilter final : public PublisherCrossExecFilter { +public: + LocalSubscribeFilter(folly::Executor* relayExec, std::shared_ptr relay) + : PublisherCrossExecFilter(relayExec, relay), relay_(std::move(relay)) {} + + folly::coro::Task subscribe( + moxygen::SubscribeRequest subReq, + std::shared_ptr consumer + ) override { + if (subReq.fullTrackName.trackNamespace.empty()) { + co_return folly::makeUnexpected(moxygen::SubscribeError{ + subReq.requestID, + moxygen::SubscribeErrorCode::TRACK_NOT_EXIST, + "namespace required" + }); + } + auto session = moxygen::MoQSession::getRequestSession(); + auto* subscriberExec = session->getExecutor(); + // No executor hop: subscribeFromSubscriberThread starts on subscriberExec. + co_return co_await relay_->subscribeFromSubscriberThread( + std::move(subReq), + std::move(consumer), + std::move(session), + subscriberExec + ); + } + +private: + std::shared_ptr relay_; +}; + +std::shared_ptr MoqxRelay::createPublisherFilter() { + if (relayExec_ && useLocalForwarders_) { + return std::make_shared(relayExec_, shared_from_this()); + } + if (relayExec_) { + return std::make_shared(relayExec_, shared_from_this()); + } + return shared_from_this(); +} + +// Installed as the subscribe handler for client sessions when useLocalForwarders_ +// is true. Overrides publish() to create a local publisher forwarder on the +// publisher's executor, avoiding the cross-exec hop to relayExec_ for data. +// All other Subscriber methods (publishNamespace, goaway) fall through to +// SubscriberCrossExecFilter which still dispatches to relayExec_. +class MoqxRelay::LocalPublishFilter final : public SubscriberCrossExecFilter { +public: + LocalPublishFilter(folly::Executor* relayExec, std::shared_ptr relay) + : SubscriberCrossExecFilter(relayExec, relay), relay_(std::move(relay)) {} + + PublishResult publish( + moxygen::PublishRequest pub, + std::shared_ptr handle + ) override { + auto session = moxygen::MoQSession::getRequestSession(); + return relay_ + ->publishFromPublisherThread(std::move(pub), std::move(handle), std::move(session)); + } + +private: + std::shared_ptr relay_; +}; + +std::shared_ptr MoqxRelay::createSubscriberFilter() { + if (relayExec_ && useLocalForwarders_) { + return std::make_shared(relayExec_, shared_from_this()); + } + if (relayExec_) { + return std::make_shared(relayExec_, shared_from_this()); + } + return shared_from_this(); +} + +Subscriber::PublishResult MoqxRelay::publishFromPublisherThread( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session +) { + if (!pub.fullTrackName.trackNamespace.startsWith(allowedNamespacePrefix_)) { + return folly::makeUnexpected( + PublishError{pub.requestID, PublishErrorCode::UNINTERESTED, "bad namespace"} + ); + } + if (pub.fullTrackName.trackNamespace.empty()) { + return folly::makeUnexpected( + PublishError{pub.requestID, PublishErrorCode::INTERNAL_ERROR, "namespace required"} + ); + } + + auto ftn = pub.fullTrackName; + // The publisher's local forwarder IS the primary forwarder — it lives on + // publisherExec and is registered in the registry by setupPublisherPrimary. + auto localPubFwd = std::make_shared(ftn, pub.largest); + localPubFwd->setExtensions(pub.extensions); + + // Cache the publisher's local forwarder in this thread's registry so a + // subscriber on the same iothread reuses it instead of building a second + // forwarder. The registry holds a strong ref; the forwarder's onPublishDone + // callback removes the entry to release it (identity-checked). + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + + // Set the forwarder callback here on publisherExec (where localPubFwd lives), + // before the reply task hops to relayExec_. publishWithSession is called from + // relayExec_ via setupPublisherPrimary and must not touch the callback there. + // + // The LocalForwarderCallback wrapper removes localPubFwd from tlForwarders_ on + // its own thread when the publisher terminates (onPublishDone). removeOnEmpty + // is false: the publisher's forwarder must survive subscriber churn — it is + // removed only when the source ends, not when its last subscriber leaves. + { + auto relayAdapter = std::make_shared( + std::weak_ptr(shared_from_this()) + ); + auto crossExec = std::make_shared( + relayExec_, + localPubFwd, + std::move(relayAdapter) + ); + localPubFwd->setCallback(std::make_shared( + tlForwarders_.get(), + ftn, + std::move(crossExec), + /*removeOnEmpty=*/false + )); + } + + // The publisher's forwarder is authoritative — claim the slot, displacing any + // stale subscribe-path local forwarder so same-thread subscribers reuse THIS + // forwarder via the fast path. + tlForwarders_->set(ftn, localPubFwd); + + // Channel sub for the relay chain (topNFilter → terminationFilter → cache). + // FIFO on relayExec_ guarantees setupPublisherPrimary wires the downstream + // before any object dispatches from localPubFwd arrive on relayExec_. + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // forward=true + passive=true: internal relay chain observes all objects but + // does not count as a real forwarding subscriber (see completeFirstSubscriber). + localPubFwd + ->addChannelSubscriber(relayExec_, /*forward=*/true, relayChainFilter, /*passive=*/true); + + auto reply = folly::coro::co_invoke( + [exec = relayExec_, + relay = shared_from_this(), + pub = std::move(pub), + handle = std::move(handle), + session = std::move(session), + localPubFwd, + relayChainFilter]() mutable -> folly::coro::Task> { + co_return co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(exec), + relay->setupPublisherPrimary( + std::move(pub), + std::move(handle), + std::move(session), + std::move(localPubFwd), + std::move(relayChainFilter) + ) + ); + } + ); + + // The publisher writes directly to its local forwarder. When the publisher + // terminates, localPubFwd->publishDone fires onPublishDone on its callback + // (LocalForwarderCallback), which removes the tlForwarders_ entry on this + // thread — no separate termination filter needed. + auto consumer = std::static_pointer_cast(std::move(localPubFwd)); + return PublishConsumerAndReplyTask{std::move(consumer), std::move(reply)}; +} + +folly::coro::Task> MoqxRelay::setupPublisherPrimary( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr primaryFwd, + std::shared_ptr relayChainFilter +) { + // Running on relayExec_. Register the publisher's local forwarder as the + // primary — no second forwarder here; it lives on publisherExec. + auto ftn = pub.fullTrackName; + auto setup = + publishWithSession(std::move(pub), std::move(handle), std::move(session), primaryFwd); + if (setup.hasError()) { + co_return folly::makeUnexpected(setup.error()); + } + + auto topNView = registry_.getTopNView(ftn); + XCHECK(topNView && topNView->topNFilter) + << "setupPublisherPrimary: topNFilter always present in MT mode"; + relayChainFilter->setDownstream(topNView->topNFilter); + + co_return setup.value().publishOk; +} + +// === Multi-iothread subscribe helpers === + +folly::coro::Task +MoqxRelay::subscribeStatefulWork(SubscribeRequest subReq) { + const auto& ftn = subReq.fullTrackName; + + if (!registry_.exists(ftn) && upstream_ && + !namespaceTree_.findPublisherSession(ftn.trackNamespace)) { + co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); + } + + auto firstOrSubsequent = registry_.getOrCreateFromSubscribe( + ftn, + shared_from_this(), + [this, &ftn](std::shared_ptr f) { return buildFilterChain(ftn, std::move(f)); } + ); + + if (auto* first = std::get_if(&firstOrSubsequent)) { + auto upstreamSession = namespaceTree_.findPublisherSession(ftn.trackNamespace); + if (!upstreamSession) { + co_return StatefulSubscribeResult{ + nullptr, + nullptr, + folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::TRACK_NOT_EXIST, + "no such namespace or track" + }), + std::nullopt + }; + } + + const auto clientRequestID = subReq.requestID; + SubscribeRequest upstreamSubReq = subReq; + upstreamSubReq.priority = kDefaultUpstreamPriority; + upstreamSubReq.groupOrder = GroupOrder::Default; + upstreamSubReq.locType = LocationType::LargestObject; + upstreamSubReq.forward = false; // updated to real value in completeFirstSubscriber + upstreamSubReq.requestID = upstreamSession->peekNextRequestID(); + + // first->consumer is the primary forwarder (lives on primaryExec == upstreamSession's + // executor). No cross-exec wrapping — upstream delivers on that executor directly. + auto upstreamConsumer = first->consumer; + StatefulSubscribeResult + result{first->forwarder, upstreamSession->getExecutor(), std::nullopt, std::nullopt}; + result.firstSetup.emplace(StatefulSubscribeResult::FirstSubscriberSetup{ + upstreamSession, + std::move(upstreamSubReq), + std::move(upstreamConsumer), + std::move(first->pending), + clientRequestID + }); + co_return result; + + } else { + auto sub = co_await std::get>( + std::move(firstOrSubsequent) + ); + auto upstreamView = registry_.getUpstreamView(ftn); + auto* primaryExec = upstreamView ? upstreamView->publisherExec : nullptr; + co_return StatefulSubscribeResult{sub.forwarder, primaryExec, std::nullopt, std::nullopt}; + } +} + +namespace { + +// Free coroutine helpers for ChannelForwarderCallback — avoids lambda-coroutine +// capture lifetime issues (lambda lives on caller stack; coroutine frame outlives it). +folly::coro::Task channelSubscribeUpdate( + std::shared_ptr handle, + bool forward +) { + auto res = co_await handle->requestUpdate(RequestUpdate{ + RequestID(0), + handle->subscribeOk().requestID, + kLocationMin, + kLocationMax.group, + kDefaultPriority, + forward + }); + if (res.hasError()) { + XLOG(ERR) << "requestUpdate failed: " << res.error().reasonPhrase; + } +} + +folly::coro::Task channelNewGroupRequestUpdate( + std::shared_ptr handle, + uint64_t group +) { + RequestUpdate update; + update.requestID = RequestID(0); + update.existingRequestID = handle->subscribeOk().requestID; + update.params.insertParam( + Parameter(folly::to_underlying(TrackRequestParamKey::NEW_GROUP_REQUEST), group) + ); + auto res = co_await handle->requestUpdate(std::move(update)); + if (res.hasError()) { + XLOG(ERR) << "NEW_GROUP_REQUEST update failed: " << res.error().reasonPhrase; + } +} + +// === MoQForwarder::Callback chain overview === +// +// MoQForwarder fires three callbacks: onEmpty (last subscriber left), +// forwardChanged (forwarding subscriber count crossed zero), and +// newGroupRequested (subscriber issued a NEW_GROUP_REQUEST). +// +// Single-threaded mode: +// forwarder.callback = MoqxRelay (direct, no hop) +// +// Multi-threaded — primary forwarder (lives on primaryExec): +// primaryFwd.callback = +// CrossExecForwarderCallback(relayExec_, primaryFwd, +// WeakRelayForwarderCallback(relay)) +// +// [primaryExec] CrossExecForwarderCallback: captures ftn by value, +// dispatches to relayExec_ fire-and-forget +// ↓ +// [relayExec_] WeakRelayForwarderCallback: recovers relay via weak_ptr, +// calls onEmptyImpl / forwardChangedImpl / newGroupRequestedImpl +// +// Multi-threaded — local forwarder (lives on subscriberExec, subscribe path): +// During setup window: +// localFwd.callback = PendingForwarderCallback +// captures events; replayed onto finalCallback after setup +// +// After setup: +// localFwd.callback = +// LocalForwarderCallback(localReg, ftn, +// CrossExecForwarderCallback(primaryExec, localFwd, +// ChannelForwarderCallback(primaryFwd, subscriberExec, primaryExec))) +// +// [subscriberExec] LocalForwarderCallback: removes from localReg on onEmpty, +// passes through forwardChanged / newGroupRequested +// ↓ (CrossExecForwarderCallback dispatches to primaryExec) +// [primaryExec] ChannelForwarderCallback: +// onEmpty → primaryFwd->removeChannelSubscriberByExec(subscriberExec) +// (may cascade into primaryFwd's own callback chain above) +// forwardChanged / newGroupRequested → launch background coro +// → requestUpdate(handle_) +// +// Weak-ptr discipline: +// WeakRelayForwarderCallback holds weak_ptr to break the cycle +// registry → forwarder → callback → relay → registry. +// CrossExecForwarderCallback holds weak_ptr to avoid a permanent +// ownership cycle; it locks eagerly on the calling thread (where the forwarder +// is alive) and moves the shared_ptr into the lambda to keep it alive across +// the executor hop. + +// Captures forwardChanged/newGroupRequested/onEmpty during setup (getOrCreate→setCallback window) +// so they can be replayed once the real callback is installed. +class PendingForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + PendingForwarderCallback( + openmoq::moqx::LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn + ) + : localReg_(localReg), ftn_(std::move(ftn)) {} + + void forwardChanged(moxygen::MoQForwarder*, bool f) override { lastForward_ = f; } + void newGroupRequested(moxygen::MoQForwarder*, uint64_t g) override { + maxGroup_ = std::max(maxGroup_.value_or(0), g); + } + void onEmpty(moxygen::MoQForwarder* forwarder) override { + localReg_->remove(ftn_, forwarder); + sawOnEmpty_ = true; + } + + openmoq::moqx::LocalForwarderRegistry* localReg_; + moxygen::FullTrackName ftn_; + std::optional lastForward_; + std::optional maxGroup_; + bool sawOnEmpty_{false}; +}; + +// Runs on the primary forwarder's executor (publisher's iothread). Propagates +// channel-subscriber lifecycle events to the primary and upstream handle. +class ChannelForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + ChannelForwarderCallback( + std::weak_ptr weakPrimary, + folly::Executor* subscriberExec, + folly::Executor* primaryExec + ) + : weakPrimary_(std::move(weakPrimary)), subscriberExec_(subscriberExec), + primaryExec_(primaryExec) {} + + // Called on primaryExec immediately after addChannelSubscriber returns. + void setHandle(std::shared_ptr h) { + handle_ = std::move(h); + } + + // Called on primaryExec after addChannelSubscriber. Holds the last shared_ptr + // so that onEmpty can defer destruction to subscriberExec_, ensuring all + // in-flight this-capturing lambdas on subscriberExec_ run before the filter + // is destroyed. + void setFilter(std::shared_ptr filter) { crossExecFilter_ = std::move(filter); } + + void onEmpty(moxygen::MoQForwarder* /*localFwd*/) override { + auto primary = weakPrimary_.lock(); + if (primary) { + primary->removeChannelSubscriberByExec(subscriberExec_); + } + // Break the reference cycle that keeps the local-forwarding chain alive: + // localFwd → callback (LocalForwarderCallback → CrossExecForwarderCallback + // → this ChannelForwarderCallback) → handle_ (channel Subscriber) + // → trackConsumer (CrossExecFilter) → localFwd. + // removeChannelSubscriberByExec drops the primary's strong ref to the channel + // Subscriber, but handle_ still pins it (and thus the whole chain). When the + // primary was already destroyed (replace scenario), removeChannelSubscriberByExec + // never ran, so handle_ is the only remaining root. Reset it unconditionally. + handle_.reset(); + // Post filter destruction to subscriberExec_ so FIFO ordering guarantees + // all previously-enqueued this-capturing lambdas run before the destructor. + if (crossExecFilter_) { + subscriberExec_->add([f = std::move(crossExecFilter_)]() {}); + } + } + + void forwardChanged(moxygen::MoQForwarder*, bool forward) override { + if (!handle_) { + return; + } + folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec_), + channelSubscribeUpdate(handle_, forward) + ) + .start(); + } + + void newGroupRequested(moxygen::MoQForwarder*, uint64_t group) override { + if (!handle_) { + return; + } + folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec_), + channelNewGroupRequestUpdate(handle_, group) + ) + .start(); + } + +private: + std::weak_ptr weakPrimary_; + folly::Executor* subscriberExec_; + folly::Executor* primaryExec_; + std::shared_ptr handle_; + std::shared_ptr crossExecFilter_; +}; + +// Builds the local→primary callback chain, hops to primaryExec to install the +// channel subscriber, then returns. The caller resumes on its own pinned executor +// via co_withExecutor's natural unwind (relayExec_ or subscriberExec depending on +// which path is calling). +folly::coro::Task> wireLocalToPrimary( + openmoq::moqx::LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn, + std::shared_ptr localFwd, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + folly::Executor* subscriberExec, + std::shared_ptr crossExecFilter, + bool forward +) { + auto channelCb = + std::make_shared(primaryFwd, subscriberExec, primaryExec); + auto crossExecCb = std::make_shared(primaryExec, localFwd, channelCb); + auto finalCallback = + std::make_shared(localReg, std::move(ftn), std::move(crossExecCb)); + co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec), + [&]() -> folly::coro::Task { + auto chanHandle = + primaryFwd->addChannelSubscriber(subscriberExec, forward, crossExecFilter); + if (chanHandle) { + channelCb->setHandle(chanHandle); + } + channelCb->setFilter(crossExecFilter); + co_return; + }() + ); + co_return finalCallback; +} + +// Installs finalCallback on localFwd and replays any forwardChanged/newGroupRequested +// events captured during setup. Must run on subscriberExec. +void replayPendingEvents( + moxygen::MoQForwarder* localFwd, + const std::shared_ptr& finalCallback, + const PendingForwarderCallback& pendingCb, + bool forward +) { + localFwd->setCallback(finalCallback); + if (pendingCb.lastForward_ && *pendingCb.lastForward_ != forward) { + finalCallback->forwardChanged(localFwd, *pendingCb.lastForward_); + } + if (pendingCb.maxGroup_) { + finalCallback->newGroupRequested(localFwd, *pendingCb.maxGroup_); + } +} +} // namespace + +folly::coro::Task MoqxRelay::publishViaLocalForwarder( + std::shared_ptr subscriberSession, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + bool forward, + bool pinned +) { + auto* subscriberExec = subscriberSession->getExecutor(); + const auto& ftn = primaryFwd->fullTrackName(); + + // Fast path: local forwarder already exists on this thread. + if (auto* localReg = tlForwarders_.get()) { + if (auto localFwd = localReg->get(ftn)) { + auto p = startPublish(subscriberSession, localFwd, forward, pinned, nullptr); + if (p) { + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); + } + co_return; + } + } + + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + auto* localReg = tlForwarders_.get(); + auto [localFwd, isNew] = localReg->getOrCreate(ftn, [&] { + auto fwd = std::make_shared(ftn, primaryFwd->largest()); + fwd->setExtensions(primaryFwd->extensions()); + return fwd; + }); + + auto p = startPublish(subscriberSession, localFwd, forward, pinned, nullptr); + if (!p) { + if (isNew) { + localReg->remove(ftn, localFwd.get()); + } + co_return; + } + + if (!isNew) { + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); + co_return; + } + + // isNew=true: wire localFwd into primaryFwd as a channel subscriber. + auto pendingCb = std::make_shared(localReg, ftn); + localFwd->setCallback(pendingCb); + // deepCopyPayload=true (default): each subscriber thread owns its IOBuf chain, + // avoiding cross-thread contention on the shared atomic refcount. + auto crossExecFilter = std::make_shared(subscriberExec, localFwd); + bool fwd = (localFwd->numForwardingSubscribers() > 0); + + auto finalCallback = co_await wireLocalToPrimary( + localReg, + ftn, + localFwd, + primaryFwd, + primaryExec, + subscriberExec, + crossExecFilter, + fwd + ); + + // Natural unwind: back on subscriberExec. + + if (pendingCb->sawOnEmpty_) { + folly::via(primaryExec, [pf = primaryFwd, ex = subscriberExec]() noexcept { + pf->removeChannelSubscriberByExec(ex); + }); + localFwd->publishDone(PublishDone{ + RequestID(0), + PublishDoneStatusCode::INTERNAL_ERROR, + 0, + "all subscribers cancelled during setup" + }); + co_return; + } + + replayPendingEvents(localFwd.get(), finalCallback, *pendingCb, fwd); + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); +} + +folly::coro::Task MoqxRelay::completeFirstSubscriber( + StatefulSubscribeResult::FirstSubscriberSetup& setup, + std::shared_ptr primaryFwd, + bool forward +) { + // Add relay-exec channel sub (topNFilter/terminationFilter/cache path). + // setDownstream wired on relayExec_ before pending.complete(). + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // forward=true + passive=true: the relay's own chain (top-N/termination/cache) + // must observe every object (forward=true) so top-N and cache see the full + // stream, but it must not count as a real forwarding subscriber — passive=true + // keeps it out of forwardingSubscribers_ (so it never toggles upstream + // forwardChanged) and out of the onEmpty quorum (so the primary's onEmpty + // still fires once the last real cross-exec subscriber leaves, letting the + // primary and its upstream subscription be pruned). + primaryFwd + ->addChannelSubscriber(relayExec_, /*forward=*/true, relayChainFilter, /*passive=*/true); + setup.upstreamSubReq.forward = forward; + auto subRes = co_await setup.upstreamSession->subscribe( + setup.upstreamSubReq, + std::move(setup.upstreamConsumer) + ); + if (subRes.hasError()) { + co_return FirstSubscriberResult{ + std::move(relayChainFilter), + folly::makeUnexpected(SubscribeError{ + setup.clientRequestID, + subRes.error().errorCode, + folly::to("upstream subscribe failed: ", subRes.error().reasonPhrase) + }) + }; + } + // Process subscribeOk on primaryExec (forwarder's executor). + const auto& ok = subRes.value()->subscribeOk(); + auto reqID = ok.requestID; + auto exts = ok.extensions; // copy before move of subRes + auto largest = ok.largest; // copy before move of subRes + if (largest) { + primaryFwd->updateLargest(largest->group, largest->object); + } + primaryFwd->setExtensions(exts); + primaryFwd->tryProcessNewGroupRequest(setup.upstreamSubReq.params, /*fire=*/false); + co_return FirstSubscriberResult{ + std::move(relayChainFilter), + UpstreamOk{std::move(subRes.value()), reqID, std::move(exts), std::move(largest)} + }; +} + +folly::coro::Task MoqxRelay::subscribeFromSubscriberThread( + SubscribeRequest subReq, + std::shared_ptr consumer, + std::shared_ptr session, + folly::Executor* subscriberExec +) { + const auto& ftn = subReq.fullTrackName; + + // Fast path: local forwarder already exists on this thread. + if (auto* localReg = tlForwarders_.get()) { + if (auto localFwd = localReg->get(ftn)) { + if (localFwd->largest() && subReq.locType == LocationType::AbsoluteRange && + subReq.endGroup < localFwd->largest()->group) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INVALID_RANGE, + "Range in the past, use FETCH" + }); + } + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; + } + } + + // Ensure thread-local registry. + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + auto* localReg = tlForwarders_.get(); + + // getOrCreate before the relay hop: serializes same-iothread races. + // isNew=true means this thread owns setup; isNew=false means another setup is in + // progress (or complete) on this thread and we can just attach. + auto [localFwd, isNew] = + localReg->getOrCreate(ftn, [&] { return std::make_shared(ftn); }); + + if (!isNew) { + if (localFwd->largest() && subReq.locType == LocationType::AbsoluteRange && + subReq.endGroup < localFwd->largest()->group) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INVALID_RANGE, + "Range in the past, use FETCH" + }); + } + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; + } + + // isNew=true: this thread owns setup. Install PendingForwarderCallback first so + // forwardChanged/newGroupRequested/onEmpty events during setup are captured for replay. + auto pendingCb = std::make_shared(localReg, ftn); + localFwd->setCallback(pendingCb); + + // deepCopyPayload=true (default): each subscriber thread owns its IOBuf chain, + // avoiding cross-thread contention on the shared atomic refcount. + auto crossExecFilter = std::make_shared(subscriberExec, localFwd); + + // addSubscriber before the relay hop: numForwardingSubscribers() must be correct + // when addChannelSubscriber runs on primaryExec, so forward flag is right from the start. + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + localReg->remove(ftn, localFwd.get()); + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + bool forward = (localFwd->numForwardingSubscribers() > 0); + + // Out-vars set by the relay/primary chain, read after unwind on subscriberExec. + std::optional upstreamError; + std::optional upstreamOk; + std::shared_ptr finalCallback; + std::shared_ptr primaryFwd; + folly::Executor* primaryExec{nullptr}; + // Set on firstSetup path; wired to topNFilter on relayExec_ before pending.complete(). + std::shared_ptr relayChainFilter; + + // Single hop to relayExec_, with a nested hop to primaryExec so that the relay-exec + // cleanup (cache + pending.complete) runs on the natural unwind rather than as a + // separate explicit hop. + co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(relayExec_), + [&]() -> folly::coro::Task { + auto sr = co_await subscribeStatefulWork(subReq); + if (sr.error) { + upstreamError = std::move(*sr.error); // SubscribeResult (Expected with error) + co_return; + } + + primaryFwd = sr.primaryForwarder; + primaryExec = sr.primaryExec; + + if (primaryFwd && primaryExec) { + finalCallback = co_await wireLocalToPrimary( + localReg, + ftn, + localFwd, + primaryFwd, + primaryExec, + subscriberExec, + crossExecFilter, + forward + ); + // Natural unwind: back on relayExec_. + + if (sr.firstSetup) { + auto r = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec), + completeFirstSubscriber(*sr.firstSetup, primaryFwd, forward) + ); + relayChainFilter = std::move(r.relayChainFilter); + if (r.result.hasError()) { + upstreamError = folly::makeUnexpected(std::move(r.result.error())); + } else { + upstreamOk = std::move(r.result.value()); + } + } + } + + // Natural unwind: back on relayExec_. + if (upstreamError) { + // Channel subscriber(s) installed before subscribe failed; clean up. + if (primaryFwd && primaryExec) { + auto re = relayExec_; + auto rcf = relayChainFilter; + folly::via( + primaryExec, + [pf = primaryFwd, ex = subscriberExec, re, rcf = std::move(rcf)]() noexcept { + pf->removeChannelSubscriberByExec(ex); + if (rcf) { + pf->removeChannelSubscriberByExec(re); + } + } + ); + } + // pending destructor fires when sr is destroyed, cleaning registry entry. + co_return; + } + if (!sr.firstSetup) { + co_return; + } + // Wire relay chain before pending.complete() so buffered objects see topNFilter. + if (relayChainFilter) { + auto topNView = registry_.getTopNView(ftn); + if (topNView && topNView->topNFilter) { + relayChainFilter->setDownstream(topNView->topNFilter); + } + } + if (upstreamOk) { + if (cache_) { + cache_->setTrackExtensions(ftn, upstreamOk->extensions); + } + auto& setup = *sr.firstSetup; + if (!setup.pending.complete( + std::move(upstreamOk->handle), + upstreamOk->requestID, + setup.upstreamSession, + maybeWrapPublisher(relayExec_, setup.upstreamSession) + )) { + upstreamError = folly::makeUnexpected(SubscribeError{ + setup.clientRequestID, + SubscribeErrorCode::INTERNAL_ERROR, + "publisher reconnected during subscribe" + }); + } + } + }() + ); + + // Natural unwind: back on subscriberExec. + + if (pendingCb->sawOnEmpty_) { + // All subscribers left during setup; localReg entry already removed in + // PendingForwarderCallback::onEmpty. + // TODO: "everyone left during setup" and "everyone left + new subscriber joined" + // are edge cases that need further scrutiny and tests. For now do a best-effort + // cleanup of the channel subscriber if it was installed, and return an error. + // The relay registry entry (and upstream subscription if pending.complete ran) + // remains, so a subsequent subscribe on any thread will hit the SubsequentSubscriber + // path and work correctly. + if (primaryFwd && primaryExec) { + auto re = relayExec_; + auto rcf = relayChainFilter; + folly::via( + primaryExec, + [pf = primaryFwd, ex = subscriberExec, re, rcf = std::move(rcf)]() noexcept { + pf->removeChannelSubscriberByExec(ex); + if (rcf) { + pf->removeChannelSubscriberByExec(re); + } + } + ); + } + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "all subscribers cancelled during setup" + }); + } + + if (upstreamError) { + localFwd->publishDone(PublishDone{ + RequestID(0), + PublishDoneStatusCode::INTERNAL_ERROR, + 0, + upstreamError->error().reasonPhrase + }); + localReg->remove(ftn, localFwd.get()); + co_return std::move(*upstreamError); + } + + if (upstreamOk) { + localFwd->setExtensions(upstreamOk->extensions); + if (upstreamOk->largest) { + localFwd->updateLargest(upstreamOk->largest->group, upstreamOk->largest->object); + } + } + replayPendingEvents(localFwd.get(), finalCallback, *pendingCb, forward); + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; +} + +// === End multi-iothread subscribe helpers === + folly::coro::Task MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr consumer) { + return subscribeImpl(std::move(subReq), std::move(consumer)); +} + +folly::coro::Task +MoqxRelay::subscribeImpl(SubscribeRequest subReq, std::shared_ptr consumer) { auto session = MoQSession::getRequestSession(); + maybeSetSessionExec(*session); const auto& ftn = subReq.fullTrackName; if (ftn.trackNamespace.empty()) { @@ -709,8 +1790,7 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con // TOCTOU fix: if we might be the first subscriber, wait for the upstream // connection before branching. A concurrent coroutine may emplace the entry // while we are suspended, so we re-check inside getOrCreateFromSubscribe. - if (!registry_.exists(ftn) && upstream_ && - !namespaceTree_.findPublisherSession(ftn.trackNamespace)) { + if (!registry_.exists(ftn) && upstream_ && !findUpstreamPublisher(ftn.trackNamespace)) { co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); } @@ -727,6 +1807,7 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con {subReq.requestID, SubscribeErrorCode::TRACK_NOT_EXIST, "no such namespace or track"} )); } // pending destructor fires on early return above + auto upstreamPublisher = maybeWrapPublisher(relayExec_, upstreamSession); // Add subscriber first (with the client's original request) in case objects // arrive before subscribe OK. @@ -751,9 +1832,8 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con subReq.locType = LocationType::LargestObject; // Per the spec, we're supposed to always forward=1 upstream subReq.forward = first->forwarder->numForwardingSubscribers() > 0; - subReq.requestID = upstreamSession->peekNextRequestID(); - auto subRes = co_await upstreamSession->subscribe(subReq, first->consumer); + auto subRes = co_await upstreamPublisher->subscribe(subReq, first->consumer); if (subRes.hasError()) { co_return folly::makeUnexpected(SubscribeError( {clientRequestID, @@ -777,7 +1857,8 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con first->forwarder->tryProcessNewGroupRequest(subReq.params, /*fire=*/false); auto requestID = subRes.value()->subscribeOk().requestID; - if (!first->pending.complete(std::move(subRes.value()), requestID, upstreamSession)) { + if (!first->pending + .complete(std::move(subRes.value()), requestID, upstreamSession, upstreamPublisher)) { XLOG(ERR) << "Subscription replaced by reconnecting publisher: " << ftn; co_return folly::makeUnexpected(SubscribeError{ clientRequestID, @@ -818,6 +1899,11 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con folly::coro::Task MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { + return fetchImpl(std::move(fetch), std::move(consumer)); +} + +folly::coro::Task +MoqxRelay::fetchImpl(Fetch fetch, std::shared_ptr consumer) { auto session = MoQSession::getRequestSession(); // check auth @@ -844,33 +1930,30 @@ MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { fetch.args = StandaloneFetch(res.value().start, res.value().end); joining = nullptr; } else { - // Upstream is resolving the subscribe, forward joining fetch - joining->joiningRequestID = fetchView->requestID; + // Upstream is resolving the subscribe; let MoQSession resolve the + // request ID by track name to avoid a cross-executor data race. + joining->joiningRequestID = kAutoRequestID; } } - auto upstreamSession = namespaceTree_.findPublisherSession(fetch.fullTrackName.trackNamespace); - if (!upstreamSession && upstream_) { + auto upstreamPublisher = findUpstreamPublisher(fetch.fullTrackName.trackNamespace); + if (!upstreamPublisher && upstream_) { co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); - upstreamSession = namespaceTree_.findPublisherSession(fetch.fullTrackName.trackNamespace); + upstreamPublisher = findUpstreamPublisher(fetch.fullTrackName.trackNamespace); } - if (!upstreamSession) { + if (!upstreamPublisher) { // Attempt to find matching upstream subscription (from publish) if (auto fetchView = registry_.getFetchView(fetch.fullTrackName)) { - upstreamSession = fetchView->upstream; + upstreamPublisher = fetchView->publisher; } - if (!upstreamSession) { + if (!upstreamPublisher) { co_return folly::makeUnexpected( FetchError({fetch.requestID, FetchErrorCode::TRACK_NOT_EXIST, "no upstream for fetch"}) ); } } - if (session.get() == upstreamSession.get()) { - co_return folly::makeUnexpected( - FetchError({fetch.requestID, FetchErrorCode::INTERNAL_ERROR, "self fetch"}) - ); - } fetch.priority = kDefaultUpstreamPriority; + if (!cache_ || joining) { // We can't use the cache on an unresolved joining fetch - we don't know // which objects are being requested. However, once we have that resolved, @@ -879,12 +1962,18 @@ MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { XLOG(DBG1) << "Upstream fetch {" << standalone->start.group << "," << standalone->start.object << "}.." << standalone->end.group << "," << standalone->end.object << "}"; } - co_return co_await upstreamSession->fetch(fetch, std::move(consumer)); + co_return co_await upstreamPublisher->fetch(std::move(fetch), std::move(consumer)); } - co_return co_await cache_->fetch(fetch, std::move(consumer), std::move(upstreamSession)); + co_return co_await cache_ + ->fetch(std::move(fetch), std::move(consumer), std::move(upstreamPublisher)); } folly::coro::Task MoqxRelay::trackStatus(TrackStatus trackStatus) { + return trackStatusImpl(std::move(trackStatus)); +} + +folly::coro::Task MoqxRelay::trackStatusImpl(TrackStatus trackStatus +) { XLOG(DBG1) << __func__ << " ftn=" << trackStatus.fullTrackName; if (trackStatus.fullTrackName.trackNamespace.empty()) { @@ -922,25 +2011,26 @@ folly::coro::Task MoqxRelay::trackStatus(TrackStat << " statusCode=" << (uint32_t)statusCode; co_return trackStatusOk; } else { - // No subscription - forward to upstream - auto upstreamSession = - namespaceTree_.findPublisherSession(trackStatus.fullTrackName.trackNamespace); - if (!upstreamSession && upstream_) { - co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); - upstreamSession = - namespaceTree_.findPublisherSession(trackStatus.fullTrackName.trackNamespace); + // No active subscription — try registry publisher first, then namespace tree + std::shared_ptr upstreamPublisher; + if (upstreamView) { + upstreamPublisher = upstreamView->publisher; + } else { + upstreamPublisher = findUpstreamPublisher(trackStatus.fullTrackName.trackNamespace); + if (!upstreamPublisher && upstream_) { + co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); + upstreamPublisher = findUpstreamPublisher(trackStatus.fullTrackName.trackNamespace); + } } - if (!upstreamSession) { - XLOG(DBG1) << "No upstream session for track: " << trackStatus.fullTrackName; + if (!upstreamPublisher) { + XLOG(DBG1) << "No upstream for track: " << trackStatus.fullTrackName; co_return folly::makeUnexpected(TrackStatusError{ trackStatus.requestID, TrackStatusErrorCode::TRACK_NOT_EXIST, "no such namespace or track" }); } - - // Forward the trackStatus request to the upstream publisher session - auto result = co_await upstreamSession->trackStatus(trackStatus); + auto result = co_await upstreamPublisher->trackStatus(std::move(trackStatus)); if (result.hasError()) { XLOG(DBG1) << "Upstream trackStatus failed: " << result.error().reasonPhrase; @@ -952,7 +2042,10 @@ folly::coro::Task MoqxRelay::trackStatus(TrackStat } void MoqxRelay::onEmpty(MoQForwarder* forwarder) { - const auto& ftn = forwarder->fullTrackName(); + onEmptyImpl(forwarder->fullTrackName()); +} + +void MoqxRelay::onEmptyImpl(const FullTrackName& ftn) { auto upstreamView = registry_.getUpstreamView(ftn); if (!upstreamView) { return; @@ -970,8 +2063,12 @@ void MoqxRelay::onEmpty(MoQForwarder* forwarder) { if (upstreamView->isPublish) { // if it's publish, don't unsubscribe, just subscribeUpdate forward=false XLOG(DBG1) << "Updating upstream subscription forward=false"; - auto exec = upstreamView->upstream->getExecutor(); - co_withExecutor(exec, doSubscribeUpdate(upstreamView->handle, /*forward=*/false)).start(); + auto exec = relayExec(); + co_withExecutor( + folly::getKeepAliveToken(exec), + doSubscribeUpdate(upstreamView->handle, /*forward=*/false) + ) + .start(); } else { upstreamView->handle->unsubscribe(); XLOG(DBG4) << "Erasing subscription to " << ftn; @@ -980,7 +2077,10 @@ void MoqxRelay::onEmpty(MoQForwarder* forwarder) { } void MoqxRelay::forwardChanged(MoQForwarder* forwarder, bool forward) { - const auto& ftn = forwarder->fullTrackName(); + forwardChangedImpl(forwarder->fullTrackName(), forward); +} + +void MoqxRelay::forwardChangedImpl(const FullTrackName& ftn, bool forward) { auto upstreamView = registry_.getUpstreamView(ftn); if (!upstreamView) { return; @@ -996,12 +2096,16 @@ void MoqxRelay::forwardChanged(MoQForwarder* forwarder, bool forward) { } XLOG(INFO) << "Updating forward for " << ftn << " forward=" << forward; - auto exec = upstreamView->upstream->getExecutor(); - co_withExecutor(exec, doSubscribeUpdate(upstreamView->handle, forward)).start(); + auto exec = relayExec(); + co_withExecutor(folly::getKeepAliveToken(exec), doSubscribeUpdate(upstreamView->handle, forward)) + .start(); } void MoqxRelay::newGroupRequested(MoQForwarder* forwarder, uint64_t group) { - const auto& ftn = forwarder->fullTrackName(); + newGroupRequestedImpl(forwarder->fullTrackName(), group); +} + +void MoqxRelay::newGroupRequestedImpl(const FullTrackName& ftn, uint64_t group) { auto upstreamView = registry_.getUpstreamView(ftn); // Check if handle is still valid (publisher may have terminated) if (!upstreamView || !upstreamView->handle) { @@ -1010,9 +2114,10 @@ void MoqxRelay::newGroupRequested(MoQForwarder* forwarder, uint64_t group) { } XLOG(INFO) << "New group request detected for " << ftn; - auto exec = upstreamView->upstream->getExecutor(); + auto exec = relayExec(); auto handle = upstreamView->handle; - co_withExecutor(exec, doNewGroupRequestUpdate(std::move(handle), group)).start(); + co_withExecutor(folly::getKeepAliveToken(exec), doNewGroupRequestUpdate(std::move(handle), group)) + .start(); } // TRACK_FILTER support @@ -1127,8 +2232,8 @@ void MoqxRelay::onTrackSelected( XLOG(DBG4) << "[MoqxRelay] Track selected: " << ftn << " session=" << session.get() << " forward=" << forward; - if (!session || session->isClosed()) { - XLOG(ERR) << "onTrackSelected: session null or closed, skipping " << ftn; + if (!session) { + XLOG(ERR) << "onTrackSelected: null session for " << ftn; return; } @@ -1138,23 +2243,19 @@ void MoqxRelay::onTrackSelected( return; } - auto exec = session->getExecutor(); - XCHECK(exec) << "onTrackSelected: null executor for session " << session.get(); - - // TODO: Consider batching multiple publishToSession calls on the same executor - // when multiple tracks are selected for the same session in a single ranking update. - co_withExecutor( - exec, - publishToSession(session, trackForwarder, forward, /*trackFilterSubscriber=*/true) - ) - .start(); + auto upstreamView = registry_.getUpstreamView(ftn); + XCHECK(!relayExec_ || (upstreamView && upstreamView->publisherExec)) + << "onTrackSelected: relayExec set but no publisherExec for " << ftn; + auto* publisherExec = relayExec_ ? upstreamView->publisherExec : nullptr; + // TRACK_FILTER subscribers are unpinned so onTrackEvicted can remove them. + addSubscriberAndPublish(session, trackForwarder, forward, /*pinned=*/false, publisherExec); } void MoqxRelay::onTrackEvicted(const FullTrackName& ftn, std::shared_ptr session) { XLOG(DBG4) << "[MoqxRelay] Track evicted: " << ftn << " session=" << session.get(); - if (!session || session->isClosed()) { - XLOG(WARN) << "onTrackEvicted: session null or closed, skipping " << ftn; + if (!session) { + XLOG(WARN) << "onTrackEvicted: null session for " << ftn; return; } diff --git a/src/MoqxRelay.h b/src/MoqxRelay.h index e2ffe85..7895d99 100644 --- a/src/MoqxRelay.h +++ b/src/MoqxRelay.h @@ -13,10 +13,16 @@ #include "SubscriptionRegistry.h" #include "UpstreamProvider.h" #include "config/Config.h" +#include "relay/CrossExecFilter.h" +#include "relay/CrossExecForwarderCallback.h" +#include "relay/LocalForwarderRegistry.h" #include "relay/PropertyRanking.h" +#include "relay/RelayExecUtil.h" #include #include +#include +#include #include #include #include @@ -24,6 +30,8 @@ #include #include +namespace openmoq::moqx {} // namespace openmoq::moqx + namespace openmoq::moqx { // Visitor interface for relay state inspection. @@ -115,6 +123,36 @@ class MoqxRelay : public moxygen::Publisher, } } + // Optionally isolate relay state on a dedicated executor thread. + // When set, all public entry points switch to relayExec before touching + // relay state, and consumer callbacks to/from sessions are wrapped with + // cross-executor filters. relayExec must outlive this relay. + // If not set (default), all operations run on the calling thread. + void setRelayExec(folly::Executor* relayExec) { relayExec_ = relayExec; } + + // Takes ownership of exec and uses it as the relay executor. + void setRelayExec(std::shared_ptr exec) { + ownedRelayExec_ = std::move(exec); + relayExec_ = ownedRelayExec_.get(); + } + + folly::Executor* getRelayExec() const { return relayExec_; } + + // When true, uses per-thread local forwarders to minimize cross-thread + // hops in the data plane (one cross-exec hop per callback per thread). + // Defaults to false (one cross-exec hop per plane callback per subscriber + // plus one for the publisher). + void setUseLocalForwarders(bool enable) { useLocalForwarders_ = enable; } + + // Returns the appropriate Publisher filter for a client session. + // Uses LocalSubscribeFilter when relayExec_ + useLocalForwarders_ are set, + // PublisherCrossExecFilter when only relayExec_ is set, or this directly. + std::shared_ptr createPublisherFilter(); + + // Returns the appropriate Subscriber filter for a client session. + // Uses SubscriberCrossExecFilter when relayExec_ is set, or this directly. + std::shared_ptr createSubscriberFilter(); + void setAllowedNamespacePrefix(moxygen::TrackNamespace allowed) { allowedNamespacePrefix_ = std::move(allowed); } @@ -231,6 +269,8 @@ class MoqxRelay : public moxygen::Publisher, private: class NamespaceSubscription; class TerminationFilter; + class LocalSubscribeFilter; + class LocalPublishFilter; void unsubscribeNamespace( const moxygen::TrackNamespace& prefix, @@ -247,17 +287,77 @@ class MoqxRelay : public moxygen::Publisher, void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; + // FTN-keyed impl variants — called by the MoQForwarder::Callback overrides + // above (single-thread) or by WeakRelayForwarderCallback on relay exec. + void onEmptyImpl(const moxygen::FullTrackName& ftn); + void forwardChangedImpl(const moxygen::FullTrackName& ftn, bool forward); + void newGroupRequestedImpl(const moxygen::FullTrackName& ftn, uint64_t group); + folly::coro::Task publishNamespaceToSession( std::shared_ptr session, moxygen::PublishNamespace pubNs, std::shared_ptr nodePtr ); - folly::coro::Task publishToSession( + // Sync setup: addSubscriber → set pinned → session->publish (optionally via + // SubscriberCrossExecFilter when subscriberExec is non-null) → set trackConsumer. + // Returns nullopt and cleans up on any synchronous failure. + struct PreparedPublish { + std::shared_ptr subscriber; + folly::coro::Task> reply; + }; + std::optional startPublish( std::shared_ptr session, std::shared_ptr forwarder, bool forward, - bool trackFilterSubscriber = false + bool pinned, + folly::Executor* subscriberExec + ); + + // Runs on publisherExec. Calls startPublish and co_awaits the reply. + // subscriberExec non-null → cross-exec publish dispatch. + // Runs on subscriberExec. Gets or creates the thread-local forwarder for ftn, + // wires it to primaryFwd as a channel subscriber (isNew path), and awaits the + // publish reply. Shared by addSubscriberAndPublish and any future callers that + // need to attach a subscriber via a local forwarder. + folly::coro::Task publishViaLocalForwarder( + std::shared_ptr subscriberSession, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + bool forward, + bool pinned + ); + + // In relay+local-forwarder mode: dispatches publishViaLocalForwarder to + // subscriberExec. Otherwise: calls startPublish sync and fires reply async. + // Returns false on synchronous failure. + bool addSubscriberAndPublish( + std::shared_ptr subscriberSession, + std::shared_ptr forwarder, + bool forward, + bool pinned, + folly::Executor* publisherExec = nullptr + ); + + // Called from LocalPublishFilter::publish() on publisherExec. Creates a local + // publisher forwarder as the session consumer and asynchronously sets up the + // primary forwarder on relayExec_ via setupPublisherPrimary(). + moxygen::Subscriber::PublishResult publishFromPublisherThread( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session + ); + + // Runs on relayExec_. Registers primaryFwd (the publisher's local forwarder, + // created on publisherExec) in the registry and wires relayChainFilter to the + // topNFilter so the relay chain receives objects. + folly::coro::Task> + setupPublisherPrimary( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr primaryFwd, + std::shared_ptr relayChainFilter ); folly::coro::Task @@ -319,6 +419,124 @@ class MoqxRelay : public moxygen::Publisher, const moxygen::FullTrackName& ftn, std::shared_ptr consumer ); + + // Result of subscribeStatefulWork (Phase 1, runs on relay exec). + struct StatefulSubscribeResult { + std::shared_ptr primaryForwarder; + folly::Executor* primaryExec{nullptr}; // owning executor of primaryForwarder + std::optional error; // set on failure + + // Set only for the FirstSubscriber path. Must be moved into + // completeFirstSubscriber on relayExec_ after the channel subscriber is + // installed on primaryExec. Pending destructor fires on abandoned move. + struct FirstSubscriberSetup { + std::shared_ptr upstreamSession; + moxygen::SubscribeRequest upstreamSubReq; + std::shared_ptr upstreamConsumer; + SubscriptionRegistry::UpstreamSubscribePending pending; + moxygen::RequestID clientRequestID; + }; + std::optional firstSetup; + }; + + // Phase 1: registry lookup + FirstSubscriber setup. Does NOT subscribe + // upstream; returns firstSetup for the caller to complete after installing + // the channel subscriber. + folly::coro::Task subscribeStatefulWork(moxygen::SubscribeRequest subReq + ); + + struct UpstreamOk { + std::shared_ptr handle; + moxygen::RequestID requestID; + moxygen::Extensions extensions; + std::optional largest; + }; + struct FirstSubscriberResult { + std::shared_ptr relayChainFilter; // always set; needed for cleanup on error + folly::Expected result; + }; + + // Runs on primaryExec. Adds the relay-exec channel subscriber, issues the + // upstream subscribe, and processes SubscribeOk on the primary forwarder. + folly::coro::Task completeFirstSubscriber( + StatefulSubscribeResult::FirstSubscriberSetup& setup, + std::shared_ptr primaryFwd, + bool forward + ); + + // Multi-iothread subscribe: subscriber-thread orchestrator. + // Dispatches subscribeStatefulWork to relayExec_, then on the subscriber thread + // creates a local forwarder, wires a channel subscriber, and returns. + folly::coro::Task subscribeFromSubscriberThread( + moxygen::SubscribeRequest subReq, + std::shared_ptr consumer, + std::shared_ptr session, + folly::Executor* subscriberExec + ); + + // Impl methods — run on relayExec_ when set, or inline when relayExec_==nullptr. + folly::coro::Task + subscribeImpl(moxygen::SubscribeRequest subReq, std::shared_ptr consumer); + folly::coro::Task + fetchImpl(moxygen::Fetch fetch, std::shared_ptr consumer); + folly::coro::Task subscribeNamespaceImpl( + moxygen::SubscribeNamespace subNs, + std::shared_ptr namespacePublishHandle + ); + folly::coro::Task publishNamespaceImpl( + moxygen::PublishNamespace pubNs, + std::shared_ptr callback + ); + folly::coro::Task trackStatusImpl(moxygen::TrackStatus req + ); + folly::coro::Task onUpstreamConnectImpl(std::shared_ptr session); + + // Synchronous result of publishWithSession: the consumer the publisher writes + // to and the PublishOk to return to the publisher. Returned synchronously so + // the reply coro (coPublish) can co_return the PublishOk immediately after + // setup without waiting for any downstream peer handshake. + struct PublishSetup { + std::shared_ptr consumer; + moxygen::PublishOk publishOk; + }; + using PublishSetupResult = folly::Expected; + + // Contains all the inline publish() logic, taking session explicitly so it + // can be called from either the I/O thread (relayExec_==nullptr) or from + // coPublish on relay exec (where getRequestSession() would return null). + // If forwarder is non-null it is used as-is (pre-created by publish()); otherwise + // a new forwarder is created from pub (single-threaded or subscribeNamespace path). + PublishSetupResult publishWithSession( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr forwarder = nullptr + ); + + std::shared_ptr ownedRelayExec_; + folly::Executor* relayExec_{nullptr}; + // Only set in single-threaded mode (relayExec_ == null); used as the + // coroutine start executor for fire-and-forget tasks like doSubscribeUpdate. + folly::Executor* sessionExec_{nullptr}; + + void maybeSetSessionExec(moxygen::MoQSession& session) { + if (!relayExec_ && !sessionExec_) { + sessionExec_ = session.getExecutor(); + } + } + + folly::Executor* relayExec() const { return relayExec_ ? relayExec_ : sessionExec_; } + + std::shared_ptr findUpstreamPublisher(const moxygen::TrackNamespace& ns) { + auto session = namespaceTree_.findPublisherSession(ns); + if (!session) { + return nullptr; + } + return maybeWrapPublisher(relayExec_, std::move(session)); + } + + bool useLocalForwarders_{false}; + folly::ThreadLocalPtr tlForwarders_; std::unique_ptr cache_; uint64_t maxDeselected_{kDefaultMaxDeselected}; @@ -329,12 +547,15 @@ class MoqxRelay : public moxygen::Publisher, }; // Creates a NamespacePublishHandle that bridges NAMESPACE/NAMESPACE_DONE -// messages from a peer relay into relay->doPublishNamespace() synchronously. -// Used for both the initiating (UpstreamProvider) and reciprocal (MoqxRelay) paths. +// messages from a peer relay into relay->doPublishNamespace(). When relayExec +// is non-null, callbacks are dispatched to it so relay state is only mutated +// on the relay executor thread. Used for both the initiating (UpstreamProvider) +// and reciprocal (MoqxRelay) paths. std::shared_ptr makeNamespaceBridgeHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID = {} + std::string peerID = {}, + folly::Executor* relayExec = nullptr ); } // namespace openmoq::moqx diff --git a/src/MoqxRelayContext.cpp b/src/MoqxRelayContext.cpp index d1d9336..aa44520 100644 --- a/src/MoqxRelayContext.cpp +++ b/src/MoqxRelayContext.cpp @@ -5,11 +5,15 @@ */ #include "MoqxRelayContext.h" +#include "relay/PublisherCrossExecFilter.h" +#include "relay/RelayExecUtil.h" +#include "relay/SubscriberCrossExecFilter.h" #include "stats/MoQStatsCollector.h" #include #include #include +#include #include using namespace moxygen; @@ -18,11 +22,29 @@ namespace openmoq::moqx { MoqxRelayContext::MoqxRelayContext( const folly::F14FastMap& services, - const std::string& relayID + const std::string& relayID, + bool useRelayThread, + bool useLocalForwarders ) : serviceMatcher_(services), relayID_(relayID) { - for (const auto& [name, svc] : services) { - services_.emplace(name, ServiceEntry{svc, std::make_shared(svc.cache, relayID)}); + if (useRelayThread && !services.empty()) { + relayThreadPool_ = std::make_shared( + services.size(), + std::make_shared("moqx-relay") + ); + auto evbs = relayThreadPool_->getAllEventBases(); + XCHECK_EQ(evbs.size(), services.size()); + size_t i = 0; + for (const auto& [name, svc] : services) { + auto relay = std::make_shared(svc.cache, relayID); + relay->setRelayExec(std::make_shared(evbs[i++].get())); + relay->setUseLocalForwarders(useLocalForwarders); + services_.emplace(name, ServiceEntry{svc, std::move(relay)}); + } + } else { + for (const auto& [name, svc] : services) { + services_.emplace(name, ServiceEntry{svc, std::make_shared(svc.cache, relayID)}); + } } } @@ -53,10 +75,7 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { CHECK(workerEvb) << "initUpstreams: workerEvb must not be null"; workerEvb_ = workerEvb; - // Use the provided worker EVB for all upstream connections. - // Per-EVB providers (one per worker thread) are a follow-up. - auto exec = std::make_shared(workerEvb); - + auto workerExec = std::make_shared(workerEvb); for (auto& [name, entry] : services_) { if (!entry.config.upstream) { continue; @@ -64,15 +83,31 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { const auto& cfg = *entry.config.upstream; auto verifier = makeUpstreamVerifier(cfg.tls); auto relay = entry.relay; - auto onConnect = [relay](std::shared_ptr session) -> folly::coro::Task { - co_await relay->onUpstreamConnect(session); + auto* relayExec = relay->getRelayExec(); + auto onConnect = [relay, + relayExec](std::shared_ptr session) -> folly::coro::Task { + if (relayExec) { + co_return co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(relayExec), + relay->onUpstreamConnect(session) + ); + } + co_return co_await relay->onUpstreamConnect(session); + }; + auto onDisconnect = [relay, relayExec]() { + runOnExec(relayExec, [relay]() { relay->onUpstreamDisconnect(); }); }; - auto onDisconnect = [relay]() { relay->onUpstreamDisconnect(); }; + std::shared_ptr pubHandler = relay; + std::shared_ptr subHandler = relay; + if (relayExec) { + pubHandler = std::make_shared(relayExec, relay); + subHandler = std::make_shared(relayExec, relay); + } auto provider = std::make_shared( - exec, + workerExec, proxygen::URL(cfg.url), - /*publishHandler=*/entry.relay, - /*subscribeHandler=*/entry.relay, + /*publishHandler=*/pubHandler, + /*subscribeHandler=*/subHandler, verifier, std::move(onConnect), std::move(onDisconnect), @@ -84,7 +119,7 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { // Eagerly connect so the peering handshake fires before any subscribers // arrive. The connection is lazy in UpstreamProvider but we kick it off // now so the upstream namespace tree is ready. - co_withExecutor(workerEvb, provider->start()).start(); + co_withExecutor(workerExec.get(), provider->start()).start(); } } @@ -158,11 +193,12 @@ folly::Expected MoqxRelayContext::validateAu return folly::makeUnexpected(SessionCloseErrorCode::INVALID_AUTHORITY); } - // Route: set per-service relay as handler + // Route: set per-service relay as handler, wrapping in cross-exec filters if needed auto it = services_.find(*matchedName); CHECK(it != services_.end()) << "Service '" << *matchedName << "' matched but no entry found"; - session->setPublishHandler(it->second.relay); - session->setSubscribeHandler(it->second.relay); + auto& relay = it->second.relay; + session->setPublishHandler(relay->createPublisherFilter()); + session->setSubscribeHandler(relay->createSubscriberFilter()); return folly::unit; } diff --git a/src/MoqxRelayContext.h b/src/MoqxRelayContext.h index 2fec04b..de1c7e6 100644 --- a/src/MoqxRelayContext.h +++ b/src/MoqxRelayContext.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -54,7 +55,9 @@ class MoqxRelayContext { MoqxRelayContext( const folly::F14FastMap& services, - const std::string& relayID + const std::string& relayID, + bool useRelayThread = true, + bool useLocalForwarders = false ); void setStatsRegistry(std::shared_ptr registry); @@ -108,6 +111,11 @@ class MoqxRelayContext { ); private: + // When use_relay_thread=true: one dedicated thread per service, each with its + // own executor, isolating relay state from I/O threads. Null when disabled. + // Each relay owns its MoQFollyExecutorImpl; the pool just keeps threads alive. + std::shared_ptr relayThreadPool_; + folly::F14FastMap services_; ServiceMatcher serviceMatcher_; std::string relayID_; diff --git a/src/MoqxRelayServer.cpp b/src/MoqxRelayServer.cpp index aedc404..210e5c4 100644 --- a/src/MoqxRelayServer.cpp +++ b/src/MoqxRelayServer.cpp @@ -133,7 +133,11 @@ MoqxRelayServer::MoqxRelayServer( : MoQServer( buildFizzContext(listenerCfg), listenerCfg.endpoint, - buildTransportSettings(listenerCfg.quic, listenerCfg.mvfst) + MoQServer::Options{ + .transportSettings = buildTransportSettings(listenerCfg.quic, listenerCfg.mvfst), + .udpSendBufferBytes = listenerCfg.mvfst.udpSocketBufferBytes, + .udpRecvBufferBytes = listenerCfg.mvfst.udpSocketBufferBytes, + } ), listenerCfg_(listenerCfg), context_(std::move(context)), ioExecutor_(ioExecutor) {} diff --git a/src/SubscriptionRegistry.cpp b/src/SubscriptionRegistry.cpp index 41acd6a..3b1e999 100644 --- a/src/SubscriptionRegistry.cpp +++ b/src/SubscriptionRegistry.cpp @@ -13,7 +13,8 @@ namespace openmoq::moqx { bool SubscriptionRegistry::UpstreamSubscribePending::complete( std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ) { active_ = false; return registry_->completeSubscription( @@ -21,7 +22,8 @@ bool SubscriptionRegistry::UpstreamSubscribePending::complete( weakForwarder_, std::move(handle), requestID, - std::move(upstreamSession) + std::move(upstreamSession), + std::move(publisher) ); } @@ -85,7 +87,8 @@ bool SubscriptionRegistry::completeSubscription( std::weak_ptr weakForwarder, std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ) { auto it = subscriptions_.find(ftn); if (it == subscriptions_.end() || it->second.forwarder != weakForwarder.lock()) { @@ -95,6 +98,7 @@ bool SubscriptionRegistry::completeSubscription( rsub.handle = std::move(handle); rsub.requestID = requestID; rsub.upstream = std::move(upstreamSession); + rsub.publisher = std::move(publisher); rsub.promise.setValue(folly::unit); return true; } @@ -114,6 +118,7 @@ SubscriptionRegistry::PublishEntry SubscriptionRegistry::createFromPublish( const moxygen::FullTrackName& ftn, std::shared_ptr forwarder, std::shared_ptr session, + std::shared_ptr publisher, moxygen::RequestID requestID, std::shared_ptr handle, folly::FunctionRef)> chainBuilder @@ -137,6 +142,7 @@ SubscriptionRegistry::PublishEntry SubscriptionRegistry::createFromPublish( rsub.promise.setValue(folly::unit); rsub.requestID = requestID; rsub.handle = std::move(handle); + rsub.publisher = std::move(publisher); rsub.isPublish = true; auto [consumer, topNFilter] = chainBuilder(forwarder); @@ -174,9 +180,10 @@ SubscriptionRegistry::getUpstreamView(const moxygen::FullTrackName& ftn) const { const auto& rsub = it->second; return UpstreamView{ rsub.forwarder, - rsub.upstream, + rsub.publisher, rsub.handle, rsub.requestID, + rsub.upstream ? rsub.upstream->getExecutor() : nullptr, rsub.isPublish, rsub.promise.isFulfilled() }; @@ -189,7 +196,7 @@ SubscriptionRegistry::getFetchView(const moxygen::FullTrackName& ftn) const { return std::nullopt; } const auto& rsub = it->second; - return FetchView{rsub.forwarder, rsub.upstream, rsub.requestID, rsub.promise.isFulfilled()}; + return FetchView{rsub.forwarder, rsub.publisher, rsub.requestID, rsub.promise.isFulfilled()}; } std::shared_ptr @@ -201,6 +208,7 @@ SubscriptionRegistry::onPublisherTerminated(const moxygen::FullTrackName& ftn) { auto& rsub = it->second; rsub.handle.reset(); rsub.upstream.reset(); + rsub.publisher.reset(); if (rsub.forwarder->empty()) { subscriptions_.erase(it); return nullptr; diff --git a/src/SubscriptionRegistry.h b/src/SubscriptionRegistry.h index 4f5630d..22b42c9 100644 --- a/src/SubscriptionRegistry.h +++ b/src/SubscriptionRegistry.h @@ -53,12 +53,13 @@ class SubscriptionRegistry { // Identity-checked success path. Re-finds by ftn; checks forwarder identity // to detect a reconnecting publisher that replaced the entry during the - // caller's co_await suspension. Sets handle, requestID, upstreamSession; - // fulfills promise. Returns false if entry is gone or replaced. + // caller's co_await suspension. Sets handle, requestID, upstreamSession, + // publisher; fulfills promise. Returns false if entry is gone or replaced. bool complete( std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ); ~UpstreamSubscribePending(); @@ -109,6 +110,7 @@ class SubscriptionRegistry { const moxygen::FullTrackName& ftn, std::shared_ptr forwarder, std::shared_ptr session, + std::shared_ptr publisher, moxygen::RequestID requestID, std::shared_ptr handle, folly::FunctionRef)> chainBuilder @@ -126,12 +128,13 @@ class SubscriptionRegistry { }; std::optional getTopNView(const moxygen::FullTrackName& ftn) const; - // For onEmpty / forwardChanged / newGroupRequested + // For onEmpty / forwardChanged / newGroupRequested / trackStatus struct UpstreamView { std::shared_ptr forwarder; - std::shared_ptr upstream; + std::shared_ptr publisher; std::shared_ptr handle; moxygen::RequestID requestID; + folly::Executor* publisherExec{nullptr}; // executor the primary forwarder lives on bool isPublish; bool isReady; // promise fulfilled }; @@ -140,7 +143,7 @@ class SubscriptionRegistry { // For fetch() struct FetchView { std::shared_ptr forwarder; - std::shared_ptr upstream; + std::shared_ptr publisher; moxygen::RequestID requestID; bool isReady; }; @@ -182,6 +185,7 @@ class SubscriptionRegistry { std::shared_ptr forwarder; std::shared_ptr upstream; + std::shared_ptr publisher; moxygen::RequestID requestID{0}; std::shared_ptr handle; folly::coro::SharedPromise promise; @@ -196,7 +200,8 @@ class SubscriptionRegistry { std::weak_ptr weakForwarder, std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ); // Called by UpstreamSubscribePending destructor on failure. diff --git a/src/config/Config.h b/src/config/Config.h index 1f45b18..9969f2d 100644 --- a/src/config/Config.h +++ b/src/config/Config.h @@ -128,6 +128,10 @@ struct MvfstConfig { float ceTarget{0.0f}; // 0 = disabled }; + // UDP socket send/receive buffer size in bytes. 0 = use MoQServer default (1 MB). + // Increase when high fan-out causes EAGAIN bursts under load. + uint64_t udpSocketBufferBytes{0}; + BBR bbr; BBR2 bbr2; Cubic cubic; @@ -196,6 +200,8 @@ struct Config { std::optional admin; std::string relayID; // always set: from config or randomly generated uint32_t threads{1}; + bool useRelayThread{true}; + bool useLocalForwarders{false}; bool mvfstBpfSteering{true}; }; diff --git a/src/config/ConfigResolver.cpp b/src/config/ConfigResolver.cpp index 33d7a8e..b073f2c 100644 --- a/src/config/ConfigResolver.cpp +++ b/src/config/ConfigResolver.cpp @@ -508,6 +508,8 @@ void applyMvfstOverride(MvfstConfig& base, const ParsedMvfstConfig& overlay) { if (auto v = ls->ce_target.value()) base.l4s.ceTarget = *v; } + if (auto v = overlay.udp_socket_buffer_bytes.value()) + base.udpSocketBufferBytes = *v; } MvfstConfig mergeMvfstConfig( @@ -822,8 +824,16 @@ folly::Expected resolveConfig(const ParsedConfig& c const uint32_t threads = config.threads.value().value_or(1); if (threads == 0) { errors.push_back("threads must be >= 1"); - } else if (threads > 1) { - errors.push_back("threads > 1 is not yet supported"); + } + + const bool useRelayThread = config.use_relay_thread.value().value_or(true); + if (threads > 1 && !useRelayThread) { + errors.push_back("use_relay_thread must be true when threads > 1"); + } + + const bool useLocalForwarders = config.use_local_forwarders.value().value_or(false); + if (useLocalForwarders && !useRelayThread) { + errors.push_back("use_local_forwarders requires use_relay_thread to be true"); } const bool mvfstBpfSteering = config.mvfst_bpf_steering.value().value_or(true); @@ -877,6 +887,8 @@ folly::Expected resolveConfig(const ParsedConfig& c .admin = std::move(adminConfig), .relayID = std::move(relayID), .threads = threads, + .useRelayThread = useRelayThread, + .useLocalForwarders = useLocalForwarders, .mvfstBpfSteering = mvfstBpfSteering, }, .warnings = std::move(warnings), diff --git a/src/config/loader/ParsedConfig.h b/src/config/loader/ParsedConfig.h index 178d6a2..e3a5c16 100644 --- a/src/config/loader/ParsedConfig.h +++ b/src/config/loader/ParsedConfig.h @@ -225,6 +225,11 @@ struct ParsedMvfstConfig { "L4S tunables (ECN CE target). Usable alongside any ECN-aware CC algorithm.", std::optional> l4s; + rfl::Description< + "UDP socket send and receive buffer size in bytes. Default: 0 (use MoQServer built-in " + "default of 1 MB). Increase to reduce EAGAIN errors under high fan-out burst load.", + std::optional> + udp_socket_buffer_bytes; }; struct ParsedListenerConfig { @@ -387,6 +392,17 @@ struct ParsedConfig { std::optional> listener_defaults; rfl::Description<"Number of IO worker threads (default: 1)", std::optional> threads; + rfl::Description< + "Dedicate one relay thread per service for relay state isolation (default: true). " + "Disable for baseline performance comparison.", + std::optional> + use_relay_thread; + rfl::Description< + "Use per-subscriber-thread local forwarders to minimize cross-thread hops on the " + "data path (requires use_relay_thread; default: false). Disable to run all subscribes " + "on the relay thread via subscribeImpl.", + std::optional> + use_local_forwarders; rfl::Description< "Attach a classic BPF reuseport filter to steer QUIC packets to the correct mvfst worker " "based on the connection ID's workerId field (Linux only, mvfst stack only, default: true). " diff --git a/src/main.cpp b/src/main.cpp index ae0cebe..545ccc9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -110,7 +110,12 @@ int main(int argc, char* argv[]) { // === 6. Initialize services === // Construct and configure the application's own services // (MoqxRelayContext, MoqxRelayServer, etc.) - auto context = std::make_shared(config.services, config.relayID); + auto context = std::make_shared( + config.services, + config.relayID, + config.useRelayThread, + config.useLocalForwarders + ); // === 6a. Stats registry === auto statsRegistry = std::make_shared(); diff --git a/src/relay/CrossExecForwarderCallback.cpp b/src/relay/CrossExecForwarderCallback.cpp index 14968bc..7765c53 100644 --- a/src/relay/CrossExecForwarderCallback.cpp +++ b/src/relay/CrossExecForwarderCallback.cpp @@ -18,6 +18,16 @@ void CrossExecForwarderCallback::onEmpty(moxygen::MoQForwarder* /*forwarder*/) { }); } +void CrossExecForwarderCallback::onPublishDone(moxygen::MoQForwarder* /*forwarder*/) { + auto f = forwarder_.lock(); + if (!f) { + return; + } + targetExec_->add([f = std::move(f), downstream = downstream_]() mutable { + downstream->onPublishDone(f.get()); + }); +} + void CrossExecForwarderCallback::forwardChanged( moxygen::MoQForwarder* /*forwarder*/, bool forward diff --git a/src/relay/CrossExecForwarderCallback.h b/src/relay/CrossExecForwarderCallback.h index 33e6add..5f359d9 100644 --- a/src/relay/CrossExecForwarderCallback.h +++ b/src/relay/CrossExecForwarderCallback.h @@ -27,6 +27,7 @@ class CrossExecForwarderCallback final : public moxygen::MoQForwarder::Callback downstream_(std::move(downstream)) {} void onEmpty(moxygen::MoQForwarder* forwarder) override; + void onPublishDone(moxygen::MoQForwarder* forwarder) override; void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; diff --git a/src/relay/LocalForwarderCallback.h b/src/relay/LocalForwarderCallback.h new file mode 100644 index 0000000..f393dc0 --- /dev/null +++ b/src/relay/LocalForwarderCallback.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "relay/LocalForwarderRegistry.h" + +#include +#include + +namespace openmoq::moqx { + +// Runs on the local forwarder's executor. Delegates lifecycle events to +// downstream (a CrossExecForwarderCallback targeting the primary's executor) +// and owns the forwarder's removal from tlForwarders_. +// +// Removal is identity-checked (only vacates the slot if it still holds THIS +// forwarder), making teardown order-independent: a terminated forwarder can +// never clobber a newer one that has claimed the same track name. +// +// removeOnEmpty distinguishes the two roles: +// - subscribe-path local forwarder (removeOnEmpty=true): when its last +// subscriber leaves, its channel sub is pulled from the primary and it is +// dead — remove on onEmpty as well as onPublishDone. +// - publisher's primary forwarder (removeOnEmpty=false): it must survive +// subscriber churn (new subscribers may arrive while the publisher is +// live), so it is removed ONLY when the source terminates (onPublishDone). +class LocalForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + LocalForwarderCallback( + LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn, + std::shared_ptr downstream, + bool removeOnEmpty = true + ) + : localReg_(localReg), ftn_(std::move(ftn)), downstream_(std::move(downstream)), + removeOnEmpty_(removeOnEmpty) {} + + void onEmpty(moxygen::MoQForwarder* forwarder) override { + downstream_->onEmpty(forwarder); + if (removeOnEmpty_) { + localReg_->remove(ftn_, forwarder); + } + } + + void onPublishDone(moxygen::MoQForwarder* forwarder) override { + downstream_->onPublishDone(forwarder); + // Source-driven teardown (publisher/upstream terminated). Identity-checked. + localReg_->remove(ftn_, forwarder); + } + + void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override { + downstream_->forwardChanged(forwarder, forward); + } + + void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override { + downstream_->newGroupRequested(forwarder, group); + } + +private: + LocalForwarderRegistry* localReg_; + moxygen::FullTrackName ftn_; + std::shared_ptr downstream_; + bool removeOnEmpty_; +}; + +} // namespace openmoq::moqx diff --git a/src/relay/LocalForwarderRegistry.h b/src/relay/LocalForwarderRegistry.h new file mode 100644 index 0000000..8598664 --- /dev/null +++ b/src/relay/LocalForwarderRegistry.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include + +namespace openmoq::moqx { + +// Thread-local registry mapping FullTrackName → local MoQForwarder on one +// iothread. All methods must be called on the owning thread. One instance per +// iothread, stored in MoqxRelay::tlForwarders_. +class LocalForwarderRegistry { +public: + struct GetOrCreateResult { + std::shared_ptr localForwarder; + // true only on first creation; caller must cross-exec to publisher thread + // and add a CrossExecFilter subscriber to the primary forwarder. + bool isNew; + }; + + // Returns the existing local forwarder for ftn, or calls factory() to create + // one. factory() is called at most once per ftn per thread lifetime. + GetOrCreateResult getOrCreate( + const moxygen::FullTrackName& ftn, + folly::FunctionRef()> factory + ) { + auto it = forwarders_.find(ftn); + if (it != forwarders_.end()) { + return {it->second, /*isNew=*/false}; + } + auto forwarder = factory(); + forwarders_.emplace(ftn, forwarder); + return {std::move(forwarder), /*isNew=*/true}; + } + + // Claim the slot for ftn unconditionally, returning the previous occupant (if + // any). The publisher's forwarder is authoritative for a track on its thread: + // a stale subscribe-path local forwarder under the same name is displaced + // here, and drains itself via the source-termination cascade (its identity- + // checked removal then no-ops, since this forwarder now owns the slot). + std::shared_ptr + set(const moxygen::FullTrackName& ftn, std::shared_ptr forwarder) { + auto prev = std::move(forwarders_[ftn]); + forwarders_[ftn] = std::move(forwarder); + return prev; + } + + std::shared_ptr get(const moxygen::FullTrackName& ftn) const { + auto it = forwarders_.find(ftn); + return it != forwarders_.end() ? it->second : nullptr; + } + + // Identity-checked removal: erase the entry for ftn only if it still points + // at `expected`. Called from a forwarder's onPublishDone, which fires on this + // thread. The identity check makes teardown order-independent: a terminated + // forwarder can only vacate its own slot, so it never clobbers a newer + // forwarder that has since claimed the same track name. + void remove(const moxygen::FullTrackName& ftn, const moxygen::MoQForwarder* expected) { + auto it = forwarders_.find(ftn); + if (it == forwarders_.end() || it->second.get() != expected) { + return; + } + forwarders_.erase(it); + } + +private: + folly::F14FastMap< + moxygen::FullTrackName, + std::shared_ptr, + moxygen::FullTrackName::hash> + forwarders_; +}; + +} // namespace openmoq::moqx diff --git a/src/relay/NullConsumers.h b/src/relay/NullConsumers.h new file mode 100644 index 0000000..1527ef4 --- /dev/null +++ b/src/relay/NullConsumers.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "moxygen/MoQConsumers.h" + +#include + +namespace moxygen { + +// SubgroupConsumer that discards all calls. Used as the terminal sink when +// a structural consumer (e.g. SubscribeWriteback) is required but no +// downstream delivery is needed. +class NullSubgroupConsumer : public SubgroupConsumer { +public: + folly::Expected + object(uint64_t, Payload, Extensions, bool) override { + return folly::unit; + } + + folly::Expected + beginObject(uint64_t, uint64_t, Payload, Extensions) override { + return folly::unit; + } + + folly::Expected objectPayload(Payload, bool) override { + return ObjectPublishStatus::IN_PROGRESS; + } + + folly::Expected endOfGroup(uint64_t) override { + return folly::unit; + } + + folly::Expected endOfTrackAndGroup(uint64_t) override { + return folly::unit; + } + + folly::Expected endOfSubgroup() override { return folly::unit; } + + void reset(ResetStreamErrorCode) override {} +}; + +// TrackConsumer that discards all calls. beginSubgroup() returns a +// NullSubgroupConsumer. +class NullTrackConsumer : public TrackConsumer { +public: + folly::Expected setTrackAlias(TrackAlias) override { + return folly::unit; + } + + folly::Expected, MoQPublishError> + beginSubgroup(uint64_t, uint64_t, Priority, bool) override { + return std::make_shared(); + } + + folly::Expected, MoQPublishError> awaitStreamCredit() override { + return folly::makeSemiFuture(); + } + + folly::Expected + objectStream(const ObjectHeader&, Payload, bool) override { + return folly::unit; + } + + folly::Expected + datagram(const ObjectHeader&, Payload, bool) override { + return folly::unit; + } + + folly::Expected publishDone(PublishDone) override { + return folly::unit; + } +}; + +} // namespace moxygen diff --git a/src/relay/PublisherCrossExecFilter.h b/src/relay/PublisherCrossExecFilter.h index c3c7499..0ad2f57 100644 --- a/src/relay/PublisherCrossExecFilter.h +++ b/src/relay/PublisherCrossExecFilter.h @@ -17,7 +17,7 @@ namespace openmoq::moqx { // the result to the caller. goaway() is fire-and-forget. // // Requires targetExec_ to be a FIFO executor if call ordering matters. -class PublisherCrossExecFilter final : public moxygen::Publisher { +class PublisherCrossExecFilter : public moxygen::Publisher { public: PublisherCrossExecFilter(folly::Executor* targetExec, std::shared_ptr inner) : targetExec_(targetExec), inner_(std::move(inner)) {} diff --git a/src/relay/RelayExecUtil.h b/src/relay/RelayExecUtil.h new file mode 100644 index 0000000..ae40628 --- /dev/null +++ b/src/relay/RelayExecUtil.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "CrossExecFilter.h" +#include "PublisherCrossExecFilter.h" +#include +#include +#include +#include + +namespace openmoq::moqx { + +// Wraps c in a CrossExecFilter targeting exec, or returns c if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr c) { + if (!exec) { + return c; + } + return std::make_shared(exec, std::move(c), /*deepCopyPayload=*/false); +} + +// Wraps c in a FetchCrossExecFilter targeting exec, or returns c if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr c) { + if (!exec) { + return c; + } + return FetchCrossExecFilter::create(exec, std::move(c), /*deepCopyPayload=*/false); +} + +// Wraps p in a PublisherCrossExecFilter targeting exec, or returns p if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr p) { + if (!exec) { + return p; + } + return std::make_shared(exec, std::move(p)); +} + +// Wraps session as a Publisher, targeting its executor when relayExec is set. +inline std::shared_ptr +maybeWrapPublisher(folly::Executor* relayExec, std::shared_ptr session) { + // Evaluate getExecutor() before std::move(session) to avoid unspecified + // argument evaluation order leaving session moved-from. + auto* exec = relayExec ? session->getExecutor() : nullptr; + return maybeCrossExec(exec, std::shared_ptr(std::move(session))); +} + +// Runs fn on exec (fire-and-forget) if exec is non-null; otherwise runs it +// inline on the calling thread. +template void runOnExec(folly::Executor* exec, Fn&& fn) { + if (exec) { + exec->add(std::forward(fn)); + } else { + std::forward(fn)(); + } +} + +// When relayExec is set, dispatches fn to sessionExec (fire-and-forget). +// Otherwise runs fn inline (caller is already on the correct thread). +// Use this when relay state changes need to notify a specific session's executor. +template +void runOnSessionExec(folly::Executor* relayExec, folly::Executor* sessionExec, Fn&& fn) { + runOnExec(relayExec ? sessionExec : nullptr, std::forward(fn)); +} + +} // namespace openmoq::moqx diff --git a/src/relay/SubscriberCrossExecFilter.h b/src/relay/SubscriberCrossExecFilter.h index 1f12cf9..d9866b5 100644 --- a/src/relay/SubscriberCrossExecFilter.h +++ b/src/relay/SubscriberCrossExecFilter.h @@ -24,7 +24,7 @@ class CrossExecFilter; // goaway() is fire-and-forget. // // Requires targetExec_ to be a FIFO executor if call ordering matters. -class SubscriberCrossExecFilter final : public moxygen::Subscriber { +class SubscriberCrossExecFilter : public moxygen::Subscriber { public: SubscriberCrossExecFilter(folly::Executor* targetExec, std::shared_ptr inner) : targetExec_(targetExec), inner_(std::move(inner)) {} diff --git a/src/relay/WeakRelayForwarderCallback.cpp b/src/relay/WeakRelayForwarderCallback.cpp new file mode 100644 index 0000000..b37babf --- /dev/null +++ b/src/relay/WeakRelayForwarderCallback.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "WeakRelayForwarderCallback.h" +namespace openmoq::moqx { + +void WeakRelayForwarderCallback::onEmpty(moxygen::MoQForwarder* forwarder) { + if (auto r = relay_.lock()) { + r->onEmpty(forwarder); + } +} + +void WeakRelayForwarderCallback::forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) { + if (auto r = relay_.lock()) { + r->forwardChanged(forwarder, forward); + } +} + +void WeakRelayForwarderCallback::newGroupRequested( + moxygen::MoQForwarder* forwarder, + uint64_t group +) { + if (auto r = relay_.lock()) { + r->newGroupRequested(forwarder, group); + } +} + +} // namespace openmoq::moqx diff --git a/src/relay/WeakRelayForwarderCallback.h b/src/relay/WeakRelayForwarderCallback.h new file mode 100644 index 0000000..61749ec --- /dev/null +++ b/src/relay/WeakRelayForwarderCallback.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace openmoq::moqx { + +// Relay-side MoQForwarder::Callback target. Holds a weak_ptr to the relay +// (as its MoQForwarder::Callback base) to avoid a reference cycle +// (registry → forwarder → callback → relay). Intended to be used as the +// downstream of CrossExecForwarderCallback, which handles cross-exec +// dispatch and forwarder pointer recovery. +class WeakRelayForwarderCallback final : public moxygen::MoQForwarder::Callback { +public: + explicit WeakRelayForwarderCallback(std::weak_ptr relay) + : relay_(std::move(relay)) {} + + void onEmpty(moxygen::MoQForwarder* forwarder) override; + void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; + void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; + +private: + std::weak_ptr relay_; +}; + +} // namespace openmoq::moqx diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bc86edc..dd48015 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -39,6 +39,7 @@ add_executable(moqx_relay_test MoqxRelayTrackStatusTests.cpp MoqxRelayNGRTests.cpp MoqxRelayPeerTests.cpp + MoqxRelayTestModes.cpp ) target_link_libraries(moqx_relay_test PRIVATE moqx_test_fixture diff --git a/test/MoqxRelayDataPlaneTests.cpp b/test/MoqxRelayDataPlaneTests.cpp index 2aa0606..001a095 100644 --- a/test/MoqxRelayDataPlaneTests.cpp +++ b/test/MoqxRelayDataPlaneTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // new ones. // Sequence: publish, 2 subscribers, beginSubgroup, beginSubgroup again -> // first consumers get reset, both subscribers get new consumers. -TEST_F(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { +TEST_P(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { auto publisherSession = createMockSession(); auto sub1 = createMockSession(); auto sub2 = createMockSession(); @@ -57,25 +57,32 @@ TEST_F(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { auto sgForwarder1 = publishConsumer->beginSubgroup(0, 0, 0); EXPECT_TRUE(sgForwarder1.hasValue()); + driveIfMultiThread( + ); // flush so beginSubgroup wires downstream_ and calls mockConsumer beginSubgroup // Duplicate beginSubgroup - should reset v1 consumers and return new - // forwarder + // forwarder. Simulate publisher resetting the old stream. auto sgForwarder2 = publishConsumer->beginSubgroup(0, 0, 0); + driveIfMultiThread(); // flush duplicate so v1 consumers are reset and v2 consumers are created + (*sgForwarder1)->reset(moxygen::ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush publisher reset of old stream EXPECT_TRUE(sgForwarder2.hasValue()); EXPECT_NE(sgForwarder1.value(), sgForwarder2.value()); // Close the new subgroup cleanly before teardown to avoid reset during // cleanup EXPECT_TRUE(sgForwarder2.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush endOfSubgroup removeSession(publisherSession); removeSession(sub1); removeSession(sub2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Test: Duplicate beginSubgroup after all subscribers have stop_sending'd // returns CANCELLED to propagate the signal back to the publisher. -TEST_F(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { +TEST_P(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { auto publisherSession = createMockSession(); auto subscriber = createMockSession(); @@ -99,22 +106,45 @@ TEST_F(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { auto sgRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgRes.hasValue()); auto sg = sgRes.value(); + driveIfMultiThread(); // flush so beginSubgroup wires sg.downstream_ before object() enqueues // Trigger stop_sending tombstone via CANCELLED error from object() + if (relayEvb_) { + // MT: enqueue an extra object() after the beginSubgroup lambda + sg->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + } sg->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber - // Duplicate beginSubgroup - all consumers tombstoned, should return CANCELLED + // Duplicate beginSubgroup - all consumers tombstoned, should return CANCELLED. auto dupRes = publishConsumer->beginSubgroup(0, 0, 0); - EXPECT_TRUE(dupRes.hasError()); - EXPECT_EQ(dupRes.error().code, MoQPublishError::CANCELLED); + if (relayMode() == RelayMode::MultiThread) { + // MT mode: a CrossExecFilter sits between publisher and relay, so it always + // returns a subFilter and the error is deferred until the next operation. + ASSERT_TRUE(dupRes.hasValue()); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + auto probeRes = dupRes.value()->endOfSubgroup(); + EXPECT_TRUE(probeRes.hasError()); + if (probeRes.hasError()) { + EXPECT_EQ(probeRes.error().code, MoQPublishError::CANCELLED); + } + } else { + // ST and LocalForwarderMT: the publisher writes directly to the (local) + // forwarder with no cross-exec hop, so CANCELLED is returned synchronously. + EXPECT_TRUE(dupRes.hasError()); + EXPECT_EQ(dupRes.error().code, MoQPublishError::CANCELLED); + } removeSession(publisherSession); removeSession(subscriber); + sg->reset(ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Test: Duplicate beginSubgroup with partial stop_sending - active subscriber // gets reset and new consumer; tombstoned subscriber is skipped. -TEST_F(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { +TEST_P(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { auto publisherSession = createMockSession(); auto subA = createMockSession(); auto subB = createMockSession(); @@ -143,6 +173,11 @@ TEST_F(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { // CANCELLED to simulate stop_sending EXPECT_CALL(*sgAv1, object(_, _, _, _)) .WillOnce(Return(folly::makeExpected(folly::unit))); + if (relayEvb_) { + EXPECT_CALL(*sgAv1, object(_, _, _, _)) + .WillOnce(Return(folly::makeExpected(folly::unit))) + .RetiresOnSaturation(); + } EXPECT_CALL(*sgBv1, object(_, _, _, _)) .WillOnce( Return(folly::makeUnexpected(MoQPublishError(MoQPublishError::CANCELLED, "stop sending"))) @@ -160,22 +195,34 @@ TEST_F(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { auto sgForwarder1 = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgForwarder1.hasValue()); + driveIfMultiThread(); // flush so beginSubgroup wires downstream_ before object() enqueues + if (relayEvb_) { + // MT: enqueue an extra object() after the beginSubgroup lambda + sgForwarder1.value()->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + } // Trigger tombstone for sub B via CANCELLED from object() sgForwarder1.value()->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones sub B // Duplicate beginSubgroup: sub A gets reset+new, sub B is skipped - // (tombstoned) + // (tombstoned). Simulate publisher resetting the old stream. auto sgForwarder2 = publishConsumer->beginSubgroup(0, 0, 0); + driveIfMultiThread(); // flush duplicate so sgAv1 is reset and sgAv2 is created + (*sgForwarder1)->reset(moxygen::ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush publisher reset of old stream EXPECT_TRUE(sgForwarder2.hasValue()); EXPECT_NE(sgForwarder1.value(), sgForwarder2.value()); // Close the new subgroup cleanly before teardown EXPECT_TRUE(sgForwarder2.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush endOfSubgroup removeSession(publisherSession); removeSession(subA); removeSession(subB); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } } // namespace moxygen::test diff --git a/test/MoqxRelayFetchTests.cpp b/test/MoqxRelayFetchTests.cpp index b7f488c..948bbf6 100644 --- a/test/MoqxRelayFetchTests.cpp +++ b/test/MoqxRelayFetchTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // crash. When findPublishNamespaceSession returns null (no publishNamespace), // fetch falls back to subscriptions_. After onPublishDone, upstream is null // but the subscription entry remains if the forwarder has subscribers. -TEST_F(MoQRelayTest, FetchAfterPublisherTermination) { +TEST_P(MoQRelayTest, FetchAfterPublisherTermination) { auto publisherSession = createMockSession(); auto subSession = createMockSession(); auto fetchSession = createMockSession(); diff --git a/test/MoqxRelayNGRTests.cpp b/test/MoqxRelayNGRTests.cpp index 67a45b5..88a7693 100644 --- a/test/MoqxRelayNGRTests.cpp +++ b/test/MoqxRelayNGRTests.cpp @@ -7,12 +7,13 @@ */ #include "MoqxRelayTestFixture.h" +#include namespace moxygen::test { // Test: relay PUBLISH path – dynamic groups from PublishRequest extensions // is stored in the forwarder and forwarded to every downstream subscriber -TEST_F(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { +TEST_P(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -40,12 +41,13 @@ TEST_F(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { removeSession(subscriberSession); exec_->drive(); removeSession(publisherSession); + driveIfMultiThread(); } // Test: relay SUBSCRIBE path – dynamic groups from the upstream SubscribeOk is // stored in the forwarder and forwarded to both the first and late-joining // downstream subscribers -TEST_F(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { +TEST_P(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -88,11 +90,12 @@ TEST_F(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); } // Relay test: When a late-joining subscriber sends NEW_GROUP_REQUEST in its // SUBSCRIBE, the relay forwards it upstream via REQUEST_UPDATE -TEST_F(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { +TEST_P(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -163,7 +166,7 @@ TEST_F(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { // Relay test: A downstream subscriber sending REQUEST_UPDATE with // NEW_GROUP_REQUEST causes the relay to cascade the NGR upstream -TEST_F(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { +TEST_P(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -214,11 +217,12 @@ TEST_F(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); } // Relay test: downstream subscriber returns PublishOk carrying NEW_GROUP_REQUEST; // relay cascades NGR to the publisher handle upstream via REQUEST_UPDATE -TEST_F(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { +TEST_P(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -248,10 +252,13 @@ TEST_F(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { doSubscribeNamespace(subscriberSession, kTestNamespace); auto publishHandle = makePublishHandle(); + std::atomic updates{0}; { testing::InSequence seq; - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // forward=true update - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).WillOnce([](const RequestUpdate& update) { + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // forward=true update + .WillOnce([&](const RequestUpdate&) { ++updates; }); + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).WillOnce([&](const RequestUpdate& update) { + ++updates; auto ngrValue = getFirstIntParam(update.params, TrackRequestParamKey::NEW_GROUP_REQUEST); ASSERT_TRUE(ngrValue.has_value()); EXPECT_EQ(*ngrValue, 8u); @@ -272,14 +279,27 @@ TEST_F(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { }); exec_->drive(); + // Wait for the async cascade (forward=true + NGR=8) to actually land rather + // than driving a fixed number of times, then lock in the assertion before + // teardown so the trailing forwardChanged(false) → requestUpdate at + // removeSession can't over-saturate the expectation. + EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) + << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + + // Lock in the assertion before teardown: in LocalForwarderMT the trailing + // forwardChanged(false) → requestUpdate at subscriber removal lands + // asynchronously and would otherwise over-saturate the expectation. + ASSERT_TRUE(testing::Mock::VerifyAndClearExpectations(publishHandle.get())); + removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); } // Relay test: a second subscriber returning the same NEW_GROUP_REQUEST value in // its PublishOk is deduplicated; the upstream handle receives exactly one // REQUEST_UPDATE -TEST_F(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { +TEST_P(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -320,10 +340,13 @@ TEST_F(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { doSubscribeNamespace(subscriber2, kTestNamespace); auto publishHandle = makePublishHandle(); + std::atomic updates{0}; { testing::InSequence seq; - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // forward=true update - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // NGR update (deduplicated) + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // forward=true update + .WillOnce([&](const RequestUpdate&) { ++updates; }); + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // NGR update (deduplicated) + .WillOnce([&](const RequestUpdate&) { ++updates; }); } PublishRequest pub; @@ -340,9 +363,22 @@ TEST_F(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { }); exec_->drive(); + // Wait for the deduplicated cascade (forward=true + one NGR) to land, then + // lock in the assertion before teardown; the trailing forwardChanged(false) → + // requestUpdate at subscriber teardown is not asserted here and would + // otherwise race in and over-saturate the expectation. + EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) + << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + + // Lock in the assertion before teardown: in LocalForwarderMT the trailing + // forwardChanged(false) → requestUpdate at subscriber removal lands + // asynchronously and would otherwise over-saturate the expectation. + ASSERT_TRUE(testing::Mock::VerifyAndClearExpectations(publishHandle.get())); + removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); } } // namespace moxygen::test diff --git a/test/MoqxRelayPeerTests.cpp b/test/MoqxRelayPeerTests.cpp index dbbc576..16ca38d 100644 --- a/test/MoqxRelayPeerTests.cpp +++ b/test/MoqxRelayPeerTests.cpp @@ -12,7 +12,7 @@ namespace moxygen::test { // Test: makeNamespaceBridgeHandle routes namespaceMsg to doPublishNamespace -TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { +TEST_P(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { auto peerSession = createMockSession(); // Bridge handle routes NAMESPACE messages from peerSession into the relay. @@ -32,7 +32,7 @@ TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { } // Test: makeNamespaceBridgeHandle routes namespaceDoneMsg to doPublishNamespaceDone -TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { +TEST_P(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { auto peerSession = createMockSession(); auto handle = makeNamespaceBridgeHandle(relay_, peerSession); @@ -65,7 +65,7 @@ TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { // // Production relays negotiate draft-16 (empty prefix allowed). The delivery // path for draft-16 is synchronous: namespacePublishHandle->namespaceMsg(). -TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { +TEST_P(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { // Relay must have a relayID for peer detection to activate. resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -107,7 +107,7 @@ TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { // Complement: namespaces from LOCAL publishers (not from the peer) must still // be delivered when that peer subscribes. -TEST_F(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { +TEST_P(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -134,6 +134,7 @@ TEST_F(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { removeSession(localPublisher); removeSession(peerSession); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // A mock session that simulates a peer announcing peerNs when the relay @@ -183,7 +184,7 @@ class PeerAnnounceSession : public NiceMock { // directly via makeNamespaceBridgeHandle), this test goes through the full // publisherInterface()->subscribeNamespace() production path so the bug in the call-site is // exercised. -TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { +TEST_P(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -237,7 +238,7 @@ TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { // without graceful namespaceDoneMsg calls, tree entries it created must be // cleaned up so stale sourceSession shared_ptrs don't keep dead session objects // alive and downstream subscribers receive NAMESPACE_DONE. -TEST_F(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { +TEST_P(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { auto upstreamSession = createMockSession(); // Simulate the bridge path: create a handle and announce a namespace through @@ -264,7 +265,7 @@ TEST_F(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { // Verify that when a new publisher takes over a namespace before the old // bridge handle is destroyed, the stale handle's destructor does NOT evict // the new publisher's entry (doPublishNamespaceDone guards on sourceSession). -TEST_F(MoQRelayTest, BridgeHandleDestructorDoesNotEvictNewPublisher) { +TEST_P(MoQRelayTest, BridgeHandleDestructorDoesNotEvictNewPublisher) { auto session1 = createMockSession(); auto session2 = createMockSession(); diff --git a/test/MoqxRelayPublishTests.cpp b/test/MoqxRelayPublishTests.cpp index a93c13a..50c1cbe 100644 --- a/test/MoqxRelayPublishTests.cpp +++ b/test/MoqxRelayPublishTests.cpp @@ -7,11 +7,12 @@ */ #include "MoqxRelayTestFixture.h" +#include namespace moxygen::test { // Test: Verify allowed namespace prefix is set correctly -TEST_F(MoQRelayTest, AllowedNamespacePrefix) { +TEST_P(MoQRelayTest, AllowedNamespacePrefix) { // This just verifies the relay can be constructed with a namespace prefix // More detailed testing requires full session setup auto relay2 = std::make_shared(config::CacheConfig{ @@ -23,7 +24,7 @@ TEST_F(MoQRelayTest, AllowedNamespacePrefix) { } // Test: Publish a track through the relay -TEST_F(MoQRelayTest, PublishSuccess) { +TEST_P(MoQRelayTest, PublishSuccess) { auto publisherSession = createMockSession(); // Publish the namespace @@ -48,17 +49,20 @@ TEST_F(MoQRelayTest, PublishSuccess) { // Test: Extensions from publish are forwarded to subscribers via // subscribeNamespace -TEST_F(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { +TEST_P(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { auto publisherSession = createMockSession(); auto subscriber = createMockSession(); // Subscribe to namespace first auto mockConsumer = createMockConsumer(); Extensions receivedExtensions; + std::atomic published{false}; EXPECT_CALL(*subscriber, publish(testing::_, testing::_)) .WillOnce([&mockConsumer, - &receivedExtensions](const PublishRequest& pubReq, auto /*subHandle*/) { + &receivedExtensions, + &published](const PublishRequest& pubReq, auto /*subHandle*/) { receivedExtensions = pubReq.extensions; + published.store(true); return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ mockConsumer, []() -> folly::coro::Task> { @@ -94,12 +98,19 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { }); exec_->drive(); + // Wait until the relay has actually forwarded the publish to the subscriber + // (the mock sets `published`), rather than reading receivedExtensions after a + // fixed drive — under parallel load the forwarding may not have completed yet. + ASSERT_TRUE(driveUntil([&] { return published.load(); })) + << "publish was not forwarded to the subscriber"; + // Verify extensions were forwarded EXPECT_EQ(receivedExtensions.getIntExtension(kDeliveryTimeoutExtensionType), 5000); EXPECT_EQ(receivedExtensions.getIntExtension(0xBEEF'0000), 42); removeSession(publisherSession); removeSession(subscriber); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // ============================================================ @@ -107,7 +118,7 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { // ============================================================ // Test: Extensions from publish are forwarded to late-joining subscribers -TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { +TEST_P(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -153,29 +164,38 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { // Late-joining subscriber 2 should also get extensions Extensions receivedExtensions; + std::atomic published2{false}; auto mockConsumer2 = createMockConsumer(); EXPECT_CALL(*subscriber2, publish(testing::_, testing::_)) - .WillOnce([&mockConsumer2, &receivedExtensions](const PublishRequest& pubReq, auto) { - receivedExtensions = pubReq.extensions; - return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ - mockConsumer2, - []() -> folly::coro::Task> { - co_return PublishOk{ - RequestID(2), - true, - 0, - GroupOrder::OldestFirst, - LocationType::LargestObject, - std::nullopt, - std::nullopt - }; - }() - }); - }); + .WillOnce( + [&mockConsumer2, &receivedExtensions, &published2](const PublishRequest& pubReq, auto) { + receivedExtensions = pubReq.extensions; + published2.store(true); + return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ + mockConsumer2, + []() -> folly::coro::Task> { + co_return PublishOk{ + RequestID(2), + true, + 0, + GroupOrder::OldestFirst, + LocationType::LargestObject, + std::nullopt, + std::nullopt + }; + }() + }); + } + ); doSubscribeNamespace(subscriber2, kTestNamespace); exec_->drive(); + // Wait until the relay forwards the publish to the late joiner before reading + // the extensions (async under parallel load). + ASSERT_TRUE(driveUntil([&] { return published2.load(); })) + << "publish was not forwarded to the late-joining subscriber"; + // Verify late-joiner received extensions EXPECT_EQ(receivedExtensions.getIntExtension(kDeliveryTimeoutExtensionType), 3000); EXPECT_EQ(receivedExtensions.getIntExtension(0xCAFE'0000), 99); @@ -183,6 +203,8 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Regression test: publisher reconnect after disconnect with active subscriber @@ -199,7 +221,7 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { // 4. Session A reconnects and re-publishes the same track. The multipublisher // check finds the surviving entry and calls it->second.handle->unsubscribe() // — null-pointer dereference, SIGSEGV. -TEST_F(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { +TEST_P(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -226,6 +248,8 @@ TEST_F(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { withSessionContext(publisherSession, [&]() { auto sgRes = consumer->beginSubgroup(/*groupID=*/0, /*subgroupID=*/0, /*priority=*/0, false); ASSERT_TRUE(sgRes.hasValue()) << "beginSubgroup should succeed"; + // Simulate QUIC resetting the open stream on connection drop. + (*sgRes)->reset(moxygen::ResetStreamErrorCode::INTERNAL_ERROR); }); // Step 3: session A's connection drops WITHOUT closing the subgroup. @@ -260,7 +284,7 @@ TEST_F(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { // old forwarder's subscribers must receive publishDone, and the new // publish-path subscription must be fully functional (accepting data from the // new publisher). -TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { +TEST_P(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -283,9 +307,9 @@ TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { // Subscribe to the track (creates subscribe-path subscription) auto oldConsumer = createMockConsumer(); - bool publishDoneReceived = false; + std::atomic publishDoneReceived{false}; EXPECT_CALL(*oldConsumer, publishDone(_)).WillOnce([&publishDoneReceived](const PublishDone&) { - publishDoneReceived = true; + publishDoneReceived.store(true); return folly::makeExpected(folly::unit); }); auto handle = subscribeToTrack( @@ -301,8 +325,11 @@ TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto publishConsumer = doPublish(publisherSession, kTestTrackName); ASSERT_NE(publishConsumer, nullptr); - // Old subscriber must have been drained - EXPECT_TRUE(publishDoneReceived) << "Old subscribe-path subscriber should receive publishDone"; + // Old subscriber must have been drained. publishDone crosses executors to the + // old subscribe-path consumer, so wait for it rather than asserting after a + // fixed drive. + EXPECT_TRUE(driveUntil([&] { return publishDoneReceived.load(); })) + << "Old subscribe-path subscriber should receive publishDone"; // New publish-path subscription should be functional: subscribe a new // downstream consumer and verify it receives data from the publisher @@ -319,9 +346,11 @@ TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto sgRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgRes.hasValue()); EXPECT_TRUE(sgRes.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush relayExec_ so beginSubgroup/endOfSubgroup reach subscribers removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Regression test: publisher reconnects while a subscribe coroutine is @@ -334,7 +363,7 @@ TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { // ScopeGuardImplBase::terminate() → std::terminate (exit code 139). // // Without the fix: crashes. With the fix: subscribe returns an error cleanly. -TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { +TEST_P(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { auto publisherSession1 = createMockSession(); auto publisherSession2 = createMockSession(); auto subscriberSession = createMockSession(); @@ -423,7 +452,7 @@ TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { // entry (promise already satisfied), and rsub.promise.setValue() throws // PromiseAlreadySatisfied, which propagates as an unhandled coroutine exception. // With the fix: subscribe returns SUBSCRIBE_ERROR "publisher reconnected". -TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { +TEST_P(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { auto publisherSession1 = createMockSession(); auto publisherSession2 = createMockSession(); auto subscriberSession = createMockSession(); @@ -498,7 +527,7 @@ TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { // Regression: after publishDone the namespace-tree node must be pruned when // the track was the only remaining content. (Was a bug before unpublishTrack // gained a NodeMutationGuard; kept as a regression guard.) -TEST_F(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { +TEST_P(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { auto publisher = createMockSession(); doPublishNamespace(publisher, kTestNamespace); @@ -538,10 +567,11 @@ TEST_F(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { exec_->drive(); removeSession(publisher); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Empty namespace: publishNamespace with an empty TrackNamespace must not crash. -TEST_F(MoQRelayTest, EmptyNamespacePublishNamespaceDone) { +TEST_P(MoQRelayTest, EmptyNamespacePublishNamespaceDone) { auto publisher = createMockSession(); TrackNamespace emptyNs{{}}; diff --git a/test/MoqxRelaySubNsTests.cpp b/test/MoqxRelaySubNsTests.cpp index bf63e11..bee9b08 100644 --- a/test/MoqxRelaySubNsTests.cpp +++ b/test/MoqxRelaySubNsTests.cpp @@ -10,7 +10,7 @@ namespace moxygen::test { -TEST_F(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { +TEST_P(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -87,9 +87,10 @@ TEST_F(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } -TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { +TEST_P(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { // Default session uses kVersionDraftCurrent (draft-14, which is < 16) auto session = createMockSession(); @@ -109,7 +110,7 @@ TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { removeSession(session); } -TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { +TEST_P(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { auto session = createMockSession(); // Override the negotiated version to draft-16 ON_CALL(*session, getNegotiatedVersion()) @@ -121,7 +122,7 @@ TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { removeSession(session); } -TEST_F(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { +TEST_P(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { auto subscriber = createMockSession(); auto publisher = createMockSession(); @@ -156,7 +157,7 @@ TEST_F(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { // forwarder is empty, the relay fires REQUEST_UPDATE twice — once explicitly // at the if(forwarder->empty()) site and once via forwardChanged() when // addSubscriber() increments numForwardingSubscribers from 0 to 1. -TEST_F(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) { +TEST_P(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); @@ -192,7 +193,7 @@ TEST_F(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) // forwarder is empty, the relay fires a spurious REQUEST_UPDATE(forward=false) // at the if(forwarder->empty()) site — even though the upstream is already at // forward=false (set by publish() which found no subscribers). -TEST_F(MoQRelayTest, SubscribeNs_ForwardFalse_EmptyForwarder_NoRequestUpdate) { +TEST_P(MoQRelayTest, SubscribeNs_ForwardFalse_EmptyForwarder_NoRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); diff --git a/test/MoqxRelaySubscribeTests.cpp b/test/MoqxRelaySubscribeTests.cpp index cb2075a..fa5f7e2 100644 --- a/test/MoqxRelaySubscribeTests.cpp +++ b/test/MoqxRelaySubscribeTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // terminated (onPublishDone clears handle/upstream). We trigger forwardChanged // via Subscriber::requestUpdate changing forward from true→false (1→0 // transition). The subscriber survives drain because it has an open subgroup. -TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { +TEST_P(MoQRelayTest, ForwardChangedAfterPublisherTermination) { auto publisherSession = createMockSession(); auto subSession = createMockSession(); @@ -34,6 +34,8 @@ TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { }); auto subgroupRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(subgroupRes.hasValue()); + driveIfMultiThread( + ); // flush beginSubgroup so relay subgroup forwarder is wired before publishDone // Publisher terminates — onPublishDone clears handle/upstream. // forwarder->publishDone sets draining and calls drainSubscriber, but the @@ -61,6 +63,7 @@ TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { removeSession(publisherSession); removeSession(subSession); + driveIfMultiThread(); // flush pending lambdas (sg->reset, cleanup) before mocks are destroyed } // Bug: when a second subscriber with forward=true joins an existing PUBLISH-path @@ -68,7 +71,7 @@ TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { // twice — once via forwardChanged() (which fires synchronously inside addSubscriber // via addForwardingSubscriber) and once via the explicit block at the end of the // subscribe() else-branch. Analogous to the subscribeNamespace bug fixed in this PR. -TEST_F(MoQRelayTest, Subscribe_SecondForwardingSubscriber_SingleRequestUpdate) { +TEST_P(MoQRelayTest, Subscribe_SecondForwardingSubscriber_SingleRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); diff --git a/test/MoqxRelayTestFixture.cpp b/test/MoqxRelayTestFixture.cpp index 051d97f..802552e 100644 --- a/test/MoqxRelayTestFixture.cpp +++ b/test/MoqxRelayTestFixture.cpp @@ -16,7 +16,16 @@ void TestMoQExecutor::add(folly::Func func) { MoQFollyExecutorImpl::add(std::move(func)); } void TestMoQExecutor::drive() { - if (auto* evb = getBackingEventBase()) { + auto* evb = getBackingEventBase(); + if (!evb) { + return; + } + evb->loopOnce(); + if (relayEvb_) { + // flush our pending task to relay + relayEvb_->runInEventBaseThreadAndWait([]() {}); + // now flush any tasks created by our task + relayEvb_->runInEventBaseThreadAndWait([]() {}); evb->loopOnce(); } } @@ -30,14 +39,55 @@ void MoQRelayTest::SetUp() { exec_ = std::make_shared(); relay_ = std::make_shared(config::CacheConfig{.maxCachedTracks = 0}); relay_->setAllowedNamespacePrefix(kAllowedPrefix); + if (relayMode() == RelayMode::MultiThread) { + relayThread_ = std::make_unique("relay-test"); + relayEvb_ = relayThread_->getEventBase(); + relay_->setRelayExec(relayEvb_); + relay_->setUseLocalForwarders(false); + exec_->setRelayEvb(relayEvb_); + ASSERT_NE(relay_->getRelayExec(), nullptr); + publisherInterface_ = + std::make_shared(relay_->getRelayExec(), relay_); + subscriberInterface_ = + std::make_shared(relay_->getRelayExec(), relay_); + } else if (relayMode() == RelayMode::LocalForwarderMT) { + relayThread_ = std::make_unique("relay-test"); + relayEvb_ = relayThread_->getEventBase(); + relay_->setRelayExec(relayEvb_); + relay_->setUseLocalForwarders(true); + exec_->setRelayEvb(relayEvb_); + ASSERT_NE(relay_->getRelayExec(), nullptr); + publisherInterface_ = relay_->createPublisherFilter(); + subscriberInterface_ = relay_->createSubscriberFilter(); + } } void MoQRelayTest::resetRelay(std::shared_ptr relay) { relay_ = std::move(relay); + if (relayEvb_) { + relay_->setRelayExec(relayEvb_); + if (relayMode() == RelayMode::LocalForwarderMT) { + publisherInterface_ = relay_->createPublisherFilter(); + subscriberInterface_ = relay_->createSubscriberFilter(); + } else { + publisherInterface_ = std::make_shared(relayEvb_, relay_); + subscriberInterface_ = std::make_shared(relayEvb_, relay_); + } + } } void MoQRelayTest::TearDown() { + // Drain any pending relay exec tasks (e.g., async publishNamespaceDone dispatches + // from cleanup) before destroying relay state to avoid use-after-free on NamespaceTree. + if (relayEvb_) { + relayEvb_->runInEventBaseThreadAndWait([]() {}); + relayEvb_->runInEventBaseThreadAndWait([]() {}); + } + exec_->setRelayEvb(nullptr); + publisherInterface_.reset(); + subscriberInterface_.reset(); relay_.reset(); + relayThread_.reset(); } std::shared_ptr MoQRelayTest::createMockSession() { @@ -195,6 +245,7 @@ std::shared_ptr MoQRelayTest::doPublish( } co_withExecutor(static_cast(exec_.get()), std::move(res->reply)) .start(); + driveIfMultiThread(); // flush reply to relay exec so publish state is ready return res->consumer; } return std::shared_ptr(nullptr); @@ -255,6 +306,7 @@ std::shared_ptr MoQRelayTest::doPublishWithHandle( getOrCreateMockState(session)->publishConsumers.push_back(consumer); co_withExecutor(static_cast(exec_.get()), std::move(res->reply)) .start(); + driveIfMultiThread(); // flush reply to relay exec so publish state is ready return consumer; }); } diff --git a/test/MoqxRelayTestFixture.h b/test/MoqxRelayTestFixture.h index 187c612..90a49cb 100644 --- a/test/MoqxRelayTestFixture.h +++ b/test/MoqxRelayTestFixture.h @@ -12,8 +12,11 @@ #pragma once #include "MoqxRelay.h" +#include "relay/PublisherCrossExecFilter.h" +#include "relay/SubscriberCrossExecFilter.h" #include #include +#include #include #include #include @@ -29,6 +32,27 @@ using namespace openmoq::moqx; namespace moxygen::test { +enum class RelayMode { + SingleThread, + MultiThread, + LocalForwarderMT, + // Future: LocalForwarderMTCrossThread — separate publisherThread_ for pub/sub split +}; + +inline void PrintTo(RelayMode mode, std::ostream* os) { + switch (mode) { + case RelayMode::SingleThread: + *os << "SingleThread"; + return; + case RelayMode::MultiThread: + *os << "MultiThread"; + return; + case RelayMode::LocalForwarderMT: + *os << "LocalForwarderMT"; + return; + } +} + inline const TrackNamespace kTestNamespace{{"test", "namespace"}}; inline const TrackNamespace kAllowedPrefix{{"test"}}; inline const FullTrackName kTestTrackName{kTestNamespace, "track1"}; @@ -43,13 +67,18 @@ class TestMoQExecutor : public MoQFollyExecutorImpl, public folly::DrivableExecu void drive() override; void driveFor(int n); + void setRelayEvb(folly::EventBase* evb) { relayEvb_ = evb; } + private: folly::EventBase evb_; + folly::EventBase* relayEvb_{nullptr}; }; // Test fixture for MoqxRelay and NamespaceTree tests. -class MoQRelayTest : public ::testing::Test { +class MoQRelayTest : public ::testing::TestWithParam { protected: + virtual RelayMode relayMode() const { return GetParam(); } + void SetUp() override; void TearDown() override; @@ -69,7 +98,36 @@ class MoQRelayTest : public ::testing::Test { folly::Optional expectedError = folly::none ); - template void verifyOnRelayExec(Func&& func) { func(); } + template void verifyOnRelayExec(Func&& func) { + if (relayEvb_) { + relayEvb_->runInEventBaseThreadAndWait(std::forward(func)); + } else { + func(); + } + } + + void driveIfMultiThread() { + if (relayEvb_) { + exec_->drive(); + } + } + + // Drive both executors until `done()` is true or maxIters is reached. Each + // exec_->drive() flushes exec_ and (in MT/LocalForwarderMT modes) synchronizes + // with relayEvb_ via runInEventBaseThreadAndWait, so this deterministically + // advances the relay's async cascades (e.g. forwardChanged/NGR requestUpdate) + // instead of guessing a fixed drive count. Returns done() — false means it + // timed out. In SingleThread mode drive() degrades to a single loopOnce. + template bool driveUntil(Pred&& done, int maxIters = 500) { + // SingleThread mode is fully synchronous: there is no relay thread to await, + // so at most one loopOnce can make new progress. Cap iterations at 1 to + // avoid spinning loopOnce maxIters times when a predicate stays false. + int iters = relayEvb_ ? maxIters : 1; + for (int i = 0; i < iters && !done(); ++i) { + exec_->drive(); + } + return done(); + } template auto withSessionContext(std::shared_ptr session, Func&& func) -> decltype(func()) { @@ -152,6 +210,8 @@ class MoQRelayTest : public ::testing::Test { std::shared_ptr exec_; std::shared_ptr relay_; + std::unique_ptr relayThread_; + folly::EventBase* relayEvb_{nullptr}; }; } // namespace moxygen::test diff --git a/test/MoqxRelayTestModes.cpp b/test/MoqxRelayTestModes.cpp new file mode 100644 index 0000000..8e88bfe --- /dev/null +++ b/test/MoqxRelayTestModes.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) OpenMOQ contributors. + */ + +#include "MoqxRelayTestFixture.h" + +namespace moxygen::test { + +INSTANTIATE_TEST_SUITE_P( + AllModes, + MoQRelayTest, + ::testing::Values(RelayMode::SingleThread, RelayMode::MultiThread, RelayMode::LocalForwarderMT), + [](const ::testing::TestParamInfo& info) -> std::string { + switch (info.param) { + case RelayMode::SingleThread: + return "SingleThread"; + case RelayMode::MultiThread: + return "MultiThread"; + case RelayMode::LocalForwarderMT: + return "LocalForwarderMT"; + } + return "Unknown"; + } +); + +} // namespace moxygen::test diff --git a/test/MoqxRelayTrackStatusTests.cpp b/test/MoqxRelayTrackStatusTests.cpp index c4dc751..b222a10 100644 --- a/test/MoqxRelayTrackStatusTests.cpp +++ b/test/MoqxRelayTrackStatusTests.cpp @@ -11,7 +11,7 @@ namespace moxygen::test { // Test: TrackStatus on non-existent track -TEST_F(MoQRelayTest, TrackStatusNonExistentTrack) { +TEST_P(MoQRelayTest, TrackStatusNonExistentTrack) { auto clientSession = createMockSession(); // Request trackStatus for a track that doesn't exist @@ -34,7 +34,7 @@ TEST_F(MoQRelayTest, TrackStatusNonExistentTrack) { // Test: TrackStatus on existing track - returns forwarder state (no upstream // call) -TEST_F(MoQRelayTest, TrackStatusSuccessfulForward) { +TEST_P(MoQRelayTest, TrackStatusSuccessfulForward) { auto publisherSession = createMockSession(); auto clientSession = createMockSession(); @@ -67,7 +67,7 @@ TEST_F(MoQRelayTest, TrackStatusSuccessfulForward) { // Verifies that when there's no exact subscription but a publisher has // published a matching namespace prefix, the relay correctly routes // TRACK_STATUS upstream using prefix matching -TEST_F(MoQRelayTest, TrackStatusViaPrefixMatching) { +TEST_P(MoQRelayTest, TrackStatusViaPrefixMatching) { auto publisher = createMockSession(); auto requester = createMockSession(); diff --git a/test/MoqxTrackFilterTest.cpp b/test/MoqxTrackFilterTest.cpp index 50d6f69..443fae1 100644 --- a/test/MoqxTrackFilterTest.cpp +++ b/test/MoqxTrackFilterTest.cpp @@ -31,6 +31,8 @@ constexpr uint64_t kPropType = 0x100; // audio level property type class MoqxTrackFilterTest : public moxygen::test::MoQRelayTest { protected: + RelayMode relayMode() const override { return RelayMode::SingleThread; } + void SetUp() override { MoQRelayTest::SetUp(); relay_ = std::make_shared( diff --git a/test/SubscriptionRegistryTest.cpp b/test/SubscriptionRegistryTest.cpp index 2db0023..1ad6c20 100644 --- a/test/SubscriptionRegistryTest.cpp +++ b/test/SubscriptionRegistryTest.cpp @@ -44,7 +44,7 @@ TEST(SubscriptionRegistryTest, AwaitSubsequentSucceeds) { registry.getOrCreateFromSubscribe(kFtn, nullptr, subscribeChain) ); - EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); folly::EventBase evb; auto sub = folly::coro::blockingWait(std::move(task), &evb); @@ -89,7 +89,7 @@ TEST(SubscriptionRegistryTest, PendingCompleteReturnsFalseWhenEntryGone) { registry.remove(kFtn); // simulate publisher replacing the entry mid-subscribe - EXPECT_FALSE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_FALSE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); } // Regression: awaitSubsequent must re-find after suspension; erased entry throws. @@ -114,7 +114,7 @@ TEST(SubscriptionRegistryTest, AwaitSubsequentHandlesErasedEntry) { std::move(std::get>(token2)); // Upstream subscribe succeeds — fulfills the promise. - EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); // Entry is erased before the subsequent coroutine resumes. registry.remove(kFtn); @@ -131,11 +131,18 @@ TEST(SubscriptionRegistryTest, CreateFromPublishEvictsSubscribeEntry) { registry.getOrCreateFromSubscribe(kFtn, nullptr, subscribeChain) ); auto originalForwarder = first.forwarder; - first.pending.complete(nullptr, RequestID(0), nullptr); + first.pending.complete(nullptr, RequestID(0), nullptr, nullptr); auto newForwarder = std::make_shared(kFtn, std::nullopt); - auto entry = - registry.createFromPublish(kFtn, newForwarder, nullptr, RequestID(1), nullptr, publishChain); + auto entry = registry.createFromPublish( + kFtn, + newForwarder, + nullptr, + nullptr, + RequestID(1), + nullptr, + publishChain + ); ASSERT_TRUE(entry.evicted.has_value()); EXPECT_EQ(entry.evicted->forwarder, originalForwarder); @@ -147,7 +154,8 @@ TEST(SubscriptionRegistryTest, CreateFromPublishEvictsSubscribeEntry) { TEST(SubscriptionRegistryTest, OnPublisherTerminatedErasesEmptyEntry) { SubscriptionRegistry registry; auto forwarder = std::make_shared(kFtn, std::nullopt); - registry.createFromPublish(kFtn, forwarder, nullptr, RequestID(0), nullptr, publishChain); + registry + .createFromPublish(kFtn, forwarder, nullptr, nullptr, RequestID(0), nullptr, publishChain); auto result = registry.onPublisherTerminated(kFtn); EXPECT_EQ(result, nullptr); diff --git a/test/config/ConfigResolverTest.cpp b/test/config/ConfigResolverTest.cpp index ac08fc7..37e8d82 100644 --- a/test/config/ConfigResolverTest.cpp +++ b/test/config/ConfigResolverTest.cpp @@ -920,12 +920,29 @@ TEST(ResolveConfig, ThreadsZeroRejected) { EXPECT_THAT(result.error(), HasSubstr("threads must be >= 1")); } -TEST(ResolveConfig, ThreadsGreaterThanOneRejected) { +TEST(ResolveConfig, ThreadsGreaterThanOneAccepted) { auto cfg = makeMinimalInsecureConfig(); cfg.threads = std::optional{2}; auto result = resolveConfig(cfg); + ASSERT_TRUE(result.hasValue()); + EXPECT_EQ(result.value().config.threads, 2u); +} + +TEST(ResolveConfig, UseRelayThreadFalseWithOneThreadAccepted) { + auto cfg = makeMinimalInsecureConfig(); + cfg.use_relay_thread = std::optional{false}; + auto result = resolveConfig(cfg); + ASSERT_TRUE(result.hasValue()); + EXPECT_FALSE(result.value().config.useRelayThread); +} + +TEST(ResolveConfig, UseRelayThreadFalseWithMultipleThreadsRejected) { + auto cfg = makeMinimalInsecureConfig(); + cfg.threads = std::optional{2}; + cfg.use_relay_thread = std::optional{false}; + auto result = resolveConfig(cfg); ASSERT_TRUE(result.hasError()); - EXPECT_THAT(result.error(), HasSubstr("threads > 1 is not yet supported")); + EXPECT_THAT(result.error(), HasSubstr("use_relay_thread must be true when threads > 1")); } // --- multiple listeners tests ---