From 87344a3036849b295dfaef4374ea80177407c21d Mon Sep 17 00:00:00 2001 From: afrind Date: Sun, 31 May 2026 23:15:19 -0400 Subject: [PATCH 1/9] test: parameterize relay tests for SingleThread and MultiThread modes Converts all MoQRelayTest TEST_F cases to TEST_P and instantiates them under AllModes/MoQRelayTest with named variants (SingleThread, MultiThread). MoqxTrackFilterTest overrides relayMode() to always return SingleThread so it stays as TEST_F without GetParam() UB. MoqxRelayTestModes.cpp is added only to moqx_relay_test (not the shared fixture library) to avoid a GTest warning in moqx_track_filter_test. --- test/CMakeLists.txt | 1 + test/MoqxRelayDataPlaneTests.cpp | 6 +++--- test/MoqxRelayFetchTests.cpp | 2 +- test/MoqxRelayNGRTests.cpp | 12 ++++++------ test/MoqxRelayPeerTests.cpp | 14 +++++++------- test/MoqxRelayPublishTests.cpp | 20 ++++++++++---------- test/MoqxRelaySubNsTests.cpp | 12 ++++++------ test/MoqxRelaySubscribeTests.cpp | 4 ++-- test/MoqxRelayTestFixture.h | 16 +++++++++++++++- test/MoqxRelayTestModes.cpp | 22 ++++++++++++++++++++++ test/MoqxRelayTrackStatusTests.cpp | 6 +++--- test/MoqxTrackFilterTest.cpp | 2 ++ 12 files changed, 78 insertions(+), 39 deletions(-) create mode 100644 test/MoqxRelayTestModes.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bc86edca..dd480153 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -39,6 +39,7 @@ add_executable(moqx_relay_test MoqxRelayTrackStatusTests.cpp MoqxRelayNGRTests.cpp MoqxRelayPeerTests.cpp + MoqxRelayTestModes.cpp ) target_link_libraries(moqx_relay_test PRIVATE moqx_test_fixture diff --git a/test/MoqxRelayDataPlaneTests.cpp b/test/MoqxRelayDataPlaneTests.cpp index 2aa06061..e1daf0a7 100644 --- a/test/MoqxRelayDataPlaneTests.cpp +++ b/test/MoqxRelayDataPlaneTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // new ones. // Sequence: publish, 2 subscribers, beginSubgroup, beginSubgroup again -> // first consumers get reset, both subscribers get new consumers. -TEST_F(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { +TEST_P(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { auto publisherSession = createMockSession(); auto sub1 = createMockSession(); auto sub2 = createMockSession(); @@ -75,7 +75,7 @@ TEST_F(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { // Test: Duplicate beginSubgroup after all subscribers have stop_sending'd // returns CANCELLED to propagate the signal back to the publisher. -TEST_F(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { +TEST_P(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { auto publisherSession = createMockSession(); auto subscriber = createMockSession(); @@ -114,7 +114,7 @@ TEST_F(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { // Test: Duplicate beginSubgroup with partial stop_sending - active subscriber // gets reset and new consumer; tombstoned subscriber is skipped. -TEST_F(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { +TEST_P(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { auto publisherSession = createMockSession(); auto subA = createMockSession(); auto subB = createMockSession(); diff --git a/test/MoqxRelayFetchTests.cpp b/test/MoqxRelayFetchTests.cpp index b7f488c4..948bbf64 100644 --- a/test/MoqxRelayFetchTests.cpp +++ b/test/MoqxRelayFetchTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // crash. When findPublishNamespaceSession returns null (no publishNamespace), // fetch falls back to subscriptions_. After onPublishDone, upstream is null // but the subscription entry remains if the forwarder has subscribers. -TEST_F(MoQRelayTest, FetchAfterPublisherTermination) { +TEST_P(MoQRelayTest, FetchAfterPublisherTermination) { auto publisherSession = createMockSession(); auto subSession = createMockSession(); auto fetchSession = createMockSession(); diff --git a/test/MoqxRelayNGRTests.cpp b/test/MoqxRelayNGRTests.cpp index 67a45b5b..827fb86d 100644 --- a/test/MoqxRelayNGRTests.cpp +++ b/test/MoqxRelayNGRTests.cpp @@ -12,7 +12,7 @@ namespace moxygen::test { // Test: relay PUBLISH path – dynamic groups from PublishRequest extensions // is stored in the forwarder and forwarded to every downstream subscriber -TEST_F(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { +TEST_P(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -45,7 +45,7 @@ TEST_F(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { // Test: relay SUBSCRIBE path – dynamic groups from the upstream SubscribeOk is // stored in the forwarder and forwarded to both the first and late-joining // downstream subscribers -TEST_F(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { +TEST_P(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -92,7 +92,7 @@ TEST_F(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { // Relay test: When a late-joining subscriber sends NEW_GROUP_REQUEST in its // SUBSCRIBE, the relay forwards it upstream via REQUEST_UPDATE -TEST_F(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { +TEST_P(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -163,7 +163,7 @@ TEST_F(MoQRelayTest, RelaySubscribeLateJoinerNGRForwardedUpstream) { // Relay test: A downstream subscriber sending REQUEST_UPDATE with // NEW_GROUP_REQUEST causes the relay to cascade the NGR upstream -TEST_F(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { +TEST_P(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -218,7 +218,7 @@ TEST_F(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { // Relay test: downstream subscriber returns PublishOk carrying NEW_GROUP_REQUEST; // relay cascades NGR to the publisher handle upstream via REQUEST_UPDATE -TEST_F(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { +TEST_P(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -279,7 +279,7 @@ TEST_F(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { // Relay test: a second subscriber returning the same NEW_GROUP_REQUEST value in // its PublishOk is deduplicated; the upstream handle receives exactly one // REQUEST_UPDATE -TEST_F(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { +TEST_P(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); diff --git a/test/MoqxRelayPeerTests.cpp b/test/MoqxRelayPeerTests.cpp index dbbc5769..086a0d0d 100644 --- a/test/MoqxRelayPeerTests.cpp +++ b/test/MoqxRelayPeerTests.cpp @@ -12,7 +12,7 @@ namespace moxygen::test { // Test: makeNamespaceBridgeHandle routes namespaceMsg to doPublishNamespace -TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { +TEST_P(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { auto peerSession = createMockSession(); // Bridge handle routes NAMESPACE messages from peerSession into the relay. @@ -32,7 +32,7 @@ TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsNamespaceMsg) { } // Test: makeNamespaceBridgeHandle routes namespaceDoneMsg to doPublishNamespaceDone -TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { +TEST_P(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { auto peerSession = createMockSession(); auto handle = makeNamespaceBridgeHandle(relay_, peerSession); @@ -65,7 +65,7 @@ TEST_F(MoQRelayTest, NamespaceBridgeHandleForwardsDoneMsg) { // // Production relays negotiate draft-16 (empty prefix allowed). The delivery // path for draft-16 is synchronous: namespacePublishHandle->namespaceMsg(). -TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { +TEST_P(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { // Relay must have a relayID for peer detection to activate. resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -107,7 +107,7 @@ TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBackOnReconnect) { // Complement: namespaces from LOCAL publishers (not from the peer) must still // be delivered when that peer subscribes. -TEST_F(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { +TEST_P(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -183,7 +183,7 @@ class PeerAnnounceSession : public NiceMock { // directly via makeNamespaceBridgeHandle), this test goes through the full // publisherInterface()->subscribeNamespace() production path so the bug in the call-site is // exercised. -TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { +TEST_P(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { resetRelay(std::make_shared(config::CacheConfig{.maxCachedTracks = 0}, "sg-sin-2-1")); relay_->setAllowedNamespacePrefix(kAllowedPrefix); @@ -237,7 +237,7 @@ TEST_F(MoQRelayTest, PeerNamespaceNotEchoedBack_FullProductionPath) { // without graceful namespaceDoneMsg calls, tree entries it created must be // cleaned up so stale sourceSession shared_ptrs don't keep dead session objects // alive and downstream subscribers receive NAMESPACE_DONE. -TEST_F(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { +TEST_P(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { auto upstreamSession = createMockSession(); // Simulate the bridge path: create a handle and announce a namespace through @@ -264,7 +264,7 @@ TEST_F(MoQRelayTest, BridgeHandleDestructorCleansUpNamespaces) { // Verify that when a new publisher takes over a namespace before the old // bridge handle is destroyed, the stale handle's destructor does NOT evict // the new publisher's entry (doPublishNamespaceDone guards on sourceSession). -TEST_F(MoQRelayTest, BridgeHandleDestructorDoesNotEvictNewPublisher) { +TEST_P(MoQRelayTest, BridgeHandleDestructorDoesNotEvictNewPublisher) { auto session1 = createMockSession(); auto session2 = createMockSession(); diff --git a/test/MoqxRelayPublishTests.cpp b/test/MoqxRelayPublishTests.cpp index a93c13a8..ef857123 100644 --- a/test/MoqxRelayPublishTests.cpp +++ b/test/MoqxRelayPublishTests.cpp @@ -11,7 +11,7 @@ namespace moxygen::test { // Test: Verify allowed namespace prefix is set correctly -TEST_F(MoQRelayTest, AllowedNamespacePrefix) { +TEST_P(MoQRelayTest, AllowedNamespacePrefix) { // This just verifies the relay can be constructed with a namespace prefix // More detailed testing requires full session setup auto relay2 = std::make_shared(config::CacheConfig{ @@ -23,7 +23,7 @@ TEST_F(MoQRelayTest, AllowedNamespacePrefix) { } // Test: Publish a track through the relay -TEST_F(MoQRelayTest, PublishSuccess) { +TEST_P(MoQRelayTest, PublishSuccess) { auto publisherSession = createMockSession(); // Publish the namespace @@ -48,7 +48,7 @@ TEST_F(MoQRelayTest, PublishSuccess) { // Test: Extensions from publish are forwarded to subscribers via // subscribeNamespace -TEST_F(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { +TEST_P(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { auto publisherSession = createMockSession(); auto subscriber = createMockSession(); @@ -107,7 +107,7 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { // ============================================================ // Test: Extensions from publish are forwarded to late-joining subscribers -TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { +TEST_P(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -199,7 +199,7 @@ TEST_F(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { // 4. Session A reconnects and re-publishes the same track. The multipublisher // check finds the surviving entry and calls it->second.handle->unsubscribe() // — null-pointer dereference, SIGSEGV. -TEST_F(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { +TEST_P(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -260,7 +260,7 @@ TEST_F(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { // old forwarder's subscribers must receive publishDone, and the new // publish-path subscription must be fully functional (accepting data from the // new publisher). -TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { +TEST_P(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto publisherSession = createMockSession(); auto subscriberSession = createMockSession(); @@ -334,7 +334,7 @@ TEST_F(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { // ScopeGuardImplBase::terminate() → std::terminate (exit code 139). // // Without the fix: crashes. With the fix: subscribe returns an error cleanly. -TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { +TEST_P(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { auto publisherSession1 = createMockSession(); auto publisherSession2 = createMockSession(); auto subscriberSession = createMockSession(); @@ -423,7 +423,7 @@ TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeScopeGuardCrash) { // entry (promise already satisfied), and rsub.promise.setValue() throws // PromiseAlreadySatisfied, which propagates as an unhandled coroutine exception. // With the fix: subscribe returns SUBSCRIBE_ERROR "publisher reconnected". -TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { +TEST_P(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { auto publisherSession1 = createMockSession(); auto publisherSession2 = createMockSession(); auto subscriberSession = createMockSession(); @@ -498,7 +498,7 @@ TEST_F(MoQRelayTest, PublishReconnectDuringSubscribeSuccessPathCrash) { // Regression: after publishDone the namespace-tree node must be pruned when // the track was the only remaining content. (Was a bug before unpublishTrack // gained a NodeMutationGuard; kept as a regression guard.) -TEST_F(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { +TEST_P(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { auto publisher = createMockSession(); doPublishNamespace(publisher, kTestNamespace); @@ -541,7 +541,7 @@ TEST_F(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { } // Empty namespace: publishNamespace with an empty TrackNamespace must not crash. -TEST_F(MoQRelayTest, EmptyNamespacePublishNamespaceDone) { +TEST_P(MoQRelayTest, EmptyNamespacePublishNamespaceDone) { auto publisher = createMockSession(); TrackNamespace emptyNs{{}}; diff --git a/test/MoqxRelaySubNsTests.cpp b/test/MoqxRelaySubNsTests.cpp index bf63e113..c89d2cf3 100644 --- a/test/MoqxRelaySubNsTests.cpp +++ b/test/MoqxRelaySubNsTests.cpp @@ -10,7 +10,7 @@ namespace moxygen::test { -TEST_F(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { +TEST_P(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { auto publisherSession = createMockSession(); auto subscriber1 = createMockSession(); auto subscriber2 = createMockSession(); @@ -89,7 +89,7 @@ TEST_F(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { removeSession(subscriber2); } -TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { +TEST_P(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { // Default session uses kVersionDraftCurrent (draft-14, which is < 16) auto session = createMockSession(); @@ -109,7 +109,7 @@ TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { removeSession(session); } -TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { +TEST_P(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { auto session = createMockSession(); // Override the negotiated version to draft-16 ON_CALL(*session, getNegotiatedVersion()) @@ -121,7 +121,7 @@ TEST_F(MoQRelayTest, SubscribeNamespaceEmptyPrefixAllowedV16) { removeSession(session); } -TEST_F(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { +TEST_P(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { auto subscriber = createMockSession(); auto publisher = createMockSession(); @@ -156,7 +156,7 @@ TEST_F(MoQRelayTest, ExactNamespaceSubscriberReceivesPublishNamespace) { // forwarder is empty, the relay fires REQUEST_UPDATE twice — once explicitly // at the if(forwarder->empty()) site and once via forwardChanged() when // addSubscriber() increments numForwardingSubscribers from 0 to 1. -TEST_F(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) { +TEST_P(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); @@ -192,7 +192,7 @@ TEST_F(MoQRelayTest, SubscribeNs_ForwardTrue_EmptyForwarder_SingleRequestUpdate) // forwarder is empty, the relay fires a spurious REQUEST_UPDATE(forward=false) // at the if(forwarder->empty()) site — even though the upstream is already at // forward=false (set by publish() which found no subscribers). -TEST_F(MoQRelayTest, SubscribeNs_ForwardFalse_EmptyForwarder_NoRequestUpdate) { +TEST_P(MoQRelayTest, SubscribeNs_ForwardFalse_EmptyForwarder_NoRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); diff --git a/test/MoqxRelaySubscribeTests.cpp b/test/MoqxRelaySubscribeTests.cpp index cb2075a1..816b19c2 100644 --- a/test/MoqxRelaySubscribeTests.cpp +++ b/test/MoqxRelaySubscribeTests.cpp @@ -14,7 +14,7 @@ namespace moxygen::test { // terminated (onPublishDone clears handle/upstream). We trigger forwardChanged // via Subscriber::requestUpdate changing forward from true→false (1→0 // transition). The subscriber survives drain because it has an open subgroup. -TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { +TEST_P(MoQRelayTest, ForwardChangedAfterPublisherTermination) { auto publisherSession = createMockSession(); auto subSession = createMockSession(); @@ -68,7 +68,7 @@ TEST_F(MoQRelayTest, ForwardChangedAfterPublisherTermination) { // twice — once via forwardChanged() (which fires synchronously inside addSubscriber // via addForwardingSubscriber) and once via the explicit block at the end of the // subscribe() else-branch. Analogous to the subscribeNamespace bug fixed in this PR. -TEST_F(MoQRelayTest, Subscribe_SecondForwardingSubscriber_SingleRequestUpdate) { +TEST_P(MoQRelayTest, Subscribe_SecondForwardingSubscriber_SingleRequestUpdate) { auto pubSession = createMockSession(); doPublishNamespace(pubSession, kTestNamespace); auto mockHandle = makePublishHandle(); diff --git a/test/MoqxRelayTestFixture.h b/test/MoqxRelayTestFixture.h index 187c6124..245a1cbe 100644 --- a/test/MoqxRelayTestFixture.h +++ b/test/MoqxRelayTestFixture.h @@ -29,6 +29,18 @@ using namespace openmoq::moqx; namespace moxygen::test { +enum class RelayMode { + SingleThread, +}; + +inline void PrintTo(RelayMode mode, std::ostream* os) { + switch (mode) { + case RelayMode::SingleThread: + *os << "SingleThread"; + return; + } +} + inline const TrackNamespace kTestNamespace{{"test", "namespace"}}; inline const TrackNamespace kAllowedPrefix{{"test"}}; inline const FullTrackName kTestTrackName{kTestNamespace, "track1"}; @@ -48,8 +60,10 @@ class TestMoQExecutor : public MoQFollyExecutorImpl, public folly::DrivableExecu }; // Test fixture for MoqxRelay and NamespaceTree tests. -class MoQRelayTest : public ::testing::Test { +class MoQRelayTest : public ::testing::TestWithParam { protected: + virtual RelayMode relayMode() const { return GetParam(); } + void SetUp() override; void TearDown() override; diff --git a/test/MoqxRelayTestModes.cpp b/test/MoqxRelayTestModes.cpp new file mode 100644 index 00000000..38faea89 --- /dev/null +++ b/test/MoqxRelayTestModes.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (c) OpenMOQ contributors. + */ + +#include "MoqxRelayTestFixture.h" + +namespace moxygen::test { + +INSTANTIATE_TEST_SUITE_P( + AllModes, + MoQRelayTest, + ::testing::Values(RelayMode::SingleThread), + [](const ::testing::TestParamInfo& info) -> std::string { + switch (info.param) { + case RelayMode::SingleThread: + return "SingleThread"; + } + return "Unknown"; + } +); + +} // namespace moxygen::test diff --git a/test/MoqxRelayTrackStatusTests.cpp b/test/MoqxRelayTrackStatusTests.cpp index c4dc7518..b222a10f 100644 --- a/test/MoqxRelayTrackStatusTests.cpp +++ b/test/MoqxRelayTrackStatusTests.cpp @@ -11,7 +11,7 @@ namespace moxygen::test { // Test: TrackStatus on non-existent track -TEST_F(MoQRelayTest, TrackStatusNonExistentTrack) { +TEST_P(MoQRelayTest, TrackStatusNonExistentTrack) { auto clientSession = createMockSession(); // Request trackStatus for a track that doesn't exist @@ -34,7 +34,7 @@ TEST_F(MoQRelayTest, TrackStatusNonExistentTrack) { // Test: TrackStatus on existing track - returns forwarder state (no upstream // call) -TEST_F(MoQRelayTest, TrackStatusSuccessfulForward) { +TEST_P(MoQRelayTest, TrackStatusSuccessfulForward) { auto publisherSession = createMockSession(); auto clientSession = createMockSession(); @@ -67,7 +67,7 @@ TEST_F(MoQRelayTest, TrackStatusSuccessfulForward) { // Verifies that when there's no exact subscription but a publisher has // published a matching namespace prefix, the relay correctly routes // TRACK_STATUS upstream using prefix matching -TEST_F(MoQRelayTest, TrackStatusViaPrefixMatching) { +TEST_P(MoQRelayTest, TrackStatusViaPrefixMatching) { auto publisher = createMockSession(); auto requester = createMockSession(); diff --git a/test/MoqxTrackFilterTest.cpp b/test/MoqxTrackFilterTest.cpp index 50d6f69c..443fae18 100644 --- a/test/MoqxTrackFilterTest.cpp +++ b/test/MoqxTrackFilterTest.cpp @@ -31,6 +31,8 @@ constexpr uint64_t kPropType = 0x100; // audio level property type class MoqxTrackFilterTest : public moxygen::test::MoQRelayTest { protected: + RelayMode relayMode() const override { return RelayMode::SingleThread; } + void SetUp() override { MoQRelayTest::SetUp(); relay_ = std::make_shared( From 5d0c58c5fbb36a1c0404dc28447c04ec85182c50 Mon Sep 17 00:00:00 2001 From: afrind Date: Sun, 31 May 2026 23:43:03 -0400 Subject: [PATCH 2/9] relay_thread config and allow > 1 thread in config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a use_relay_thread boolean config option (default: true) that controls whether relay state is isolated on a dedicated executor thread. Disabling it is intended for baseline performance comparison only. Also removes the hard error that rejected threads > 1, replacing it with a targeted check: threads > 1 requires use_relay_thread=true. This unlocks the config validation only — threads > 1 with use_relay_thread=true will race on shared relay state until the following commit wires up the dedicated relay executor. --- src/config/Config.h | 1 + src/config/ConfigResolver.cpp | 8 ++++++-- src/config/loader/ParsedConfig.h | 5 +++++ test/config/ConfigResolverTest.cpp | 21 +++++++++++++++++++-- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/config/Config.h b/src/config/Config.h index 1f45b181..8efeccff 100644 --- a/src/config/Config.h +++ b/src/config/Config.h @@ -196,6 +196,7 @@ struct Config { std::optional admin; std::string relayID; // always set: from config or randomly generated uint32_t threads{1}; + bool useRelayThread{true}; bool mvfstBpfSteering{true}; }; diff --git a/src/config/ConfigResolver.cpp b/src/config/ConfigResolver.cpp index 33d7a8eb..c97e2885 100644 --- a/src/config/ConfigResolver.cpp +++ b/src/config/ConfigResolver.cpp @@ -822,8 +822,11 @@ folly::Expected resolveConfig(const ParsedConfig& c const uint32_t threads = config.threads.value().value_or(1); if (threads == 0) { errors.push_back("threads must be >= 1"); - } else if (threads > 1) { - errors.push_back("threads > 1 is not yet supported"); + } + + const bool useRelayThread = config.use_relay_thread.value().value_or(true); + if (threads > 1 && !useRelayThread) { + errors.push_back("use_relay_thread must be true when threads > 1"); } const bool mvfstBpfSteering = config.mvfst_bpf_steering.value().value_or(true); @@ -877,6 +880,7 @@ folly::Expected resolveConfig(const ParsedConfig& c .admin = std::move(adminConfig), .relayID = std::move(relayID), .threads = threads, + .useRelayThread = useRelayThread, .mvfstBpfSteering = mvfstBpfSteering, }, .warnings = std::move(warnings), diff --git a/src/config/loader/ParsedConfig.h b/src/config/loader/ParsedConfig.h index 178d6a2b..b075ff32 100644 --- a/src/config/loader/ParsedConfig.h +++ b/src/config/loader/ParsedConfig.h @@ -387,6 +387,11 @@ struct ParsedConfig { std::optional> listener_defaults; rfl::Description<"Number of IO worker threads (default: 1)", std::optional> threads; + rfl::Description< + "Dedicate one relay thread per service for relay state isolation (default: true). " + "Disable for baseline performance comparison.", + std::optional> + use_relay_thread; rfl::Description< "Attach a classic BPF reuseport filter to steer QUIC packets to the correct mvfst worker " "based on the connection ID's workerId field (Linux only, mvfst stack only, default: true). " diff --git a/test/config/ConfigResolverTest.cpp b/test/config/ConfigResolverTest.cpp index ac08fc73..37e8d82c 100644 --- a/test/config/ConfigResolverTest.cpp +++ b/test/config/ConfigResolverTest.cpp @@ -920,12 +920,29 @@ TEST(ResolveConfig, ThreadsZeroRejected) { EXPECT_THAT(result.error(), HasSubstr("threads must be >= 1")); } -TEST(ResolveConfig, ThreadsGreaterThanOneRejected) { +TEST(ResolveConfig, ThreadsGreaterThanOneAccepted) { auto cfg = makeMinimalInsecureConfig(); cfg.threads = std::optional{2}; auto result = resolveConfig(cfg); + ASSERT_TRUE(result.hasValue()); + EXPECT_EQ(result.value().config.threads, 2u); +} + +TEST(ResolveConfig, UseRelayThreadFalseWithOneThreadAccepted) { + auto cfg = makeMinimalInsecureConfig(); + cfg.use_relay_thread = std::optional{false}; + auto result = resolveConfig(cfg); + ASSERT_TRUE(result.hasValue()); + EXPECT_FALSE(result.value().config.useRelayThread); +} + +TEST(ResolveConfig, UseRelayThreadFalseWithMultipleThreadsRejected) { + auto cfg = makeMinimalInsecureConfig(); + cfg.threads = std::optional{2}; + cfg.use_relay_thread = std::optional{false}; + auto result = resolveConfig(cfg); ASSERT_TRUE(result.hasError()); - EXPECT_THAT(result.error(), HasSubstr("threads > 1 is not yet supported")); + EXPECT_THAT(result.error(), HasSubstr("use_relay_thread must be true when threads > 1")); } // --- multiple listeners tests --- From 0076f92c460ae777684464605e6526668627f566 Mon Sep 17 00:00:00 2001 From: afrind Date: Sun, 31 May 2026 23:43:03 -0400 Subject: [PATCH 3/9] relay: isolate relay state on dedicated executor to support multiple I/O threads --- deps/moxygen | 2 +- src/MoqxRelay.cpp | 356 ++++++++++++++++++++---------- src/MoqxRelay.h | 106 ++++++++- src/MoqxRelayContext.cpp | 77 +++++-- src/MoqxRelayContext.h | 9 +- src/SubscriptionRegistry.cpp | 17 +- src/SubscriptionRegistry.h | 18 +- src/main.cpp | 3 +- src/relay/RelayExecUtil.h | 72 ++++++ test/SubscriptionRegistryTest.cpp | 22 +- 10 files changed, 525 insertions(+), 157 deletions(-) create mode 100644 src/relay/RelayExecUtil.h diff --git a/deps/moxygen b/deps/moxygen index e282ba6c..2dff34e0 160000 --- a/deps/moxygen +++ b/deps/moxygen @@ -1 +1 @@ -Subproject commit e282ba6c871fed0fd0ebc90dc98d4893ae04f9df +Subproject commit 2dff34e0eaf67953c3512c1d6e5be6164770ce38 diff --git a/src/MoqxRelay.cpp b/src/MoqxRelay.cpp index dbe22713..ea0b9371 100644 --- a/src/MoqxRelay.cpp +++ b/src/MoqxRelay.cpp @@ -7,6 +7,9 @@ */ #include "MoqxRelay.h" +#include "relay/CrossExecForwarderCallback.h" +#include "relay/RelayExecUtil.h" +#include "relay/SubscriberCrossExecFilter.h" #include #include #include @@ -28,9 +31,11 @@ class MoqxRelayNamespaceHandle : public Publisher::NamespacePublishHandle { MoqxRelayNamespaceHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID = {} + std::string peerID = {}, + folly::Executor* relayExec = nullptr ) - : relay_(std::move(relay)), session_(std::move(session)), peerID_(std::move(peerID)) {} + : relay_(std::move(relay)), session_(std::move(session)), peerID_(std::move(peerID)), + relayExec_(relayExec) {} ~MoqxRelayNamespaceHandle() { auto relay = relay_.lock(); @@ -38,52 +43,68 @@ class MoqxRelayNamespaceHandle : public Publisher::NamespacePublishHandle { return; } for (const auto& ns : activeNamespaces_) { - relay->doPublishNamespaceDone(ns, session_); + runOnExec(relayExec_, [relay, ns, session = session_]() mutable { + relay->doPublishNamespaceDone(ns, session); + }); } } void namespaceMsg(const TrackNamespace& suffix) override { - auto relay = relay_.lock(); - if (!relay || !session_) { - return; - } activeNamespaces_.insert(suffix); PublishNamespace pubNs; pubNs.trackNamespace = suffix; - relay->doPublishNamespace(std::move(pubNs), session_, nullptr, peerID_); + runOnExec( + relayExec_, + [relay = relay_, pubNs = std::move(pubNs), session = session_, peerID = peerID_]() mutable { + if (auto r = relay.lock()) { + r->doPublishNamespace(std::move(pubNs), session, nullptr, peerID); + } + } + ); } void namespaceDoneMsg(const TrackNamespace& suffix) override { - auto relay = relay_.lock(); - if (!relay || !session_) { - return; - } activeNamespaces_.erase(suffix); - relay->doPublishNamespaceDone(suffix, session_); + runOnExec(relayExec_, [relay = relay_, suffix, session = session_]() mutable { + if (auto r = relay.lock()) { + r->doPublishNamespaceDone(suffix, session); + } + }); } private: std::weak_ptr relay_; std::shared_ptr session_; std::string peerID_; + folly::Executor* relayExec_; folly::F14FastSet activeNamespaces_; }; std::shared_ptr makeNamespaceBridgeHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID + std::string peerID, + folly::Executor* relayExec ) { return std::make_shared( std::move(relay), std::move(session), - std::move(peerID) + std::move(peerID), + relayExec ); } folly::coro::Task MoqxRelay::onUpstreamConnect(std::shared_ptr session) { - auto nsHandle = makeNamespaceBridgeHandle(weak_from_this(), session); - auto result = co_await session->subscribeNamespace(makePeerSubNs(relayID_), nsHandle); + co_return co_await onUpstreamConnectImpl(std::move(session)); +} + +folly::coro::Task MoqxRelay::onUpstreamConnectImpl(std::shared_ptr session) { + auto nsHandle = makeNamespaceBridgeHandle(weak_from_this(), session, {}, relayExec_); + // subscribeNamespace must run on the upstream session's executor + auto result = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(session->getExecutor()), + session->subscribeNamespace(makePeerSubNs(relayID_), nsHandle) + ); if (result.hasValue()) { upstreamSubNsHandle_ = std::move(result.value()); } else { @@ -168,7 +189,7 @@ std::shared_ptr MoqxRelay::doPublishNamespac if (outSession != session && (info.options == SubscribeNamespaceOptions::NAMESPACE || info.options == SubscribeNamespaceOptions::BOTH)) { if (info.namespacePublishHandle) { - // Draft 16+: send NAMESPACE message on the bidi stream + // Draft 16+: send NAMESPACE message on the bidi stream. TrackNamespace suffix(std::vector( pubNs.trackNamespace.trackNamespace.begin() + info.trackNamespacePrefix.size(), pubNs.trackNamespace.trackNamespace.end() @@ -187,6 +208,13 @@ std::shared_ptr MoqxRelay::doPublishNamespac folly::coro::Task MoqxRelay::publishNamespace( PublishNamespace pubNs, std::shared_ptr callback +) { + return publishNamespaceImpl(std::move(pubNs), std::move(callback)); +} + +folly::coro::Task MoqxRelay::publishNamespaceImpl( + PublishNamespace pubNs, + std::shared_ptr callback ) { // TODO: store auth for forwarding on future SubscribeNamespace? auto session = MoQSession::getRequestSession(); @@ -274,20 +302,31 @@ Subscriber::PublishResult MoqxRelay::publish(PublishRequest pub, std::shared_ptr handle) { XLOG(DBG1) << __func__ << " ftn=" << pub.fullTrackName; XCHECK(handle) << "Publish handle cannot be null"; + // Validate before touching relay state (safe to do on calling thread). if (!pub.fullTrackName.trackNamespace.startsWith(allowedNamespacePrefix_)) { return folly::makeUnexpected( PublishError{pub.requestID, PublishErrorCode::UNINTERESTED, "bad namespace"} ); } - if (pub.fullTrackName.trackNamespace.empty()) { return folly::makeUnexpected( PublishError({pub.requestID, PublishErrorCode::INTERNAL_ERROR, "namespace required"}) ); } - + // When relayExec_ is set, SubscriberCrossExecFilter (wired at session + // registration) has already dispatched to relayExec_ before calling this + // method, so getRequestSession() is valid and publishWithSession() runs on + // the correct thread. auto session = MoQSession::getRequestSession(); + maybeSetSessionExec(*session); + return publishWithSession(std::move(pub), std::move(handle), std::move(session)); +} +Subscriber::PublishResult MoqxRelay::publishWithSession( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session +) { // Handle duplicate publisher at relay level before registering in the tree. // Move the forwarder out and erase the entry BEFORE calling publishDone. // publishDone iterates subscribers via forEachSubscriber; if a subscriber @@ -302,10 +341,12 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptr(pub.fullTrackName, pub.largest); forwarder->setExtensions(pub.extensions); + auto publisherWrapped = maybeWrapPublisher(relayExec_, session); auto publishEntry = registry_.createFromPublish( pub.fullTrackName, forwarder, session, + std::move(publisherWrapped), pub.requestID, std::move(handle), [&](std::shared_ptr f) { return buildFilterChain(pub.fullTrackName, f); } @@ -348,6 +389,14 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptrsetCallback( + std::make_shared(relayExec_, forwarder, shared_from_this()) + ); + } else { + forwarder->setCallback(shared_from_this()); + } + uint64_t nSubscribers = 0; bool hasTrackFilterSub = false; for (auto& [outSession, info] : sessions) { @@ -360,11 +409,12 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptrgetExecutor(); - co_withExecutor(exec, publishToSession(outSession, forwarder, info.forward)).start(); + if (!addSubscriberAndPublish(outSession, forwarder, info.forward, /*pinned=*/true)) { + XLOG(ERR) << "addSubscriberAndPublish failed for " << forwarder->fullTrackName(); + continue; + } } } - forwarder->setCallback(shared_from_this()); // Forward if there are direct subscribers OR TRACK_FILTER subscribers // (PropertyRanking needs objects to evaluate property values for ranking). @@ -385,49 +435,90 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptr MoqxRelay::publishToSession( - std::shared_ptr session, +namespace { + +// Free-function coroutine: awaits the publish reply and calls onPublishOk. +// Holds forwarder alive because subscriber keeps a raw ref into it. +folly::coro::Task awaitPublishReply( std::shared_ptr forwarder, - bool forward, - bool trackFilterSubscriber + std::shared_ptr subscriber, + folly::coro::Task> reply ) { - if (session->isClosed()) { - XLOG(WARN) << "publishToSession: session closed, skipping " << forwarder->fullTrackName(); + auto result = co_await co_awaitTry(std::move(reply)); + if (result.hasException()) { + XLOG(ERR) << "Publish reply exception for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get() << ": " << result.exception().what(); + subscriber->unsubscribe(); co_return; } - auto subscriber = forwarder->addSubscriber(session, forward); - if (!subscriber) { - XLOG(ERR) << "Subscribe failed: addSubscriber returned null for " << forwarder->fullTrackName(); + if (result.value().hasError()) { + XLOG(ERR) << "Publish reply error for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get() << ": " << result.value().error().reasonPhrase; + subscriber->unsubscribe(); co_return; } - // Direct subscribers are pinned (not evictable by PropertyRanking). - // TRACK_FILTER subscribers are unpinned so onTrackEvicted can remove them. - subscriber->pinned = !trackFilterSubscriber; - XLOG(DBG4) << "added subscriber for ftn=" << forwarder->fullTrackName(); - auto guard = folly::makeGuard([subscriber] { subscriber->unsubscribe(); }); + XLOG(DBG1) << "Received PublishOk for " << forwarder->fullTrackName() + << " subscriber=" << subscriber.get(); + subscriber->onPublishOk(result.value().value()); +} - auto pubInitial = session->publish(subscriber->getPublishRequest(), subscriber); - if (pubInitial.hasError()) { - XLOG(ERR) << "Publish failed err=" << pubInitial.error().reasonPhrase; - co_return; +} // namespace + +std::optional MoqxRelay::startPublish( + std::shared_ptr session, + std::shared_ptr forwarder, + bool forward, + bool pinned, + folly::Executor* subscriberExec +) { + auto subscriber = forwarder->addSubscriber(session, forward); + if (!subscriber) { + XLOG(ERR) << "startPublish: addSubscriber null for " << forwarder->fullTrackName(); + return std::nullopt; } - subscriber->trackConsumer = std::move(pubInitial->consumer); - auto pubResult = co_await co_awaitTry(std::move(pubInitial->reply)); - if (pubResult.hasException()) { - XLOG(ERR) << "Publish failed err=" << pubResult.exception().what(); - co_return; + subscriber->pinned = pinned; + XLOG(DBG4) << "added subscriber for ftn=" << forwarder->fullTrackName(); + Subscriber::PublishResult pub; + if (subscriberExec) { + SubscriberCrossExecFilter wrapped(subscriberExec, session); + pub = wrapped.publish(subscriber->getPublishRequest(), subscriber); + } else { + pub = session->publish(subscriber->getPublishRequest(), subscriber); } - if (pubResult.value().hasError()) { - XLOG(ERR) << "Publish failed err=" << pubResult.value().error().reasonPhrase; - co_return; + if (pub.hasError()) { + XLOG(ERR) << "startPublish: publish failed: " << pub.error().reasonPhrase; + subscriber->unsubscribe(); + return std::nullopt; } - guard.dismiss(); - XLOG(DBG1) << "Publish OK sess=" << session.get(); - auto& pubOk = pubResult.value().value(); + subscriber->trackConsumer = std::move(pub->consumer); + return PreparedPublish{std::move(subscriber), std::move(pub->reply)}; +} - // Process the PUBLISH_OK response - updates range, forward flag, and - // handles NEW_GROUP_REQUEST forwarding via callback - subscriber->onPublishOk(pubOk); +bool MoqxRelay::addSubscriberAndPublish( + std::shared_ptr session, + std::shared_ptr forwarder, + bool forward, + bool pinned +) { + auto p = startPublish( + session, + forwarder, + forward, + pinned, + relayExec_ ? session->getExecutor() : nullptr + ); + if (!p) { + return false; + } + // Run awaitPublishReply on relayExec_ so onPublishOk and detach() (from + // publishDone) are always on the same thread and cannot race. For + // single-thread (relayExec_ == nullptr) this is the subscriber's exec. + co_withExecutor( + relayExec_ ? static_cast(relayExec_) : session->getExecutor(), + awaitPublishReply(forwarder, std::move(p->subscriber), std::move(p->reply)) + ) + .start(); + return true; } class MoqxRelay::NamespaceSubscription : public Publisher::SubscribeNamespaceHandle { @@ -524,6 +615,13 @@ MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr MoqxRelay::subscribeNamespace( SubscribeNamespace subNs, std::shared_ptr namespacePublishHandle +) { + return subscribeNamespaceImpl(std::move(subNs), std::move(namespacePublishHandle)); +} + +folly::coro::Task MoqxRelay::subscribeNamespaceImpl( + SubscribeNamespace subNs, + std::shared_ptr namespacePublishHandle ) { XLOG(DBG1) << __func__ << " nsp=" << subNs.trackNamespacePrefix; @@ -539,10 +637,11 @@ folly::coro::Task MoqxRelay::subscribeNames << ", reciprocating peer subNs"; // Tag with the peer's relay ID so we suppress echoing these namespaces // back to that peer on reconnect. - auto handle = makeNamespaceBridgeHandle(weak_from_this(), session, incomingPeerID); - auto recipResult = co_await session->subscribeNamespace( - makePeerSubNs(), - handle + auto handle = makeNamespaceBridgeHandle(weak_from_this(), session, incomingPeerID, relayExec_); + // subscribeNamespace must run on the peer session's executor. + auto recipResult = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(session->getExecutor()), + session->subscribeNamespace(makePeerSubNs(), handle) ); // no token: reciprocal, prevents loop if (recipResult.hasError()) { XLOG(ERR) << "Reciprocal peer subNs failed: " << recipResult.error().reasonPhrase; @@ -593,7 +692,7 @@ folly::coro::Task MoqxRelay::subscribeNames // If TRACK_FILTER is present, enroll session in PropertyRanking for top-N selection. // NOTE: onSelected callbacks fire synchronously within addSessionToTopNGroup() for - // tracks already in top-N, triggering publishToSession() before this call returns. + // tracks already in top-N, triggering onTrackSelected() before this call returns. if (trackFilter) { auto ranking = getOrCreateRanking(nodePtr, trackFilter->propertyType, subNs.trackNamespacePrefix); @@ -648,7 +747,10 @@ folly::coro::Task MoqxRelay::subscribeNames (subNs.options == SubscribeNamespaceOptions::BOTH || subNs.options == SubscribeNamespaceOptions::PUBLISH)) { if (publishSession != session) { - co_withExecutor(exec, publishToSession(session, forwarder, subNs.forward)).start(); + if (!addSubscriberAndPublish(session, forwarder, subNs.forward, /*pinned=*/true)) { + XLOG(ERR) << "addSubscriberAndPublish failed for " << ftn; + return; + } } } }); @@ -697,7 +799,13 @@ MoqxRelay::PublishState MoqxRelay::findPublishState(const FullTrackName& ftn) { folly::coro::Task MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr consumer) { + return subscribeImpl(std::move(subReq), std::move(consumer)); +} + +folly::coro::Task +MoqxRelay::subscribeImpl(SubscribeRequest subReq, std::shared_ptr consumer) { auto session = MoQSession::getRequestSession(); + maybeSetSessionExec(*session); const auto& ftn = subReq.fullTrackName; if (ftn.trackNamespace.empty()) { @@ -709,8 +817,7 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con // TOCTOU fix: if we might be the first subscriber, wait for the upstream // connection before branching. A concurrent coroutine may emplace the entry // while we are suspended, so we re-check inside getOrCreateFromSubscribe. - if (!registry_.exists(ftn) && upstream_ && - !namespaceTree_.findPublisherSession(ftn.trackNamespace)) { + if (!registry_.exists(ftn) && upstream_ && !findUpstreamPublisher(ftn.trackNamespace)) { co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); } @@ -727,6 +834,7 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con {subReq.requestID, SubscribeErrorCode::TRACK_NOT_EXIST, "no such namespace or track"} )); } // pending destructor fires on early return above + auto upstreamPublisher = maybeWrapPublisher(relayExec_, upstreamSession); // Add subscriber first (with the client's original request) in case objects // arrive before subscribe OK. @@ -751,9 +859,8 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con subReq.locType = LocationType::LargestObject; // Per the spec, we're supposed to always forward=1 upstream subReq.forward = first->forwarder->numForwardingSubscribers() > 0; - subReq.requestID = upstreamSession->peekNextRequestID(); - auto subRes = co_await upstreamSession->subscribe(subReq, first->consumer); + auto subRes = co_await upstreamPublisher->subscribe(subReq, first->consumer); if (subRes.hasError()) { co_return folly::makeUnexpected(SubscribeError( {clientRequestID, @@ -777,7 +884,8 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con first->forwarder->tryProcessNewGroupRequest(subReq.params, /*fire=*/false); auto requestID = subRes.value()->subscribeOk().requestID; - if (!first->pending.complete(std::move(subRes.value()), requestID, upstreamSession)) { + if (!first->pending + .complete(std::move(subRes.value()), requestID, upstreamSession, upstreamPublisher)) { XLOG(ERR) << "Subscription replaced by reconnecting publisher: " << ftn; co_return folly::makeUnexpected(SubscribeError{ clientRequestID, @@ -818,6 +926,11 @@ MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr con folly::coro::Task MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { + return fetchImpl(std::move(fetch), std::move(consumer)); +} + +folly::coro::Task +MoqxRelay::fetchImpl(Fetch fetch, std::shared_ptr consumer) { auto session = MoQSession::getRequestSession(); // check auth @@ -844,33 +957,30 @@ MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { fetch.args = StandaloneFetch(res.value().start, res.value().end); joining = nullptr; } else { - // Upstream is resolving the subscribe, forward joining fetch - joining->joiningRequestID = fetchView->requestID; + // Upstream is resolving the subscribe; let MoQSession resolve the + // request ID by track name to avoid a cross-executor data race. + joining->joiningRequestID = kAutoRequestID; } } - auto upstreamSession = namespaceTree_.findPublisherSession(fetch.fullTrackName.trackNamespace); - if (!upstreamSession && upstream_) { + auto upstreamPublisher = findUpstreamPublisher(fetch.fullTrackName.trackNamespace); + if (!upstreamPublisher && upstream_) { co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); - upstreamSession = namespaceTree_.findPublisherSession(fetch.fullTrackName.trackNamespace); + upstreamPublisher = findUpstreamPublisher(fetch.fullTrackName.trackNamespace); } - if (!upstreamSession) { + if (!upstreamPublisher) { // Attempt to find matching upstream subscription (from publish) if (auto fetchView = registry_.getFetchView(fetch.fullTrackName)) { - upstreamSession = fetchView->upstream; + upstreamPublisher = fetchView->publisher; } - if (!upstreamSession) { + if (!upstreamPublisher) { co_return folly::makeUnexpected( FetchError({fetch.requestID, FetchErrorCode::TRACK_NOT_EXIST, "no upstream for fetch"}) ); } } - if (session.get() == upstreamSession.get()) { - co_return folly::makeUnexpected( - FetchError({fetch.requestID, FetchErrorCode::INTERNAL_ERROR, "self fetch"}) - ); - } fetch.priority = kDefaultUpstreamPriority; + if (!cache_ || joining) { // We can't use the cache on an unresolved joining fetch - we don't know // which objects are being requested. However, once we have that resolved, @@ -879,12 +989,18 @@ MoqxRelay::fetch(Fetch fetch, std::shared_ptr consumer) { XLOG(DBG1) << "Upstream fetch {" << standalone->start.group << "," << standalone->start.object << "}.." << standalone->end.group << "," << standalone->end.object << "}"; } - co_return co_await upstreamSession->fetch(fetch, std::move(consumer)); + co_return co_await upstreamPublisher->fetch(std::move(fetch), std::move(consumer)); } - co_return co_await cache_->fetch(fetch, std::move(consumer), std::move(upstreamSession)); + co_return co_await cache_ + ->fetch(std::move(fetch), std::move(consumer), std::move(upstreamPublisher)); } folly::coro::Task MoqxRelay::trackStatus(TrackStatus trackStatus) { + return trackStatusImpl(std::move(trackStatus)); +} + +folly::coro::Task MoqxRelay::trackStatusImpl(TrackStatus trackStatus +) { XLOG(DBG1) << __func__ << " ftn=" << trackStatus.fullTrackName; if (trackStatus.fullTrackName.trackNamespace.empty()) { @@ -922,25 +1038,26 @@ folly::coro::Task MoqxRelay::trackStatus(TrackStat << " statusCode=" << (uint32_t)statusCode; co_return trackStatusOk; } else { - // No subscription - forward to upstream - auto upstreamSession = - namespaceTree_.findPublisherSession(trackStatus.fullTrackName.trackNamespace); - if (!upstreamSession && upstream_) { - co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); - upstreamSession = - namespaceTree_.findPublisherSession(trackStatus.fullTrackName.trackNamespace); + // No active subscription — try registry publisher first, then namespace tree + std::shared_ptr upstreamPublisher; + if (upstreamView) { + upstreamPublisher = upstreamView->publisher; + } else { + upstreamPublisher = findUpstreamPublisher(trackStatus.fullTrackName.trackNamespace); + if (!upstreamPublisher && upstream_) { + co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); + upstreamPublisher = findUpstreamPublisher(trackStatus.fullTrackName.trackNamespace); + } } - if (!upstreamSession) { - XLOG(DBG1) << "No upstream session for track: " << trackStatus.fullTrackName; + if (!upstreamPublisher) { + XLOG(DBG1) << "No upstream for track: " << trackStatus.fullTrackName; co_return folly::makeUnexpected(TrackStatusError{ trackStatus.requestID, TrackStatusErrorCode::TRACK_NOT_EXIST, "no such namespace or track" }); } - - // Forward the trackStatus request to the upstream publisher session - auto result = co_await upstreamSession->trackStatus(trackStatus); + auto result = co_await upstreamPublisher->trackStatus(std::move(trackStatus)); if (result.hasError()) { XLOG(DBG1) << "Upstream trackStatus failed: " << result.error().reasonPhrase; @@ -952,7 +1069,10 @@ folly::coro::Task MoqxRelay::trackStatus(TrackStat } void MoqxRelay::onEmpty(MoQForwarder* forwarder) { - const auto& ftn = forwarder->fullTrackName(); + onEmptyImpl(forwarder->fullTrackName()); +} + +void MoqxRelay::onEmptyImpl(const FullTrackName& ftn) { auto upstreamView = registry_.getUpstreamView(ftn); if (!upstreamView) { return; @@ -970,8 +1090,12 @@ void MoqxRelay::onEmpty(MoQForwarder* forwarder) { if (upstreamView->isPublish) { // if it's publish, don't unsubscribe, just subscribeUpdate forward=false XLOG(DBG1) << "Updating upstream subscription forward=false"; - auto exec = upstreamView->upstream->getExecutor(); - co_withExecutor(exec, doSubscribeUpdate(upstreamView->handle, /*forward=*/false)).start(); + auto exec = relayExec(); + co_withExecutor( + folly::getKeepAliveToken(exec), + doSubscribeUpdate(upstreamView->handle, /*forward=*/false) + ) + .start(); } else { upstreamView->handle->unsubscribe(); XLOG(DBG4) << "Erasing subscription to " << ftn; @@ -980,7 +1104,10 @@ void MoqxRelay::onEmpty(MoQForwarder* forwarder) { } void MoqxRelay::forwardChanged(MoQForwarder* forwarder, bool forward) { - const auto& ftn = forwarder->fullTrackName(); + forwardChangedImpl(forwarder->fullTrackName(), forward); +} + +void MoqxRelay::forwardChangedImpl(const FullTrackName& ftn, bool forward) { auto upstreamView = registry_.getUpstreamView(ftn); if (!upstreamView) { return; @@ -996,12 +1123,16 @@ void MoqxRelay::forwardChanged(MoQForwarder* forwarder, bool forward) { } XLOG(INFO) << "Updating forward for " << ftn << " forward=" << forward; - auto exec = upstreamView->upstream->getExecutor(); - co_withExecutor(exec, doSubscribeUpdate(upstreamView->handle, forward)).start(); + auto exec = relayExec(); + co_withExecutor(folly::getKeepAliveToken(exec), doSubscribeUpdate(upstreamView->handle, forward)) + .start(); } void MoqxRelay::newGroupRequested(MoQForwarder* forwarder, uint64_t group) { - const auto& ftn = forwarder->fullTrackName(); + newGroupRequestedImpl(forwarder->fullTrackName(), group); +} + +void MoqxRelay::newGroupRequestedImpl(const FullTrackName& ftn, uint64_t group) { auto upstreamView = registry_.getUpstreamView(ftn); // Check if handle is still valid (publisher may have terminated) if (!upstreamView || !upstreamView->handle) { @@ -1010,9 +1141,10 @@ void MoqxRelay::newGroupRequested(MoQForwarder* forwarder, uint64_t group) { } XLOG(INFO) << "New group request detected for " << ftn; - auto exec = upstreamView->upstream->getExecutor(); + auto exec = relayExec(); auto handle = upstreamView->handle; - co_withExecutor(exec, doNewGroupRequestUpdate(std::move(handle), group)).start(); + co_withExecutor(folly::getKeepAliveToken(exec), doNewGroupRequestUpdate(std::move(handle), group)) + .start(); } // TRACK_FILTER support @@ -1127,8 +1259,8 @@ void MoqxRelay::onTrackSelected( XLOG(DBG4) << "[MoqxRelay] Track selected: " << ftn << " session=" << session.get() << " forward=" << forward; - if (!session || session->isClosed()) { - XLOG(ERR) << "onTrackSelected: session null or closed, skipping " << ftn; + if (!session) { + XLOG(ERR) << "onTrackSelected: null session for " << ftn; return; } @@ -1141,20 +1273,18 @@ void MoqxRelay::onTrackSelected( auto exec = session->getExecutor(); XCHECK(exec) << "onTrackSelected: null executor for session " << session.get(); - // TODO: Consider batching multiple publishToSession calls on the same executor - // when multiple tracks are selected for the same session in a single ranking update. - co_withExecutor( - exec, - publishToSession(session, trackForwarder, forward, /*trackFilterSubscriber=*/true) - ) - .start(); + // TODO: Consider batching multiple addSubscriberAndPublish calls on the same + // executor when multiple tracks are selected for the same session in a single + // ranking update. + // TRACK_FILTER subscribers are unpinned so onTrackEvicted can remove them. + addSubscriberAndPublish(session, trackForwarder, forward, /*pinned=*/false); } void MoqxRelay::onTrackEvicted(const FullTrackName& ftn, std::shared_ptr session) { XLOG(DBG4) << "[MoqxRelay] Track evicted: " << ftn << " session=" << session.get(); - if (!session || session->isClosed()) { - XLOG(WARN) << "onTrackEvicted: session null or closed, skipping " << ftn; + if (!session) { + XLOG(WARN) << "onTrackEvicted: null session for " << ftn; return; } diff --git a/src/MoqxRelay.h b/src/MoqxRelay.h index e2ffe858..f2f9393b 100644 --- a/src/MoqxRelay.h +++ b/src/MoqxRelay.h @@ -14,9 +14,11 @@ #include "UpstreamProvider.h" #include "config/Config.h" #include "relay/PropertyRanking.h" +#include "relay/RelayExecUtil.h" #include #include +#include #include #include #include @@ -24,6 +26,10 @@ #include #include +namespace openmoq::moqx { +class CrossExecForwarderCallback; +} // namespace openmoq::moqx + namespace openmoq::moqx { // Visitor interface for relay state inspection. @@ -115,6 +121,21 @@ class MoqxRelay : public moxygen::Publisher, } } + // Optionally isolate relay state on a dedicated executor thread. + // When set, all public entry points switch to relayExec before touching + // relay state, and consumer callbacks to/from sessions are wrapped with + // cross-executor filters. relayExec must outlive this relay. + // If not set (default), all operations run on the calling thread. + void setRelayExec(folly::Executor* relayExec) { relayExec_ = relayExec; } + + // Takes ownership of exec and uses it as the relay executor. + void setRelayExec(std::shared_ptr exec) { + ownedRelayExec_ = std::move(exec); + relayExec_ = ownedRelayExec_.get(); + } + + folly::Executor* getRelayExec() const { return relayExec_; } + void setAllowedNamespacePrefix(moxygen::TrackNamespace allowed) { allowedNamespacePrefix_ = std::move(allowed); } @@ -247,17 +268,41 @@ class MoqxRelay : public moxygen::Publisher, void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; + // FTN-keyed impl variants — called by CrossExecForwarderCallback (relay exec) + // or directly from the non-cross-exec callbacks above. + friend class CrossExecForwarderCallback; + void onEmptyImpl(const moxygen::FullTrackName& ftn); + void forwardChangedImpl(const moxygen::FullTrackName& ftn, bool forward); + void newGroupRequestedImpl(const moxygen::FullTrackName& ftn, uint64_t group); + folly::coro::Task publishNamespaceToSession( std::shared_ptr session, moxygen::PublishNamespace pubNs, std::shared_ptr nodePtr ); - folly::coro::Task publishToSession( + // Sync setup: addSubscriber → set pinned → session->publish (via + // SubscriberCrossExecFilter when subscriberExec is non-null) → set + // trackConsumer. Returns nullopt and cleans up on any synchronous failure. + struct PreparedPublish { + std::shared_ptr subscriber; + folly::coro::Task> reply; + }; + std::optional startPublish( std::shared_ptr session, std::shared_ptr forwarder, bool forward, - bool trackFilterSubscriber = false + bool pinned, + folly::Executor* subscriberExec + ); + + // Calls startPublish and fires the reply as a free-running coroutine. + // Returns false and cleans up on any synchronous failure. + bool addSubscriberAndPublish( + std::shared_ptr session, + std::shared_ptr forwarder, + bool forward, + bool pinned ); folly::coro::Task @@ -319,6 +364,54 @@ class MoqxRelay : public moxygen::Publisher, const moxygen::FullTrackName& ftn, std::shared_ptr consumer ); + + // Impl methods — run on relayExec_ when set, or inline when relayExec_==nullptr. + folly::coro::Task + subscribeImpl(moxygen::SubscribeRequest subReq, std::shared_ptr consumer); + folly::coro::Task + fetchImpl(moxygen::Fetch fetch, std::shared_ptr consumer); + folly::coro::Task subscribeNamespaceImpl( + moxygen::SubscribeNamespace subNs, + std::shared_ptr namespacePublishHandle + ); + folly::coro::Task publishNamespaceImpl( + moxygen::PublishNamespace pubNs, + std::shared_ptr callback + ); + folly::coro::Task trackStatusImpl(moxygen::TrackStatus req + ); + folly::coro::Task onUpstreamConnectImpl(std::shared_ptr session); + + // Contains all the inline publish() logic, taking session explicitly so it + // can be called from either the I/O thread (relayExec_==nullptr) or from + // coPublish on relay exec (where getRequestSession() would return null). + PublishResult publishWithSession( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session + ); + + std::shared_ptr ownedRelayExec_; + folly::Executor* relayExec_{nullptr}; + // Only set in single-threaded mode (relayExec_ == null); used as the + // coroutine start executor for fire-and-forget tasks like doSubscribeUpdate. + folly::Executor* sessionExec_{nullptr}; + + void maybeSetSessionExec(moxygen::MoQSession& session) { + if (!relayExec_ && !sessionExec_) { + sessionExec_ = session.getExecutor(); + } + } + + folly::Executor* relayExec() const { return relayExec_ ? relayExec_ : sessionExec_; } + + std::shared_ptr findUpstreamPublisher(const moxygen::TrackNamespace& ns) { + auto session = namespaceTree_.findPublisherSession(ns); + if (!session) { + return nullptr; + } + return maybeWrapPublisher(relayExec_, std::move(session)); + } std::unique_ptr cache_; uint64_t maxDeselected_{kDefaultMaxDeselected}; @@ -329,12 +422,15 @@ class MoqxRelay : public moxygen::Publisher, }; // Creates a NamespacePublishHandle that bridges NAMESPACE/NAMESPACE_DONE -// messages from a peer relay into relay->doPublishNamespace() synchronously. -// Used for both the initiating (UpstreamProvider) and reciprocal (MoqxRelay) paths. +// messages from a peer relay into relay->doPublishNamespace(). When relayExec +// is non-null, callbacks are dispatched to it so relay state is only mutated +// on the relay executor thread. Used for both the initiating (UpstreamProvider) +// and reciprocal (MoqxRelay) paths. std::shared_ptr makeNamespaceBridgeHandle( std::weak_ptr relay, std::shared_ptr session, - std::string peerID = {} + std::string peerID = {}, + folly::Executor* relayExec = nullptr ); } // namespace openmoq::moqx diff --git a/src/MoqxRelayContext.cpp b/src/MoqxRelayContext.cpp index d1d9336f..e08dea1a 100644 --- a/src/MoqxRelayContext.cpp +++ b/src/MoqxRelayContext.cpp @@ -5,11 +5,15 @@ */ #include "MoqxRelayContext.h" +#include "relay/PublisherCrossExecFilter.h" +#include "relay/RelayExecUtil.h" +#include "relay/SubscriberCrossExecFilter.h" #include "stats/MoQStatsCollector.h" #include #include #include +#include #include using namespace moxygen; @@ -18,11 +22,27 @@ namespace openmoq::moqx { MoqxRelayContext::MoqxRelayContext( const folly::F14FastMap& services, - const std::string& relayID + const std::string& relayID, + bool useRelayThread ) : serviceMatcher_(services), relayID_(relayID) { - for (const auto& [name, svc] : services) { - services_.emplace(name, ServiceEntry{svc, std::make_shared(svc.cache, relayID)}); + if (useRelayThread && !services.empty()) { + relayThreadPool_ = std::make_shared( + services.size(), + std::make_shared("moqx-relay") + ); + auto evbs = relayThreadPool_->getAllEventBases(); + XCHECK_EQ(evbs.size(), services.size()); + size_t i = 0; + for (const auto& [name, svc] : services) { + auto relay = std::make_shared(svc.cache, relayID); + relay->setRelayExec(std::make_shared(evbs[i++].get())); + services_.emplace(name, ServiceEntry{svc, std::move(relay)}); + } + } else { + for (const auto& [name, svc] : services) { + services_.emplace(name, ServiceEntry{svc, std::make_shared(svc.cache, relayID)}); + } } } @@ -53,10 +73,7 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { CHECK(workerEvb) << "initUpstreams: workerEvb must not be null"; workerEvb_ = workerEvb; - // Use the provided worker EVB for all upstream connections. - // Per-EVB providers (one per worker thread) are a follow-up. - auto exec = std::make_shared(workerEvb); - + auto workerExec = std::make_shared(workerEvb); for (auto& [name, entry] : services_) { if (!entry.config.upstream) { continue; @@ -64,15 +81,31 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { const auto& cfg = *entry.config.upstream; auto verifier = makeUpstreamVerifier(cfg.tls); auto relay = entry.relay; - auto onConnect = [relay](std::shared_ptr session) -> folly::coro::Task { - co_await relay->onUpstreamConnect(session); + auto* relayExec = relay->getRelayExec(); + auto onConnect = [relay, + relayExec](std::shared_ptr session) -> folly::coro::Task { + if (relayExec) { + co_return co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(relayExec), + relay->onUpstreamConnect(session) + ); + } + co_return co_await relay->onUpstreamConnect(session); + }; + auto onDisconnect = [relay, relayExec]() { + runOnExec(relayExec, [relay]() { relay->onUpstreamDisconnect(); }); }; - auto onDisconnect = [relay]() { relay->onUpstreamDisconnect(); }; + std::shared_ptr pubHandler = relay; + std::shared_ptr subHandler = relay; + if (relayExec) { + pubHandler = std::make_shared(relayExec, relay); + subHandler = std::make_shared(relayExec, relay); + } auto provider = std::make_shared( - exec, + workerExec, proxygen::URL(cfg.url), - /*publishHandler=*/entry.relay, - /*subscribeHandler=*/entry.relay, + /*publishHandler=*/pubHandler, + /*subscribeHandler=*/subHandler, verifier, std::move(onConnect), std::move(onDisconnect), @@ -84,7 +117,7 @@ void MoqxRelayContext::initUpstreams(folly::EventBase* workerEvb) { // Eagerly connect so the peering handshake fires before any subscribers // arrive. The connection is lazy in UpstreamProvider but we kick it off // now so the upstream namespace tree is ready. - co_withExecutor(workerEvb, provider->start()).start(); + co_withExecutor(workerExec.get(), provider->start()).start(); } } @@ -158,11 +191,21 @@ folly::Expected MoqxRelayContext::validateAu return folly::makeUnexpected(SessionCloseErrorCode::INVALID_AUTHORITY); } - // Route: set per-service relay as handler + // Route: set per-service relay as handler, wrapping in cross-exec filters if needed auto it = services_.find(*matchedName); CHECK(it != services_.end()) << "Service '" << *matchedName << "' matched but no entry found"; - session->setPublishHandler(it->second.relay); - session->setSubscribeHandler(it->second.relay); + auto* relayExec = it->second.relay->getRelayExec(); + if (relayExec) { + session->setPublishHandler( + std::make_shared(relayExec, it->second.relay) + ); + session->setSubscribeHandler( + std::make_shared(relayExec, it->second.relay) + ); + } else { + session->setPublishHandler(it->second.relay); + session->setSubscribeHandler(it->second.relay); + } return folly::unit; } diff --git a/src/MoqxRelayContext.h b/src/MoqxRelayContext.h index 2fec04b5..ebbf3e4b 100644 --- a/src/MoqxRelayContext.h +++ b/src/MoqxRelayContext.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -54,7 +55,8 @@ class MoqxRelayContext { MoqxRelayContext( const folly::F14FastMap& services, - const std::string& relayID + const std::string& relayID, + bool useRelayThread = true ); void setStatsRegistry(std::shared_ptr registry); @@ -108,6 +110,11 @@ class MoqxRelayContext { ); private: + // When use_relay_thread=true: one dedicated thread per service, each with its + // own executor, isolating relay state from I/O threads. Null when disabled. + // Each relay owns its MoQFollyExecutorImpl; the pool just keeps threads alive. + std::shared_ptr relayThreadPool_; + folly::F14FastMap services_; ServiceMatcher serviceMatcher_; std::string relayID_; diff --git a/src/SubscriptionRegistry.cpp b/src/SubscriptionRegistry.cpp index 41acd6aa..bfe27e0e 100644 --- a/src/SubscriptionRegistry.cpp +++ b/src/SubscriptionRegistry.cpp @@ -13,7 +13,8 @@ namespace openmoq::moqx { bool SubscriptionRegistry::UpstreamSubscribePending::complete( std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ) { active_ = false; return registry_->completeSubscription( @@ -21,7 +22,8 @@ bool SubscriptionRegistry::UpstreamSubscribePending::complete( weakForwarder_, std::move(handle), requestID, - std::move(upstreamSession) + std::move(upstreamSession), + std::move(publisher) ); } @@ -85,7 +87,8 @@ bool SubscriptionRegistry::completeSubscription( std::weak_ptr weakForwarder, std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ) { auto it = subscriptions_.find(ftn); if (it == subscriptions_.end() || it->second.forwarder != weakForwarder.lock()) { @@ -95,6 +98,7 @@ bool SubscriptionRegistry::completeSubscription( rsub.handle = std::move(handle); rsub.requestID = requestID; rsub.upstream = std::move(upstreamSession); + rsub.publisher = std::move(publisher); rsub.promise.setValue(folly::unit); return true; } @@ -114,6 +118,7 @@ SubscriptionRegistry::PublishEntry SubscriptionRegistry::createFromPublish( const moxygen::FullTrackName& ftn, std::shared_ptr forwarder, std::shared_ptr session, + std::shared_ptr publisher, moxygen::RequestID requestID, std::shared_ptr handle, folly::FunctionRef)> chainBuilder @@ -137,6 +142,7 @@ SubscriptionRegistry::PublishEntry SubscriptionRegistry::createFromPublish( rsub.promise.setValue(folly::unit); rsub.requestID = requestID; rsub.handle = std::move(handle); + rsub.publisher = std::move(publisher); rsub.isPublish = true; auto [consumer, topNFilter] = chainBuilder(forwarder); @@ -174,7 +180,7 @@ SubscriptionRegistry::getUpstreamView(const moxygen::FullTrackName& ftn) const { const auto& rsub = it->second; return UpstreamView{ rsub.forwarder, - rsub.upstream, + rsub.publisher, rsub.handle, rsub.requestID, rsub.isPublish, @@ -189,7 +195,7 @@ SubscriptionRegistry::getFetchView(const moxygen::FullTrackName& ftn) const { return std::nullopt; } const auto& rsub = it->second; - return FetchView{rsub.forwarder, rsub.upstream, rsub.requestID, rsub.promise.isFulfilled()}; + return FetchView{rsub.forwarder, rsub.publisher, rsub.requestID, rsub.promise.isFulfilled()}; } std::shared_ptr @@ -201,6 +207,7 @@ SubscriptionRegistry::onPublisherTerminated(const moxygen::FullTrackName& ftn) { auto& rsub = it->second; rsub.handle.reset(); rsub.upstream.reset(); + rsub.publisher.reset(); if (rsub.forwarder->empty()) { subscriptions_.erase(it); return nullptr; diff --git a/src/SubscriptionRegistry.h b/src/SubscriptionRegistry.h index 4f5630dd..76faf574 100644 --- a/src/SubscriptionRegistry.h +++ b/src/SubscriptionRegistry.h @@ -53,12 +53,13 @@ class SubscriptionRegistry { // Identity-checked success path. Re-finds by ftn; checks forwarder identity // to detect a reconnecting publisher that replaced the entry during the - // caller's co_await suspension. Sets handle, requestID, upstreamSession; - // fulfills promise. Returns false if entry is gone or replaced. + // caller's co_await suspension. Sets handle, requestID, upstreamSession, + // publisher; fulfills promise. Returns false if entry is gone or replaced. bool complete( std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ); ~UpstreamSubscribePending(); @@ -109,6 +110,7 @@ class SubscriptionRegistry { const moxygen::FullTrackName& ftn, std::shared_ptr forwarder, std::shared_ptr session, + std::shared_ptr publisher, moxygen::RequestID requestID, std::shared_ptr handle, folly::FunctionRef)> chainBuilder @@ -126,10 +128,10 @@ class SubscriptionRegistry { }; std::optional getTopNView(const moxygen::FullTrackName& ftn) const; - // For onEmpty / forwardChanged / newGroupRequested + // For onEmpty / forwardChanged / newGroupRequested / trackStatus struct UpstreamView { std::shared_ptr forwarder; - std::shared_ptr upstream; + std::shared_ptr publisher; std::shared_ptr handle; moxygen::RequestID requestID; bool isPublish; @@ -140,7 +142,7 @@ class SubscriptionRegistry { // For fetch() struct FetchView { std::shared_ptr forwarder; - std::shared_ptr upstream; + std::shared_ptr publisher; moxygen::RequestID requestID; bool isReady; }; @@ -182,6 +184,7 @@ class SubscriptionRegistry { std::shared_ptr forwarder; std::shared_ptr upstream; + std::shared_ptr publisher; moxygen::RequestID requestID{0}; std::shared_ptr handle; folly::coro::SharedPromise promise; @@ -196,7 +199,8 @@ class SubscriptionRegistry { std::weak_ptr weakForwarder, std::shared_ptr handle, moxygen::RequestID requestID, - std::shared_ptr upstreamSession + std::shared_ptr upstreamSession, + std::shared_ptr publisher ); // Called by UpstreamSubscribePending destructor on failure. diff --git a/src/main.cpp b/src/main.cpp index ae0cebe7..bb570813 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -110,7 +110,8 @@ int main(int argc, char* argv[]) { // === 6. Initialize services === // Construct and configure the application's own services // (MoqxRelayContext, MoqxRelayServer, etc.) - auto context = std::make_shared(config.services, config.relayID); + auto context = + std::make_shared(config.services, config.relayID, config.useRelayThread); // === 6a. Stats registry === auto statsRegistry = std::make_shared(); diff --git a/src/relay/RelayExecUtil.h b/src/relay/RelayExecUtil.h new file mode 100644 index 00000000..ae406288 --- /dev/null +++ b/src/relay/RelayExecUtil.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "CrossExecFilter.h" +#include "PublisherCrossExecFilter.h" +#include +#include +#include +#include + +namespace openmoq::moqx { + +// Wraps c in a CrossExecFilter targeting exec, or returns c if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr c) { + if (!exec) { + return c; + } + return std::make_shared(exec, std::move(c), /*deepCopyPayload=*/false); +} + +// Wraps c in a FetchCrossExecFilter targeting exec, or returns c if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr c) { + if (!exec) { + return c; + } + return FetchCrossExecFilter::create(exec, std::move(c), /*deepCopyPayload=*/false); +} + +// Wraps p in a PublisherCrossExecFilter targeting exec, or returns p if exec is null. +inline std::shared_ptr +maybeCrossExec(folly::Executor* exec, std::shared_ptr p) { + if (!exec) { + return p; + } + return std::make_shared(exec, std::move(p)); +} + +// Wraps session as a Publisher, targeting its executor when relayExec is set. +inline std::shared_ptr +maybeWrapPublisher(folly::Executor* relayExec, std::shared_ptr session) { + // Evaluate getExecutor() before std::move(session) to avoid unspecified + // argument evaluation order leaving session moved-from. + auto* exec = relayExec ? session->getExecutor() : nullptr; + return maybeCrossExec(exec, std::shared_ptr(std::move(session))); +} + +// Runs fn on exec (fire-and-forget) if exec is non-null; otherwise runs it +// inline on the calling thread. +template void runOnExec(folly::Executor* exec, Fn&& fn) { + if (exec) { + exec->add(std::forward(fn)); + } else { + std::forward(fn)(); + } +} + +// When relayExec is set, dispatches fn to sessionExec (fire-and-forget). +// Otherwise runs fn inline (caller is already on the correct thread). +// Use this when relay state changes need to notify a specific session's executor. +template +void runOnSessionExec(folly::Executor* relayExec, folly::Executor* sessionExec, Fn&& fn) { + runOnExec(relayExec ? sessionExec : nullptr, std::forward(fn)); +} + +} // namespace openmoq::moqx diff --git a/test/SubscriptionRegistryTest.cpp b/test/SubscriptionRegistryTest.cpp index 2db00234..1ad6c20a 100644 --- a/test/SubscriptionRegistryTest.cpp +++ b/test/SubscriptionRegistryTest.cpp @@ -44,7 +44,7 @@ TEST(SubscriptionRegistryTest, AwaitSubsequentSucceeds) { registry.getOrCreateFromSubscribe(kFtn, nullptr, subscribeChain) ); - EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); folly::EventBase evb; auto sub = folly::coro::blockingWait(std::move(task), &evb); @@ -89,7 +89,7 @@ TEST(SubscriptionRegistryTest, PendingCompleteReturnsFalseWhenEntryGone) { registry.remove(kFtn); // simulate publisher replacing the entry mid-subscribe - EXPECT_FALSE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_FALSE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); } // Regression: awaitSubsequent must re-find after suspension; erased entry throws. @@ -114,7 +114,7 @@ TEST(SubscriptionRegistryTest, AwaitSubsequentHandlesErasedEntry) { std::move(std::get>(token2)); // Upstream subscribe succeeds — fulfills the promise. - EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr)); + EXPECT_TRUE(first.pending.complete(nullptr, RequestID(0), nullptr, nullptr)); // Entry is erased before the subsequent coroutine resumes. registry.remove(kFtn); @@ -131,11 +131,18 @@ TEST(SubscriptionRegistryTest, CreateFromPublishEvictsSubscribeEntry) { registry.getOrCreateFromSubscribe(kFtn, nullptr, subscribeChain) ); auto originalForwarder = first.forwarder; - first.pending.complete(nullptr, RequestID(0), nullptr); + first.pending.complete(nullptr, RequestID(0), nullptr, nullptr); auto newForwarder = std::make_shared(kFtn, std::nullopt); - auto entry = - registry.createFromPublish(kFtn, newForwarder, nullptr, RequestID(1), nullptr, publishChain); + auto entry = registry.createFromPublish( + kFtn, + newForwarder, + nullptr, + nullptr, + RequestID(1), + nullptr, + publishChain + ); ASSERT_TRUE(entry.evicted.has_value()); EXPECT_EQ(entry.evicted->forwarder, originalForwarder); @@ -147,7 +154,8 @@ TEST(SubscriptionRegistryTest, CreateFromPublishEvictsSubscribeEntry) { TEST(SubscriptionRegistryTest, OnPublisherTerminatedErasesEmptyEntry) { SubscriptionRegistry registry; auto forwarder = std::make_shared(kFtn, std::nullopt); - registry.createFromPublish(kFtn, forwarder, nullptr, RequestID(0), nullptr, publishChain); + registry + .createFromPublish(kFtn, forwarder, nullptr, nullptr, RequestID(0), nullptr, publishChain); auto result = registry.onPublisherTerminated(kFtn); EXPECT_EQ(result, nullptr); From 44a7046e9ea27d11d42147f491a7506b74aca624 Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 02:29:07 -0400 Subject: [PATCH 4/9] test: add MultiThread relay test mode Add RelayMode::MultiThread to the MoQRelayTest parameterized suite so the relay is exercised across an executor boundary, alongside the existing SingleThread mode. Fixture (MoqxRelayTestFixture.h/.cpp, MoqxRelayTestModes.cpp): - New RelayMode::MultiThread enum value, instantiated in the AllModes suite. - SetUp() starts a ScopedEventBaseThread, wires the relay onto it via setRelayExec, and wraps it in PublisherCrossExecFilter / SubscriberCrossExecFilter. TearDown() drains pending relay-exec tasks and tears down in order to avoid use-after-free on NamespaceTree. - TestMoQExecutor::drive() does a loopOnce(), then in MT mode round-trips through the relay EVB twice to flush task->relay and relay-created tasks. - Add driveIfMultiThread(), make verifyOnRelayExec() hop to the relay EVB, and add driveUntil(pred) to advance async cascades deterministically instead of using a fixed drive count (capped at one iter in ST mode). Tests (DataPlane, NGR, Peer, Publish, SubNs, Subscribe): - Insert driveIfMultiThread() where work crosses executors. - Replace fixed-drive-then-read with driveUntil() plus std::atomic flags. - DuplicateSubgroupCancelledWhenNoActiveConsumers splits ST/MT expectations: in MT the CrossExecFilter defers the CANCELLED error to the next op, so it probes via endOfSubgroup(). - Add reset() calls to simulate the publisher/QUIC resetting open streams. Test Plan: --- test/MoqxRelayDataPlaneTests.cpp | 57 ++++++++++++++++++++++--- test/MoqxRelayNGRTests.cpp | 34 +++++++++++++-- test/MoqxRelayPeerTests.cpp | 1 + test/MoqxRelayPublishTests.cpp | 73 ++++++++++++++++++++++---------- test/MoqxRelaySubNsTests.cpp | 1 + test/MoqxRelaySubscribeTests.cpp | 3 ++ test/MoqxRelayTestFixture.cpp | 39 ++++++++++++++++- test/MoqxRelayTestFixture.h | 44 ++++++++++++++++++- test/MoqxRelayTestModes.cpp | 4 +- 9 files changed, 222 insertions(+), 34 deletions(-) diff --git a/test/MoqxRelayDataPlaneTests.cpp b/test/MoqxRelayDataPlaneTests.cpp index e1daf0a7..001a095f 100644 --- a/test/MoqxRelayDataPlaneTests.cpp +++ b/test/MoqxRelayDataPlaneTests.cpp @@ -57,20 +57,27 @@ TEST_P(MoQRelayTest, DuplicateSubgroupReplacesActiveConsumers) { auto sgForwarder1 = publishConsumer->beginSubgroup(0, 0, 0); EXPECT_TRUE(sgForwarder1.hasValue()); + driveIfMultiThread( + ); // flush so beginSubgroup wires downstream_ and calls mockConsumer beginSubgroup // Duplicate beginSubgroup - should reset v1 consumers and return new - // forwarder + // forwarder. Simulate publisher resetting the old stream. auto sgForwarder2 = publishConsumer->beginSubgroup(0, 0, 0); + driveIfMultiThread(); // flush duplicate so v1 consumers are reset and v2 consumers are created + (*sgForwarder1)->reset(moxygen::ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush publisher reset of old stream EXPECT_TRUE(sgForwarder2.hasValue()); EXPECT_NE(sgForwarder1.value(), sgForwarder2.value()); // Close the new subgroup cleanly before teardown to avoid reset during // cleanup EXPECT_TRUE(sgForwarder2.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush endOfSubgroup removeSession(publisherSession); removeSession(sub1); removeSession(sub2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Test: Duplicate beginSubgroup after all subscribers have stop_sending'd @@ -99,17 +106,40 @@ TEST_P(MoQRelayTest, DuplicateSubgroupCancelledWhenNoActiveConsumers) { auto sgRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgRes.hasValue()); auto sg = sgRes.value(); + driveIfMultiThread(); // flush so beginSubgroup wires sg.downstream_ before object() enqueues // Trigger stop_sending tombstone via CANCELLED error from object() + if (relayEvb_) { + // MT: enqueue an extra object() after the beginSubgroup lambda + sg->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + } sg->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber - // Duplicate beginSubgroup - all consumers tombstoned, should return CANCELLED + // Duplicate beginSubgroup - all consumers tombstoned, should return CANCELLED. auto dupRes = publishConsumer->beginSubgroup(0, 0, 0); - EXPECT_TRUE(dupRes.hasError()); - EXPECT_EQ(dupRes.error().code, MoQPublishError::CANCELLED); + if (relayMode() == RelayMode::MultiThread) { + // MT mode: a CrossExecFilter sits between publisher and relay, so it always + // returns a subFilter and the error is deferred until the next operation. + ASSERT_TRUE(dupRes.hasValue()); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + auto probeRes = dupRes.value()->endOfSubgroup(); + EXPECT_TRUE(probeRes.hasError()); + if (probeRes.hasError()) { + EXPECT_EQ(probeRes.error().code, MoQPublishError::CANCELLED); + } + } else { + // ST and LocalForwarderMT: the publisher writes directly to the (local) + // forwarder with no cross-exec hop, so CANCELLED is returned synchronously. + EXPECT_TRUE(dupRes.hasError()); + EXPECT_EQ(dupRes.error().code, MoQPublishError::CANCELLED); + } removeSession(publisherSession); removeSession(subscriber); + sg->reset(ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Test: Duplicate beginSubgroup with partial stop_sending - active subscriber @@ -143,6 +173,11 @@ TEST_P(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { // CANCELLED to simulate stop_sending EXPECT_CALL(*sgAv1, object(_, _, _, _)) .WillOnce(Return(folly::makeExpected(folly::unit))); + if (relayEvb_) { + EXPECT_CALL(*sgAv1, object(_, _, _, _)) + .WillOnce(Return(folly::makeExpected(folly::unit))) + .RetiresOnSaturation(); + } EXPECT_CALL(*sgBv1, object(_, _, _, _)) .WillOnce( Return(folly::makeUnexpected(MoQPublishError(MoQPublishError::CANCELLED, "stop sending"))) @@ -160,22 +195,34 @@ TEST_P(MoQRelayTest, DuplicateSubgroupSkipsTombstonedSubscriber) { auto sgForwarder1 = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgForwarder1.hasValue()); + driveIfMultiThread(); // flush so beginSubgroup wires downstream_ before object() enqueues + if (relayEvb_) { + // MT: enqueue an extra object() after the beginSubgroup lambda + sgForwarder1.value()->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones the subscriber + } // Trigger tombstone for sub B via CANCELLED from object() sgForwarder1.value()->object(0, nullptr, {}, false); + driveIfMultiThread(); // flush so object() runs and tombstones sub B // Duplicate beginSubgroup: sub A gets reset+new, sub B is skipped - // (tombstoned) + // (tombstoned). Simulate publisher resetting the old stream. auto sgForwarder2 = publishConsumer->beginSubgroup(0, 0, 0); + driveIfMultiThread(); // flush duplicate so sgAv1 is reset and sgAv2 is created + (*sgForwarder1)->reset(moxygen::ResetStreamErrorCode::CANCELLED); + driveIfMultiThread(); // flush publisher reset of old stream EXPECT_TRUE(sgForwarder2.hasValue()); EXPECT_NE(sgForwarder1.value(), sgForwarder2.value()); // Close the new subgroup cleanly before teardown EXPECT_TRUE(sgForwarder2.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush endOfSubgroup removeSession(publisherSession); removeSession(subA); removeSession(subB); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } } // namespace moxygen::test diff --git a/test/MoqxRelayNGRTests.cpp b/test/MoqxRelayNGRTests.cpp index 827fb86d..3a998f6e 100644 --- a/test/MoqxRelayNGRTests.cpp +++ b/test/MoqxRelayNGRTests.cpp @@ -7,6 +7,7 @@ */ #include "MoqxRelayTestFixture.h" +#include namespace moxygen::test { @@ -40,6 +41,7 @@ TEST_P(MoQRelayTest, RelayPublishPropagatesDynamicGroupsToSubscribers) { removeSession(subscriberSession); exec_->drive(); removeSession(publisherSession); + driveIfMultiThread(); } // Test: relay SUBSCRIBE path – dynamic groups from the upstream SubscribeOk is @@ -88,6 +90,7 @@ TEST_P(MoQRelayTest, RelaySubscribePropagatesDynamicGroupsToAllSubscribers) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); } // Relay test: When a late-joining subscriber sends NEW_GROUP_REQUEST in its @@ -214,6 +217,7 @@ TEST_P(MoQRelayTest, RelayRequestUpdateNGRCascadedUpstream) { removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); } // Relay test: downstream subscriber returns PublishOk carrying NEW_GROUP_REQUEST; @@ -248,10 +252,13 @@ TEST_P(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { doSubscribeNamespace(subscriberSession, kTestNamespace); auto publishHandle = makePublishHandle(); + std::atomic updates{0}; { testing::InSequence seq; - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // forward=true update - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).WillOnce([](const RequestUpdate& update) { + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // forward=true update + .WillOnce([&](const RequestUpdate&) { ++updates; }); + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).WillOnce([&](const RequestUpdate& update) { + ++updates; auto ngrValue = getFirstIntParam(update.params, TrackRequestParamKey::NEW_GROUP_REQUEST); ASSERT_TRUE(ngrValue.has_value()); EXPECT_EQ(*ngrValue, 8u); @@ -272,8 +279,16 @@ TEST_P(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { }); exec_->drive(); + // Wait for the async cascade (forward=true + NGR=8) to actually land rather + // than driving a fixed number of times, then lock in the assertion before + // teardown so the trailing forwardChanged(false) → requestUpdate at + // removeSession can't over-saturate the expectation. + EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) + << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); } // Relay test: a second subscriber returning the same NEW_GROUP_REQUEST value in @@ -320,10 +335,13 @@ TEST_P(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { doSubscribeNamespace(subscriber2, kTestNamespace); auto publishHandle = makePublishHandle(); + std::atomic updates{0}; { testing::InSequence seq; - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // forward=true update - EXPECT_CALL(*publishHandle, requestUpdateCalled(_)).Times(1); // NGR update (deduplicated) + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // forward=true update + .WillOnce([&](const RequestUpdate&) { ++updates; }); + EXPECT_CALL(*publishHandle, requestUpdateCalled(_)) // NGR update (deduplicated) + .WillOnce([&](const RequestUpdate&) { ++updates; }); } PublishRequest pub; @@ -340,9 +358,17 @@ TEST_P(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { }); exec_->drive(); + // Wait for the deduplicated cascade (forward=true + one NGR) to land, then + // lock in the assertion before teardown; the trailing forwardChanged(false) → + // requestUpdate at subscriber teardown is not asserted here and would + // otherwise race in and over-saturate the expectation. + EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) + << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); } } // namespace moxygen::test diff --git a/test/MoqxRelayPeerTests.cpp b/test/MoqxRelayPeerTests.cpp index 086a0d0d..16ca38d7 100644 --- a/test/MoqxRelayPeerTests.cpp +++ b/test/MoqxRelayPeerTests.cpp @@ -134,6 +134,7 @@ TEST_P(MoQRelayTest, LocalNamespaceDeliveredToPeerOnReconnect) { removeSession(localPublisher); removeSession(peerSession); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // A mock session that simulates a peer announcing peerNs when the relay diff --git a/test/MoqxRelayPublishTests.cpp b/test/MoqxRelayPublishTests.cpp index ef857123..0419fa71 100644 --- a/test/MoqxRelayPublishTests.cpp +++ b/test/MoqxRelayPublishTests.cpp @@ -7,6 +7,7 @@ */ #include "MoqxRelayTestFixture.h" +#include namespace moxygen::test { @@ -55,10 +56,13 @@ TEST_P(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { // Subscribe to namespace first auto mockConsumer = createMockConsumer(); Extensions receivedExtensions; + std::atomic published{false}; EXPECT_CALL(*subscriber, publish(testing::_, testing::_)) .WillOnce([&mockConsumer, - &receivedExtensions](const PublishRequest& pubReq, auto /*subHandle*/) { + &receivedExtensions, + &published](const PublishRequest& pubReq, auto /*subHandle*/) { receivedExtensions = pubReq.extensions; + published.store(true); return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ mockConsumer, []() -> folly::coro::Task> { @@ -94,12 +98,19 @@ TEST_P(MoQRelayTest, PublishExtensionsForwardedToSubscribers) { }); exec_->drive(); + // Wait until the relay has actually forwarded the publish to the subscriber + // (the mock sets `published`), rather than reading receivedExtensions after a + // fixed drive — under parallel load the forwarding may not have completed yet. + ASSERT_TRUE(driveUntil([&] { return published.load(); })) + << "publish was not forwarded to the subscriber"; + // Verify extensions were forwarded EXPECT_EQ(receivedExtensions.getIntExtension(kDeliveryTimeoutExtensionType), 5000); EXPECT_EQ(receivedExtensions.getIntExtension(0xBEEF'0000), 42); removeSession(publisherSession); removeSession(subscriber); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // ============================================================ @@ -153,29 +164,38 @@ TEST_P(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { // Late-joining subscriber 2 should also get extensions Extensions receivedExtensions; + std::atomic published2{false}; auto mockConsumer2 = createMockConsumer(); EXPECT_CALL(*subscriber2, publish(testing::_, testing::_)) - .WillOnce([&mockConsumer2, &receivedExtensions](const PublishRequest& pubReq, auto) { - receivedExtensions = pubReq.extensions; - return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ - mockConsumer2, - []() -> folly::coro::Task> { - co_return PublishOk{ - RequestID(2), - true, - 0, - GroupOrder::OldestFirst, - LocationType::LargestObject, - std::nullopt, - std::nullopt - }; - }() - }); - }); + .WillOnce( + [&mockConsumer2, &receivedExtensions, &published2](const PublishRequest& pubReq, auto) { + receivedExtensions = pubReq.extensions; + published2.store(true); + return Subscriber::PublishResult(Subscriber::PublishConsumerAndReplyTask{ + mockConsumer2, + []() -> folly::coro::Task> { + co_return PublishOk{ + RequestID(2), + true, + 0, + GroupOrder::OldestFirst, + LocationType::LargestObject, + std::nullopt, + std::nullopt + }; + }() + }); + } + ); doSubscribeNamespace(subscriber2, kTestNamespace); exec_->drive(); + // Wait until the relay forwards the publish to the late joiner before reading + // the extensions (async under parallel load). + ASSERT_TRUE(driveUntil([&] { return published2.load(); })) + << "publish was not forwarded to the late-joining subscriber"; + // Verify late-joiner received extensions EXPECT_EQ(receivedExtensions.getIntExtension(kDeliveryTimeoutExtensionType), 3000); EXPECT_EQ(receivedExtensions.getIntExtension(0xCAFE'0000), 99); @@ -183,6 +203,7 @@ TEST_P(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Regression test: publisher reconnect after disconnect with active subscriber @@ -226,6 +247,8 @@ TEST_P(MoQRelayTest, PublisherReconnectWithOpenSubgroupNoSegfault) { withSessionContext(publisherSession, [&]() { auto sgRes = consumer->beginSubgroup(/*groupID=*/0, /*subgroupID=*/0, /*priority=*/0, false); ASSERT_TRUE(sgRes.hasValue()) << "beginSubgroup should succeed"; + // Simulate QUIC resetting the open stream on connection drop. + (*sgRes)->reset(moxygen::ResetStreamErrorCode::INTERNAL_ERROR); }); // Step 3: session A's connection drops WITHOUT closing the subgroup. @@ -283,9 +306,9 @@ TEST_P(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { // Subscribe to the track (creates subscribe-path subscription) auto oldConsumer = createMockConsumer(); - bool publishDoneReceived = false; + std::atomic publishDoneReceived{false}; EXPECT_CALL(*oldConsumer, publishDone(_)).WillOnce([&publishDoneReceived](const PublishDone&) { - publishDoneReceived = true; + publishDoneReceived.store(true); return folly::makeExpected(folly::unit); }); auto handle = subscribeToTrack( @@ -301,8 +324,11 @@ TEST_P(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto publishConsumer = doPublish(publisherSession, kTestTrackName); ASSERT_NE(publishConsumer, nullptr); - // Old subscriber must have been drained - EXPECT_TRUE(publishDoneReceived) << "Old subscribe-path subscriber should receive publishDone"; + // Old subscriber must have been drained. publishDone crosses executors to the + // old subscribe-path consumer, so wait for it rather than asserting after a + // fixed drive. + EXPECT_TRUE(driveUntil([&] { return publishDoneReceived.load(); })) + << "Old subscribe-path subscriber should receive publishDone"; // New publish-path subscription should be functional: subscribe a new // downstream consumer and verify it receives data from the publisher @@ -319,9 +345,11 @@ TEST_P(MoQRelayTest, PublishReplacesSubscribeDrainsOldAndServesNew) { auto sgRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(sgRes.hasValue()); EXPECT_TRUE(sgRes.value()->endOfSubgroup().hasValue()); + driveIfMultiThread(); // flush relayExec_ so beginSubgroup/endOfSubgroup reach subscribers removeSession(publisherSession); removeSession(subscriberSession); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Regression test: publisher reconnects while a subscribe coroutine is @@ -538,6 +566,7 @@ TEST_P(MoQRelayTest, PublishDonePrunesNamespaceTreeNode) { exec_->drive(); removeSession(publisher); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Empty namespace: publishNamespace with an empty TrackNamespace must not crash. diff --git a/test/MoqxRelaySubNsTests.cpp b/test/MoqxRelaySubNsTests.cpp index c89d2cf3..bee9b08f 100644 --- a/test/MoqxRelaySubNsTests.cpp +++ b/test/MoqxRelaySubNsTests.cpp @@ -87,6 +87,7 @@ TEST_P(MoQRelayTest, SubscribeNamespaceDoesntAddDrainingPublish) { removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } TEST_P(MoQRelayTest, SubscribeNamespaceEmptyPrefixRejectedPreV16) { diff --git a/test/MoqxRelaySubscribeTests.cpp b/test/MoqxRelaySubscribeTests.cpp index 816b19c2..fa5f7e26 100644 --- a/test/MoqxRelaySubscribeTests.cpp +++ b/test/MoqxRelaySubscribeTests.cpp @@ -34,6 +34,8 @@ TEST_P(MoQRelayTest, ForwardChangedAfterPublisherTermination) { }); auto subgroupRes = publishConsumer->beginSubgroup(0, 0, 0); ASSERT_TRUE(subgroupRes.hasValue()); + driveIfMultiThread( + ); // flush beginSubgroup so relay subgroup forwarder is wired before publishDone // Publisher terminates — onPublishDone clears handle/upstream. // forwarder->publishDone sets draining and calls drainSubscriber, but the @@ -61,6 +63,7 @@ TEST_P(MoQRelayTest, ForwardChangedAfterPublisherTermination) { removeSession(publisherSession); removeSession(subSession); + driveIfMultiThread(); // flush pending lambdas (sg->reset, cleanup) before mocks are destroyed } // Bug: when a second subscriber with forward=true joins an existing PUBLISH-path diff --git a/test/MoqxRelayTestFixture.cpp b/test/MoqxRelayTestFixture.cpp index 051d97ff..75ec9e53 100644 --- a/test/MoqxRelayTestFixture.cpp +++ b/test/MoqxRelayTestFixture.cpp @@ -16,7 +16,16 @@ void TestMoQExecutor::add(folly::Func func) { MoQFollyExecutorImpl::add(std::move(func)); } void TestMoQExecutor::drive() { - if (auto* evb = getBackingEventBase()) { + auto* evb = getBackingEventBase(); + if (!evb) { + return; + } + evb->loopOnce(); + if (relayEvb_) { + // flush our pending task to relay + relayEvb_->runInEventBaseThreadAndWait([]() {}); + // now flush any tasks created by our task + relayEvb_->runInEventBaseThreadAndWait([]() {}); evb->loopOnce(); } } @@ -30,14 +39,40 @@ void MoQRelayTest::SetUp() { exec_ = std::make_shared(); relay_ = std::make_shared(config::CacheConfig{.maxCachedTracks = 0}); relay_->setAllowedNamespacePrefix(kAllowedPrefix); + if (relayMode() == RelayMode::MultiThread) { + relayThread_ = std::make_unique("relay-test"); + relayEvb_ = relayThread_->getEventBase(); + relay_->setRelayExec(relayEvb_); + exec_->setRelayEvb(relayEvb_); + ASSERT_NE(relay_->getRelayExec(), nullptr); + publisherInterface_ = + std::make_shared(relay_->getRelayExec(), relay_); + subscriberInterface_ = + std::make_shared(relay_->getRelayExec(), relay_); + } } void MoQRelayTest::resetRelay(std::shared_ptr relay) { relay_ = std::move(relay); + if (relayEvb_) { + relay_->setRelayExec(relayEvb_); + publisherInterface_ = std::make_shared(relayEvb_, relay_); + subscriberInterface_ = std::make_shared(relayEvb_, relay_); + } } void MoQRelayTest::TearDown() { + // Drain any pending relay exec tasks (e.g., async publishNamespaceDone dispatches + // from cleanup) before destroying relay state to avoid use-after-free on NamespaceTree. + if (relayEvb_) { + relayEvb_->runInEventBaseThreadAndWait([]() {}); + relayEvb_->runInEventBaseThreadAndWait([]() {}); + } + exec_->setRelayEvb(nullptr); + publisherInterface_.reset(); + subscriberInterface_.reset(); relay_.reset(); + relayThread_.reset(); } std::shared_ptr MoQRelayTest::createMockSession() { @@ -195,6 +230,7 @@ std::shared_ptr MoQRelayTest::doPublish( } co_withExecutor(static_cast(exec_.get()), std::move(res->reply)) .start(); + driveIfMultiThread(); // flush reply to relay exec so publish state is ready return res->consumer; } return std::shared_ptr(nullptr); @@ -255,6 +291,7 @@ std::shared_ptr MoQRelayTest::doPublishWithHandle( getOrCreateMockState(session)->publishConsumers.push_back(consumer); co_withExecutor(static_cast(exec_.get()), std::move(res->reply)) .start(); + driveIfMultiThread(); // flush reply to relay exec so publish state is ready return consumer; }); } diff --git a/test/MoqxRelayTestFixture.h b/test/MoqxRelayTestFixture.h index 245a1cbe..460b6c66 100644 --- a/test/MoqxRelayTestFixture.h +++ b/test/MoqxRelayTestFixture.h @@ -12,8 +12,11 @@ #pragma once #include "MoqxRelay.h" +#include "relay/PublisherCrossExecFilter.h" +#include "relay/SubscriberCrossExecFilter.h" #include #include +#include #include #include #include @@ -31,6 +34,8 @@ namespace moxygen::test { enum class RelayMode { SingleThread, + MultiThread, + // Future: Mode3 — add here, then add a branch in SetUp() and INSTANTIATE entry }; inline void PrintTo(RelayMode mode, std::ostream* os) { @@ -38,6 +43,9 @@ inline void PrintTo(RelayMode mode, std::ostream* os) { case RelayMode::SingleThread: *os << "SingleThread"; return; + case RelayMode::MultiThread: + *os << "MultiThread"; + return; } } @@ -55,8 +63,11 @@ class TestMoQExecutor : public MoQFollyExecutorImpl, public folly::DrivableExecu void drive() override; void driveFor(int n); + void setRelayEvb(folly::EventBase* evb) { relayEvb_ = evb; } + private: folly::EventBase evb_; + folly::EventBase* relayEvb_{nullptr}; }; // Test fixture for MoqxRelay and NamespaceTree tests. @@ -83,7 +94,36 @@ class MoQRelayTest : public ::testing::TestWithParam { folly::Optional expectedError = folly::none ); - template void verifyOnRelayExec(Func&& func) { func(); } + template void verifyOnRelayExec(Func&& func) { + if (relayEvb_) { + relayEvb_->runInEventBaseThreadAndWait(std::forward(func)); + } else { + func(); + } + } + + void driveIfMultiThread() { + if (relayEvb_) { + exec_->drive(); + } + } + + // Drive both executors until `done()` is true or maxIters is reached. Each + // exec_->drive() flushes exec_ and (in MT/LocalForwarderMT modes) synchronizes + // with relayEvb_ via runInEventBaseThreadAndWait, so this deterministically + // advances the relay's async cascades (e.g. forwardChanged/NGR requestUpdate) + // instead of guessing a fixed drive count. Returns done() — false means it + // timed out. In SingleThread mode drive() degrades to a single loopOnce. + template bool driveUntil(Pred&& done, int maxIters = 500) { + // SingleThread mode is fully synchronous: there is no relay thread to await, + // so at most one loopOnce can make new progress. Cap iterations at 1 to + // avoid spinning loopOnce maxIters times when a predicate stays false. + int iters = relayEvb_ ? maxIters : 1; + for (int i = 0; i < iters && !done(); ++i) { + exec_->drive(); + } + return done(); + } template auto withSessionContext(std::shared_ptr session, Func&& func) -> decltype(func()) { @@ -166,6 +206,8 @@ class MoQRelayTest : public ::testing::TestWithParam { std::shared_ptr exec_; std::shared_ptr relay_; + std::unique_ptr relayThread_; + folly::EventBase* relayEvb_{nullptr}; }; } // namespace moxygen::test diff --git a/test/MoqxRelayTestModes.cpp b/test/MoqxRelayTestModes.cpp index 38faea89..bc8d5427 100644 --- a/test/MoqxRelayTestModes.cpp +++ b/test/MoqxRelayTestModes.cpp @@ -9,11 +9,13 @@ namespace moxygen::test { INSTANTIATE_TEST_SUITE_P( AllModes, MoQRelayTest, - ::testing::Values(RelayMode::SingleThread), + ::testing::Values(RelayMode::SingleThread, RelayMode::MultiThread), [](const ::testing::TestParamInfo& info) -> std::string { switch (info.param) { case RelayMode::SingleThread: return "SingleThread"; + case RelayMode::MultiThread: + return "MultiThread"; } return "Unknown"; } From d2e27183f3a67e991f68472800dbf1a8d7f1395d Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 02:33:17 -0400 Subject: [PATCH 5/9] relay: cache as passive subscriber of primary forwarder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the cache from the inline filter chain and attach it as a passive subscriber of the primary MoQForwarder instead. Previous chain: TopNFilter → TerminationFilter → SubscribeWriteback(cache) → Forwarder New chain: TopNFilter → TerminationFilter → Forwarder + forwarder.addSubscriber(passive, NullTrackConsumer via SubscribeWriteback) MoqxCache::makePassiveConsumer(): returns a SubscribeWriteback wrapping NullTrackConsumer, suitable for use as a passive forwarder subscriber. Behavior is identical to before — cache writes remain synchronous on the publisher's thread and the full MoqxRelayTest suite passes unchanged. The passive subscriber does not affect forwardingSubscribers_, so removing all real subscribers still fires onEmpty and tears down the upstream subscription correctly. Bumps moxygen submodule to the step-1 commit (NullConsumers + passive flag). Co-Authored-By: Claude Sonnet 4.6 --- src/MoqxCache.cpp | 5 +++ src/MoqxCache.h | 5 +++ src/MoqxRelay.cpp | 23 +++++++++--- src/relay/NullConsumers.h | 79 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 src/relay/NullConsumers.h diff --git a/src/MoqxCache.cpp b/src/MoqxCache.cpp index ca37d07e..6eac3aa9 100644 --- a/src/MoqxCache.cpp +++ b/src/MoqxCache.cpp @@ -7,6 +7,7 @@ */ #include "MoqxCache.h" +#include "relay/NullConsumers.h" #include #include @@ -1292,6 +1293,10 @@ class MoqxCache::FetchWriteback : public FetchConsumer { } }; +std::shared_ptr MoqxCache::makePassiveConsumer(const FullTrackName& ftn) { + return getSubscribeWriteback(ftn, std::make_shared()); +} + std::shared_ptr MoqxCache::getSubscribeWriteback( const FullTrackName& ftn, std::shared_ptr consumer diff --git a/src/MoqxCache.h b/src/MoqxCache.h index 19951a71..24125126 100644 --- a/src/MoqxCache.h +++ b/src/MoqxCache.h @@ -48,6 +48,11 @@ class MoqxCache { std::shared_ptr consumer ); + // Returns a TrackConsumer suitable for use as a passive forwarder subscriber. + // Objects delivered to it are written to the cache; nothing is forwarded + // downstream (the inner consumer is a NullTrackConsumer). + std::shared_ptr makePassiveConsumer(const moxygen::FullTrackName& ftn); + // Serves objects from the cache to the consumer. If objects in the range are // not in cache, issue one-or-more FETCH'es upstream. Objects fetched from // upstream are written back to the cache and passed to the consumer. diff --git a/src/MoqxRelay.cpp b/src/MoqxRelay.cpp index ea0b9371..591247e7 100644 --- a/src/MoqxRelay.cpp +++ b/src/MoqxRelay.cpp @@ -596,12 +596,23 @@ std::shared_ptr MoqxRelay::getSubscribeWriteback( SubscriptionRegistry::FilterChainResult MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr forwarder) { - // Build chain: TopNFilter → TerminationFilter → (cache?) → Forwarder - // This ensures property values are observed in both PUBLISH and SUBSCRIBE paths. - auto baseConsumer = cache_ ? cache_->getSubscribeWriteback(ftn, forwarder) - : std::static_pointer_cast(forwarder); - auto terminationFilter = - std::make_shared(shared_from_this(), ftn, std::move(baseConsumer)); + // Build chain: TopNFilter → TerminationFilter → Forwarder + // Cache (if present) attaches as a passive subscriber of the forwarder so + // that it receives objects without affecting the forwarding-subscriber count + // or the upstream subscription lifecycle. + if (cache_) { + forwarder->addSubscriber( + /*session=*/nullptr, + /*forward=*/true, + cache_->makePassiveConsumer(ftn), + /*passive=*/true + ); + } + auto terminationFilter = std::make_shared( + shared_from_this(), + ftn, + std::static_pointer_cast(forwarder) + ); auto topNFilter = std::make_shared(ftn, std::static_pointer_cast(terminationFilter)); topNFilter->setActivityThreshold(activityThreshold_); diff --git a/src/relay/NullConsumers.h b/src/relay/NullConsumers.h new file mode 100644 index 00000000..1527ef43 --- /dev/null +++ b/src/relay/NullConsumers.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "moxygen/MoQConsumers.h" + +#include + +namespace moxygen { + +// SubgroupConsumer that discards all calls. Used as the terminal sink when +// a structural consumer (e.g. SubscribeWriteback) is required but no +// downstream delivery is needed. +class NullSubgroupConsumer : public SubgroupConsumer { +public: + folly::Expected + object(uint64_t, Payload, Extensions, bool) override { + return folly::unit; + } + + folly::Expected + beginObject(uint64_t, uint64_t, Payload, Extensions) override { + return folly::unit; + } + + folly::Expected objectPayload(Payload, bool) override { + return ObjectPublishStatus::IN_PROGRESS; + } + + folly::Expected endOfGroup(uint64_t) override { + return folly::unit; + } + + folly::Expected endOfTrackAndGroup(uint64_t) override { + return folly::unit; + } + + folly::Expected endOfSubgroup() override { return folly::unit; } + + void reset(ResetStreamErrorCode) override {} +}; + +// TrackConsumer that discards all calls. beginSubgroup() returns a +// NullSubgroupConsumer. +class NullTrackConsumer : public TrackConsumer { +public: + folly::Expected setTrackAlias(TrackAlias) override { + return folly::unit; + } + + folly::Expected, MoQPublishError> + beginSubgroup(uint64_t, uint64_t, Priority, bool) override { + return std::make_shared(); + } + + folly::Expected, MoQPublishError> awaitStreamCredit() override { + return folly::makeSemiFuture(); + } + + folly::Expected + objectStream(const ObjectHeader&, Payload, bool) override { + return folly::unit; + } + + folly::Expected + datagram(const ObjectHeader&, Payload, bool) override { + return folly::unit; + } + + folly::Expected publishDone(PublishDone) override { + return folly::unit; + } +}; + +} // namespace moxygen From 9b5688b52d73798aeadc4a64e836fa9df2932978 Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 02:33:23 -0400 Subject: [PATCH 6/9] relay: per-thread local forwarder data path (use_local_forwarders) Adds an optional data-plane mode in which each I/O thread owns its own MoQForwarder per track, so fanning objects out to subscribers on the same thread needs no cross-thread hop. Gated by the new use_local_forwarders option (which requires use_relay_thread); off by default. Theory of operation -------------------- The relay now runs in one of three modes, chosen per session by createPublisherFilter()/createSubscriberFilter(): 1. Single-threaded (relayExec_ unset): relay state and all forwarders live on the session I/O thread; MoqxRelay is the session handler directly. 2. Cross-exec (relayExec_ set, local forwarders off): relay state is isolated on a dedicated relay executor. Session handlers are Publisher/SubscriberCrossExecFilter, which hop to the relay exec before touching state. One primary forwarder per track lives on the relay exec and fans out to every subscriber via a per-callback cross-exec hop. 3. Local forwarders (relayExec_ set, local forwarders on): the primary forwarder for a track lives on its publisher's I/O thread. Each subscriber I/O thread keeps a thread-local forwarder (LocalForwarderRegistry in tlForwarders_) that attaches to the primary as a single channel subscriber. The primary thus hops once per subscriber *thread*; each local forwarder then fans out to its same-thread subscribers with no further hop. Subscribe (local mode) is split so the relay-exec critical section stays small: subscribeFromSubscriberThread orchestrates on the subscriber thread, dispatching subscribeStatefulWork (registry lookup, first-vs-subsequent decision) to the relay exec, then wiring the local->primary channel; for the first subscriber, completeFirstSubscriber issues the upstream subscribe and applies the SubscribeOk on the primary's exec. Publish (local mode) makes the publisher's local forwarder the primary and registers it on the relay exec via setupPublisherPrimary; the relay chain (TopNFilter -> termination -> cache) attaches to it as a passive channel subscriber so it observes all objects without counting as a forwarding subscriber. Lifecycle callbacks flow LocalForwarderCallback -> CrossExecForwarderCallback -> ChannelForwarderCallback (on the primary's exec), which propagates forward and new-group-request changes upstream via requestUpdate and removes the channel subscriber on onEmpty. LocalForwarderRegistry removal is identity-checked so teardown is order-independent. PendingForwarderCallback captures forwardChanged/newGroupRequested/onEmpty during the setup window for replay once the real callback is installed. WeakRelayForwarderCallback breaks the registry -> forwarder -> callback -> relay reference cycle and is now also used by the cross-exec (non-local) mode. Config and tests ---------------- - use_local_forwarders config field with ConfigResolver validation (requires use_relay_thread), threaded through MoqxRelayContext; --local-forwarders flag added to scripts/perf-test.sh. - The relay test suite gains a third parameterized mode, LocalForwarderMT, run alongside SingleThread and MultiThread. --- CMakeLists.txt | 1 + deps/moxygen | 2 +- scripts/perf-test.sh | 8 +- src/MoqxRelay.cpp | 1034 +++++++++++++++++++++- src/MoqxRelay.h | 155 +++- src/MoqxRelayContext.cpp | 19 +- src/MoqxRelayContext.h | 3 +- src/SubscriptionRegistry.cpp | 1 + src/SubscriptionRegistry.h | 1 + src/config/Config.h | 1 + src/config/ConfigResolver.cpp | 6 + src/config/loader/ParsedConfig.h | 6 + src/main.cpp | 8 +- src/relay/CrossExecForwarderCallback.cpp | 10 + src/relay/CrossExecForwarderCallback.h | 1 + src/relay/LocalForwarderCallback.h | 70 ++ src/relay/LocalForwarderRegistry.h | 81 ++ src/relay/PublisherCrossExecFilter.h | 2 +- src/relay/SubscriberCrossExecFilter.h | 2 +- src/relay/WeakRelayForwarderCallback.cpp | 31 + src/relay/WeakRelayForwarderCallback.h | 32 + test/MoqxRelayNGRTests.cpp | 10 + test/MoqxRelayPublishTests.cpp | 1 + test/MoqxRelayTestFixture.cpp | 19 +- test/MoqxRelayTestFixture.h | 6 +- test/MoqxRelayTestModes.cpp | 4 +- 26 files changed, 1438 insertions(+), 76 deletions(-) create mode 100644 src/relay/LocalForwarderCallback.h create mode 100644 src/relay/LocalForwarderRegistry.h create mode 100644 src/relay/WeakRelayForwarderCallback.cpp create mode 100644 src/relay/WeakRelayForwarderCallback.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cee1aa72..c94c7a8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,6 +139,7 @@ add_library(moqx_core STATIC src/relay/PropertyRanking.cpp src/relay/CrossExecFilter.cpp src/relay/CrossExecForwarderCallback.cpp + src/relay/WeakRelayForwarderCallback.cpp src/relay/PublisherCrossExecFilter.cpp src/relay/SubscriberCrossExecFilter.cpp ) diff --git a/deps/moxygen b/deps/moxygen index 2dff34e0..8850292d 160000 --- a/deps/moxygen +++ b/deps/moxygen @@ -1 +1 @@ -Subproject commit 2dff34e0eaf67953c3512c1d6e5be6164770ce38 +Subproject commit 8850292d49db9ca70a7bf627b2ba3d5539e9a2d0 diff --git a/scripts/perf-test.sh b/scripts/perf-test.sh index ca3691ae..807122f5 100755 --- a/scripts/perf-test.sh +++ b/scripts/perf-test.sh @@ -15,6 +15,8 @@ # --delivery-timeout N Delivery timeout in ms (default: 500) # --transport TYPE quic or webtransport (default: quic) # --no-relay-thread Disable relay exec thread (use_relay_thread: false) +# --local-forwarders Enable per-subscriber-thread local forwarders +# (use_local_forwarders: true; requires relay thread) # --io-threads N Number of relay IO threads (default: 1) # --threads N Number of perf client threads (default: 2) # --relay-log SPEC folly XLOG config passed as --logging=SPEC to relay @@ -52,6 +54,7 @@ DURATION=30 DELIVERY_TIMEOUT=500 TRANSPORT="quic" USE_RELAY_THREAD="true" +USE_LOCAL_FORWARDERS="false" IO_THREADS=1 CLIENT_THREADS=2 RELAY_LOG_SPEC="" @@ -80,6 +83,7 @@ while [[ $# -gt 0 ]]; do --delivery-timeout) DELIVERY_TIMEOUT="$2"; shift 2 ;; --transport) TRANSPORT="$2"; shift 2 ;; --no-relay-thread) USE_RELAY_THREAD="false"; shift ;; + --local-forwarders) USE_LOCAL_FORWARDERS="true"; shift ;; --io-threads) IO_THREADS="$2"; shift 2 ;; --threads) CLIENT_THREADS="$2"; shift 2 ;; --relay-log) RELAY_LOG_SPEC="$2"; shift 2 ;; @@ -206,6 +210,7 @@ cat >"$RELAY_CFG" </dev/null || true -echo "Starting relay (use_relay_thread=$USE_RELAY_THREAD, io_threads=$IO_THREADS, transport=$TRANSPORT, mvfst_bpf_steering=$BPF_STEERING)..." +echo "Starting relay (use_relay_thread=$USE_RELAY_THREAD, local_forwarders=$USE_LOCAL_FORWARDERS, io_threads=$IO_THREADS, transport=$TRANSPORT, mvfst_bpf_steering=$BPF_STEERING)..." RELAY_LOGGING_ARG=() [[ -n "$RELAY_LOG_SPEC" ]] && RELAY_LOGGING_ARG=("--logging=$RELAY_LOG_SPEC") if [[ -n "$JEMALLOC" ]]; then diff --git a/src/MoqxRelay.cpp b/src/MoqxRelay.cpp index 591247e7..d6a926fa 100644 --- a/src/MoqxRelay.cpp +++ b/src/MoqxRelay.cpp @@ -7,9 +7,13 @@ */ #include "MoqxRelay.h" +#include "relay/CrossExecFilter.h" #include "relay/CrossExecForwarderCallback.h" -#include "relay/RelayExecUtil.h" +#include "relay/LocalForwarderCallback.h" +#include "relay/NullConsumers.h" +#include "relay/PublisherCrossExecFilter.h" #include "relay/SubscriberCrossExecFilter.h" +#include "relay/WeakRelayForwarderCallback.h" #include #include #include @@ -318,14 +322,52 @@ MoqxRelay::publish(PublishRequest pub, std::shared_ptr(ftn, pub.largest); + forwarder->setExtensions(pub.extensions); + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // TODO: update forward=true when observers register with top-N (and think about cache) + forwarder->addChannelSubscriber(relayExec_, /*forward=*/false, relayChainFilter); + auto setup = + publishWithSession(std::move(pub), std::move(handle), std::move(session), forwarder); + if (setup.hasError()) { + return folly::makeUnexpected(setup.error()); + } + auto topNView = registry_.getTopNView(ftn); + if (topNView && topNView->topNFilter) { + relayChainFilter->setDownstream(topNView->topNFilter); + } + return PublishConsumerAndReplyTask{ + forwarder, + folly::coro::makeTask>( + folly::Expected(std::move(setup.value().publishOk)) + ) + }; + } + maybeSetSessionExec(*session); - return publishWithSession(std::move(pub), std::move(handle), std::move(session)); + auto setup = publishWithSession(std::move(pub), std::move(handle), std::move(session)); + if (setup.hasError()) { + return folly::makeUnexpected(setup.error()); + } + return PublishConsumerAndReplyTask{ + std::move(setup.value().consumer), + folly::coro::makeTask>( + folly::Expected(std::move(setup.value().publishOk)) + ) + }; } -Subscriber::PublishResult MoqxRelay::publishWithSession( +MoqxRelay::PublishSetupResult MoqxRelay::publishWithSession( PublishRequest pub, std::shared_ptr handle, - std::shared_ptr session + std::shared_ptr session, + std::shared_ptr forwarder ) { // Handle duplicate publisher at relay level before registering in the tree. // Move the forwarder out and erase the entry BEFORE calling publishDone. @@ -338,8 +380,10 @@ Subscriber::PublishResult MoqxRelay::publishWithSession( // subgroups). onPublishDone() already reset handle and drained the // forwarder, so skip both calls to avoid a null deref and a double // publishDone. - auto forwarder = std::make_shared(pub.fullTrackName, pub.largest); - forwarder->setExtensions(pub.extensions); + if (!forwarder) { + forwarder = std::make_shared(pub.fullTrackName, pub.largest); + forwarder->setExtensions(pub.extensions); + } auto publisherWrapped = maybeWrapPublisher(relayExec_, session); auto publishEntry = registry_.createFromPublish( @@ -389,13 +433,17 @@ Subscriber::PublishResult MoqxRelay::publishWithSession( } ); - if (relayExec_) { - forwarder->setCallback( - std::make_shared(relayExec_, forwarder, shared_from_this()) - ); - } else { + if (relayExec_ && !useLocalForwarders_) { + // Non-local: forwarder lives on relayExec_; callbacks fire directly — no + // CrossExec dispatch needed. Weak ref breaks the registry → forwarder → + // callback → relay cycle. + forwarder->setCallback(std::make_shared(weak_from_this())); + } else if (!relayExec_) { forwarder->setCallback(shared_from_this()); } + // useLocalForwarders_ case: the local forwarder lives on publisherExec and + // already had its CrossExecForwarderCallback installed by setupPublisherLocal, + // which dispatches onEmpty to relayExec_. Must not overwrite it here. uint64_t nSubscribers = 0; bool hasTrackFilterSub = false; @@ -409,7 +457,14 @@ Subscriber::PublishResult MoqxRelay::publishWithSession( if (outSession != session && (info.options == SubscribeNamespaceOptions::PUBLISH || info.options == SubscribeNamespaceOptions::BOTH)) { nSubscribers++; - if (!addSubscriberAndPublish(outSession, forwarder, info.forward, /*pinned=*/true)) { + auto* publisherExec = relayExec_ ? session->getExecutor() : nullptr; + if (!addSubscriberAndPublish( + outSession, + forwarder, + info.forward, + /*pinned=*/true, + publisherExec + )) { XLOG(ERR) << "addSubscriberAndPublish failed for " << forwarder->fullTrackName(); continue; } @@ -421,9 +476,9 @@ Subscriber::PublishResult MoqxRelay::publishWithSession( // When subscribers join later via subscribeNamespace, forwardChanged() sends REQUEST_UPDATE. bool shouldForward = (nSubscribers > 0) || hasTrackFilterSub; - return PublishConsumerAndReplyTask{ + return PublishSetup{ publishEntry.consumer, - folly::coro::makeTask>(PublishOk{ + PublishOk{ pub.requestID, /*forward=*/shouldForward, kDefaultPriority, @@ -431,16 +486,14 @@ Subscriber::PublishResult MoqxRelay::publishWithSession( LocationType::AbsoluteRange, kLocationMin, kLocationMax.group - }) + } }; } namespace { -// Free-function coroutine: awaits the publish reply and calls onPublishOk. -// Holds forwarder alive because subscriber keeps a raw ref into it. folly::coro::Task awaitPublishReply( - std::shared_ptr forwarder, + std::shared_ptr forwarder, // keeps subscriber's raw ref alive std::shared_ptr subscriber, folly::coro::Task> reply ) { @@ -477,7 +530,6 @@ std::optional MoqxRelay::startPublish( return std::nullopt; } subscriber->pinned = pinned; - XLOG(DBG4) << "added subscriber for ftn=" << forwarder->fullTrackName(); Subscriber::PublishResult pub; if (subscriberExec) { SubscriberCrossExecFilter wrapped(subscriberExec, session); @@ -495,17 +547,28 @@ std::optional MoqxRelay::startPublish( } bool MoqxRelay::addSubscriberAndPublish( - std::shared_ptr session, + std::shared_ptr subscriberSession, std::shared_ptr forwarder, bool forward, - bool pinned + bool pinned, + folly::Executor* publisherExec ) { + if (relayExec_ && useLocalForwarders_) { + XCHECK(publisherExec + ) << "addSubscriberAndPublish: publisherExec required in local-forwarder mode"; + co_withExecutor( + subscriberSession->getExecutor(), + publishViaLocalForwarder(subscriberSession, forwarder, publisherExec, forward, pinned) + ) + .start(); + return true; + } auto p = startPublish( - session, + subscriberSession, forwarder, forward, pinned, - relayExec_ ? session->getExecutor() : nullptr + relayExec_ ? subscriberSession->getExecutor() : nullptr ); if (!p) { return false; @@ -514,7 +577,7 @@ bool MoqxRelay::addSubscriberAndPublish( // publishDone) are always on the same thread and cannot race. For // single-thread (relayExec_ == nullptr) this is the subscriber's exec. co_withExecutor( - relayExec_ ? static_cast(relayExec_) : session->getExecutor(), + relayExec_ ? static_cast(relayExec_) : subscriberSession->getExecutor(), awaitPublishReply(forwarder, std::move(p->subscriber), std::move(p->reply)) ) .start(); @@ -596,10 +659,26 @@ std::shared_ptr MoqxRelay::getSubscribeWriteback( SubscriptionRegistry::FilterChainResult MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr forwarder) { - // Build chain: TopNFilter → TerminationFilter → Forwarder - // Cache (if present) attaches as a passive subscriber of the forwarder so - // that it receives objects without affecting the forwarding-subscriber count - // or the upstream subscription lifecycle. + if (relayExec_ && useLocalForwarders_) { + // Multi-iothread with local forwarders: publisher writes directly to forwarder on primaryExec. + // relayChainFilter (added by publish()) fans off to topNFilter/termination/cache. + std::shared_ptr chainEnd = + cache_ ? cache_->makePassiveConsumer(ftn) : std::make_shared(); + auto terminationFilter = + std::make_shared(shared_from_this(), ftn, std::move(chainEnd)); + auto topNFilter = std::make_shared( + ftn, + std::static_pointer_cast(terminationFilter) + ); + topNFilter->setActivityThreshold(activityThreshold_); + return SubscriptionRegistry::FilterChainResult{ + .consumer = std::static_pointer_cast(forwarder), + .topNFilter = topNFilter + }; + } + + // Single-threaded: chain wraps forwarder directly (no cross-exec needed). + // Cache attaches as a passive subscriber of the forwarder. if (cache_) { forwarder->addSubscriber( /*session=*/nullptr, @@ -616,7 +695,6 @@ MoqxRelay::buildFilterChain(const FullTrackName& ftn, std::shared_ptr(ftn, std::static_pointer_cast(terminationFilter)); topNFilter->setActivityThreshold(activityThreshold_); - return SubscriptionRegistry::FilterChainResult{ .consumer = std::static_pointer_cast(topNFilter), .topNFilter = topNFilter @@ -758,7 +836,14 @@ folly::coro::Task MoqxRelay::subscribeNames (subNs.options == SubscribeNamespaceOptions::BOTH || subNs.options == SubscribeNamespaceOptions::PUBLISH)) { if (publishSession != session) { - if (!addSubscriberAndPublish(session, forwarder, subNs.forward, /*pinned=*/true)) { + auto* publisherExec = relayExec_ ? publishSession->getExecutor() : nullptr; + if (!addSubscriberAndPublish( + session, + forwarder, + subNs.forward, + /*pinned=*/true, + publisherExec + )) { XLOG(ERR) << "addSubscriberAndPublish failed for " << ftn; return; } @@ -808,6 +893,883 @@ MoqxRelay::PublishState MoqxRelay::findPublishState(const FullTrackName& ftn) { return state; } +// === LocalSubscribeFilter, createPublisherFilter, and createSubscriberFilter === + +// Installed as the publish handler for client sessions when useLocalForwarders_ +// is true. Overrides subscribe() to call subscribeFromSubscriberThread() directly +// on the subscriber's executor instead of hopping to relayExec_ first. +// All other Publisher methods fall through to PublisherCrossExecFilter which +// still dispatches to relayExec_. +class MoqxRelay::LocalSubscribeFilter final : public PublisherCrossExecFilter { +public: + LocalSubscribeFilter(folly::Executor* relayExec, std::shared_ptr relay) + : PublisherCrossExecFilter(relayExec, relay), relay_(std::move(relay)) {} + + folly::coro::Task subscribe( + moxygen::SubscribeRequest subReq, + std::shared_ptr consumer + ) override { + if (subReq.fullTrackName.trackNamespace.empty()) { + co_return folly::makeUnexpected(moxygen::SubscribeError{ + subReq.requestID, + moxygen::SubscribeErrorCode::TRACK_NOT_EXIST, + "namespace required" + }); + } + auto session = moxygen::MoQSession::getRequestSession(); + auto* subscriberExec = session->getExecutor(); + // No executor hop: subscribeFromSubscriberThread starts on subscriberExec. + co_return co_await relay_->subscribeFromSubscriberThread( + std::move(subReq), + std::move(consumer), + std::move(session), + subscriberExec + ); + } + +private: + std::shared_ptr relay_; +}; + +std::shared_ptr MoqxRelay::createPublisherFilter() { + if (relayExec_ && useLocalForwarders_) { + return std::make_shared(relayExec_, shared_from_this()); + } + if (relayExec_) { + return std::make_shared(relayExec_, shared_from_this()); + } + return shared_from_this(); +} + +// Installed as the subscribe handler for client sessions when useLocalForwarders_ +// is true. Overrides publish() to create a local publisher forwarder on the +// publisher's executor, avoiding the cross-exec hop to relayExec_ for data. +// All other Subscriber methods (publishNamespace, goaway) fall through to +// SubscriberCrossExecFilter which still dispatches to relayExec_. +class MoqxRelay::LocalPublishFilter final : public SubscriberCrossExecFilter { +public: + LocalPublishFilter(folly::Executor* relayExec, std::shared_ptr relay) + : SubscriberCrossExecFilter(relayExec, relay), relay_(std::move(relay)) {} + + PublishResult publish( + moxygen::PublishRequest pub, + std::shared_ptr handle + ) override { + auto session = moxygen::MoQSession::getRequestSession(); + return relay_ + ->publishFromPublisherThread(std::move(pub), std::move(handle), std::move(session)); + } + +private: + std::shared_ptr relay_; +}; + +std::shared_ptr MoqxRelay::createSubscriberFilter() { + if (relayExec_ && useLocalForwarders_) { + return std::make_shared(relayExec_, shared_from_this()); + } + if (relayExec_) { + return std::make_shared(relayExec_, shared_from_this()); + } + return shared_from_this(); +} + +Subscriber::PublishResult MoqxRelay::publishFromPublisherThread( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session +) { + if (!pub.fullTrackName.trackNamespace.startsWith(allowedNamespacePrefix_)) { + return folly::makeUnexpected( + PublishError{pub.requestID, PublishErrorCode::UNINTERESTED, "bad namespace"} + ); + } + if (pub.fullTrackName.trackNamespace.empty()) { + return folly::makeUnexpected( + PublishError{pub.requestID, PublishErrorCode::INTERNAL_ERROR, "namespace required"} + ); + } + + auto ftn = pub.fullTrackName; + // The publisher's local forwarder IS the primary forwarder — it lives on + // publisherExec and is registered in the registry by setupPublisherPrimary. + auto localPubFwd = std::make_shared(ftn, pub.largest); + localPubFwd->setExtensions(pub.extensions); + + // Cache the publisher's local forwarder in this thread's registry so a + // subscriber on the same iothread reuses it instead of building a second + // forwarder. The registry holds a strong ref; the forwarder's onPublishDone + // callback removes the entry to release it (identity-checked). + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + + // Set the forwarder callback here on publisherExec (where localPubFwd lives), + // before the reply task hops to relayExec_. publishWithSession is called from + // relayExec_ via setupPublisherPrimary and must not touch the callback there. + // + // The LocalForwarderCallback wrapper removes localPubFwd from tlForwarders_ on + // its own thread when the publisher terminates (onPublishDone). removeOnEmpty + // is false: the publisher's forwarder must survive subscriber churn — it is + // removed only when the source ends, not when its last subscriber leaves. + { + auto relayAdapter = std::make_shared( + std::weak_ptr(shared_from_this()) + ); + auto crossExec = std::make_shared( + relayExec_, + localPubFwd, + std::move(relayAdapter) + ); + localPubFwd->setCallback(std::make_shared( + tlForwarders_.get(), + ftn, + std::move(crossExec), + /*removeOnEmpty=*/false + )); + } + + // The publisher's forwarder is authoritative — claim the slot, displacing any + // stale subscribe-path local forwarder so same-thread subscribers reuse THIS + // forwarder via the fast path. + tlForwarders_->set(ftn, localPubFwd); + + // Channel sub for the relay chain (topNFilter → terminationFilter → cache). + // FIFO on relayExec_ guarantees setupPublisherPrimary wires the downstream + // before any object dispatches from localPubFwd arrive on relayExec_. + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // forward=true + passive=true: internal relay chain observes all objects but + // does not count as a real forwarding subscriber (see completeFirstSubscriber). + localPubFwd + ->addChannelSubscriber(relayExec_, /*forward=*/true, relayChainFilter, /*passive=*/true); + + auto reply = folly::coro::co_invoke( + [exec = relayExec_, + relay = shared_from_this(), + pub = std::move(pub), + handle = std::move(handle), + session = std::move(session), + localPubFwd, + relayChainFilter]() mutable -> folly::coro::Task> { + co_return co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(exec), + relay->setupPublisherPrimary( + std::move(pub), + std::move(handle), + std::move(session), + std::move(localPubFwd), + std::move(relayChainFilter) + ) + ); + } + ); + + // The publisher writes directly to its local forwarder. When the publisher + // terminates, localPubFwd->publishDone fires onPublishDone on its callback + // (LocalForwarderCallback), which removes the tlForwarders_ entry on this + // thread — no separate termination filter needed. + auto consumer = std::static_pointer_cast(std::move(localPubFwd)); + return PublishConsumerAndReplyTask{std::move(consumer), std::move(reply)}; +} + +folly::coro::Task> MoqxRelay::setupPublisherPrimary( + PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr primaryFwd, + std::shared_ptr relayChainFilter +) { + // Running on relayExec_. Register the publisher's local forwarder as the + // primary — no second forwarder here; it lives on publisherExec. + auto ftn = pub.fullTrackName; + auto setup = + publishWithSession(std::move(pub), std::move(handle), std::move(session), primaryFwd); + if (setup.hasError()) { + co_return folly::makeUnexpected(setup.error()); + } + + auto topNView = registry_.getTopNView(ftn); + XCHECK(topNView && topNView->topNFilter) + << "setupPublisherPrimary: topNFilter always present in MT mode"; + relayChainFilter->setDownstream(topNView->topNFilter); + + co_return setup.value().publishOk; +} + +// === Multi-iothread subscribe helpers === + +folly::coro::Task +MoqxRelay::subscribeStatefulWork(SubscribeRequest subReq) { + const auto& ftn = subReq.fullTrackName; + + if (!registry_.exists(ftn) && upstream_ && + !namespaceTree_.findPublisherSession(ftn.trackNamespace)) { + co_await upstream_->waitForConnected(kUpstreamConnectWaitTimeout); + } + + auto firstOrSubsequent = registry_.getOrCreateFromSubscribe( + ftn, + shared_from_this(), + [this, &ftn](std::shared_ptr f) { return buildFilterChain(ftn, std::move(f)); } + ); + + if (auto* first = std::get_if(&firstOrSubsequent)) { + auto upstreamSession = namespaceTree_.findPublisherSession(ftn.trackNamespace); + if (!upstreamSession) { + co_return StatefulSubscribeResult{ + nullptr, + nullptr, + folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::TRACK_NOT_EXIST, + "no such namespace or track" + }), + std::nullopt + }; + } + + const auto clientRequestID = subReq.requestID; + SubscribeRequest upstreamSubReq = subReq; + upstreamSubReq.priority = kDefaultUpstreamPriority; + upstreamSubReq.groupOrder = GroupOrder::Default; + upstreamSubReq.locType = LocationType::LargestObject; + upstreamSubReq.forward = false; // updated to real value in completeFirstSubscriber + upstreamSubReq.requestID = upstreamSession->peekNextRequestID(); + + // first->consumer is the primary forwarder (lives on primaryExec == upstreamSession's + // executor). No cross-exec wrapping — upstream delivers on that executor directly. + auto upstreamConsumer = first->consumer; + StatefulSubscribeResult + result{first->forwarder, upstreamSession->getExecutor(), std::nullopt, std::nullopt}; + result.firstSetup.emplace(StatefulSubscribeResult::FirstSubscriberSetup{ + upstreamSession, + std::move(upstreamSubReq), + std::move(upstreamConsumer), + std::move(first->pending), + clientRequestID + }); + co_return result; + + } else { + auto sub = co_await std::get>( + std::move(firstOrSubsequent) + ); + auto upstreamView = registry_.getUpstreamView(ftn); + auto* primaryExec = upstreamView ? upstreamView->publisherExec : nullptr; + co_return StatefulSubscribeResult{sub.forwarder, primaryExec, std::nullopt, std::nullopt}; + } +} + +namespace { + +// Free coroutine helpers for ChannelForwarderCallback — avoids lambda-coroutine +// capture lifetime issues (lambda lives on caller stack; coroutine frame outlives it). +folly::coro::Task channelSubscribeUpdate( + std::shared_ptr handle, + bool forward +) { + auto res = co_await handle->requestUpdate(RequestUpdate{ + RequestID(0), + handle->subscribeOk().requestID, + kLocationMin, + kLocationMax.group, + kDefaultPriority, + forward + }); + if (res.hasError()) { + XLOG(ERR) << "requestUpdate failed: " << res.error().reasonPhrase; + } +} + +folly::coro::Task channelNewGroupRequestUpdate( + std::shared_ptr handle, + uint64_t group +) { + RequestUpdate update; + update.requestID = RequestID(0); + update.existingRequestID = handle->subscribeOk().requestID; + update.params.insertParam( + Parameter(folly::to_underlying(TrackRequestParamKey::NEW_GROUP_REQUEST), group) + ); + auto res = co_await handle->requestUpdate(std::move(update)); + if (res.hasError()) { + XLOG(ERR) << "NEW_GROUP_REQUEST update failed: " << res.error().reasonPhrase; + } +} + +// === MoQForwarder::Callback chain overview === +// +// MoQForwarder fires three callbacks: onEmpty (last subscriber left), +// forwardChanged (forwarding subscriber count crossed zero), and +// newGroupRequested (subscriber issued a NEW_GROUP_REQUEST). +// +// Single-threaded mode: +// forwarder.callback = MoqxRelay (direct, no hop) +// +// Multi-threaded — primary forwarder (lives on primaryExec): +// primaryFwd.callback = +// CrossExecForwarderCallback(relayExec_, primaryFwd, +// WeakRelayForwarderCallback(relay)) +// +// [primaryExec] CrossExecForwarderCallback: captures ftn by value, +// dispatches to relayExec_ fire-and-forget +// ↓ +// [relayExec_] WeakRelayForwarderCallback: recovers relay via weak_ptr, +// calls onEmptyImpl / forwardChangedImpl / newGroupRequestedImpl +// +// Multi-threaded — local forwarder (lives on subscriberExec, subscribe path): +// During setup window: +// localFwd.callback = PendingForwarderCallback +// captures events; replayed onto finalCallback after setup +// +// After setup: +// localFwd.callback = +// LocalForwarderCallback(localReg, ftn, +// CrossExecForwarderCallback(primaryExec, localFwd, +// ChannelForwarderCallback(primaryFwd, subscriberExec, primaryExec))) +// +// [subscriberExec] LocalForwarderCallback: removes from localReg on onEmpty, +// passes through forwardChanged / newGroupRequested +// ↓ (CrossExecForwarderCallback dispatches to primaryExec) +// [primaryExec] ChannelForwarderCallback: +// onEmpty → primaryFwd->removeChannelSubscriberByExec(subscriberExec) +// (may cascade into primaryFwd's own callback chain above) +// forwardChanged / newGroupRequested → launch background coro +// → requestUpdate(handle_) +// +// Weak-ptr discipline: +// WeakRelayForwarderCallback holds weak_ptr to break the cycle +// registry → forwarder → callback → relay → registry. +// CrossExecForwarderCallback holds weak_ptr to avoid a permanent +// ownership cycle; it locks eagerly on the calling thread (where the forwarder +// is alive) and moves the shared_ptr into the lambda to keep it alive across +// the executor hop. + +// Captures forwardChanged/newGroupRequested/onEmpty during setup (getOrCreate→setCallback window) +// so they can be replayed once the real callback is installed. +class PendingForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + PendingForwarderCallback( + openmoq::moqx::LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn + ) + : localReg_(localReg), ftn_(std::move(ftn)) {} + + void forwardChanged(moxygen::MoQForwarder*, bool f) override { lastForward_ = f; } + void newGroupRequested(moxygen::MoQForwarder*, uint64_t g) override { + maxGroup_ = std::max(maxGroup_.value_or(0), g); + } + void onEmpty(moxygen::MoQForwarder* forwarder) override { + localReg_->remove(ftn_, forwarder); + sawOnEmpty_ = true; + } + + openmoq::moqx::LocalForwarderRegistry* localReg_; + moxygen::FullTrackName ftn_; + std::optional lastForward_; + std::optional maxGroup_; + bool sawOnEmpty_{false}; +}; + +// Runs on the primary forwarder's executor (publisher's iothread). Propagates +// channel-subscriber lifecycle events to the primary and upstream handle. +class ChannelForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + ChannelForwarderCallback( + std::weak_ptr weakPrimary, + folly::Executor* subscriberExec, + folly::Executor* primaryExec + ) + : weakPrimary_(std::move(weakPrimary)), subscriberExec_(subscriberExec), + primaryExec_(primaryExec) {} + + // Called on primaryExec immediately after addChannelSubscriber returns. + void setHandle(std::shared_ptr h) { + handle_ = std::move(h); + } + + // Called on primaryExec after addChannelSubscriber. Holds the last shared_ptr + // so that onEmpty can defer destruction to subscriberExec_, ensuring all + // in-flight this-capturing lambdas on subscriberExec_ run before the filter + // is destroyed. + void setFilter(std::shared_ptr filter) { crossExecFilter_ = std::move(filter); } + + void onEmpty(moxygen::MoQForwarder* /*localFwd*/) override { + auto primary = weakPrimary_.lock(); + if (primary) { + primary->removeChannelSubscriberByExec(subscriberExec_); + } + // Break the reference cycle that keeps the local-forwarding chain alive: + // localFwd → callback (LocalForwarderCallback → CrossExecForwarderCallback + // → this ChannelForwarderCallback) → handle_ (channel Subscriber) + // → trackConsumer (CrossExecFilter) → localFwd. + // removeChannelSubscriberByExec drops the primary's strong ref to the channel + // Subscriber, but handle_ still pins it (and thus the whole chain). When the + // primary was already destroyed (replace scenario), removeChannelSubscriberByExec + // never ran, so handle_ is the only remaining root. Reset it unconditionally. + handle_.reset(); + // Post filter destruction to subscriberExec_ so FIFO ordering guarantees + // all previously-enqueued this-capturing lambdas run before the destructor. + if (crossExecFilter_) { + subscriberExec_->add([f = std::move(crossExecFilter_)]() {}); + } + } + + void forwardChanged(moxygen::MoQForwarder*, bool forward) override { + if (!handle_) { + return; + } + folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec_), + channelSubscribeUpdate(handle_, forward) + ) + .start(); + } + + void newGroupRequested(moxygen::MoQForwarder*, uint64_t group) override { + if (!handle_) { + return; + } + folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec_), + channelNewGroupRequestUpdate(handle_, group) + ) + .start(); + } + +private: + std::weak_ptr weakPrimary_; + folly::Executor* subscriberExec_; + folly::Executor* primaryExec_; + std::shared_ptr handle_; + std::shared_ptr crossExecFilter_; +}; + +// Builds the local→primary callback chain, hops to primaryExec to install the +// channel subscriber, then returns. The caller resumes on its own pinned executor +// via co_withExecutor's natural unwind (relayExec_ or subscriberExec depending on +// which path is calling). +folly::coro::Task> wireLocalToPrimary( + openmoq::moqx::LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn, + std::shared_ptr localFwd, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + folly::Executor* subscriberExec, + std::shared_ptr crossExecFilter, + bool forward +) { + auto channelCb = + std::make_shared(primaryFwd, subscriberExec, primaryExec); + auto crossExecCb = std::make_shared(primaryExec, localFwd, channelCb); + auto finalCallback = + std::make_shared(localReg, std::move(ftn), std::move(crossExecCb)); + co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec), + [&]() -> folly::coro::Task { + auto chanHandle = + primaryFwd->addChannelSubscriber(subscriberExec, forward, crossExecFilter); + if (chanHandle) { + channelCb->setHandle(chanHandle); + } + channelCb->setFilter(crossExecFilter); + co_return; + }() + ); + co_return finalCallback; +} + +// Installs finalCallback on localFwd and replays any forwardChanged/newGroupRequested +// events captured during setup. Must run on subscriberExec. +void replayPendingEvents( + moxygen::MoQForwarder* localFwd, + const std::shared_ptr& finalCallback, + const PendingForwarderCallback& pendingCb, + bool forward +) { + localFwd->setCallback(finalCallback); + if (pendingCb.lastForward_ && *pendingCb.lastForward_ != forward) { + finalCallback->forwardChanged(localFwd, *pendingCb.lastForward_); + } + if (pendingCb.maxGroup_) { + finalCallback->newGroupRequested(localFwd, *pendingCb.maxGroup_); + } +} +} // namespace + +folly::coro::Task MoqxRelay::publishViaLocalForwarder( + std::shared_ptr subscriberSession, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + bool forward, + bool pinned +) { + auto* subscriberExec = subscriberSession->getExecutor(); + const auto& ftn = primaryFwd->fullTrackName(); + + // Fast path: local forwarder already exists on this thread. + if (auto* localReg = tlForwarders_.get()) { + if (auto localFwd = localReg->get(ftn)) { + auto p = startPublish(subscriberSession, localFwd, forward, pinned, nullptr); + if (p) { + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); + } + co_return; + } + } + + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + auto* localReg = tlForwarders_.get(); + auto [localFwd, isNew] = localReg->getOrCreate(ftn, [&] { + auto fwd = std::make_shared(ftn, primaryFwd->largest()); + fwd->setExtensions(primaryFwd->extensions()); + return fwd; + }); + + auto p = startPublish(subscriberSession, localFwd, forward, pinned, nullptr); + if (!p) { + if (isNew) { + localReg->remove(ftn, localFwd.get()); + } + co_return; + } + + if (!isNew) { + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); + co_return; + } + + // isNew=true: wire localFwd into primaryFwd as a channel subscriber. + auto pendingCb = std::make_shared(localReg, ftn); + localFwd->setCallback(pendingCb); + // deepCopyPayload=true (default): each subscriber thread owns its IOBuf chain, + // avoiding cross-thread contention on the shared atomic refcount. + auto crossExecFilter = std::make_shared(subscriberExec, localFwd); + bool fwd = (localFwd->numForwardingSubscribers() > 0); + + auto finalCallback = co_await wireLocalToPrimary( + localReg, + ftn, + localFwd, + primaryFwd, + primaryExec, + subscriberExec, + crossExecFilter, + fwd + ); + + // Natural unwind: back on subscriberExec. + + if (pendingCb->sawOnEmpty_) { + folly::via(primaryExec, [pf = primaryFwd, ex = subscriberExec]() noexcept { + pf->removeChannelSubscriberByExec(ex); + }); + localFwd->publishDone(PublishDone{ + RequestID(0), + PublishDoneStatusCode::INTERNAL_ERROR, + 0, + "all subscribers cancelled during setup" + }); + co_return; + } + + replayPendingEvents(localFwd.get(), finalCallback, *pendingCb, fwd); + co_await awaitPublishReply(localFwd, std::move(p->subscriber), std::move(p->reply)); +} + +folly::coro::Task MoqxRelay::completeFirstSubscriber( + StatefulSubscribeResult::FirstSubscriberSetup& setup, + std::shared_ptr primaryFwd, + bool forward +) { + // Add relay-exec channel sub (topNFilter/terminationFilter/cache path). + // setDownstream wired on relayExec_ before pending.complete(). + auto relayChainFilter = std::make_shared(relayExec_, nullptr); + // forward=true + passive=true: the relay's own chain (top-N/termination/cache) + // must observe every object (forward=true) so top-N and cache see the full + // stream, but it must not count as a real forwarding subscriber — passive=true + // keeps it out of forwardingSubscribers_ (so it never toggles upstream + // forwardChanged) and out of the onEmpty quorum (so the primary's onEmpty + // still fires once the last real cross-exec subscriber leaves, letting the + // primary and its upstream subscription be pruned). + primaryFwd + ->addChannelSubscriber(relayExec_, /*forward=*/true, relayChainFilter, /*passive=*/true); + setup.upstreamSubReq.forward = forward; + auto subRes = co_await setup.upstreamSession->subscribe( + setup.upstreamSubReq, + std::move(setup.upstreamConsumer) + ); + if (subRes.hasError()) { + co_return FirstSubscriberResult{ + std::move(relayChainFilter), + folly::makeUnexpected(SubscribeError{ + setup.clientRequestID, + subRes.error().errorCode, + folly::to("upstream subscribe failed: ", subRes.error().reasonPhrase) + }) + }; + } + // Process subscribeOk on primaryExec (forwarder's executor). + const auto& ok = subRes.value()->subscribeOk(); + auto reqID = ok.requestID; + auto exts = ok.extensions; // copy before move of subRes + auto largest = ok.largest; // copy before move of subRes + if (largest) { + primaryFwd->updateLargest(largest->group, largest->object); + } + primaryFwd->setExtensions(exts); + primaryFwd->tryProcessNewGroupRequest(setup.upstreamSubReq.params, /*fire=*/false); + co_return FirstSubscriberResult{ + std::move(relayChainFilter), + UpstreamOk{std::move(subRes.value()), reqID, std::move(exts), std::move(largest)} + }; +} + +folly::coro::Task MoqxRelay::subscribeFromSubscriberThread( + SubscribeRequest subReq, + std::shared_ptr consumer, + std::shared_ptr session, + folly::Executor* subscriberExec +) { + const auto& ftn = subReq.fullTrackName; + + // Fast path: local forwarder already exists on this thread. + if (auto* localReg = tlForwarders_.get()) { + if (auto localFwd = localReg->get(ftn)) { + if (localFwd->largest() && subReq.locType == LocationType::AbsoluteRange && + subReq.endGroup < localFwd->largest()->group) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INVALID_RANGE, + "Range in the past, use FETCH" + }); + } + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; + } + } + + // Ensure thread-local registry. + if (!tlForwarders_.get()) { + tlForwarders_.reset(new LocalForwarderRegistry()); + } + auto* localReg = tlForwarders_.get(); + + // getOrCreate before the relay hop: serializes same-iothread races. + // isNew=true means this thread owns setup; isNew=false means another setup is in + // progress (or complete) on this thread and we can just attach. + auto [localFwd, isNew] = + localReg->getOrCreate(ftn, [&] { return std::make_shared(ftn); }); + + if (!isNew) { + if (localFwd->largest() && subReq.locType == LocationType::AbsoluteRange && + subReq.endGroup < localFwd->largest()->group) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INVALID_RANGE, + "Range in the past, use FETCH" + }); + } + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; + } + + // isNew=true: this thread owns setup. Install PendingForwarderCallback first so + // forwardChanged/newGroupRequested/onEmpty events during setup are captured for replay. + auto pendingCb = std::make_shared(localReg, ftn); + localFwd->setCallback(pendingCb); + + // deepCopyPayload=true (default): each subscriber thread owns its IOBuf chain, + // avoiding cross-thread contention on the shared atomic refcount. + auto crossExecFilter = std::make_shared(subscriberExec, localFwd); + + // addSubscriber before the relay hop: numForwardingSubscribers() must be correct + // when addChannelSubscriber runs on primaryExec, so forward flag is right from the start. + auto sub = localFwd->addSubscriber(session, subReq, std::move(consumer)); + if (!sub) { + localReg->remove(ftn, localFwd.get()); + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "failed to add subscriber" + }); + } + bool forward = (localFwd->numForwardingSubscribers() > 0); + + // Out-vars set by the relay/primary chain, read after unwind on subscriberExec. + std::optional upstreamError; + std::optional upstreamOk; + std::shared_ptr finalCallback; + std::shared_ptr primaryFwd; + folly::Executor* primaryExec{nullptr}; + // Set on firstSetup path; wired to topNFilter on relayExec_ before pending.complete(). + std::shared_ptr relayChainFilter; + + // Single hop to relayExec_, with a nested hop to primaryExec so that the relay-exec + // cleanup (cache + pending.complete) runs on the natural unwind rather than as a + // separate explicit hop. + co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(relayExec_), + [&]() -> folly::coro::Task { + auto sr = co_await subscribeStatefulWork(subReq); + if (sr.error) { + upstreamError = std::move(*sr.error); // SubscribeResult (Expected with error) + co_return; + } + + primaryFwd = sr.primaryForwarder; + primaryExec = sr.primaryExec; + + if (primaryFwd && primaryExec) { + finalCallback = co_await wireLocalToPrimary( + localReg, + ftn, + localFwd, + primaryFwd, + primaryExec, + subscriberExec, + crossExecFilter, + forward + ); + // Natural unwind: back on relayExec_. + + if (sr.firstSetup) { + auto r = co_await folly::coro::co_withExecutor( + folly::getKeepAliveToken(primaryExec), + completeFirstSubscriber(*sr.firstSetup, primaryFwd, forward) + ); + relayChainFilter = std::move(r.relayChainFilter); + if (r.result.hasError()) { + upstreamError = folly::makeUnexpected(std::move(r.result.error())); + } else { + upstreamOk = std::move(r.result.value()); + } + } + } + + // Natural unwind: back on relayExec_. + if (upstreamError) { + // Channel subscriber(s) installed before subscribe failed; clean up. + if (primaryFwd && primaryExec) { + auto re = relayExec_; + auto rcf = relayChainFilter; + folly::via( + primaryExec, + [pf = primaryFwd, ex = subscriberExec, re, rcf = std::move(rcf)]() noexcept { + pf->removeChannelSubscriberByExec(ex); + if (rcf) { + pf->removeChannelSubscriberByExec(re); + } + } + ); + } + // pending destructor fires when sr is destroyed, cleaning registry entry. + co_return; + } + if (!sr.firstSetup) { + co_return; + } + // Wire relay chain before pending.complete() so buffered objects see topNFilter. + if (relayChainFilter) { + auto topNView = registry_.getTopNView(ftn); + if (topNView && topNView->topNFilter) { + relayChainFilter->setDownstream(topNView->topNFilter); + } + } + if (upstreamOk) { + if (cache_) { + cache_->setTrackExtensions(ftn, upstreamOk->extensions); + } + auto& setup = *sr.firstSetup; + if (!setup.pending.complete( + std::move(upstreamOk->handle), + upstreamOk->requestID, + setup.upstreamSession, + maybeWrapPublisher(relayExec_, setup.upstreamSession) + )) { + upstreamError = folly::makeUnexpected(SubscribeError{ + setup.clientRequestID, + SubscribeErrorCode::INTERNAL_ERROR, + "publisher reconnected during subscribe" + }); + } + } + }() + ); + + // Natural unwind: back on subscriberExec. + + if (pendingCb->sawOnEmpty_) { + // All subscribers left during setup; localReg entry already removed in + // PendingForwarderCallback::onEmpty. + // TODO: "everyone left during setup" and "everyone left + new subscriber joined" + // are edge cases that need further scrutiny and tests. For now do a best-effort + // cleanup of the channel subscriber if it was installed, and return an error. + // The relay registry entry (and upstream subscription if pending.complete ran) + // remains, so a subsequent subscribe on any thread will hit the SubsequentSubscriber + // path and work correctly. + if (primaryFwd && primaryExec) { + auto re = relayExec_; + auto rcf = relayChainFilter; + folly::via( + primaryExec, + [pf = primaryFwd, ex = subscriberExec, re, rcf = std::move(rcf)]() noexcept { + pf->removeChannelSubscriberByExec(ex); + if (rcf) { + pf->removeChannelSubscriberByExec(re); + } + } + ); + } + co_return folly::makeUnexpected(SubscribeError{ + subReq.requestID, + SubscribeErrorCode::INTERNAL_ERROR, + "all subscribers cancelled during setup" + }); + } + + if (upstreamError) { + localFwd->publishDone(PublishDone{ + RequestID(0), + PublishDoneStatusCode::INTERNAL_ERROR, + 0, + upstreamError->error().reasonPhrase + }); + localReg->remove(ftn, localFwd.get()); + co_return std::move(*upstreamError); + } + + if (upstreamOk) { + localFwd->setExtensions(upstreamOk->extensions); + if (upstreamOk->largest) { + localFwd->updateLargest(upstreamOk->largest->group, upstreamOk->largest->object); + } + } + replayPendingEvents(localFwd.get(), finalCallback, *pendingCb, forward); + localFwd->tryProcessNewGroupRequest(subReq.params); + co_return sub; +} + +// === End multi-iothread subscribe helpers === + folly::coro::Task MoqxRelay::subscribe(SubscribeRequest subReq, std::shared_ptr consumer) { return subscribeImpl(std::move(subReq), std::move(consumer)); @@ -1281,14 +2243,12 @@ void MoqxRelay::onTrackSelected( return; } - auto exec = session->getExecutor(); - XCHECK(exec) << "onTrackSelected: null executor for session " << session.get(); - - // TODO: Consider batching multiple addSubscriberAndPublish calls on the same - // executor when multiple tracks are selected for the same session in a single - // ranking update. + auto upstreamView = registry_.getUpstreamView(ftn); + XCHECK(!relayExec_ || (upstreamView && upstreamView->publisherExec)) + << "onTrackSelected: relayExec set but no publisherExec for " << ftn; + auto* publisherExec = relayExec_ ? upstreamView->publisherExec : nullptr; // TRACK_FILTER subscribers are unpinned so onTrackEvicted can remove them. - addSubscriberAndPublish(session, trackForwarder, forward, /*pinned=*/false); + addSubscriberAndPublish(session, trackForwarder, forward, /*pinned=*/false, publisherExec); } void MoqxRelay::onTrackEvicted(const FullTrackName& ftn, std::shared_ptr session) { diff --git a/src/MoqxRelay.h b/src/MoqxRelay.h index f2f9393b..7895d996 100644 --- a/src/MoqxRelay.h +++ b/src/MoqxRelay.h @@ -13,12 +13,16 @@ #include "SubscriptionRegistry.h" #include "UpstreamProvider.h" #include "config/Config.h" +#include "relay/CrossExecFilter.h" +#include "relay/CrossExecForwarderCallback.h" +#include "relay/LocalForwarderRegistry.h" #include "relay/PropertyRanking.h" #include "relay/RelayExecUtil.h" #include #include #include +#include #include #include #include @@ -26,9 +30,7 @@ #include #include -namespace openmoq::moqx { -class CrossExecForwarderCallback; -} // namespace openmoq::moqx +namespace openmoq::moqx {} // namespace openmoq::moqx namespace openmoq::moqx { @@ -136,6 +138,21 @@ class MoqxRelay : public moxygen::Publisher, folly::Executor* getRelayExec() const { return relayExec_; } + // When true, uses per-thread local forwarders to minimize cross-thread + // hops in the data plane (one cross-exec hop per callback per thread). + // Defaults to false (one cross-exec hop per plane callback per subscriber + // plus one for the publisher). + void setUseLocalForwarders(bool enable) { useLocalForwarders_ = enable; } + + // Returns the appropriate Publisher filter for a client session. + // Uses LocalSubscribeFilter when relayExec_ + useLocalForwarders_ are set, + // PublisherCrossExecFilter when only relayExec_ is set, or this directly. + std::shared_ptr createPublisherFilter(); + + // Returns the appropriate Subscriber filter for a client session. + // Uses SubscriberCrossExecFilter when relayExec_ is set, or this directly. + std::shared_ptr createSubscriberFilter(); + void setAllowedNamespacePrefix(moxygen::TrackNamespace allowed) { allowedNamespacePrefix_ = std::move(allowed); } @@ -252,6 +269,8 @@ class MoqxRelay : public moxygen::Publisher, private: class NamespaceSubscription; class TerminationFilter; + class LocalSubscribeFilter; + class LocalPublishFilter; void unsubscribeNamespace( const moxygen::TrackNamespace& prefix, @@ -268,9 +287,8 @@ class MoqxRelay : public moxygen::Publisher, void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; - // FTN-keyed impl variants — called by CrossExecForwarderCallback (relay exec) - // or directly from the non-cross-exec callbacks above. - friend class CrossExecForwarderCallback; + // FTN-keyed impl variants — called by the MoQForwarder::Callback overrides + // above (single-thread) or by WeakRelayForwarderCallback on relay exec. void onEmptyImpl(const moxygen::FullTrackName& ftn); void forwardChangedImpl(const moxygen::FullTrackName& ftn, bool forward); void newGroupRequestedImpl(const moxygen::FullTrackName& ftn, uint64_t group); @@ -281,9 +299,9 @@ class MoqxRelay : public moxygen::Publisher, std::shared_ptr nodePtr ); - // Sync setup: addSubscriber → set pinned → session->publish (via - // SubscriberCrossExecFilter when subscriberExec is non-null) → set - // trackConsumer. Returns nullopt and cleans up on any synchronous failure. + // Sync setup: addSubscriber → set pinned → session->publish (optionally via + // SubscriberCrossExecFilter when subscriberExec is non-null) → set trackConsumer. + // Returns nullopt and cleans up on any synchronous failure. struct PreparedPublish { std::shared_ptr subscriber; folly::coro::Task> reply; @@ -296,13 +314,50 @@ class MoqxRelay : public moxygen::Publisher, folly::Executor* subscriberExec ); - // Calls startPublish and fires the reply as a free-running coroutine. - // Returns false and cleans up on any synchronous failure. + // Runs on publisherExec. Calls startPublish and co_awaits the reply. + // subscriberExec non-null → cross-exec publish dispatch. + // Runs on subscriberExec. Gets or creates the thread-local forwarder for ftn, + // wires it to primaryFwd as a channel subscriber (isNew path), and awaits the + // publish reply. Shared by addSubscriberAndPublish and any future callers that + // need to attach a subscriber via a local forwarder. + folly::coro::Task publishViaLocalForwarder( + std::shared_ptr subscriberSession, + std::shared_ptr primaryFwd, + folly::Executor* primaryExec, + bool forward, + bool pinned + ); + + // In relay+local-forwarder mode: dispatches publishViaLocalForwarder to + // subscriberExec. Otherwise: calls startPublish sync and fires reply async. + // Returns false on synchronous failure. bool addSubscriberAndPublish( - std::shared_ptr session, + std::shared_ptr subscriberSession, std::shared_ptr forwarder, bool forward, - bool pinned + bool pinned, + folly::Executor* publisherExec = nullptr + ); + + // Called from LocalPublishFilter::publish() on publisherExec. Creates a local + // publisher forwarder as the session consumer and asynchronously sets up the + // primary forwarder on relayExec_ via setupPublisherPrimary(). + moxygen::Subscriber::PublishResult publishFromPublisherThread( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session + ); + + // Runs on relayExec_. Registers primaryFwd (the publisher's local forwarder, + // created on publisherExec) in the registry and wires relayChainFilter to the + // topNFilter so the relay chain receives objects. + folly::coro::Task> + setupPublisherPrimary( + moxygen::PublishRequest pub, + std::shared_ptr handle, + std::shared_ptr session, + std::shared_ptr primaryFwd, + std::shared_ptr relayChainFilter ); folly::coro::Task @@ -365,6 +420,60 @@ class MoqxRelay : public moxygen::Publisher, std::shared_ptr consumer ); + // Result of subscribeStatefulWork (Phase 1, runs on relay exec). + struct StatefulSubscribeResult { + std::shared_ptr primaryForwarder; + folly::Executor* primaryExec{nullptr}; // owning executor of primaryForwarder + std::optional error; // set on failure + + // Set only for the FirstSubscriber path. Must be moved into + // completeFirstSubscriber on relayExec_ after the channel subscriber is + // installed on primaryExec. Pending destructor fires on abandoned move. + struct FirstSubscriberSetup { + std::shared_ptr upstreamSession; + moxygen::SubscribeRequest upstreamSubReq; + std::shared_ptr upstreamConsumer; + SubscriptionRegistry::UpstreamSubscribePending pending; + moxygen::RequestID clientRequestID; + }; + std::optional firstSetup; + }; + + // Phase 1: registry lookup + FirstSubscriber setup. Does NOT subscribe + // upstream; returns firstSetup for the caller to complete after installing + // the channel subscriber. + folly::coro::Task subscribeStatefulWork(moxygen::SubscribeRequest subReq + ); + + struct UpstreamOk { + std::shared_ptr handle; + moxygen::RequestID requestID; + moxygen::Extensions extensions; + std::optional largest; + }; + struct FirstSubscriberResult { + std::shared_ptr relayChainFilter; // always set; needed for cleanup on error + folly::Expected result; + }; + + // Runs on primaryExec. Adds the relay-exec channel subscriber, issues the + // upstream subscribe, and processes SubscribeOk on the primary forwarder. + folly::coro::Task completeFirstSubscriber( + StatefulSubscribeResult::FirstSubscriberSetup& setup, + std::shared_ptr primaryFwd, + bool forward + ); + + // Multi-iothread subscribe: subscriber-thread orchestrator. + // Dispatches subscribeStatefulWork to relayExec_, then on the subscriber thread + // creates a local forwarder, wires a channel subscriber, and returns. + folly::coro::Task subscribeFromSubscriberThread( + moxygen::SubscribeRequest subReq, + std::shared_ptr consumer, + std::shared_ptr session, + folly::Executor* subscriberExec + ); + // Impl methods — run on relayExec_ when set, or inline when relayExec_==nullptr. folly::coro::Task subscribeImpl(moxygen::SubscribeRequest subReq, std::shared_ptr consumer); @@ -382,13 +491,26 @@ class MoqxRelay : public moxygen::Publisher, ); folly::coro::Task onUpstreamConnectImpl(std::shared_ptr session); + // Synchronous result of publishWithSession: the consumer the publisher writes + // to and the PublishOk to return to the publisher. Returned synchronously so + // the reply coro (coPublish) can co_return the PublishOk immediately after + // setup without waiting for any downstream peer handshake. + struct PublishSetup { + std::shared_ptr consumer; + moxygen::PublishOk publishOk; + }; + using PublishSetupResult = folly::Expected; + // Contains all the inline publish() logic, taking session explicitly so it // can be called from either the I/O thread (relayExec_==nullptr) or from // coPublish on relay exec (where getRequestSession() would return null). - PublishResult publishWithSession( + // If forwarder is non-null it is used as-is (pre-created by publish()); otherwise + // a new forwarder is created from pub (single-threaded or subscribeNamespace path). + PublishSetupResult publishWithSession( moxygen::PublishRequest pub, std::shared_ptr handle, - std::shared_ptr session + std::shared_ptr session, + std::shared_ptr forwarder = nullptr ); std::shared_ptr ownedRelayExec_; @@ -412,6 +534,9 @@ class MoqxRelay : public moxygen::Publisher, } return maybeWrapPublisher(relayExec_, std::move(session)); } + + bool useLocalForwarders_{false}; + folly::ThreadLocalPtr tlForwarders_; std::unique_ptr cache_; uint64_t maxDeselected_{kDefaultMaxDeselected}; diff --git a/src/MoqxRelayContext.cpp b/src/MoqxRelayContext.cpp index e08dea1a..aa445203 100644 --- a/src/MoqxRelayContext.cpp +++ b/src/MoqxRelayContext.cpp @@ -23,7 +23,8 @@ namespace openmoq::moqx { MoqxRelayContext::MoqxRelayContext( const folly::F14FastMap& services, const std::string& relayID, - bool useRelayThread + bool useRelayThread, + bool useLocalForwarders ) : serviceMatcher_(services), relayID_(relayID) { if (useRelayThread && !services.empty()) { @@ -37,6 +38,7 @@ MoqxRelayContext::MoqxRelayContext( for (const auto& [name, svc] : services) { auto relay = std::make_shared(svc.cache, relayID); relay->setRelayExec(std::make_shared(evbs[i++].get())); + relay->setUseLocalForwarders(useLocalForwarders); services_.emplace(name, ServiceEntry{svc, std::move(relay)}); } } else { @@ -194,18 +196,9 @@ folly::Expected MoqxRelayContext::validateAu // Route: set per-service relay as handler, wrapping in cross-exec filters if needed auto it = services_.find(*matchedName); CHECK(it != services_.end()) << "Service '" << *matchedName << "' matched but no entry found"; - auto* relayExec = it->second.relay->getRelayExec(); - if (relayExec) { - session->setPublishHandler( - std::make_shared(relayExec, it->second.relay) - ); - session->setSubscribeHandler( - std::make_shared(relayExec, it->second.relay) - ); - } else { - session->setPublishHandler(it->second.relay); - session->setSubscribeHandler(it->second.relay); - } + auto& relay = it->second.relay; + session->setPublishHandler(relay->createPublisherFilter()); + session->setSubscribeHandler(relay->createSubscriberFilter()); return folly::unit; } diff --git a/src/MoqxRelayContext.h b/src/MoqxRelayContext.h index ebbf3e4b..de1c7e67 100644 --- a/src/MoqxRelayContext.h +++ b/src/MoqxRelayContext.h @@ -56,7 +56,8 @@ class MoqxRelayContext { MoqxRelayContext( const folly::F14FastMap& services, const std::string& relayID, - bool useRelayThread = true + bool useRelayThread = true, + bool useLocalForwarders = false ); void setStatsRegistry(std::shared_ptr registry); diff --git a/src/SubscriptionRegistry.cpp b/src/SubscriptionRegistry.cpp index bfe27e0e..3b1e999d 100644 --- a/src/SubscriptionRegistry.cpp +++ b/src/SubscriptionRegistry.cpp @@ -183,6 +183,7 @@ SubscriptionRegistry::getUpstreamView(const moxygen::FullTrackName& ftn) const { rsub.publisher, rsub.handle, rsub.requestID, + rsub.upstream ? rsub.upstream->getExecutor() : nullptr, rsub.isPublish, rsub.promise.isFulfilled() }; diff --git a/src/SubscriptionRegistry.h b/src/SubscriptionRegistry.h index 76faf574..22b42c9a 100644 --- a/src/SubscriptionRegistry.h +++ b/src/SubscriptionRegistry.h @@ -134,6 +134,7 @@ class SubscriptionRegistry { std::shared_ptr publisher; std::shared_ptr handle; moxygen::RequestID requestID; + folly::Executor* publisherExec{nullptr}; // executor the primary forwarder lives on bool isPublish; bool isReady; // promise fulfilled }; diff --git a/src/config/Config.h b/src/config/Config.h index 8efeccff..53791db9 100644 --- a/src/config/Config.h +++ b/src/config/Config.h @@ -197,6 +197,7 @@ struct Config { std::string relayID; // always set: from config or randomly generated uint32_t threads{1}; bool useRelayThread{true}; + bool useLocalForwarders{false}; bool mvfstBpfSteering{true}; }; diff --git a/src/config/ConfigResolver.cpp b/src/config/ConfigResolver.cpp index c97e2885..1a2986b5 100644 --- a/src/config/ConfigResolver.cpp +++ b/src/config/ConfigResolver.cpp @@ -829,6 +829,11 @@ folly::Expected resolveConfig(const ParsedConfig& c errors.push_back("use_relay_thread must be true when threads > 1"); } + const bool useLocalForwarders = config.use_local_forwarders.value().value_or(false); + if (useLocalForwarders && !useRelayThread) { + errors.push_back("use_local_forwarders requires use_relay_thread to be true"); + } + const bool mvfstBpfSteering = config.mvfst_bpf_steering.value().value_or(true); if (!errors.empty()) { @@ -881,6 +886,7 @@ folly::Expected resolveConfig(const ParsedConfig& c .relayID = std::move(relayID), .threads = threads, .useRelayThread = useRelayThread, + .useLocalForwarders = useLocalForwarders, .mvfstBpfSteering = mvfstBpfSteering, }, .warnings = std::move(warnings), diff --git a/src/config/loader/ParsedConfig.h b/src/config/loader/ParsedConfig.h index b075ff32..102c5dd8 100644 --- a/src/config/loader/ParsedConfig.h +++ b/src/config/loader/ParsedConfig.h @@ -392,6 +392,12 @@ struct ParsedConfig { "Disable for baseline performance comparison.", std::optional> use_relay_thread; + rfl::Description< + "Use per-subscriber-thread local forwarders to minimize cross-thread hops on the " + "data path (requires use_relay_thread; default: false). Disable to run all subscribes " + "on the relay thread via subscribeImpl.", + std::optional> + use_local_forwarders; rfl::Description< "Attach a classic BPF reuseport filter to steer QUIC packets to the correct mvfst worker " "based on the connection ID's workerId field (Linux only, mvfst stack only, default: true). " diff --git a/src/main.cpp b/src/main.cpp index bb570813..545ccc9f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -110,8 +110,12 @@ int main(int argc, char* argv[]) { // === 6. Initialize services === // Construct and configure the application's own services // (MoqxRelayContext, MoqxRelayServer, etc.) - auto context = - std::make_shared(config.services, config.relayID, config.useRelayThread); + auto context = std::make_shared( + config.services, + config.relayID, + config.useRelayThread, + config.useLocalForwarders + ); // === 6a. Stats registry === auto statsRegistry = std::make_shared(); diff --git a/src/relay/CrossExecForwarderCallback.cpp b/src/relay/CrossExecForwarderCallback.cpp index 14968bc4..7765c53b 100644 --- a/src/relay/CrossExecForwarderCallback.cpp +++ b/src/relay/CrossExecForwarderCallback.cpp @@ -18,6 +18,16 @@ void CrossExecForwarderCallback::onEmpty(moxygen::MoQForwarder* /*forwarder*/) { }); } +void CrossExecForwarderCallback::onPublishDone(moxygen::MoQForwarder* /*forwarder*/) { + auto f = forwarder_.lock(); + if (!f) { + return; + } + targetExec_->add([f = std::move(f), downstream = downstream_]() mutable { + downstream->onPublishDone(f.get()); + }); +} + void CrossExecForwarderCallback::forwardChanged( moxygen::MoQForwarder* /*forwarder*/, bool forward diff --git a/src/relay/CrossExecForwarderCallback.h b/src/relay/CrossExecForwarderCallback.h index 33e6add2..5f359d94 100644 --- a/src/relay/CrossExecForwarderCallback.h +++ b/src/relay/CrossExecForwarderCallback.h @@ -27,6 +27,7 @@ class CrossExecForwarderCallback final : public moxygen::MoQForwarder::Callback downstream_(std::move(downstream)) {} void onEmpty(moxygen::MoQForwarder* forwarder) override; + void onPublishDone(moxygen::MoQForwarder* forwarder) override; void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; diff --git a/src/relay/LocalForwarderCallback.h b/src/relay/LocalForwarderCallback.h new file mode 100644 index 00000000..f393dc03 --- /dev/null +++ b/src/relay/LocalForwarderCallback.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "relay/LocalForwarderRegistry.h" + +#include +#include + +namespace openmoq::moqx { + +// Runs on the local forwarder's executor. Delegates lifecycle events to +// downstream (a CrossExecForwarderCallback targeting the primary's executor) +// and owns the forwarder's removal from tlForwarders_. +// +// Removal is identity-checked (only vacates the slot if it still holds THIS +// forwarder), making teardown order-independent: a terminated forwarder can +// never clobber a newer one that has claimed the same track name. +// +// removeOnEmpty distinguishes the two roles: +// - subscribe-path local forwarder (removeOnEmpty=true): when its last +// subscriber leaves, its channel sub is pulled from the primary and it is +// dead — remove on onEmpty as well as onPublishDone. +// - publisher's primary forwarder (removeOnEmpty=false): it must survive +// subscriber churn (new subscribers may arrive while the publisher is +// live), so it is removed ONLY when the source terminates (onPublishDone). +class LocalForwarderCallback : public moxygen::MoQForwarder::Callback { +public: + LocalForwarderCallback( + LocalForwarderRegistry* localReg, + moxygen::FullTrackName ftn, + std::shared_ptr downstream, + bool removeOnEmpty = true + ) + : localReg_(localReg), ftn_(std::move(ftn)), downstream_(std::move(downstream)), + removeOnEmpty_(removeOnEmpty) {} + + void onEmpty(moxygen::MoQForwarder* forwarder) override { + downstream_->onEmpty(forwarder); + if (removeOnEmpty_) { + localReg_->remove(ftn_, forwarder); + } + } + + void onPublishDone(moxygen::MoQForwarder* forwarder) override { + downstream_->onPublishDone(forwarder); + // Source-driven teardown (publisher/upstream terminated). Identity-checked. + localReg_->remove(ftn_, forwarder); + } + + void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override { + downstream_->forwardChanged(forwarder, forward); + } + + void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override { + downstream_->newGroupRequested(forwarder, group); + } + +private: + LocalForwarderRegistry* localReg_; + moxygen::FullTrackName ftn_; + std::shared_ptr downstream_; + bool removeOnEmpty_; +}; + +} // namespace openmoq::moqx diff --git a/src/relay/LocalForwarderRegistry.h b/src/relay/LocalForwarderRegistry.h new file mode 100644 index 00000000..85986646 --- /dev/null +++ b/src/relay/LocalForwarderRegistry.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include + +namespace openmoq::moqx { + +// Thread-local registry mapping FullTrackName → local MoQForwarder on one +// iothread. All methods must be called on the owning thread. One instance per +// iothread, stored in MoqxRelay::tlForwarders_. +class LocalForwarderRegistry { +public: + struct GetOrCreateResult { + std::shared_ptr localForwarder; + // true only on first creation; caller must cross-exec to publisher thread + // and add a CrossExecFilter subscriber to the primary forwarder. + bool isNew; + }; + + // Returns the existing local forwarder for ftn, or calls factory() to create + // one. factory() is called at most once per ftn per thread lifetime. + GetOrCreateResult getOrCreate( + const moxygen::FullTrackName& ftn, + folly::FunctionRef()> factory + ) { + auto it = forwarders_.find(ftn); + if (it != forwarders_.end()) { + return {it->second, /*isNew=*/false}; + } + auto forwarder = factory(); + forwarders_.emplace(ftn, forwarder); + return {std::move(forwarder), /*isNew=*/true}; + } + + // Claim the slot for ftn unconditionally, returning the previous occupant (if + // any). The publisher's forwarder is authoritative for a track on its thread: + // a stale subscribe-path local forwarder under the same name is displaced + // here, and drains itself via the source-termination cascade (its identity- + // checked removal then no-ops, since this forwarder now owns the slot). + std::shared_ptr + set(const moxygen::FullTrackName& ftn, std::shared_ptr forwarder) { + auto prev = std::move(forwarders_[ftn]); + forwarders_[ftn] = std::move(forwarder); + return prev; + } + + std::shared_ptr get(const moxygen::FullTrackName& ftn) const { + auto it = forwarders_.find(ftn); + return it != forwarders_.end() ? it->second : nullptr; + } + + // Identity-checked removal: erase the entry for ftn only if it still points + // at `expected`. Called from a forwarder's onPublishDone, which fires on this + // thread. The identity check makes teardown order-independent: a terminated + // forwarder can only vacate its own slot, so it never clobbers a newer + // forwarder that has since claimed the same track name. + void remove(const moxygen::FullTrackName& ftn, const moxygen::MoQForwarder* expected) { + auto it = forwarders_.find(ftn); + if (it == forwarders_.end() || it->second.get() != expected) { + return; + } + forwarders_.erase(it); + } + +private: + folly::F14FastMap< + moxygen::FullTrackName, + std::shared_ptr, + moxygen::FullTrackName::hash> + forwarders_; +}; + +} // namespace openmoq::moqx diff --git a/src/relay/PublisherCrossExecFilter.h b/src/relay/PublisherCrossExecFilter.h index c3c7499e..0ad2f570 100644 --- a/src/relay/PublisherCrossExecFilter.h +++ b/src/relay/PublisherCrossExecFilter.h @@ -17,7 +17,7 @@ namespace openmoq::moqx { // the result to the caller. goaway() is fire-and-forget. // // Requires targetExec_ to be a FIFO executor if call ordering matters. -class PublisherCrossExecFilter final : public moxygen::Publisher { +class PublisherCrossExecFilter : public moxygen::Publisher { public: PublisherCrossExecFilter(folly::Executor* targetExec, std::shared_ptr inner) : targetExec_(targetExec), inner_(std::move(inner)) {} diff --git a/src/relay/SubscriberCrossExecFilter.h b/src/relay/SubscriberCrossExecFilter.h index 1f12cf99..d9866b55 100644 --- a/src/relay/SubscriberCrossExecFilter.h +++ b/src/relay/SubscriberCrossExecFilter.h @@ -24,7 +24,7 @@ class CrossExecFilter; // goaway() is fire-and-forget. // // Requires targetExec_ to be a FIFO executor if call ordering matters. -class SubscriberCrossExecFilter final : public moxygen::Subscriber { +class SubscriberCrossExecFilter : public moxygen::Subscriber { public: SubscriberCrossExecFilter(folly::Executor* targetExec, std::shared_ptr inner) : targetExec_(targetExec), inner_(std::move(inner)) {} diff --git a/src/relay/WeakRelayForwarderCallback.cpp b/src/relay/WeakRelayForwarderCallback.cpp new file mode 100644 index 00000000..b37babf3 --- /dev/null +++ b/src/relay/WeakRelayForwarderCallback.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "WeakRelayForwarderCallback.h" +namespace openmoq::moqx { + +void WeakRelayForwarderCallback::onEmpty(moxygen::MoQForwarder* forwarder) { + if (auto r = relay_.lock()) { + r->onEmpty(forwarder); + } +} + +void WeakRelayForwarderCallback::forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) { + if (auto r = relay_.lock()) { + r->forwardChanged(forwarder, forward); + } +} + +void WeakRelayForwarderCallback::newGroupRequested( + moxygen::MoQForwarder* forwarder, + uint64_t group +) { + if (auto r = relay_.lock()) { + r->newGroupRequested(forwarder, group); + } +} + +} // namespace openmoq::moqx diff --git a/src/relay/WeakRelayForwarderCallback.h b/src/relay/WeakRelayForwarderCallback.h new file mode 100644 index 00000000..61749eca --- /dev/null +++ b/src/relay/WeakRelayForwarderCallback.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) OpenMOQ contributors. + * This source code is licensed under the Apache 2.0 license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace openmoq::moqx { + +// Relay-side MoQForwarder::Callback target. Holds a weak_ptr to the relay +// (as its MoQForwarder::Callback base) to avoid a reference cycle +// (registry → forwarder → callback → relay). Intended to be used as the +// downstream of CrossExecForwarderCallback, which handles cross-exec +// dispatch and forwarder pointer recovery. +class WeakRelayForwarderCallback final : public moxygen::MoQForwarder::Callback { +public: + explicit WeakRelayForwarderCallback(std::weak_ptr relay) + : relay_(std::move(relay)) {} + + void onEmpty(moxygen::MoQForwarder* forwarder) override; + void forwardChanged(moxygen::MoQForwarder* forwarder, bool forward) override; + void newGroupRequested(moxygen::MoQForwarder* forwarder, uint64_t group) override; + +private: + std::weak_ptr relay_; +}; + +} // namespace openmoq::moqx diff --git a/test/MoqxRelayNGRTests.cpp b/test/MoqxRelayNGRTests.cpp index 3a998f6e..88a76930 100644 --- a/test/MoqxRelayNGRTests.cpp +++ b/test/MoqxRelayNGRTests.cpp @@ -286,6 +286,11 @@ TEST_P(MoQRelayTest, PublishOkNewNGRForwardedUpstream) { EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + // Lock in the assertion before teardown: in LocalForwarderMT the trailing + // forwardChanged(false) → requestUpdate at subscriber removal lands + // asynchronously and would otherwise over-saturate the expectation. + ASSERT_TRUE(testing::Mock::VerifyAndClearExpectations(publishHandle.get())); + removeSession(publisherSession); removeSession(subscriberSession); driveIfMultiThread(); @@ -365,6 +370,11 @@ TEST_P(MoQRelayTest, PublishOkDuplicateNGRNotForwardedUpstream) { EXPECT_TRUE(driveUntil([&] { return updates.load() >= 2; })) << "NGR cascade incomplete: " << updates.load() << "/2 requestUpdates"; + // Lock in the assertion before teardown: in LocalForwarderMT the trailing + // forwardChanged(false) → requestUpdate at subscriber removal lands + // asynchronously and would otherwise over-saturate the expectation. + ASSERT_TRUE(testing::Mock::VerifyAndClearExpectations(publishHandle.get())); + removeSession(publisherSession); removeSession(subscriber1); removeSession(subscriber2); diff --git a/test/MoqxRelayPublishTests.cpp b/test/MoqxRelayPublishTests.cpp index 0419fa71..50c1cbe9 100644 --- a/test/MoqxRelayPublishTests.cpp +++ b/test/MoqxRelayPublishTests.cpp @@ -204,6 +204,7 @@ TEST_P(MoQRelayTest, PublishExtensionsForwardedToLateJoiners) { removeSession(subscriber1); removeSession(subscriber2); driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed + driveIfMultiThread(); // flush relay cleanup so it drops session refs before mocks are destroyed } // Regression test: publisher reconnect after disconnect with active subscriber diff --git a/test/MoqxRelayTestFixture.cpp b/test/MoqxRelayTestFixture.cpp index 75ec9e53..802552e2 100644 --- a/test/MoqxRelayTestFixture.cpp +++ b/test/MoqxRelayTestFixture.cpp @@ -43,12 +43,22 @@ void MoQRelayTest::SetUp() { relayThread_ = std::make_unique("relay-test"); relayEvb_ = relayThread_->getEventBase(); relay_->setRelayExec(relayEvb_); + relay_->setUseLocalForwarders(false); exec_->setRelayEvb(relayEvb_); ASSERT_NE(relay_->getRelayExec(), nullptr); publisherInterface_ = std::make_shared(relay_->getRelayExec(), relay_); subscriberInterface_ = std::make_shared(relay_->getRelayExec(), relay_); + } else if (relayMode() == RelayMode::LocalForwarderMT) { + relayThread_ = std::make_unique("relay-test"); + relayEvb_ = relayThread_->getEventBase(); + relay_->setRelayExec(relayEvb_); + relay_->setUseLocalForwarders(true); + exec_->setRelayEvb(relayEvb_); + ASSERT_NE(relay_->getRelayExec(), nullptr); + publisherInterface_ = relay_->createPublisherFilter(); + subscriberInterface_ = relay_->createSubscriberFilter(); } } @@ -56,8 +66,13 @@ void MoQRelayTest::resetRelay(std::shared_ptr relay) { relay_ = std::move(relay); if (relayEvb_) { relay_->setRelayExec(relayEvb_); - publisherInterface_ = std::make_shared(relayEvb_, relay_); - subscriberInterface_ = std::make_shared(relayEvb_, relay_); + if (relayMode() == RelayMode::LocalForwarderMT) { + publisherInterface_ = relay_->createPublisherFilter(); + subscriberInterface_ = relay_->createSubscriberFilter(); + } else { + publisherInterface_ = std::make_shared(relayEvb_, relay_); + subscriberInterface_ = std::make_shared(relayEvb_, relay_); + } } } diff --git a/test/MoqxRelayTestFixture.h b/test/MoqxRelayTestFixture.h index 460b6c66..90a49cbd 100644 --- a/test/MoqxRelayTestFixture.h +++ b/test/MoqxRelayTestFixture.h @@ -35,7 +35,8 @@ namespace moxygen::test { enum class RelayMode { SingleThread, MultiThread, - // Future: Mode3 — add here, then add a branch in SetUp() and INSTANTIATE entry + LocalForwarderMT, + // Future: LocalForwarderMTCrossThread — separate publisherThread_ for pub/sub split }; inline void PrintTo(RelayMode mode, std::ostream* os) { @@ -46,6 +47,9 @@ inline void PrintTo(RelayMode mode, std::ostream* os) { case RelayMode::MultiThread: *os << "MultiThread"; return; + case RelayMode::LocalForwarderMT: + *os << "LocalForwarderMT"; + return; } } diff --git a/test/MoqxRelayTestModes.cpp b/test/MoqxRelayTestModes.cpp index bc8d5427..8e88bfec 100644 --- a/test/MoqxRelayTestModes.cpp +++ b/test/MoqxRelayTestModes.cpp @@ -9,13 +9,15 @@ namespace moxygen::test { INSTANTIATE_TEST_SUITE_P( AllModes, MoQRelayTest, - ::testing::Values(RelayMode::SingleThread, RelayMode::MultiThread), + ::testing::Values(RelayMode::SingleThread, RelayMode::MultiThread, RelayMode::LocalForwarderMT), [](const ::testing::TestParamInfo& info) -> std::string { switch (info.param) { case RelayMode::SingleThread: return "SingleThread"; case RelayMode::MultiThread: return "MultiThread"; + case RelayMode::LocalForwarderMT: + return "LocalForwarderMT"; } return "Unknown"; } From 71630f2d946a39450db2c28df12c9442376b4c77 Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 02:48:46 -0400 Subject: [PATCH 7/9] config: add udp_socket_buffer_bytes listener config option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exposes udpSendBufferBytes/udpRecvBufferBytes via MoQServer::Options, plumbed through ParsedConfig → ConfigResolver → MoqxRelayServer. 0 (default) keeps the existing MoQServer built-in 1 MB behaviour. Also comments out reorderingThreshold (removed from moxygen API). --- scripts/perf-test.sh | 7 +++++++ src/MoqxRelayServer.cpp | 6 +++++- src/config/Config.h | 4 ++++ src/config/ConfigResolver.cpp | 2 ++ src/config/loader/ParsedConfig.h | 5 +++++ 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/scripts/perf-test.sh b/scripts/perf-test.sh index 807122f5..1665a634 100755 --- a/scripts/perf-test.sh +++ b/scripts/perf-test.sh @@ -36,6 +36,9 @@ # locally. The binary is expected at # /tmp/moqperf_test_client on the remote host. # HOST may include a user prefix (user@host). +# --udp-socket-buffer-bytes N +# UDP socket send/recv buffer size in bytes for the +# relay listener (defaults to net.core.wmem_max). # # Linux-only options (not supported on macOS): # --metrics, --perf-duration, --perf-events, --perf-stat, --jemalloc, @@ -65,6 +68,7 @@ PERF_DURATION=0 PERF_EVENTS="" RUN_PERF_STAT=false REMOTE_CLIENT_HOST="" +UDP_SOCKET_BUFFER_BYTES=$(cat /proc/sys/net/core/wmem_max 2>/dev/null || echo 1048576) TRACE_SCRIPT="" CLIENT_EXTRA_ARGS=() @@ -97,6 +101,7 @@ while [[ $# -gt 0 ]]; do --trace-script) TRACE_SCRIPT="$2"; shift 2 ;; --client-args) read -ra CLIENT_EXTRA_ARGS <<< "$2"; shift 2 ;; --remote-client) REMOTE_CLIENT_HOST="$2"; shift 2 ;; + --udp-socket-buffer-bytes) UDP_SOCKET_BUFFER_BYTES="$2"; shift 2 ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done @@ -225,6 +230,7 @@ listeners: enable_gso: true max_conn_packets_sent_per_loop: 16 max_server_recv_packets_per_loop: 32 + udp_socket_buffer_bytes: $UDP_SOCKET_BUFFER_BYTES bbr2: exit_startup_on_loss: true enable_recovery_in_startup: true @@ -266,6 +272,7 @@ cp "$RELAY_CFG" "$LOG_DIR/relay.yaml" echo "delivery_timeout: $DELIVERY_TIMEOUT" echo "client_threads: $CLIENT_THREADS" echo "jemalloc: ${JEMALLOC:-none}" + echo "udp_socket_buf: $UDP_SOCKET_BUFFER_BYTES" echo "mvfst_bpf_steering: $BPF_STEERING" [[ "$PERF_DURATION" -gt 0 ]] && echo "perf_duration: ${PERF_DURATION}s (delay=$(( 3 * SUBSCRIBER_MAX / RAMP ))s)" || true [[ -n "$PERF_EVENTS" ]] && echo "perf_events: $PERF_EVENTS" || true diff --git a/src/MoqxRelayServer.cpp b/src/MoqxRelayServer.cpp index aedc4047..210e5c49 100644 --- a/src/MoqxRelayServer.cpp +++ b/src/MoqxRelayServer.cpp @@ -133,7 +133,11 @@ MoqxRelayServer::MoqxRelayServer( : MoQServer( buildFizzContext(listenerCfg), listenerCfg.endpoint, - buildTransportSettings(listenerCfg.quic, listenerCfg.mvfst) + MoQServer::Options{ + .transportSettings = buildTransportSettings(listenerCfg.quic, listenerCfg.mvfst), + .udpSendBufferBytes = listenerCfg.mvfst.udpSocketBufferBytes, + .udpRecvBufferBytes = listenerCfg.mvfst.udpSocketBufferBytes, + } ), listenerCfg_(listenerCfg), context_(std::move(context)), ioExecutor_(ioExecutor) {} diff --git a/src/config/Config.h b/src/config/Config.h index 53791db9..9969f2d1 100644 --- a/src/config/Config.h +++ b/src/config/Config.h @@ -128,6 +128,10 @@ struct MvfstConfig { float ceTarget{0.0f}; // 0 = disabled }; + // UDP socket send/receive buffer size in bytes. 0 = use MoQServer default (1 MB). + // Increase when high fan-out causes EAGAIN bursts under load. + uint64_t udpSocketBufferBytes{0}; + BBR bbr; BBR2 bbr2; Cubic cubic; diff --git a/src/config/ConfigResolver.cpp b/src/config/ConfigResolver.cpp index 1a2986b5..b073f2c3 100644 --- a/src/config/ConfigResolver.cpp +++ b/src/config/ConfigResolver.cpp @@ -508,6 +508,8 @@ void applyMvfstOverride(MvfstConfig& base, const ParsedMvfstConfig& overlay) { if (auto v = ls->ce_target.value()) base.l4s.ceTarget = *v; } + if (auto v = overlay.udp_socket_buffer_bytes.value()) + base.udpSocketBufferBytes = *v; } MvfstConfig mergeMvfstConfig( diff --git a/src/config/loader/ParsedConfig.h b/src/config/loader/ParsedConfig.h index 102c5dd8..e3a5c163 100644 --- a/src/config/loader/ParsedConfig.h +++ b/src/config/loader/ParsedConfig.h @@ -225,6 +225,11 @@ struct ParsedMvfstConfig { "L4S tunables (ECN CE target). Usable alongside any ECN-aware CC algorithm.", std::optional> l4s; + rfl::Description< + "UDP socket send and receive buffer size in bytes. Default: 0 (use MoQServer built-in " + "default of 1 MB). Increase to reduce EAGAIN errors under high fan-out burst load.", + std::optional> + udp_socket_buffer_bytes; }; struct ParsedListenerConfig { From 5abf0d5ecbe5a1ba6f5c8ee60c579d11b20e2c0a Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 03:00:09 -0400 Subject: [PATCH 8/9] perf-test: increase packets read per loop to 256 --- scripts/perf-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/perf-test.sh b/scripts/perf-test.sh index 1665a634..efc9d887 100755 --- a/scripts/perf-test.sh +++ b/scripts/perf-test.sh @@ -229,7 +229,7 @@ listeners: mvfst: enable_gso: true max_conn_packets_sent_per_loop: 16 - max_server_recv_packets_per_loop: 32 + max_server_recv_packets_per_loop: 256 udp_socket_buffer_bytes: $UDP_SOCKET_BUFFER_BYTES bbr2: exit_startup_on_loss: true From 8abbc203b60804b6fff770e4a84c57866bebc422 Mon Sep 17 00:00:00 2001 From: afrind Date: Mon, 1 Jun 2026 03:16:45 -0400 Subject: [PATCH 9/9] relay: make stop() idempotent and keep context_ alive during teardown Guard stop() with a stopped_ flag instead of context_ presence, and stop resetting context_. terminateClientSession can run after stop() returns, from handleClientSession coroutines still draining on the evb/IO threads, so context_ must outlive stop(). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/MoqxPicoRelayServer.cpp | 6 ++++-- src/MoqxPicoRelayServer.h | 1 + src/MoqxRelayServer.cpp | 7 ++++--- src/MoqxRelayServer.h | 1 + 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/MoqxPicoRelayServer.cpp b/src/MoqxPicoRelayServer.cpp index 62aed595..4f8a171d 100644 --- a/src/MoqxPicoRelayServer.cpp +++ b/src/MoqxPicoRelayServer.cpp @@ -93,12 +93,14 @@ MoqxPicoRelayServer::~MoqxPicoRelayServer() { } void MoqxPicoRelayServer::stop() { - if (!context_) { + if (stopped_) { return; } + stopped_ = true; + // Keep context_ alive: terminateClientSession can run after stop() returns, + // from handleClientSession coroutines still draining on the evb. context_->stop(); evb_->runImmediatelyOrRunInEventBaseThreadAndWait([this] { MoQPicoQuicEventBaseServer::stop(); }); - context_.reset(); } void MoqxPicoRelayServer::setStatsRegistry(std::shared_ptr registry) { diff --git a/src/MoqxPicoRelayServer.h b/src/MoqxPicoRelayServer.h index cda2fbf4..e35fc882 100644 --- a/src/MoqxPicoRelayServer.h +++ b/src/MoqxPicoRelayServer.h @@ -64,6 +64,7 @@ class MoqxPicoRelayServer : public moxygen::MoQPicoQuicEventBaseServer { config::ListenerConfig listenerCfg_; std::shared_ptr context_; folly::EventBase* evb_; + bool stopped_{false}; }; } // namespace openmoq::moqx diff --git a/src/MoqxRelayServer.cpp b/src/MoqxRelayServer.cpp index 210e5c49..886d1834 100644 --- a/src/MoqxRelayServer.cpp +++ b/src/MoqxRelayServer.cpp @@ -147,12 +147,13 @@ MoqxRelayServer::~MoqxRelayServer() { } void MoqxRelayServer::stop() { - if (!context_) { + if (stopped_) { return; } - // QuicServer::shutdown drives terminateClientSession, which uses context_. + stopped_ = true; + // Keep context_ alive: terminateClientSession can run after stop() returns, + // from handleClientSession coroutines still draining on IO threads. MoQServer::stop(); - context_.reset(); } void MoqxRelayServer::setStatsRegistry(std::shared_ptr registry) { diff --git a/src/MoqxRelayServer.h b/src/MoqxRelayServer.h index 0dd0fb5d..f99524c4 100644 --- a/src/MoqxRelayServer.h +++ b/src/MoqxRelayServer.h @@ -57,6 +57,7 @@ class MoqxRelayServer : public moxygen::MoQServer { config::ListenerConfig listenerCfg_; std::shared_ptr context_; folly::IOThreadPoolExecutor* ioExecutor_; + bool stopped_{false}; }; } // namespace openmoq::moqx