diff --git a/CCPMemory.cpp b/CCPMemory.cpp index e5bb53b..ab7ec90 100644 --- a/CCPMemory.cpp +++ b/CCPMemory.cpp @@ -290,10 +290,7 @@ static inline void* CcpPlatformMalloc( size_t size ) void* p = HeapAlloc( s_heap, 0, size ); UpdateCount( size ); #if ENABLE_TELEMETRY_MEMORY_TRACKING - if ( p && CcpTelemetryIsConnected() ) - { -// TracySecureAlloc( p, size ); - } + CcpTelemetryTrackAllocation( p, size ); #endif return p; } @@ -308,16 +305,16 @@ static inline void* CcpPlatformCalloc( size_t items, size_t size ) void* p = HeapAlloc( s_heap, HEAP_ZERO_MEMORY, bytes ); UpdateCount( bytes ); #if ENABLE_TELEMETRY_MEMORY_TRACKING - if ( p && CcpTelemetryIsConnected() ) - { -// TracySecureAlloc( p, size ); - } + CcpTelemetryTrackAllocation( p, bytes ); #endif return p; } static inline void CcpPlatformFree( void* p ) { +#if ENABLE_TELEMETRY_MEMORY_TRACKING + CcpTelemetryTrackDeallocation( p ); +#endif UpdateCount( p, false ); HeapFree( s_heap, 0, p ); } @@ -339,9 +336,7 @@ static inline void* CcpPlatformMalloc( size_t size ) s_memuse += realSize; #if ENABLE_TELEMETRY_MEMORY_TRACKING - if ( CcpTelemetryIsConnected() ) { -// TracySecureAlloc( p, realSize ); - } + CcpTelemetryTrackAllocation( p, realSize ); #endif } #if defined(__ANDROID__) @@ -363,10 +358,7 @@ static inline void* CcpPlatformCalloc( size_t items, size_t size ) s_memuse += realSize; #if ENABLE_TELEMETRY_MEMORY_TRACKING - if ( CcpTelemetryIsConnected() ) - { -// TracySecureAlloc( p, realSize ); - } + CcpTelemetryTrackAllocation( p, realSize ); #endif } return p; @@ -376,6 +368,9 @@ static inline void CcpPlatformFree( void* p ) { #if defined(__ANDROID__) p = reinterpret_cast( p ) - 1; +#endif +#if ENABLE_TELEMETRY_MEMORY_TRACKING + CcpTelemetryTrackDeallocation( p ); #endif s_memuse -= CCPMSize( p ); free( p ); @@ -542,13 +537,6 @@ void CCPFree( void* p ) { if( p ) { -#if ENABLE_TELEMETRY_MEMORY_TRACKING - if ( CcpTelemetryIsConnected() ) - { -// TracySecureFree( p ); - } -#endif - if( s_guardAllocations ) { CCPFreeWithGuard( p ); diff --git a/CMakeLists.txt b/CMakeLists.txt index 72f3b1c..4279b02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,14 @@ if(WITH_TELEMETRY) find_package(Tracy CONFIG REQUIRED) target_compile_definitions(CcpCore PUBLIC CCP_TELEMETRY_ENABLED=1) target_link_libraries(CcpCore PUBLIC Tracy::TracyClient) + # Tracy's vcpkg package is always built as Release (with NDEBUG). CcpTelemetry.cpp + # must also be compiled with NDEBUG to match Tracy's struct layout: in 0.13.1 the + # Profiler class has a debug-only member (std::atomic_bool m_inUse, #ifndef NDEBUG) + # that shifts m_programNameLock and other trailing fields by 8 bytes, causing + # SetProgramName() — inlined from TracyProfiler.hpp — to lock the wrong memory + # address and fail with EINVAL. CCP_ASSERT_ENABLED is set explicitly via CMake so + # CCP_ASSERT macros are unaffected by this NDEBUG definition. + set_source_files_properties(CcpTelemetry.cpp PROPERTIES COMPILE_DEFINITIONS NDEBUG) else() target_compile_definitions(CcpCore PUBLIC CCP_TELEMETRY_ENABLED=0) endif() diff --git a/CcpTelemetry.cpp b/CcpTelemetry.cpp index 5bdf40c..5a68b2a 100644 --- a/CcpTelemetry.cpp +++ b/CcpTelemetry.cpp @@ -22,7 +22,7 @@ std::atomic s_profilerState{ProfilerState::Stopped}; FiberNameStore s_fiberNameStore; // Persisted fiber name string store, including the empty "root" fiber name -thread_local FiberNameStore::const_iterator t_activeFiber{ s_fiberNameStore.begin() }; // default to having no fiber +thread_local FiberNameStore::const_iterator t_activeFiber{ s_fiberNameStore.end() }; // default to having no fiber template<> struct std::less @@ -35,7 +35,7 @@ struct std::less typedef std::map> TaskletZoneStore; thread_local TaskletZoneStore t_taskletZoneStore; // Per-thread record of zones instrumented from python -thread_local TaskletZoneStore::iterator t_activeTaskletZoneStore{ t_taskletZoneStore.begin() }; +thread_local TaskletZoneStore::iterator t_activeTaskletZoneStore{ t_taskletZoneStore.end() }; thread_local std::set t_manuallyTrackedZones; // Keep track of zones created through `CcpTelemetryEnterZone` to ensure that we only pop off the zone store's stack when leaving a manually created zone constexpr std::chrono::milliseconds s_cleanupDelay{5000}; @@ -85,6 +85,11 @@ bool CcpTelemetryIsStarted() return s_profilerState.load( std::memory_order_acquire ) == ProfilerState::Started; } +bool CcpMemoryProfilingIsEnabled() +{ + return s_config.trackMemoryAllocations; +} + void CcpRegisterMutex( class CcpMutex& m, const char* owner, const char* name ) { // Store the name for future Telemetry sessions, even if we're already connected. @@ -214,6 +219,7 @@ void CcpTelemetryTick() { ( *handler.first )( CCP_TELEMETRY_STOPPED, handler.second ); } + break; } case ProfilerState::Stopped: // Nothing to do @@ -224,6 +230,21 @@ void CcpTelemetryTick() } } +void CcpTelemetryTrackAllocation( void* p, size_t size ) +{ + if ( CcpMemoryProfilingIsEnabled() && CcpTelemetryIsConnected() ) { + TracySecureAlloc( p, size ); + } +} + +void CcpTelemetryTrackDeallocation( void* p ) +{ + if ( p && CcpMemoryProfilingIsEnabled() && CcpTelemetryIsConnected() ) + { + TracySecureFree( p ); + } +} + uint32_t CcpTelemetryGetTickCount() { return s_telemetryTick; @@ -274,7 +295,9 @@ void CcpTelemetrySetActiveFiber( FiberNameStore::const_iterator elem ) if ( existing != t_taskletZoneStore.end() && ! ( t_taskletZoneStore.key_comp()( t_activeFiber, existing->first ) ) ) { t_activeTaskletZoneStore = existing; - } else { + } + else + { t_activeTaskletZoneStore = t_taskletZoneStore.emplace_hint( existing, t_activeFiber, std::stack() ); } // CCP_LOG_CH( s_ch, "[Fiber %p] [Store %p] Setting active tasklet zone store", t_activeFiber, t_activeTaskletZoneStore ); @@ -383,7 +406,9 @@ void CcpTelemetryLeaveZone( void* key ) { t_activeTaskletZoneStore->second.pop(); } - t_manuallyTrackedZones.erase( key ); + if ( t_activeTaskletZoneStore->second.empty() ) { + t_manuallyTrackedZones.erase( key ); + } } } @@ -415,6 +440,11 @@ bool CcpTelemetryIsStarted() return false; } +bool CcpMemoryProfilingIsEnabled() +{ + return false; +} + void CcpRegisterThread( CcpThreadId_t threadId, const char* name ) { } diff --git a/include/CCPLog.h b/include/CCPLog.h index a508db8..0727cee 100644 --- a/include/CCPLog.h +++ b/include/CCPLog.h @@ -187,7 +187,7 @@ CARBON_CORE_API const char* GetLastErrorMessage(); // Throws a std exception with string 'message' and logs out the 'message' inline void Throw( const char* message ) { - CCP_LOGERR( message ); + CCP_LOGERR( "%s", message ); CCP_LOGWARN( "Exception thrown" ); throw std::runtime_error( message ); } diff --git a/include/CcpTelemetry.h b/include/CcpTelemetry.h index 0c73691..e229faf 100644 --- a/include/CcpTelemetry.h +++ b/include/CcpTelemetry.h @@ -44,6 +44,7 @@ struct CcpTelemetryConfig { std::string applicationName; std::chrono::milliseconds captureDuration{}; + bool trackMemoryAllocations{false}; }; [[deprecated( "Use `CcpStartTelemetry( const CcpTelemetryConfig& config ) instead" )]] CARBON_CORE_API bool CcpStartTelemetry( const char* server, int connectionType, uint32_t maxThreadCount ); @@ -66,6 +67,7 @@ CARBON_CORE_API void CcpUnregisterTelemetryEventHandler( CcpOnTelemetryEventHand CARBON_CORE_API bool CcpTelemetryIsConnectionRequested(); CARBON_CORE_API bool CcpTelemetryIsConnected(); CARBON_CORE_API bool CcpTelemetryIsStarted(); +CARBON_CORE_API bool CcpMemoryProfilingIsEnabled(); CARBON_CORE_API void CcpTelemetrySetActiveFiber( const std::string& name ); CARBON_CORE_API const std::string& CcpTelemetryGetActiveFiber(); @@ -95,4 +97,6 @@ CARBON_CORE_API void CcpTelemetryEnterZone( void* key, const char* name, const c CARBON_CORE_API void CcpTelemetryLeaveZone( void* key ); CARBON_CORE_API void CcpTelemetryZoneAddText( void* key, const char* text ); +void CcpTelemetryTrackAllocation( void*, size_t ); +void CcpTelemetryTrackDeallocation( void* ); #endif diff --git a/tests/CCPLog.cpp b/tests/CCPLog.cpp index eb48528..4fbfd67 100644 --- a/tests/CCPLog.cpp +++ b/tests/CCPLog.cpp @@ -42,7 +42,7 @@ class CCPLog : public ::testing::Test TEST_F ( CCPLog, TestCanLogSingleLine ) { CCP::RegisterLogEcho( LogTracker, CCP::LOGTYPE_INFO, true ); - const char *s = "One line"; + constexpr const char *s = "One line"; CCP_LOG(s); EXPECT_EQ( 1, logstack.size() ); @@ -52,7 +52,7 @@ TEST_F ( CCPLog, TestCanLogSingleLine ) TEST_F ( CCPLog, TestDefaultLogLevelIsInfo ) { CCP::RegisterLogEcho( LogTracker, CCP::LOGTYPE_INFO, true ); - const char *s = "One line"; + constexpr const char *s = "One line"; CCP_LOG(s); EXPECT_EQ( 1, logstack.size() ); @@ -116,15 +116,15 @@ TEST_F ( CCPLog, CanUnregisterCallbackHandler ) TEST_F ( CCPLog, GetLastErrorMessageReturnsLastError ) { - const char* expected = "Something has gone horribly wrong."; - CCP_LOGERR(expected); + constexpr const char* expected = "Something has gone horribly wrong."; + CCP_LOGERR( expected ); const char* actual = CCP::GetLastErrorMessage(); EXPECT_STREQ( expected, actual ); } TEST_F ( CCPLog, ThrowLastErrorThrowsAnError ) { - const char* error_str = "Something has gone horribly wrong."; + constexpr const char* error_str = "Something has gone horribly wrong."; CCP_LOGERR( error_str ); EXPECT_ANY_THROW( {CCP::ThrowLastError();} ); } @@ -133,7 +133,7 @@ TEST_F ( CCPLog, LogEnormousLine ) { CCP::RegisterLogEcho( LogTracker, CCP::LOGTYPE_INFO, true ); CCP::SetLogMainThreadId(); - const char* enormous_line = + constexpr const char* enormous_line = "Testing a really long log line. The log server can't handle log lines\n" "that are longer than 253 characters, so we need a test case for it.\n" "Somewhere along the line we need to split the log message into several\n" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e6bc47a..17f117f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,6 +3,7 @@ enable_testing() find_package(GTest CONFIG REQUIRED) +find_package(lz4 CONFIG REQUIRED) add_executable(CcpCoreTest CcpAtomic.cpp CCPCallstack.cpp @@ -19,8 +20,12 @@ add_executable(CcpCoreTest StringConversions.cpp TempFile.cpp CCPLog.cpp + TracyTestClient.cpp ) -target_link_libraries(CcpCoreTest PRIVATE CcpCore GTest::gtest GTest::gtest_main) +target_link_libraries(CcpCoreTest PRIVATE CcpCore GTest::gtest GTest::gtest_main lz4::lz4) +if(WIN32) + target_link_libraries(CcpCoreTest PRIVATE ws2_32) +endif() if(APPLE) target_link_libraries(CcpCoreTest PRIVATE "-framework CoreFoundation") endif() diff --git a/tests/CcpTelemetry.cpp b/tests/CcpTelemetry.cpp index 91ffbf4..27b166c 100644 --- a/tests/CcpTelemetry.cpp +++ b/tests/CcpTelemetry.cpp @@ -2,52 +2,230 @@ #include +#include + #include +// How can we test telemetry-related functionality to ensure our bookkeeping +// there is sane? +// The problem is that such tests need `ProfilerState::Started` in order to +// create a valid zone context for testing. This, in turn, needs +// `TracyIsConnected` to be true. +// +// A first thought may be to simply redefine the macro to always return true. +// However, this is not possible because it is set inside the Tracy header. +// +// The next idea, then, would be to mock the `Profiler` class. However, this +// also is not possible because the `Profiler` class is not virtual. +// +// This leads to the next idea of choosing the concrete `Profiler` class type +// based on a template parameter. This is not possible either because the macros +// exposed by Tracy would not honor any such template parameter. +// +// With all this in mind, there is another aspect to consider: +// If we wanted to inspect more of the functionality, then we almost certainly +// want to provide a test implementation of the tracy network protocol. Fortunately, +// tracy itself already provides many of the building blocks for this. So this +// includes the AI-written, but human-reviewed test client. +#include "TracyTestClient.h" + class CcpTelemetryTest : public ::testing::Test { protected: CcpTelemetryTest() = default; ~CcpTelemetryTest() override = default; - void SetUp() override { + void SetUp() override + { + ::testing::Test::SetUp(); + SetUp(true); + } + + void SetUp(bool doTestClientConnect) + { CcpTelemetryConfig conf{ "Telemetry Tests" }; EXPECT_EQ( conf.captureDuration, std::chrono::milliseconds::zero() ); CcpStartTelemetry( conf ); - while ( !TracyIsStarted ) + + // Tick until the profiler's listen socket is up. + while( !TracyIsStarted ) { - CcpTelemetryTick(); - std::this_thread::yield(); + TickTelemetry(); } + + // Connect on a background thread so this thread can keep ticking Tracy. + // The handshake requires both sides to run concurrently: Tracy's worker + // sends data and may block on Send() until the client reads it. + auto connectFuture = doTestClientConnect + ? std::async( std::launch::async, [this] { return m_tracyClient.Connect(); } ) + : std::async( std::launch::deferred, [] { return true; } ); + + // Tick until CcpTelemetry recognises the connection and enters Started state. + while( !CcpTelemetryIsConnected() ) + { + TickTelemetry(); + } + + ASSERT_TRUE( connectFuture.get() ) << "Could not connect to Tracy profiler"; } - void TearDown() override { + void TearDown() override + { + // m_tracyClient.Disconnect(); // Remove explicit call to Disconnect() because current implementation does NOT call tracy::ShutdownProfiler(). CcpStopTelemetry(); + ::testing::Test::TearDown(); + } + + void TickTelemetry( std::chrono::milliseconds duration = std::chrono::milliseconds( 500 ) ) + { + const auto deadline = std::chrono::steady_clock::now() + duration; + while( std::chrono::steady_clock::now() < deadline ) + { + CcpTelemetryTick(); + std::this_thread::sleep_for( std::chrono::milliseconds( 5 ) ); + } } const std::string expectedNoFiber; - const std::string expectedFiberName{"TestFiber"}; - const std::string expectedFiberName2{"TestFiber"}; + const std::string expectedFiberName1{ "TestFiber1" }; + const std::string expectedFiberName2{ "TestFiber2" }; + + TracyTestClient m_tracyClient; }; TEST_F( CcpTelemetryTest, TestFiberSwitching ) { - CcpTelemetrySetActiveFiber( expectedFiberName ); + CcpTelemetrySetActiveFiber( expectedFiberName1 ); const auto& observedFiberName1 = CcpTelemetryGetActiveFiber(); - EXPECT_EQ( observedFiberName1, expectedFiberName ); + EXPECT_EQ( observedFiberName1, expectedFiberName1 ); + + // Switching to a new name CcpTelemetrySetActiveFiber( expectedFiberName2 ); const auto& observedFiberName2 = CcpTelemetryGetActiveFiber(); EXPECT_EQ( observedFiberName2, expectedFiberName2 ); + + // Switching back to the original name + CcpTelemetrySetActiveFiber( expectedFiberName1 ); const auto& observedFiberName3 = CcpTelemetryGetActiveFiber(); - CcpTelemetrySetActiveFiber( expectedFiberName ); EXPECT_EQ( observedFiberName1.c_str(), observedFiberName3.c_str() ); + + // Switching to the "Root name" (no name) CcpTelemetrySetActiveFiber( "" ); EXPECT_EQ( CcpTelemetryGetActiveFiber(), expectedNoFiber ); } TEST_F( CcpTelemetryTest, RemovingActiveFiberClearsIt ) { - CcpTelemetrySetActiveFiber( expectedFiberName ); - CcpTelemetryRemoveFiber( expectedFiberName ); + CcpTelemetrySetActiveFiber( expectedFiberName1 ); + CcpTelemetryRemoveFiber( expectedFiberName1 ); EXPECT_EQ( CcpTelemetryGetActiveFiber(), expectedNoFiber ); } + +TEST_F( CcpTelemetryTest, SimpleZoneTest ) +{ + static int key = 4711; + const std::string zoneName{ "TestZone" }; + EXPECT_TRUE( CcpTelemetryIsConnected() ); + CcpTelemetryEnterZone( &key, zoneName.c_str(), __FILE__, __LINE__ ); + + // Tracy's worker sleeps up to 10 ms between queue flushes, so give it + // time to process and send the zone event before asserting. + TickTelemetry(); + + EXPECT_EQ( 1, m_tracyClient.GetZoneBeginCount() ); + auto tracyZones = m_tracyClient.GetZones(); + // CcpTelemetryEnterZone passes the zone name as the Tracy "function" field + // (via the 6-param ___tracy_alloc_srcloc), so match against both fields. + auto pred = [&zoneName]( const TracyTestClient::ZoneInfo& elem ) -> bool { + return elem.function == zoneName; + }; + EXPECT_NE( tracyZones.end(), std::find_if( tracyZones.begin(), tracyZones.end(), pred ) ); + + CcpTelemetryLeaveZone( &key ); + TickTelemetry(); + EXPECT_EQ( 1, m_tracyClient.GetZoneEndCount() ); + tracyZones = m_tracyClient.GetZones(); + EXPECT_EQ( tracyZones.end(), std::find_if( tracyZones.begin(), tracyZones.end(), pred ) ); +} + +TEST_F( CcpTelemetryTest, StackedZones ) +{ + // A stacked zone is a zone that has the same key as a previously created zone. + static int key = 4711; + CcpTelemetryEnterZone( &key, "TestZone", __FILE__, __LINE__ ); + CcpTelemetryEnterZone( &key, "TestZone2", __FILE__, __LINE__ ); + TickTelemetry(); + auto tracyZones = m_tracyClient.GetZones(); + EXPECT_EQ( 2, tracyZones.size() ); + CcpTelemetryLeaveZone( &key ); + TickTelemetry(); + tracyZones = m_tracyClient.GetZones(); + EXPECT_EQ( 1, tracyZones.size() ); + CcpTelemetryLeaveZone( &key ); + TickTelemetry(); + EXPECT_TRUE( m_tracyClient.GetZones().empty() ); +} + +TEST_F( CcpTelemetryTest, ReStartAfterStop ) +{ + // Setup takes care of connecting to the TracyTestClient + EXPECT_TRUE( m_tracyClient.IsConnected() ); + + static int key1 = 1001; + const std::string zoneName1{ "FirstZone" }; + CcpTelemetryEnterZone( &key1, zoneName1.c_str(), __FILE__, __LINE__ ); + + TickTelemetry(); + auto tracyZones = m_tracyClient.GetZones(); + auto pred = [&zoneName1]( const TracyTestClient::ZoneInfo& elem ) -> bool { + return elem.function == zoneName1; + }; + EXPECT_NE( tracyZones.end(), std::find_if( tracyZones.begin(), tracyZones.end(), pred ) ); + EXPECT_EQ( 1, tracyZones.size() ); + EXPECT_EQ( 1, m_tracyClient.GetZoneBeginCount() ); + EXPECT_EQ( 0, m_tracyClient.GetZoneEndCount() ); + + CcpTelemetryLeaveZone( &key1 ); + + TickTelemetry(); + EXPECT_TRUE( m_tracyClient.GetZones().empty() ); + EXPECT_EQ( 1, m_tracyClient.GetZoneEndCount() ); + EXPECT_TRUE( CcpTelemetryIsConnected() ); + EXPECT_TRUE( m_tracyClient.IsConnected() ); + + // Now simulate "Stop Telemetry" operation and Tick until we're in "Stopped" state + CcpStopTelemetry(); + TickTelemetry(); // This processes the "StopRequested" state. + TickTelemetry(); // This processes the "Stopped" state. + EXPECT_TRUE( m_tracyClient.IsConnected() ) << "Connection should still be true at this point because the TracyTestClient hasn't been disconnected"; + EXPECT_FALSE( CcpTelemetryIsStarted() ) << "Internal profiler state should have changed: Started->StopRequested->Stopped"; + + // Simulate a new call to StartTelemetry + SetUp( false ); + EXPECT_TRUE( m_tracyClient.IsConnected() ) << "Connection should still be true because the TracyTestClient hasn't never been disconnected"; + EXPECT_TRUE( CcpTelemetryIsStarted() ) << "Internal profiler state should have changed: Started->StopRequested->Stopped"; + + // Emit a new Zone, on the 2nd Start and validate + static int key2 = 1002; + const std::string zoneName2{ "SecondZone" }; + CcpTelemetryEnterZone( &key2, zoneName2.c_str(), __FILE__, __LINE__ ); + + TickTelemetry(); + auto tracyZones2ndStart = m_tracyClient.GetZones(); + auto pred2nd = [&zoneName2]( const TracyTestClient::ZoneInfo& elem ) -> bool { + return elem.function == zoneName2; + }; + EXPECT_NE( tracyZones2ndStart.end(), std::find_if( tracyZones2ndStart.begin(), tracyZones2ndStart.end(), pred2nd ) ); + EXPECT_EQ( 1, tracyZones2ndStart.size() ); + EXPECT_EQ( 2, m_tracyClient.GetZoneBeginCount() ) << "The total Begin Zone count should be 2, even after Stop/Start"; + EXPECT_EQ( 1, m_tracyClient.GetZoneEndCount() ) << "The total End Zone count should be 1 at this point, because of the FirstZone has ended"; + + CcpTelemetryLeaveZone( &key2 ); + + TickTelemetry(); + EXPECT_TRUE( m_tracyClient.GetZones().empty() ); + EXPECT_EQ( 2, m_tracyClient.GetZoneEndCount() ) << "The total End Zone count should be 2, FirstZone (before the Stop) and SecondZone from after the Stop/Start";; + EXPECT_TRUE( CcpTelemetryIsConnected() ); + EXPECT_TRUE( m_tracyClient.IsConnected() ); +} + diff --git a/tests/TracyTestClient.cpp b/tests/TracyTestClient.cpp new file mode 100644 index 0000000..e7fa898 --- /dev/null +++ b/tests/TracyTestClient.cpp @@ -0,0 +1,714 @@ +// Copyright © 2025 CCP ehf. +#include "TracyTestClient.h" + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +# include + using socket_t = SOCKET; + static constexpr socket_t kInvalidSocket = INVALID_SOCKET; +# define sock_close( s ) ::closesocket( s ) +#else +# include +# include +# include +# include +# include + using socket_t = int; + static constexpr socket_t kInvalidSocket = -1; +# define sock_close( s ) ::close( s ) +#endif + +static constexpr int kReadTimeoutMs = 100; + +// --------------------------------------------------------------------------- +// TCP socket (POSIX + Winsock) +// --------------------------------------------------------------------------- + +namespace { + +struct TcpSocket +{ + socket_t fd = kInvalidSocket; + + bool ConnectBlocking( const char* addr, uint16_t port ) + { + fd = ::socket( AF_INET, SOCK_STREAM, 0 ); + if( fd == kInvalidSocket ) return false; + struct sockaddr_in sa{}; + sa.sin_family = AF_INET; + sa.sin_port = htons( port ); + if( ::inet_pton( AF_INET, addr, &sa.sin_addr ) != 1 ) + { + sock_close( fd ); fd = kInvalidSocket; return false; + } + if( ::connect( fd, reinterpret_cast( &sa ), sizeof( sa ) ) != 0 ) + { + sock_close( fd ); fd = kInvalidSocket; return false; + } + return true; + } + + void Send( const void* buf, int len ) + { + ::send( fd, static_cast( buf ), len, 0 ); + } + + bool ReadRaw( void* buf, int len, int timeoutMs ) + { + auto* p = static_cast( buf ); + while( len > 0 ) + { + fd_set fds; + FD_ZERO( &fds ); + FD_SET( fd, &fds ); + struct timeval tv{}; + tv.tv_sec = timeoutMs / 1000; + tv.tv_usec = ( timeoutMs % 1000 ) * 1000; + // nfds is ignored on Windows; on POSIX it must be fd + 1. +#ifdef _WIN32 + if( ::select( 0, &fds, nullptr, nullptr, &tv ) <= 0 ) return false; +#else + if( ::select( fd + 1, &fds, nullptr, nullptr, &tv ) <= 0 ) return false; +#endif + const int n = static_cast( ::recv( fd, p, len, 0 ) ); + if( n <= 0 ) return false; + p += n; + len -= n; + } + return true; + } + + void Close() + { + if( fd != kInvalidSocket ) { sock_close( fd ); fd = kInvalidSocket; } + } + + bool IsValid() const { return fd != kInvalidSocket; } +}; + +// --------------------------------------------------------------------------- +// Tracy wire-protocol constants and types (Tracy 0.13.1) +// Derived from TracyProtocol.hpp / TracyQueue.hpp — no Tracy headers needed. +// --------------------------------------------------------------------------- + +static constexpr uint32_t kProtocolVersion = 76; +static constexpr uint32_t kTargetFrameSize = 256 * 1024; +static constexpr unsigned kLZ4Size = kTargetFrameSize + ( kTargetFrameSize / 255 ) + 16; +static constexpr int kHandshakeShibbolethSize = 8; +static const char kHandshakeShibboleth[kHandshakeShibbolethSize] = { 'T', 'r', 'a', 'c', 'y', 'P', 'r', 'f' }; + +enum HandshakeStatus : uint8_t +{ + HandshakePending = 0, + HandshakeWelcome = 1, + HandshakeProtocolMismatch = 2, + HandshakeNotAvailable = 3, + HandshakeDropped = 4, +}; + +#pragma pack( push, 1 ) + +struct WelcomeMessage +{ + double timerMul; + int64_t initBegin; + int64_t initEnd; + uint64_t resolution; + uint64_t epoch; + uint64_t exectime; + uint64_t pid; + int64_t samplingPeriod; + uint8_t flags; + uint8_t cpuArch; + char cpuManufacturer[12]; + uint32_t cpuId; + char programName[64]; + char hostInfo[1024]; +}; + +struct OnDemandPayloadMessage +{ + uint64_t frames; + uint64_t currentTime; +}; + +// Only the server-query value we actually emit. +static constexpr uint8_t kServerQueryFiberName = 7; + +struct ServerQueryPacket +{ + uint8_t type; + uint64_t ptr; + uint32_t extra; +}; + +// Minimal queue-item structs — only fields accessed in ProcessDecompressedData. +struct QueueHeader { uint8_t idx; }; +struct QueueThreadContext { uint32_t thread; }; + +struct QueueFiberEnter +{ + int64_t time; + uint64_t fiber; + uint32_t thread; + int32_t groupHint; +}; + +struct QueueFiberLeave +{ + int64_t time; + uint32_t thread; +}; + +struct QueueStringTransfer { uint64_t ptr; }; + +// QueueItem matches Tracy's 32-byte packed union layout. +struct QueueItem +{ + QueueHeader hdr; + union { + QueueThreadContext threadCtx; + QueueFiberEnter fiberEnter; + QueueFiberLeave fiberLeave; + QueueStringTransfer stringTransfer; + char _pad[31]; + }; +}; +static_assert( sizeof( QueueItem ) == 32, "QueueItem size mismatch" ); + +#pragma pack( pop ) + +// QueueType index constants (QueueType enum : uint8_t from TracyQueue.hpp). +static constexpr uint8_t kQueueZoneBeginAllocSrcLoc = 7; +static constexpr uint8_t kQueueZoneBeginAllocSrcLocCallstack = 8; +static constexpr uint8_t kQueueZoneBegin = 15; +static constexpr uint8_t kQueueZoneBeginCallstack = 16; +static constexpr uint8_t kQueueZoneEnd = 17; +static constexpr uint8_t kQueueFiberEnter = 58; +static constexpr uint8_t kQueueFiberLeave = 59; +static constexpr uint8_t kQueueTerminate = 60; +static constexpr uint8_t kQueueThreadContext = 62; +static constexpr uint8_t kQueueSingleStringData = 99; +static constexpr uint8_t kQueueSecondStringData = 100; +static constexpr uint8_t kQueueStringDataFirst = 104; // indices >= this carry QueueStringTransfer +static constexpr uint8_t kQueueSourceLocationPayload = 107; +static constexpr uint8_t kQueueFrameImageData = 111; +static constexpr uint8_t kQueueSymbolCode = 114; +static constexpr uint8_t kQueueSourceCode = 115; +static constexpr uint8_t kQueueFiberName = 116; +static constexpr uint8_t kQueueNumTypes = 117; + +// Wire-format byte size of each queue item indexed by QueueType. +// Mirrors QueueDataSize[] from TracyQueue.hpp with #pragma pack(1) struct sizes. +static const size_t kQueueDataSize[kQueueNumTypes] = { + 1, // 0 ZoneText + 1, // 1 ZoneName + 9, // 2 Message {int64_t} + 12, // 3 MessageColor {int64_t, 3×uint8_t} + 9, // 4 MessageCallstack + 12, // 5 MessageColorCallstack + 9, // 6 MessageAppInfo + 9, // 7 ZoneBeginAllocSrcLoc {int64_t} + 9, // 8 ZoneBeginAllocSrcLocCallstack + 1, // 9 CallstackSerial + 1, // 10 Callstack + 1, // 11 CallstackAlloc + 13, // 12 CallstackSample {int64_t, uint32_t} + 13, // 13 CallstackSampleContextSwitch + 10, // 14 FrameImage {uint32_t, uint16_t, uint16_t, uint8_t} + 17, // 15 ZoneBegin {int64_t, uint64_t} + 17, // 16 ZoneBeginCallstack + 9, // 17 ZoneEnd {int64_t} + 17, // 18 LockWait {uint32_t, uint32_t, int64_t} + 17, // 19 LockObtain + 13, // 20 LockRelease {uint32_t, int64_t} + 17, // 21 LockSharedWait + 17, // 22 LockSharedObtain + 17, // 23 LockSharedRelease {uint32_t, int64_t, uint32_t} + 5, // 24 LockName {uint32_t} + 27, // 25 MemAlloc {int64_t, uint32_t, uint64_t, char[6]} + 27, // 26 MemAllocNamed + 21, // 27 MemFree {int64_t, uint32_t, uint64_t} + 21, // 28 MemFreeNamed + 27, // 29 MemAllocCallstack + 27, // 30 MemAllocCallstackNamed + 21, // 31 MemFreeCallstack + 21, // 32 MemFreeCallstackNamed + 21, // 33 MemDiscard + 21, // 34 MemDiscardCallstack + 24, // 35 GpuZoneBegin {int64_t, uint32_t, uint16_t, uint8_t, uint64_t} + 24, // 36 GpuZoneBeginCallstack + 16, // 37 GpuZoneBeginAllocSrcLoc {int64_t, uint32_t, uint16_t, uint8_t} + 16, // 38 GpuZoneBeginAllocSrcLocCallstack + 16, // 39 GpuZoneEnd + 24, // 40 GpuZoneBeginSerial + 24, // 41 GpuZoneBeginCallstackSerial + 16, // 42 GpuZoneBeginAllocSrcLocSerial + 16, // 43 GpuZoneBeginAllocSrcLocCallstackSerial + 16, // 44 GpuZoneEndSerial + 25, // 45 PlotDataInt {uint64_t, int64_t, int64_t} + 21, // 46 PlotDataFloat {uint64_t, int64_t, float} + 25, // 47 PlotDataDouble {uint64_t, int64_t, double} + 23, // 48 ContextSwitch {int64_t, 2×uint32_t, 4×uint8_t, 2×int8_t} + 16, // 49 ThreadWakeup {int64_t, uint32_t, uint8_t, 2×int8_t} + 12, // 50 GpuTime {int64_t, uint16_t, uint8_t} + 2, // 51 GpuContextName {uint8_t} + 10, // 52 GpuAnnotationName {int64_t, uint8_t} + 10, // 53 CallstackFrameSize {uint64_t, uint8_t} + 13, // 54 SymbolInformation {uint32_t, uint64_t} + 1, // 55 ExternalNameMetadata (not wire-transferred) + 1, // 56 SymbolCodeMetadata (not wire-transferred) + 1, // 57 SourceCodeMetadata (not wire-transferred) + 25, // 58 FiberEnter {int64_t, uint64_t, uint32_t, int32_t} + 13, // 59 FiberLeave {int64_t, uint32_t} + 1, // 60 Terminate + 1, // 61 KeepAlive + 5, // 62 ThreadContext {uint32_t} + 26, // 63 GpuCalibration {int64_t, int64_t, int64_t, uint8_t} + 18, // 64 GpuTimeSync {int64_t, int64_t, uint8_t} + 1, // 65 Crash + 17, // 66 CrashReport {int64_t, uint64_t} + 5, // 67 ZoneValidation {uint32_t} + 4, // 68 ZoneColor {3×uint8_t} + 9, // 69 ZoneValue {uint64_t} + 17, // 70 FrameMarkMsg {int64_t, uint64_t} + 17, // 71 FrameMarkMsgStart + 17, // 72 FrameMarkMsgEnd + 13, // 73 FrameVsync {int64_t, uint32_t} + 32, // 74 SourceLocation {3×uint64_t, uint32_t, 3×uint8_t} + 22, // 75 LockAnnounce {uint32_t, int64_t, uint64_t, uint8_t} + 13, // 76 LockTerminate {uint32_t, int64_t} + 17, // 77 LockMark {2×uint32_t, uint64_t} + 17, // 78 MessageLiteral {int64_t, uint64_t} + 20, // 79 MessageLiteralColor {int64_t, 3×uint8_t, uint64_t} + 17, // 80 MessageLiteralCallstack + 20, // 81 MessageLiteralColorCallstack + 28, // 82 GpuNewContext {2×int64_t, uint32_t, float, 3×uint8_t} + 17, // 83 CallstackFrame {uint32_t, uint64_t, uint32_t} + 13, // 84 SysTimeReport {int64_t, float} + 25, // 85 SysPowerReport {3×int64_t/uint64_t} + 17, // 86 TidToPid {2×uint64_t} + 17, // 87 HwSampleCpuCycle {uint64_t, int64_t} + 17, // 88 HwSampleInstructionRetired + 17, // 89 HwSampleCacheReference + 17, // 90 HwSampleCacheMiss + 17, // 91 HwSampleBranchRetired + 17, // 92 HwSampleBranchMiss + 16, // 93 PlotConfig {uint64_t, 3×uint8_t, uint32_t} + 18, // 94 ParamSetup {uint32_t, uint64_t, uint8_t, int32_t} + 1, // 95 AckServerQueryNoop + 5, // 96 AckSourceCodeNotAvailable {uint32_t} + 1, // 97 AckSymbolCodeNotAvailable + 17, // 98 CpuTopology {4×uint32_t} + 1, // 99 SingleStringData (variable-length; header only in fixed table) + 1, // 100 SecondStringData (variable-length; header only in fixed table) + 9, // 101 MemNamePayload {uint64_t} + 9, // 102 ThreadGroupHint {uint32_t, int32_t} + 24, // 103 GpuZoneAnnotation {int64_t, double, uint32_t, uint16_t, uint8_t} + // indices >= kQueueStringDataFirst carry QueueStringTransfer + variable string + 9, // 104 StringData {QueueStringTransfer} + 9, // 105 ThreadName + 9, // 106 PlotName + 9, // 107 SourceLocationPayload + 9, // 108 CallstackPayload + 9, // 109 CallstackAllocPayload + 9, // 110 FrameName + 9, // 111 FrameImageData + 9, // 112 ExternalName + 9, // 113 ExternalThreadName + 9, // 114 SymbolCode + 9, // 115 SourceCode + 9, // 116 FiberName +}; + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// TracyTestClient +// --------------------------------------------------------------------------- + +TracyTestClient::TracyTestClient() + : m_socket( new TcpSocket() ) + , m_lz4Stream( LZ4_createStreamDecode() ) + , m_ringBuffer( new char[kTargetFrameSize * 2] ) +{ +#ifdef _WIN32 + WSADATA wsaData; + WSAStartup( MAKEWORD( 2, 2 ), &wsaData ); +#endif +} + +TracyTestClient::~TracyTestClient() +{ + Disconnect(); + delete static_cast( m_socket ); + LZ4_freeStreamDecode( static_cast( m_lz4Stream ) ); + delete[] m_ringBuffer; +#ifdef _WIN32 + WSACleanup(); +#endif +} + +bool TracyTestClient::Connect( const char* addr, uint16_t port, int timeoutMs ) +{ + auto& sock = *static_cast( m_socket ); + const auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds( timeoutMs ); + + // Retry until we connect or time out, since the profiler's listen socket may + // not be ready immediately after TracyIsStarted becomes true. + while( std::chrono::steady_clock::now() < deadline ) + { + if( sock.ConnectBlocking( addr, port ) ) + break; + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + if( !sock.IsValid() ) + return false; + + // Send handshake shibboleth and protocol version. + sock.Send( kHandshakeShibboleth, kHandshakeShibbolethSize ); + uint32_t proto = kProtocolVersion; + sock.Send( &proto, sizeof( proto ) ); + + // Receive handshake status. + HandshakeStatus status; + if( !sock.ReadRaw( &status, sizeof( status ), 2000 ) || status != HandshakeWelcome ) + { + sock.Close(); + return false; + } + + // Receive the welcome message. + WelcomeMessage welcome; + if( !sock.ReadRaw( &welcome, sizeof( welcome ), 5000 ) ) + { + sock.Close(); + return false; + } + + // With TRACY_ON_DEMAND the profiler sends an extra OnDemandPayloadMessage. + OnDemandPayloadMessage onDemand; + if( !sock.ReadRaw( &onDemand, sizeof( onDemand ), 5000 ) ) + { + sock.Close(); + return false; + } + + // Reset the LZ4 streaming context for the new connection. + LZ4_setStreamDecode( static_cast( m_lz4Stream ), nullptr, 0 ); + m_bufferOffset = 0; + + m_connected.store( true, std::memory_order_release ); + m_shutdown.store( false, std::memory_order_relaxed ); + m_recvThread = std::thread( &TracyTestClient::RecvLoop, this ); + return true; +} + +void TracyTestClient::Disconnect() +{ + if( !m_connected.load( std::memory_order_acquire ) && !m_recvThread.joinable() ) + return; + + m_shutdown.store( true, std::memory_order_release ); + + if( m_recvThread.joinable() ) + m_recvThread.join(); + + m_connected.store( false, std::memory_order_release ); +} + +bool TracyTestClient::IsConnected() const +{ + return m_connected.load( std::memory_order_acquire ); +} + +std::vector TracyTestClient::GetZones() const +{ + std::lock_guard lock( m_dataMutex ); + std::vector result; + for( const auto& [tid, stack] : m_threadZoneStacks ) + result.insert( result.end(), stack.begin(), stack.end() ); + for( const auto& [fptr, stack] : m_fiberZoneStacks ) + result.insert( result.end(), stack.begin(), stack.end() ); + return result; +} + +TracyTestClient::ZoneStack TracyTestClient::GetZonesForThread( uint32_t threadId ) const +{ + std::lock_guard lock( m_dataMutex ); + auto it = m_threadZoneStacks.find( threadId ); + if( it == m_threadZoneStacks.end() ) + return {}; + return it->second; +} + +TracyTestClient::ZoneStack TracyTestClient::GetZonesForFiber( const std::string& fiberName ) const +{ + std::lock_guard lock( m_dataMutex ); + for( const auto& [ptr, name] : m_fiberNames ) + { + if( name == fiberName ) + { + auto it = m_fiberZoneStacks.find( ptr ); + if( it != m_fiberZoneStacks.end() ) + return it->second; + } + } + return {}; +} + +std::vector TracyTestClient::GetFiberNames() const +{ + std::lock_guard lock( m_dataMutex ); + std::vector names; + names.reserve( m_fiberNames.size() ); + for( const auto& [ptr, name] : m_fiberNames ) + names.push_back( name ); + return names; +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + +void TracyTestClient::SendQueryLocked( uint8_t queryType, uint64_t ptr, uint32_t extra ) +{ + ServerQueryPacket pkt; + pkt.type = queryType; + pkt.ptr = ptr; + pkt.extra = extra; + std::lock_guard lock( m_sendMutex ); + static_cast( m_socket )->Send( &pkt, static_cast( sizeof( pkt ) ) ); +} + +// Receive loop: reads LZ4-compressed frames and decompresses them. +void TracyTestClient::RecvLoop() +{ + auto& sock = *static_cast( m_socket ); + auto* lz4 = static_cast( m_lz4Stream ); + std::unique_ptr lz4Buf( new char[kLZ4Size] ); + + while( !m_shutdown.load( std::memory_order_relaxed ) ) + { + // Each LZ4 frame is prefixed by its compressed size. + uint32_t compressedSz = 0; + if( !sock.ReadRaw( &compressedSz, sizeof( compressedSz ), kReadTimeoutMs ) ) + continue; + + if( compressedSz > static_cast( kLZ4Size ) ) + { + fprintf( stderr, "Corrupt frame: %zu\n", static_cast( compressedSz ) ); fflush( stderr ); + break; + } + + if( !sock.ReadRaw( lz4Buf.get(), static_cast( compressedSz ), kReadTimeoutMs ) ) + { + fprintf( stderr, "ReadRaw failed to read compressed data\n" ); fflush( stderr ); + break; + } + + // Decompress into the ring buffer using the streaming context so that + // the previous block acts as the LZ4 dictionary. + char* dst = m_ringBuffer + m_bufferOffset; + const int decompressedSz = LZ4_decompress_safe_continue( + lz4, lz4Buf.get(), dst, + static_cast( compressedSz ), static_cast( kTargetFrameSize ) ); + if( decompressedSz < 0 ) + break; // decompression error + + ProcessDecompressedData( dst, decompressedSz ); + + m_bufferOffset += decompressedSz; + if( m_bufferOffset > static_cast( kTargetFrameSize * 2 ) ) + m_bufferOffset = 0; + } + + // Close the socket so Tracy's worker thread sees the connection drop and + // can finish its own shutdown sequence. This is necessary whether we exit + // because kQueueTerminate was received or because Disconnect() set m_shutdown. + // TcpSocket::Close() is idempotent, so a double-close from Disconnect() is safe. + sock.Close(); + m_connected.store( false, std::memory_order_release ); +} + +TracyTestClient::ZoneStack& TracyTestClient::CurrentStack( uint32_t thread ) +{ + auto fiberIt = m_threadCurrentFiber.find( thread ); + if( fiberIt != m_threadCurrentFiber.end() && fiberIt->second != 0 ) + return m_fiberZoneStacks[fiberIt->second]; + return m_threadZoneStacks[thread]; +} + +// Parse the decompressed byte stream and update internal state. +void TracyTestClient::ProcessDecompressedData( const char* data, int sz ) +{ + const char* ptr = data; + const char* const end = data + sz; + + while( ptr < end ) + { + const auto* item = reinterpret_cast( ptr ); + const uint8_t idx = item->hdr.idx; + + if( idx >= kQueueStringDataFirst ) + { + // String transfer item: fixed header + QueueStringTransfer, followed by + // a length-prefixed string payload. + if( ptr + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ) > end ) + break; + const uint64_t strPtr = item->stringTransfer.ptr; + ptr += sizeof( QueueHeader ) + sizeof( QueueStringTransfer ); + + if( idx == kQueueFrameImageData || + idx == kQueueSymbolCode || + idx == kQueueSourceCode ) + { + // Large binary payload with uint32_t length prefix. + if( ptr + sizeof( uint32_t ) > end ) break; + uint32_t strSz = 0; + std::memcpy( &strSz, ptr, sizeof( strSz ) ); + ptr += sizeof( strSz ); + if( ptr + strSz > end ) break; + ptr += strSz; + } + else + { + // Normal string payload with uint16_t length prefix. + if( ptr + sizeof( uint16_t ) > end ) break; + uint16_t strSz = 0; + std::memcpy( &strSz, ptr, sizeof( strSz ) ); + ptr += sizeof( strSz ); + if( ptr + strSz > end ) break; + + if( idx == kQueueSourceLocationPayload ) + { + // The profiler sends this immediately before ZoneBeginAllocSrcLoc. + // Format: [uint32_t color][uint32_t line][function\0][source\0][name] + if( strSz >= 9 ) + { + const char* p = ptr; + p += 4; // skip color + uint32_t line = 0; + std::memcpy( &line, p, 4 ); + p += 4; + const char* function = p; + p += std::strlen( function ) + 1; + const char* source = p; + p += std::strlen( source ) + 1; + const size_t nameLen = static_cast( strSz ) - static_cast( p - ptr ); + + std::lock_guard lock( m_dataMutex ); + m_pendingZone = {}; + m_pendingZone.function = function; + m_pendingZone.source = source; + m_pendingZone.line = line; + if( nameLen > 0 ) + m_pendingZone.name = std::string( p, nameLen ); + m_hasPendingZone = true; + } + } + else if( idx == kQueueFiberName ) + { + std::string name( ptr, strSz ); + std::lock_guard lock( m_dataMutex ); + m_fiberNames[strPtr] = std::move( name ); + } + + ptr += strSz; + } + } + else + { + // Fixed-size item (or SingleStringData / SecondStringData special cases). + if( idx == kQueueSingleStringData || idx == kQueueSecondStringData ) + { + ptr += sizeof( QueueHeader ); + if( ptr + sizeof( uint16_t ) > end ) return; + uint16_t strSz = 0; + std::memcpy( &strSz, ptr, sizeof( strSz ) ); + ptr += sizeof( strSz ); + if( ptr + strSz > end ) return; + ptr += strSz; + } + else + { + if( idx >= kQueueNumTypes ) return; + const size_t itemSz = kQueueDataSize[idx]; + if( ptr + itemSz > end ) return; + + switch( idx ) + { + case kQueueThreadContext: + m_currentThread = item->threadCtx.thread; + break; + + case kQueueZoneBeginAllocSrcLoc: + case kQueueZoneBeginAllocSrcLocCallstack: + { + m_zoneBeginCount.fetch_add( 1, std::memory_order_relaxed ); + const uint32_t thread = m_currentThread; + std::lock_guard lock( m_dataMutex ); + if( m_hasPendingZone ) + { + CurrentStack( thread ).push_back( m_pendingZone ); + m_hasPendingZone = false; + } + break; + } + + case kQueueZoneBegin: + case kQueueZoneBeginCallstack: + m_zoneBeginCount.fetch_add( 1, std::memory_order_relaxed ); + break; + + case kQueueZoneEnd: + { + m_zoneEndCount.fetch_add( 1, std::memory_order_relaxed ); + const uint32_t thread = m_currentThread; + std::lock_guard lock( m_dataMutex ); + auto& stack = CurrentStack( thread ); + if( !stack.empty() ) + stack.pop_back(); + break; + } + + case kQueueFiberEnter: + { + const uint64_t fiberPtr = item->fiberEnter.fiber; + const uint32_t thread = item->fiberEnter.thread; + std::lock_guard lock( m_dataMutex ); + m_threadCurrentFiber[thread] = fiberPtr; + if( m_queriedFibers.insert( fiberPtr ).second ) + SendQueryLocked( kServerQueryFiberName, fiberPtr ); + break; + } + + case kQueueFiberLeave: + { + const uint32_t thread = item->fiberLeave.thread; + std::lock_guard lock( m_dataMutex ); + m_threadCurrentFiber[thread] = 0; + break; + } + + case kQueueTerminate: + { + m_shutdown.store( true, std::memory_order_release ); + break; + } + + default: + break; + } + + ptr += itemSz; + } + } + } +} diff --git a/tests/TracyTestClient.h b/tests/TracyTestClient.h new file mode 100644 index 0000000..410b378 --- /dev/null +++ b/tests/TracyTestClient.h @@ -0,0 +1,93 @@ +// Copyright © 2025 CCP ehf. +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// A minimal Tracy profiler client for use in unit tests. +// Connects to the Tracy profiler embedded in the test executable, +// receives and parses the event stream, and exposes the collected +// data so tests can make assertions about profiler activity. +class TracyTestClient +{ +public: + struct ZoneInfo + { + std::string name; + std::string function; + std::string source; + uint32_t line = 0; + }; + + using ZoneStack = std::vector; + + TracyTestClient(); + ~TracyTestClient(); + + // Try to connect to the Tracy profiler at addr:port. + // Retries until timeoutMs elapses. Returns true on success. + bool Connect( const char* addr = "127.0.0.1", uint16_t port = 8086, int timeoutMs = 5000 ); + void Disconnect(); + bool IsConnected() const; + + int GetZoneBeginCount() const { return m_zoneBeginCount.load( std::memory_order_relaxed ); } + int GetZoneEndCount() const { return m_zoneEndCount.load( std::memory_order_relaxed ); } + + // Returns all currently open zones across all threads and fibers (flattened). + std::vector GetZones() const; + // Returns the zone stack currently open for the given thread (not including fiber zones). + ZoneStack GetZonesForThread( uint32_t threadId ) const; + // Returns the zone stack currently open for the named fiber. + ZoneStack GetZonesForFiber( const std::string& fiberName ) const; + + std::vector GetFiberNames() const; + + TracyTestClient( const TracyTestClient& ) = delete; + TracyTestClient& operator=( const TracyTestClient& ) = delete; + +private: + void RecvLoop(); + void ProcessDecompressedData( const char* data, int sz ); + void SendQueryLocked( uint8_t queryType, uint64_t ptr, uint32_t extra = 0 ); + + // Returns a reference to the zone stack for the current thread/fiber context. + // Must be called with m_dataMutex held. + ZoneStack& CurrentStack( uint32_t thread ); + + // Opaque handles to Tracy types, allocated on heap to keep Tracy headers out of this header. + void* m_socket = nullptr; // tracy::Socket* + void* m_lz4Stream = nullptr; // LZ4_streamDecode_t* + + // Ring buffer matching Tracy's decompression scheme: + // must be 2 × TargetFrameSize (= 2 × 256 KiB) to serve as LZ4 dictionary. + char* m_ringBuffer = nullptr; + int m_bufferOffset = 0; + + std::thread m_recvThread; + std::atomic m_connected{ false }; + std::atomic m_shutdown{ false }; + std::atomic m_zoneBeginCount{ 0 }; + std::atomic m_zoneEndCount{ 0 }; + + // Current thread established by ThreadContext events (recv thread only, no mutex needed). + uint32_t m_currentThread = 0; + + mutable std::mutex m_dataMutex; + std::mutex m_sendMutex; + + // Source location received from a SourceLocationPayload event, + // to be consumed by the following ZoneBeginAllocSrcLoc event. + ZoneInfo m_pendingZone; + bool m_hasPendingZone = false; + + std::unordered_map m_threadCurrentFiber; // thread id → active fiber ptr (0 = none) + std::unordered_map m_threadZoneStacks; // thread id → zone stack + std::unordered_map m_fiberZoneStacks; // fiber ptr → zone stack + std::unordered_map m_fiberNames; // fiber ptr → name + std::unordered_set m_queriedFibers; // ptrs already queried +}; diff --git a/vcpkg.json b/vcpkg.json index db2ee9a..b128da3 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -16,6 +16,10 @@ "name": "gtest", "version>=": "1.16.0" }, + { + "name": "lz4", + "version>=": "1.9.4" + }, { "name": "python3", "version>=": "3.12.9#1",