From c48049abd7f0402ee684f752a2c7d55ff30ae30c Mon Sep 17 00:00:00 2001 From: caikun Date: Tue, 24 Oct 2023 11:34:11 +0800 Subject: [PATCH 1/4] support device activity --- libkineto/CMakeLists.txt | 17 ++++++-- libkineto/include/DeviceActivityInterface.h | 44 ++++++++++++++++++++ libkineto/src/ActivityProfilerController.cpp | 6 +++ libkineto/src/ActivityProfilerProxy.cpp | 19 +++++++++ libkineto/src/CuptiActivityProfiler.cpp | 31 ++++++++------ libkineto/src/CuptiActivityProfiler.h | 4 ++ 6 files changed, 106 insertions(+), 15 deletions(-) create mode 100644 libkineto/include/DeviceActivityInterface.h diff --git a/libkineto/CMakeLists.txt b/libkineto/CMakeLists.txt index 0ef479f65..d25bbd359 100644 --- a/libkineto/CMakeLists.txt +++ b/libkineto/CMakeLists.txt @@ -25,10 +25,13 @@ endfunction() project(kineto VERSION 0.1 LANGUAGES CXX C) +add_compile_options(-fPIC) + set(KINETO_LIBRARY_TYPE "default" CACHE STRING "Type of library (default, static or shared) to build") set_property(CACHE KINETO_LIBRARY_TYPE PROPERTY STRINGS default shared) option(KINETO_BUILD_TESTS "Build kineto unit tests" ON) +option(KINETO_USE_DEVICE_ACTIVITY "using DeviceActivityInterface to collect device activity" OFF) set(LIBKINETO_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") set(LIBKINETO_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") @@ -53,21 +56,24 @@ endif() # Set LIBKINETO_NOCUPTI to explicitly disable CUPTI # Otherwise, CUPTI is disabled if not found -IF (NOT CUDA_SOURCE_DIR OR NOT CUPTI_INCLUDE_DIR OR NOT CUDA_cupti_LIBRARY) +IF (NOT CUDA_SOURCE_DIR OR NOT CUPTI_INCLUDE_DIR OR NOT CUDA_cupti_LIBRARY OR KINETO_USE_DEVICE_ACTIVITY) set(LIBKINETO_NOCUPTI ON CACHE BOOL "" FORCE) endif() -IF (NOT ROCM_SOURCE_DIR AND NOT ROCTRACER_INCLUDE_DIR) +IF (NOT ROCM_SOURCE_DIR AND NOT ROCTRACER_INCLUDE_DIR OR KINETO_USE_DEVICE_ACTIVITY) set(LIBKINETO_NOROCTRACER ON CACHE BOOL "" FORCE) endif() # Define file lists -if (LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER) +if (LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER AND NOT KINETO_USE_DEVICE_ACTIVITY) get_filelist("get_libkineto_cpu_only_srcs(with_api=False)" LIBKINETO_SRCS) message(INFO " CUPTI unavailable or disabled - not building GPU profilers") elseif(NOT LIBKINETO_NOROCTRACER) get_filelist("get_libkineto_roctracer_srcs(with_api=False)" LIBKINETO_SRCS) message(INFO " Building with roctracer") +elseif(KINETO_USE_DEVICE_ACTIVITY) + get_filelist("get_libkineto_cpu_only_srcs(with_api=False)" LIBKINETO_SRCS) + message(INFO " Building with device activity") else() get_filelist("get_libkineto_cupti_srcs(with_api=False)" LIBKINETO_SRCS) endif() @@ -105,6 +111,11 @@ if (NOT LIBKINETO_NOROCTRACER) target_compile_options(kineto_base PRIVATE "-D__HIP_PLATFORM_AMD__") endif() +if (KINETO_USE_DEVICE_ACTIVITY) + list(APPEND KINETO_COMPILE_OPTIONS "-DHAS_DEVICE_ACTIVITY") +endif() +message(STATUS "Kineto: KINETO_COMPILE_OPTIONS = ${KINETO_COMPILE_OPTIONS}") + target_compile_options(kineto_base PRIVATE "${KINETO_COMPILE_OPTIONS}") target_compile_options(kineto_api PRIVATE "${KINETO_COMPILE_OPTIONS}") diff --git a/libkineto/include/DeviceActivityInterface.h b/libkineto/include/DeviceActivityInterface.h new file mode 100644 index 000000000..52cdf4c23 --- /dev/null +++ b/libkineto/include/DeviceActivityInterface.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include +#include + +#include "ActivityType.h" +#include "ITraceActivity.h" + +namespace libkineto { + +class ActivityLogger; + +class DeviceActivityInterface { + public: + enum CorrelationFlowType { + Default, + User, + End, + }; + + virtual ~DeviceActivityInterface() {} + + virtual void pushCorrelationID(uint64_t id, CorrelationFlowType type) = 0; + virtual void popCorrelationID(CorrelationFlowType type) = 0; + + virtual void enableActivities(const std::set& selectedActivities) = 0; + virtual void disableActivities(const std::set& selectedActivities) = 0; + virtual void clearActivities() = 0; + virtual void teardownContext() = 0; + virtual void setMaxBufferSize(int32_t size) = 0; + + virtual int32_t processActivities(ActivityLogger& logger, + std::function linkedActivity, + int64_t startTime, int64_t endTime) = 0; + + public: + std::atomic_bool stopCollection{false}; +}; + +extern DeviceActivityInterface* device_activity_singleton; + +} // namespace libkineto \ No newline at end of file diff --git a/libkineto/src/ActivityProfilerController.cpp b/libkineto/src/ActivityProfilerController.cpp index c8d1e0817..2b7f7f186 100644 --- a/libkineto/src/ActivityProfilerController.cpp +++ b/libkineto/src/ActivityProfilerController.cpp @@ -18,6 +18,9 @@ #ifdef HAS_ROCTRACER #include "RoctracerActivityApi.h" #endif +#ifdef HAS_DEVICE_ACTIVITY +#include "DeviceActivityInterface.h" +#endif #include "ThreadUtil.h" #include "output_json.h" #include "output_membuf.h" @@ -48,6 +51,9 @@ ActivityProfilerController::ActivityProfilerController( #ifdef HAS_ROCTRACER profiler_ = std::make_unique( RoctracerActivityApi::singleton(), cpuOnly); +#elif HAS_DEVICE_ACTIVITY + profiler_ = std::make_unique( + *device_activity_singleton, cpuOnly); #else profiler_ = std::make_unique( CuptiActivityApi::singleton(), cpuOnly); diff --git a/libkineto/src/ActivityProfilerProxy.cpp b/libkineto/src/ActivityProfilerProxy.cpp index e471c898c..f0492c951 100644 --- a/libkineto/src/ActivityProfilerProxy.cpp +++ b/libkineto/src/ActivityProfilerProxy.cpp @@ -16,6 +16,9 @@ #ifdef HAS_ROCTRACER #include "RoctracerActivityApi.h" #endif +#ifdef HAS_DEVICE_ACTIVITY +#include "DeviceActivityInterface.h" +#endif namespace KINETO_NAMESPACE { @@ -99,6 +102,10 @@ void ActivityProfilerProxy::pushCorrelationId(uint64_t id) { RoctracerActivityApi::pushCorrelationID(id, RoctracerActivityApi::CorrelationFlowType::Default); #endif +#ifdef HAS_DEVICE_ACTIVITY + device_activity_singleton->pushCorrelationID(id, + DeviceActivityInterface::CorrelationFlowType::Default); +#endif } void ActivityProfilerProxy::popCorrelationId() { @@ -108,16 +115,28 @@ void ActivityProfilerProxy::popCorrelationId() { RoctracerActivityApi::popCorrelationID( RoctracerActivityApi::CorrelationFlowType::Default); #endif +#ifdef HAS_DEVICE_ACTIVITY + device_activity_singleton->popCorrelationID( + DeviceActivityInterface::CorrelationFlowType::Default); +#endif } void ActivityProfilerProxy::pushUserCorrelationId(uint64_t id) { CuptiActivityApi::pushCorrelationID(id, CuptiActivityApi::CorrelationFlowType::User); +#ifdef HAS_DEVICE_ACTIVITY + device_activity_singleton->pushCorrelationID(id, + DeviceActivityInterface::CorrelationFlowType::User); +#endif } void ActivityProfilerProxy::popUserCorrelationId() { CuptiActivityApi::popCorrelationID( CuptiActivityApi::CorrelationFlowType::User); +#ifdef HAS_DEVICE_ACTIVITY + device_activity_singleton->popCorrelationID( + DeviceActivityInterface::CorrelationFlowType::User); +#endif } void ActivityProfilerProxy::transferCpuTrace( diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 893fbd552..fefc0ae59 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -37,6 +37,9 @@ #ifdef HAS_ROCTRACER #include "RoctracerActivityApi.h" #endif +#ifdef HAS_DEVICE_ACTIVITY +#include "DeviceActivityInterface.h" +#endif #include "output_base.h" #include "Logger.h" @@ -137,6 +140,10 @@ void CuptiActivityProfiler::transferCpuTrace( CuptiActivityProfiler::CuptiActivityProfiler( RoctracerActivityApi& cupti, bool cpuOnly) +#elif HAS_DEVICE_ACTIVITY +CuptiActivityProfiler::CuptiActivityProfiler( + DeviceActivityInterface& cupti, + bool cpuOnly) #else CuptiActivityProfiler::CuptiActivityProfiler( CuptiActivityApi& cupti, @@ -215,7 +222,7 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) { } } #endif // HAS_CUPTI -#ifdef HAS_ROCTRACER +#if defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) if (!cpuOnly_) { VLOG(0) << "Retrieving GPU activity buffers"; const int count = cupti_.processActivities( @@ -226,7 +233,7 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) { LOG(INFO) << "Processed " << count << " GPU records"; LOGGER_OBSERVER_ADD_EVENT_COUNT(count); } -#endif // HAS_ROCTRACER +#endif // HAS_ROCTRACER || HAS_DEVICE_ACTIVITY for (const auto& session : sessions_) { LOG(INFO) << "Processing child profiler trace"; @@ -629,7 +636,7 @@ void CuptiActivityProfiler::configure( LOGGER_OBSERVER_SET_GROUP_TRACE_ID(config_->requestGroupTraceID()); } -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) if (!cpuOnly_) { // Enabling CUPTI activity tracing incurs a larger perf hit at first, // presumably because structures are allocated and initialized, callbacks @@ -653,7 +660,7 @@ void CuptiActivityProfiler::configure( setupOverhead_, duration_cast(t2 - timestamp).count()); } } -#endif // HAS_CUPTI || HAS_ROCTRACER +#endif // HAS_CUPTI || HAS_ROCTRACER || HAS_DEVICE_ACTIVITY if (profilers_.size() > 0) { configureChildProfilers(); @@ -699,7 +706,7 @@ void CuptiActivityProfiler::startTraceInternal( void CuptiActivityProfiler::stopTraceInternal( const time_point& now) { captureWindowEndTime_ = libkineto::timeSinceEpoch(now); -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) if (!cpuOnly_) { time_point timestamp; if (VLOG_IS_ON(1)) { @@ -716,7 +723,7 @@ void CuptiActivityProfiler::stopTraceInternal( setupOverhead_, duration_cast(t2 - timestamp).count()); } } -#endif // HAS_CUPTI || HAS_ROCTRACER +#endif // HAS_CUPTI || HAS_ROCTRACER || HAS_DEVICE_ACTIVITY if (currentRunloopState_ == RunloopState::CollectTrace) { VLOG(0) << "CollectTrace -> ProcessTrace"; @@ -756,7 +763,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( case RunloopState::Warmup: VLOG(1) << "State: Warmup"; warmup_done = derivedConfig_->isWarmupDone(now, currentIter); -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) // Flushing can take a while so avoid doing it close to the start time if (!cpuOnly_ && currentIter < 0 && (derivedConfig_->isProfilingByIteration() || @@ -772,7 +779,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( VLOG(0) << "Warmup -> WaitForRequest"; break; } -#endif // HAS_CUPTI || HAS_ROCTRACER +#endif // HAS_CUPTI || HAS_ROCTRACER || HAS_DEVICE_ACTIVITY if (warmup_done) { UST_LOGGER_MARK_COMPLETED(kWarmUpStage); @@ -805,9 +812,9 @@ const time_point CuptiActivityProfiler::performRunLoopStep( collection_done = derivedConfig_->isCollectionDone(now, currentIter); if (collection_done -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) || cupti_.stopCollection -#endif // HAS_CUPTI || HAS_ROCTRACER +#endif // HAS_CUPTI || HAS_ROCTRACER || HAS_DEVICE_ACTIVITY ){ // Update runloop state first to prevent further updates to shared state LOG(INFO) << "Tracing complete."; @@ -931,12 +938,12 @@ void CuptiActivityProfiler::finalizeTrace(const Config& config, ActivityLogger& } void CuptiActivityProfiler::resetTraceData() { -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) || defined(HAS_DEVICE_ACTIVITY) if (!cpuOnly_) { cupti_.clearActivities(); cupti_.teardownContext(); } -#endif // HAS_CUPTI || HAS_ROCTRACER +#endif // HAS_CUPTI || HAS_ROCTRACER || HAS_DEVICE_ACTIVITY activityMap_.clear(); cpuCorrelationMap_.clear(); correlatedCudaActivities_.clear(); diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index cc1fcdfc1..f8e4cd2f3 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -43,6 +43,7 @@ namespace KINETO_NAMESPACE { class Config; class CuptiActivityApi; class RoctracerActivityApi; +class DeviceActivityInterface; // This struct is a derived snapshot of the Config. And should not // be mutable after construction. @@ -107,6 +108,7 @@ class CuptiActivityProfiler { public: CuptiActivityProfiler(CuptiActivityApi& cupti, bool cpuOnly); CuptiActivityProfiler(RoctracerActivityApi& rai, bool cpuOnly); + CuptiActivityProfiler(DeviceActivityInterface& deviceActivityApi, bool cpuOnly); CuptiActivityProfiler(const CuptiActivityProfiler&) = delete; CuptiActivityProfiler& operator=(const CuptiActivityProfiler&) = delete; @@ -345,6 +347,8 @@ class CuptiActivityProfiler { // Calls to CUPTI is encapsulated behind this interface #ifdef HAS_ROCTRACER RoctracerActivityApi& cupti_; // Design failure here +#elif HAS_DEVICE_ACTIVITY + DeviceActivityInterface& cupti_; #else CuptiActivityApi& cupti_; #endif From 83dd677ef3fc754165400ab492c32600ab10d4e9 Mon Sep 17 00:00:00 2001 From: caikun Date: Thu, 26 Oct 2023 12:41:19 +0800 Subject: [PATCH 2/4] get GLIBCXX_USE_CXX11_ABI from torch --- libkineto/CMakeLists.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libkineto/CMakeLists.txt b/libkineto/CMakeLists.txt index d25bbd359..a855aa1bc 100644 --- a/libkineto/CMakeLists.txt +++ b/libkineto/CMakeLists.txt @@ -27,6 +27,22 @@ project(kineto VERSION 0.1 LANGUAGES CXX C) add_compile_options(-fPIC) +execute_process( + COMMAND + sh -x -c + "python -c 'import torch;print(1 if torch.compiled_with_cxx11_abi() else 0)'" + OUTPUT_VARIABLE KINETO_COMPILED_WITH_CXX11_ABI) +if(KINETO_COMPILED_WITH_CXX11_ABI GREATER 0) + set(KINETO_COMPILED_WITH_CXX11_ABI 1) +else() + set(KINETO_COMPILED_WITH_CXX11_ABI 0) +endif() +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${KINETO_COMPILED_WITH_CXX11_ABI}" +) +add_compile_options(-D_GLIBCXX_USE_CXX11_ABI=${KINETO_COMPILED_WITH_CXX11_ABI}) +message(STATUS "KINETO_COMPILED_WITH_CXX11_ABI:" ${KINETO_COMPILED_WITH_CXX11_ABI}) + set(KINETO_LIBRARY_TYPE "default" CACHE STRING "Type of library (default, static or shared) to build") set_property(CACHE KINETO_LIBRARY_TYPE PROPERTY STRINGS default shared) From eeebc7a1b7962d98e56c8505de0c1c35a6bf3f92 Mon Sep 17 00:00:00 2001 From: caikun Date: Mon, 6 Nov 2023 12:31:47 +0800 Subject: [PATCH 3/4] add KINETO_COMPILED_WITH_CXX11_ABI --- libkineto/CMakeLists.txt | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/libkineto/CMakeLists.txt b/libkineto/CMakeLists.txt index a855aa1bc..5f181d858 100644 --- a/libkineto/CMakeLists.txt +++ b/libkineto/CMakeLists.txt @@ -27,15 +27,17 @@ project(kineto VERSION 0.1 LANGUAGES CXX C) add_compile_options(-fPIC) -execute_process( - COMMAND - sh -x -c - "python -c 'import torch;print(1 if torch.compiled_with_cxx11_abi() else 0)'" - OUTPUT_VARIABLE KINETO_COMPILED_WITH_CXX11_ABI) -if(KINETO_COMPILED_WITH_CXX11_ABI GREATER 0) - set(KINETO_COMPILED_WITH_CXX11_ABI 1) -else() - set(KINETO_COMPILED_WITH_CXX11_ABI 0) +if(NOT DEFINED KINETO_COMPILED_WITH_CXX11_ABI) + execute_process( + COMMAND + sh -x -c + "python -c 'import torch;print(1 if torch.compiled_with_cxx11_abi() else 0)'" + OUTPUT_VARIABLE KINETO_COMPILED_WITH_CXX11_ABI) + if(KINETO_COMPILED_WITH_CXX11_ABI GREATER 0) + set(KINETO_COMPILED_WITH_CXX11_ABI 1) + else() + set(KINETO_COMPILED_WITH_CXX11_ABI 0) + endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${KINETO_COMPILED_WITH_CXX11_ABI}" From c1bed2f2dc3779dec2a63025ea1b72a957f4badf Mon Sep 17 00:00:00 2001 From: caikun Date: Thu, 9 Nov 2023 15:54:38 +0800 Subject: [PATCH 4/4] update gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index ce186381c..8b75cc553 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ # ignore common items .idea .vscode +build/ +build*