diff --git a/.github/actions/setup-typeart/action.yml b/.github/actions/setup-typeart/action.yml new file mode 100644 index 00000000..cbe854a1 --- /dev/null +++ b/.github/actions/setup-typeart/action.yml @@ -0,0 +1,82 @@ +name: 'Setup TypeART Environment' +description: 'Sets up LLVM, Clang, OpenMPI, and other dependencies for TypeART CI' + +inputs: + llvm-version: + description: 'LLVM version to install' + required: true + typeart-typegen-legacy: + description: 'Set TYPEART_TYPEGEN_IR env variable' + required: false + install-libcxx: + description: 'Install libc++' + required: false + default: 'false' + install-lcov: + description: 'Install lcov' + required: false + default: 'false' + install-omp: + description: 'Install LLVM OpenMP runtime' + required: false + default: 'true' + setup-mold: + description: 'Setup Mold Linker' + required: false + default: 'false' + +runs: + using: "composite" + steps: + - name: LLVM apt + if: ${{ inputs.llvm-version >= 19 }} + shell: bash + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-${{ inputs.llvm-version }} main" | sudo tee /etc/apt/sources.list.d/llvm-${{ inputs.llvm-version }}.list + + - name: Update apt + shell: bash + run: sudo apt-get update + + - name: Install LLVM + shell: bash + run: sudo apt-get install -y libllvm${{ inputs.llvm-version }} llvm-${{ inputs.llvm-version }} llvm-${{ inputs.llvm-version }}-dev + + - name: Install LLVM OpenMP runtime + if: ${{ inputs.install-omp == 'true' }} + shell: bash + run: sudo apt-get install -y libomp-${{ inputs.llvm-version }}-dev + + - name: Install Clang + shell: bash + run: sudo apt-get install -y clang-${{ inputs.llvm-version }} clang-tidy-${{ inputs.llvm-version }} + + - name: Install libc++ + if: ${{ inputs.install-libcxx == 'true' }} + shell: bash + run: sudo apt-get install -y --no-install-recommends libc++-${{ inputs.llvm-version }}-dev libc++abi-${{ inputs.llvm-version }}-dev + + - name: Install OpenMPI + shell: bash + run: sudo apt-get install -y libopenmpi-dev openmpi-bin + + - name: Install lcov + if: ${{ inputs.install-lcov == 'true' }} + shell: bash + run: sudo apt-get install -y lcov + + - name: Setup Mold Linker + if: ${{ inputs.setup-mold == 'true' && inputs.llvm-version > 14 }} + uses: rui314/setup-mold@v1 + + - name: Setup env + shell: bash + run: | + sudo ln -f -s /usr/bin/clang-${{ inputs.llvm-version }} /usr/bin/clang + sudo ln -f -s /usr/bin/clang++-${{ inputs.llvm-version }} /usr/bin/clang++ + echo "LLVM_CMAKE_DIR=/usr/lib/llvm-${{ inputs.llvm-version }}/cmake" >> $GITHUB_ENV + echo "EXTERNAL_LIT=/usr/lib/llvm-${{ inputs.llvm-version >= 20 && 18 || inputs.llvm-version }}/build/utils/lit/lit.py" >> $GITHUB_ENV + if [ -n "${{ inputs.typeart-typegen-legacy }}" ]; then + echo "TYPEART_TYPEGEN_IR=${{ inputs.typeart-typegen-legacy }}" >> $GITHUB_ENV + fi diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index 8c420922..5f511500 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -81,80 +81,182 @@ jobs: steps: - uses: actions/checkout@v6 - - name: LLVM apt - if: ${{ matrix.platform.llvm-version >= 19 }} - run: | - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-${{ matrix.platform.llvm-version }} main" | sudo tee /etc/apt/sources.list.d/llvm-${{ matrix.platform.llvm-version }}.list - - - name: Update apt - run: sudo apt-get update + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: ${{ matrix.platform.llvm-version }} + typeart-typegen-legacy: ${{ matrix.platform.typeart-typegen-legacy }} + install-libcxx: ${{ matrix.preset.libcxx || 'false' }} + install-lcov: ${{ matrix.preset.coverage || 'false' }} + install-omp: 'true' + setup-mold: 'true' - - name: Install LLVM - run: sudo apt-get install libllvm${{ matrix.platform.llvm-version }} llvm-${{ matrix.platform.llvm-version }} llvm-${{ matrix.platform.llvm-version }}-dev + - name: Configure TypeART + run: cmake -B build --preset ${{ matrix.preset.name }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} - - name: Install LLVM OpenMP runtime - run: sudo apt-get install libomp-${{ matrix.platform.llvm-version }}-dev + - name: Build TypeART + run: cmake --build build --parallel 2 - - name: Install Clang - run: sudo apt-get install clang-${{ matrix.platform.llvm-version }} clang-tidy-${{ matrix.platform.llvm-version }} + - name: Prepare TypeART coverage + if: matrix.preset.coverage + run: cmake --build build --target typeart-lcov-clean - - name: Install libc++ - if: matrix.preset.libcxx - run: sudo apt-get install --no-install-recommends libc++-${{ matrix.platform.llvm-version }}-dev libc++abi-${{ matrix.platform.llvm-version }}-dev + - name: Test TypeART lit-suite + if: matrix.preset.skip_test == false + run: cmake --build build --target check-typeart - - name: Install OpenMPI - run: sudo apt-get install libopenmpi-dev openmpi-bin + - name: Build coverage report + if: matrix.preset.coverage + run: cmake --build build --target typeart-lcov-html - - name: Install lcov + - name: Coveralls (parallel) if: matrix.preset.coverage - run: sudo apt-get install lcov + uses: coverallsapp/github-action@v2.3.6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: build/typeart.coverage + flag-name: ${{ matrix.preset.name }}-${{ matrix.platform.llvm-version }}-${{ matrix.platform.typeart-typegen-legacy }} + parallel: true + + cuda-suite: + strategy: + fail-fast: false + matrix: + include: + - llvm-version: 14 + os: ubuntu-22.04 + cuda: 11.8.0 + - llvm-version: 22 + os: ubuntu-24.04 + cuda: 12.6.0 + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v6 + + - uses: Jimver/cuda-toolkit@v0.2.35 + id: cuda-toolkit + with: + cuda: "${{ matrix.cuda }}" + method: network + sub-packages: '["nvcc", "cudart", "cudart-dev"]' + non-cuda-sub-packages: '["libcurand", "libcurand-dev"]' + + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: ${{ matrix.llvm-version }} + typeart-typegen-legacy: 0 + install-lcov: 'true' + install-omp: 'true' + setup-mold: 'true' + + - name: Configure TypeART + run: cmake -B build --preset ci-cov-thread-safe -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} + + - name: Build TypeART + run: cmake --build build --parallel 2 - - name: Setup Mold Linker - if: ${{ matrix.platform.llvm-version > 14 }} - uses: rui314/setup-mold@v1 + - name: Prepare TypeART coverage + run: cmake --build build --target typeart-lcov-clean + + - name: Test TypeART cuda-suite + run: cmake --build build --target check-typeart - - name: Setup env + - name: Build coverage report + run: cmake --build build --target typeart-lcov-html + + - name: Coveralls (parallel) + uses: coverallsapp/github-action@v2.3.6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: build/typeart.coverage + flag-name: cuda-suite-${{ matrix.llvm-version }}-${{ matrix.cuda }} + parallel: true + + hip-suite: + runs-on: ubuntu-24.04 + + env: + ROCM_PATH: /opt/rocm + ROCM_VERSION: 6.4.4 + + steps: + - uses: actions/checkout@v6 + + - name: Setup ROCm Toolchain + run: | + sudo mkdir --parents --mode=0755 /etc/apt/keyrings + wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/rocm.gpg + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" | sudo tee /etc/apt/sources.list.d/rocm.list + echo -e "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 + + - name: Install ROCm ${{ env.ROCM_VERSION }} run: | - sudo ln -f -s /usr/bin/clang-${{ matrix.platform.llvm-version }} /usr/bin/clang - sudo ln -f -s /usr/bin/clang++-${{ matrix.platform.llvm-version }} /usr/bin/clang++ - echo "LLVM_CMAKE_DIR=/usr/lib/llvm-${{ matrix.platform.llvm-version }}/cmake" >> $GITHUB_ENV - echo "EXTERNAL_LIT=/usr/lib/llvm-${{ matrix.platform.llvm-version >= 20 && 18 || matrix.platform.llvm-version }}/build/utils/lit/lit.py" >> $GITHUB_ENV - echo "TYPEART_TYPEGEN_IR=${{ matrix.platform.typeart-typegen-legacy }}" >> $GITHUB_ENV + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + rocm-llvm \ + rocm-llvm-dev \ + rocm-dev + + - name: Setup ROCm Environment + run: | + sudo ln -sfn /opt/rocm-${ROCM_VERSION} /opt/rocm + echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH + echo "/opt/rocm/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=/opt/rocm/llvm/lib:/opt/rocm/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: 18 + typeart-typegen-legacy: 0 + install-lcov: 'true' + install-omp: 'false' + setup-mold: 'true' - name: Configure TypeART - run: cmake -B build --preset ${{ matrix.preset.name }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} + run: | + cmake -B build --preset ci-cov-thread-safe \ + -DCMAKE_C_COMPILER=/opt/rocm/bin/amdclang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/bin/amdclang++ \ + -DLLVM_DIR=${ROCM_PATH}/llvm/lib/cmake/llvm -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} \ + -DTYPEART_CLANG_EXEC=/opt/rocm/bin/amdclang \ + -DTYPEART_CLANGCXX_EXEC=/opt/rocm/bin/amdclang++ \ + -DTYPEART_OPT_EXEC=/opt/rocm/llvm/bin/opt \ + -DTYPEART_LLC_EXEC=/opt/rocm/llvm/bin/llc \ + -DTYPEART_LLVMCONFIG_COMMAND=/opt/rocm/llvm/bin/llvm-config \ + -DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=ON - name: Build TypeART run: cmake --build build --parallel 2 - name: Prepare TypeART coverage - if: matrix.preset.coverage run: cmake --build build --target typeart-lcov-clean - - name: Test TypeART lit-suite - if: matrix.preset.skip_test == false + - name: Test TypeART hip-suite run: cmake --build build --target check-typeart - name: Build coverage report - if: matrix.preset.coverage run: cmake --build build --target typeart-lcov-html - name: Coveralls (parallel) - if: matrix.preset.coverage uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} path-to-lcov: build/typeart.coverage - flag-name: ${{ matrix.preset.name }}-${{ matrix.platform.llvm-version }}-${{ matrix.platform.typeart-typegen-legacy }} + flag-name: hip-suite-rocm-${{ env.ROCM_VERSION }} parallel: true finish-coverage: - needs: lit-suite + if: ${{ always() }} + needs: [lit-suite, cuda-suite, hip-suite] runs-on: ubuntu-24.04 steps: - name: Coveralls Finished + uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} - parallel-finished: true \ No newline at end of file + parallel-finished: true diff --git a/cmake/modules/coverage-lcov.cmake b/cmake/modules/coverage-lcov.cmake index c97a985b..61893485 100644 --- a/cmake/modules/coverage-lcov.cmake +++ b/cmake/modules/coverage-lcov.cmake @@ -5,6 +5,23 @@ if(TYPEART_LCOV_EXEC-NOTFOUND OR TYPEART_GENHTML_EXEC-NOTFOUND) message(WARNING "lcov and genhtml command needed for coverage.") endif() +# Detect whether lcov supports: --ignore-errors unused: +# - avoids CUDA error "geninfo: ERROR: 'exclude' pattern '*/Version.cpp' is unused" +set(TYPEART_LCOV_IGNORE_UNUSED) +if(TYPEART_LCOV_EXEC) + execute_process( + COMMAND ${TYPEART_LCOV_EXEC} --version + OUTPUT_VARIABLE TYPEART_LCOV_VERSION_STRING + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(TYPEART_LCOV_VERSION_STRING MATCHES "LCOV version ([0-9]+)\\.([0-9]+)") + set(TYPEART_LCOV_VERSION_MAJOR ${CMAKE_MATCH_1}) + if(TYPEART_LCOV_VERSION_MAJOR GREATER_EQUAL 2) + set(TYPEART_LCOV_IGNORE_UNUSED --ignore-errors unused) + endif() + endif() +endif() + add_custom_target( typeart-lcov-clean COMMAND ${TYPEART_LCOV_EXEC} -d ${CMAKE_BINARY_DIR} -z @@ -27,7 +44,7 @@ endif() add_custom_target( typeart-lcov-make - COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} + COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} ${TYPEART_LCOV_IGNORE_UNUSED} --no-external -c -d ${CMAKE_BINARY_DIR} -b ${CMAKE_SOURCE_DIR} -o typeart.coverage COMMAND ${TYPEART_LCOV_EXEC} --rc derive_function_end_line=0 --remove typeart.coverage '${CMAKE_BINARY_DIR}/*' -o typeart.coverage ) @@ -50,7 +67,7 @@ function(typeart_target_lcov target) add_custom_target( typeart-lcov-make-${target} - COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} + COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} ${TYPEART_LCOV_IGNORE_UNUSED} --no-external -c -d ${CMAKE_BINARY_DIR} -b ${LCOV_TARGET_SOURCE_DIR} -o counter-${target}.pro COMMAND ${TYPEART_LCOV_EXEC} --remove counter-${target}.pro '${CMAKE_BINARY_DIR}/*' diff --git a/cmake/typeartToolchainOptions.cmake b/cmake/typeartToolchainOptions.cmake index 741c9b3e..a1444b4a 100644 --- a/cmake/typeartToolchainOptions.cmake +++ b/cmake/typeartToolchainOptions.cmake @@ -163,6 +163,20 @@ set_package_properties(Python3 PROPERTIES "The Python3 interpreter is used for lit-testing and the MPI interceptor tool code generation." ) +find_package(CUDAToolkit QUIET) +set_package_properties(CUDAToolkit PROPERTIES + TYPE OPTIONAL + PURPOSE + "CUDA toolkit enables host-side CUDA instrumentation and runtime helpers." +) + +find_package(hip QUIET) +set_package_properties(hip PROPERTIES + TYPE OPTIONAL + PURPOSE + "HIP enables host-side HIP instrumentation and runtime helpers." +) + typeart_find_llvm_progs(TYPEART_CLANG_EXEC "clang-${LLVM_VERSION_MAJOR};clang" DEFAULT_EXE "clang") typeart_find_llvm_progs(TYPEART_CLANGCXX_EXEC "clang++-${LLVM_VERSION_MAJOR};clang++" DEFAULT_EXE "clang++") typeart_find_llvm_progs(TYPEART_LLC_EXEC "llc-${LLVM_VERSION_MAJOR};llc" DEFAULT_EXE "llc") diff --git a/externals/dimeta/CMakeLists.txt b/externals/dimeta/CMakeLists.txt index 85d23e22..40fd1525 100644 --- a/externals/dimeta/CMakeLists.txt +++ b/externals/dimeta/CMakeLists.txt @@ -1,7 +1,7 @@ FetchContent_Declare( llvm-dimeta GIT_REPOSITORY https://github.com/ahueck/llvm-dimeta - GIT_TAG v0.5.1 + GIT_TAG devel GIT_SHALLOW 1 ) diff --git a/lib/passes/Commandline.cpp b/lib/passes/Commandline.cpp index 9619e166..b4171625 100644 --- a/lib/passes/Commandline.cpp +++ b/lib/passes/Commandline.cpp @@ -74,6 +74,11 @@ static cl::opt cl_typeart_instrument_heap(Commandlin cl::init(ConfigStdArgValues::heap), cl::cat(typeart_category)); +static cl::opt cl_typeart_instrument_gpu(CommandlineStdArgs::gpu, + cl::desc(ConfigStdArgDescriptions::gpu), + cl::init(ConfigStdArgValues::gpu), + cl::cat(typeart_category)); + static cl::opt cl_typeart_instrument_global(CommandlineStdArgs::global, cl::desc(ConfigStdArgDescriptions::global), cl::init(ConfigStdArgValues::global), @@ -206,6 +211,7 @@ CommandLineOptions::CommandLineOptions() { make_entry(ConfigStdArgs::types, cl_typeart_type_file), make_entry(ConfigStdArgs::stats, cl_typeart_stats), make_entry(ConfigStdArgs::heap, cl_typeart_instrument_heap), + make_entry(ConfigStdArgs::gpu, cl_typeart_instrument_gpu), make_entry(ConfigStdArgs::global, cl_typeart_instrument_global), make_entry(ConfigStdArgs::stack, cl_typeart_instrument_stack), make_entry(ConfigStdArgs::type_serialization, cl_typeart_type_serialization), @@ -226,6 +232,7 @@ CommandLineOptions::CommandLineOptions() { make_occurr_entry(ConfigStdArgs::types, cl_typeart_type_file), make_occurr_entry(ConfigStdArgs::stats, cl_typeart_stats), make_occurr_entry(ConfigStdArgs::heap, cl_typeart_instrument_heap), + make_occurr_entry(ConfigStdArgs::gpu, cl_typeart_instrument_gpu), make_occurr_entry(ConfigStdArgs::global, cl_typeart_instrument_global), make_occurr_entry(ConfigStdArgs::stack, cl_typeart_instrument_stack), make_occurr_entry(ConfigStdArgs::type_serialization, cl_typeart_type_serialization), diff --git a/lib/passes/TypeARTPass.cpp b/lib/passes/TypeARTPass.cpp index cf63ac3b..19851132 100644 --- a/lib/passes/TypeARTPass.cpp +++ b/lib/passes/TypeARTPass.cpp @@ -25,6 +25,8 @@ #include "instrumentation/TypeARTFunctions.h" #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" +#include "support/CudaUtil.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/ModuleDumper.h" #include "support/Table.h" @@ -269,6 +271,10 @@ class TypeArtPass : public llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Module& m, llvm::ModuleAnalysisManager&) { + if (gpu::is_device_module(m)) { + LOG_DEBUG("Skipping GPU device module: " << m.getName()); + return llvm::PreservedAnalyses::all(); + } bool changed{false}; changed |= doInitialization(m); const bool heap = configuration()[config::ConfigStdArgs::heap]; // Must happen after doInit @@ -324,6 +330,10 @@ bool runOnFunc(llvm::Function& f) { return false; } + if (cuda::is_cuda_helper_function(f)) { + return false; + } + if (!meminst_finder->hasFunctionData(f)) { LOG_WARNING("No allocation data could be retrieved for function: " << f.getName()); return false; diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index 34d173e5..2af24f4e 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -31,7 +31,7 @@ class IntrinsicInst; } // namespace llvm namespace typeart { -enum class MemOpKind : uint8_t { +enum class MemOpKind : uint16_t { NewLike = 1 << 0, // allocates, never null MallocLike = 1 << 1 | NewLike, // allocates, maybe null AlignedAllocLike = 1 << 2, // allocates aligned, maybe null @@ -39,12 +39,20 @@ enum class MemOpKind : uint8_t { ReallocLike = 1 << 4, // re-allocated (existing) memory FreeLike = 1 << 5, // free memory DeleteLike = 1 << 6, // delete (cpp) memory + CudaMallocLike = 1 << 7, // cuda out-parameter allocation + HipMallocLike = 1 << 8, // hip out-parameter allocation MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, AllocLike = MallocOrCallocLike, AnyAlloc = AllocLike | ReallocLike, - AnyFree = FreeLike | DeleteLike + AnyFree = FreeLike | DeleteLike, + GpuMallocLike = CudaMallocLike | HipMallocLike }; +inline bool is_kind(MemOpKind kind, MemOpKind mask) { + return (static_cast>(kind) & + static_cast>(mask)) != 0; +} + struct MemOps { inline std::optional kind(llvm::StringRef function) const { if (auto alloc = allocKind(function)) { @@ -101,6 +109,18 @@ struct MemOps { {"_ZnajSt11align_val_tRKSt9nothrow_t", MemOpKind::MallocLike}, /*new[](unsigned int, align_val_t, nothrow)*/ {"_ZnamSt11align_val_t", MemOpKind::NewLike}, /*new[](unsigned long, align_val_t)*/ {"_ZnamSt11align_val_tRKSt9nothrow_t", MemOpKind::MallocLike}, /*new[](unsigned long, align_val_t, nothrow)*/ + {"cudaMalloc", MemOpKind::CudaMallocLike}, + {"cudaHostAlloc", MemOpKind::CudaMallocLike}, + {"cudaMallocHost", MemOpKind::CudaMallocLike}, + {"cudaMallocManaged", MemOpKind::CudaMallocLike}, + {"cudaMallocAsync", MemOpKind::CudaMallocLike}, + {"cudaMallocFromPoolAsync", MemOpKind::CudaMallocLike}, + {"hipMalloc", MemOpKind::HipMallocLike}, + {"hipMallocHost", MemOpKind::HipMallocLike}, + {"hipHostMalloc", MemOpKind::HipMallocLike}, + {"hipMallocManaged", MemOpKind::HipMallocLike}, + {"hipMallocAsync", MemOpKind::HipMallocLike}, + {"hipMallocFromPoolAsync", MemOpKind::HipMallocLike}, }; const llvm::StringMap dealloc_map{ @@ -119,6 +139,12 @@ struct MemOps { {"_ZdlPvmSt11align_val_t", MemOpKind::DeleteLike}, /* delete(void*, unsigned long, align_val_t) */ {"_ZdaPvjSt11align_val_t", MemOpKind::DeleteLike}, /* delete[](void*, unsigned int, align_val_t) */ {"_ZdaPvmSt11align_val_t", MemOpKind::DeleteLike}, /* delete[](void*, unsigned long, align_val_t) */ + {"cudaFree", MemOpKind::FreeLike}, + {"cudaFreeHost", MemOpKind::FreeLike}, + {"cudaFreeAsync", MemOpKind::FreeLike}, + {"hipFree", MemOpKind::FreeLike}, + {"hipFreeHost", MemOpKind::FreeLike}, + {"hipFreeAsync", MemOpKind::FreeLike}, }; //clang-format off }; diff --git a/lib/passes/analysis/MemOpVisitor.cpp b/lib/passes/analysis/MemOpVisitor.cpp index 9b835cc8..57a81d61 100644 --- a/lib/passes/analysis/MemOpVisitor.cpp +++ b/lib/passes/analysis/MemOpVisitor.cpp @@ -17,6 +17,7 @@ #include "configuration/Configuration.h" #include "support/ConfigurationBase.h" #include "support/Error.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/TypeUtil.h" #include "support/Util.h" @@ -54,9 +55,14 @@ MemOpVisitor::MemOpVisitor() : MemOpVisitor(true, true) { } MemOpVisitor::MemOpVisitor(const config::Configuration& config) - : MemOpVisitor(config[config::ConfigStdArgs::stack], config[config::ConfigStdArgs::heap]) { + : MemOpVisitor(config[config::ConfigStdArgs::stack], config[config::ConfigStdArgs::heap], + config[config::ConfigStdArgs::gpu]) { } -MemOpVisitor::MemOpVisitor(bool stack, bool heap) : collect_allocas(stack), collect_heap(heap) { +MemOpVisitor::MemOpVisitor(bool stack, bool heap) : MemOpVisitor(stack, heap, true) { +} + +MemOpVisitor::MemOpVisitor(bool stack, bool heap, bool gpu) + : collect_allocas(stack), collect_heap(heap), collect_gpu(gpu) { } void MemOpVisitor::collect(llvm::Function& function) { @@ -91,14 +97,17 @@ void MemOpVisitor::visitCallBase(llvm::CallBase& cb) { if (!collect_heap) { return; } + const auto* called_function = cb.getCalledFunction(); + if (!collect_gpu && called_function != nullptr && gpu::is_gpu_function(*called_function)) { + return; + } const auto isInSet = [&](const auto& fMap) -> std::optional { - const auto* f = cb.getCalledFunction(); - if (!f) { + if (called_function == nullptr) { // TODO handle calls through, e.g., function pointers? - seems infeasible // LOG_INFO("Encountered indirect call, skipping."); return {}; } - const auto name = f->getName().str(); + const auto name = called_function->getName().str(); const auto res = fMap.find(name); if (res != fMap.end()) { @@ -222,10 +231,17 @@ void collect_casts_from_stack(llvm::StoreInst* store_inst, MallocBcasts& out_bca } } -std::pair collectRelevantMallocUsers(llvm::CallBase& call_inst) { +std::pair collectRelevantMallocUsers(llvm::CallBase& call_inst, MemOpKind kind) { auto geps = MallocGeps{}; auto bcasts = MallocBcasts{}; + if (is_kind(kind, MemOpKind::GpuMallocLike)) { + if (auto bitcast = gpu::bitcast_for(call_inst, kind); bitcast.has_value()) { + bcasts.insert(*bitcast); + } + return {geps, bcasts}; + } + for (auto* user : call_inst.users()) { if (auto* bit_cast = llvm::dyn_cast(user)) { bcasts.insert(bit_cast); @@ -325,7 +341,7 @@ std::optional handleArrayCookie(llvm::CallBase& ci, const Mallo } void MemOpVisitor::visitMallocLike(llvm::CallBase& ci, MemOpKind k) { - auto [geps, bcasts] = collectRelevantMallocUsers(ci); + auto [geps, bcasts] = collectRelevantMallocUsers(ci, k); auto primary_cast = bcasts.empty() ? nullptr : *bcasts.begin(); auto array_cookie = handleArrayCookie(ci, geps, bcasts, primary_cast); if (primary_cast == nullptr) { diff --git a/lib/passes/analysis/MemOpVisitor.h b/lib/passes/analysis/MemOpVisitor.h index fc982549..bc626854 100644 --- a/lib/passes/analysis/MemOpVisitor.h +++ b/lib/passes/analysis/MemOpVisitor.h @@ -39,11 +39,13 @@ struct MemOpVisitor : public llvm::InstVisitor { MemOps mem_operations{}; bool collect_allocas; bool collect_heap; + bool collect_gpu; public: MemOpVisitor(); explicit MemOpVisitor(const config::Configuration& config); MemOpVisitor(bool stack, bool heap); + MemOpVisitor(bool stack, bool heap, bool gpu); void collect(llvm::Function& function); void collectGlobals(llvm::Module& module); void clear(); diff --git a/lib/passes/configuration/EnvironmentConfiguration.cpp b/lib/passes/configuration/EnvironmentConfiguration.cpp index 76182011..c36eb9c4 100644 --- a/lib/passes/configuration/EnvironmentConfiguration.cpp +++ b/lib/passes/configuration/EnvironmentConfiguration.cpp @@ -110,6 +110,7 @@ EnvironmentFlagsOptions::EnvironmentFlagsOptions() { EnvironmentStdArgsValues::stats), make_entry(ConfigStdArgs::heap, EnvironmentStdArgs::heap, EnvironmentStdArgsValues::heap), + make_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu, EnvironmentStdArgsValues::gpu), make_entry(ConfigStdArgs::global, EnvironmentStdArgs::global, EnvironmentStdArgsValues::global), make_entry(ConfigStdArgs::stack, EnvironmentStdArgs::stack, @@ -150,6 +151,7 @@ EnvironmentFlagsOptions::EnvironmentFlagsOptions() { make_occurr_entry(ConfigStdArgs::types, config::EnvironmentStdArgs::types), make_occurr_entry(ConfigStdArgs::stats, EnvironmentStdArgs::stats), make_occurr_entry(ConfigStdArgs::heap, EnvironmentStdArgs::heap), + make_occurr_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu), make_occurr_entry(ConfigStdArgs::global, EnvironmentStdArgs::global), make_occurr_entry(ConfigStdArgs::stack, EnvironmentStdArgs::stack), make_occurr_entry(ConfigStdArgs::type_serialization, EnvironmentStdArgs::type_serialization), diff --git a/lib/passes/configuration/PassBuilderUtil.h b/lib/passes/configuration/PassBuilderUtil.h index cea89fcd..94fac7ce 100644 --- a/lib/passes/configuration/PassBuilderUtil.h +++ b/lib/passes/configuration/PassBuilderUtil.h @@ -16,6 +16,7 @@ #define TYPEART_PASS_BUILDER_UTIL_H #include "support/Logger.h" +#include "support/Util.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -29,11 +30,7 @@ inline bool checkParametrizedPassName(llvm::StringRef Name, llvm::StringRef Pass // normal pass name w/o parameters == default parameters if (Name.empty()) return true; -#if LLVM_VERSION_MAJOR > 15 - return Name.starts_with("<") && Name.ends_with(">"); -#else - return Name.startswith("<") && Name.endswith(">"); -#endif + return starts_with_any_of(Name, "<") && ends_with_any_of(Name, ">"); } /// This performs customized parsing of pass name with parameters. diff --git a/lib/passes/configuration/PassConfiguration.cpp b/lib/passes/configuration/PassConfiguration.cpp index 5c067571..ad911f74 100644 --- a/lib/passes/configuration/PassConfiguration.cpp +++ b/lib/passes/configuration/PassConfiguration.cpp @@ -59,6 +59,12 @@ PassConfig parse_typeart_config_with_occurrence(llvm::StringRef parameters) { continue; } + if (parameter_name == ConfigStdArgs::gpu) { + result.gpu = enable; + occurrence_map[ConfigStdArgs::gpu] = true; + continue; + } + if (parameter_name == ConfigStdArgs::stack) { result.stack = enable; occurrence_map[ConfigStdArgs::stack] = true; @@ -162,4 +168,4 @@ PassConfig parse_typeart_config_with_occurrence(llvm::StringRef parameters) { return {result, occurrence_map}; } -} // namespace typeart::config::pass \ No newline at end of file +} // namespace typeart::config::pass diff --git a/lib/passes/configuration/TypeARTOptions.cpp b/lib/passes/configuration/TypeARTOptions.cpp index 1c82427f..30b0d0fd 100644 --- a/lib/passes/configuration/TypeARTOptions.cpp +++ b/lib/passes/configuration/TypeARTOptions.cpp @@ -90,6 +90,7 @@ struct llvm::yaml::MappingTraits { using typeart::config::ConfigStdArgs; yml_io.mapRequired(ConfigStdArgs::types, info.types); yml_io.mapRequired(ConfigStdArgs::heap, info.heap); + yml_io.mapOptional(ConfigStdArgs::gpu, info.gpu); yml_io.mapRequired(ConfigStdArgs::stack, info.stack); yml_io.mapOptional(ConfigStdArgs::global, info.global); yml_io.mapOptional(ConfigStdArgs::stats, info.statistics); @@ -138,6 +139,7 @@ TypeARTConfigOptions construct_with(Constructor&& make_entry) { make_entry(ConfigStdArgs::types, config.types); make_entry(ConfigStdArgs::stats, config.statistics); make_entry(ConfigStdArgs::heap, config.heap); + make_entry(ConfigStdArgs::gpu, config.gpu); make_entry(ConfigStdArgs::global, config.global); make_entry(ConfigStdArgs::stack, config.stack); make_entry(ConfigStdArgs::stack_lifetime, config.stack_lifetime); @@ -186,6 +188,7 @@ OptionsMap options_to_map(const TypeARTConfigOptions& config) { make_entry(ConfigStdArgs::types, config.types), make_entry(ConfigStdArgs::stats, config.statistics), make_entry(ConfigStdArgs::heap, config.heap), + make_entry(ConfigStdArgs::gpu, config.gpu), make_entry(ConfigStdArgs::global, config.global), make_entry(ConfigStdArgs::stack, config.stack), make_entry(ConfigStdArgs::stack_lifetime, config.stack_lifetime), diff --git a/lib/passes/configuration/TypeARTOptions.h b/lib/passes/configuration/TypeARTOptions.h index 03e7884d..56b99424 100644 --- a/lib/passes/configuration/TypeARTOptions.h +++ b/lib/passes/configuration/TypeARTOptions.h @@ -46,6 +46,7 @@ struct TypeARTAnalysisOptions { struct TypeARTConfigOptions { std::string types{ConfigStdArgValues::types}; bool heap{ConfigStdArgValues::heap}; + bool gpu{ConfigStdArgValues::gpu}; bool stack{ConfigStdArgValues::stack}; bool global{ConfigStdArgValues::global}; bool statistics{ConfigStdArgValues::stats}; diff --git a/lib/passes/instrumentation/CallBackFunctionInserter.cpp b/lib/passes/instrumentation/CallBackFunctionInserter.cpp index cd89f9f7..b9c1d361 100644 --- a/lib/passes/instrumentation/CallBackFunctionInserter.cpp +++ b/lib/passes/instrumentation/CallBackFunctionInserter.cpp @@ -5,6 +5,7 @@ #include "support/ConfigurationBase.h" #include "support/Logger.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -55,8 +56,9 @@ llvm::CallInst* CallbackFunctionInserter::create_instrumentation_call(llvm::IRBu const auto callback_id = ifunc_for_function(callback_type, instruction_or_value); auto type_id_param_out = type_id_handler_->getOrRegister(args.typeid_value); - const auto mode = llvm::isa(type_id_param_out) ? mode_ : TypeSerializationImplementation::FILE; - auto function = function_query_->getFunctionFor(callback_id, mode); + const bool has_global_type_payload = !llvm::isa(type_id_param_out); + const auto mode = has_global_type_payload ? mode_ : TypeSerializationImplementation::FILE; + auto function = function_query_->getFunctionFor(callback_id, mode); return IRB.CreateCall(function, llvm::ArrayRef{args.pointer_value, type_id_param_out, args.element_count}); @@ -91,4 +93,4 @@ std::unique_ptr make_callback_inserter(const config::Co return std::make_unique(configuration, std::move(type_id_handler), function_query); } -} // namespace typeart \ No newline at end of file +} // namespace typeart diff --git a/lib/passes/instrumentation/MemOpArgCollector.cpp b/lib/passes/instrumentation/MemOpArgCollector.cpp index bd6315ed..f1531c1e 100644 --- a/lib/passes/instrumentation/MemOpArgCollector.cpp +++ b/lib/passes/instrumentation/MemOpArgCollector.cpp @@ -104,6 +104,16 @@ HeapArgList MemOpArgCollector::collectHeap(const MallocDataList& mallocs) { case MemOpKind::AlignedAllocLike: byte_count = malloc_call->getArgOperand(1); break; + case MemOpKind::CudaMallocLike: + [[fallthrough]]; + case MemOpKind::HipMallocLike: + byte_count = malloc_call->getArgOperand(1); + if (mdata.primary != nullptr) { + pointer = mdata.primary->getOperand(0); + } else { + pointer = malloc_call->getArgOperand(0); + } + break; default: LOG_ERROR("Unknown malloc kind. Not instrumenting. " << util::dump(*malloc_call)); // TODO see above continues diff --git a/lib/passes/instrumentation/MemOpInstrumentation.cpp b/lib/passes/instrumentation/MemOpInstrumentation.cpp index 950bfd8d..2d742c80 100644 --- a/lib/passes/instrumentation/MemOpInstrumentation.cpp +++ b/lib/passes/instrumentation/MemOpInstrumentation.cpp @@ -72,8 +72,13 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { const bool is_llvm_ir_type = static_cast(type_gen) == static_cast(TypegenImplementation::IR); for (const auto& [malloc, args] : heap) { - auto kind = malloc.kind; - auto* malloc_call = args.get_as(ArgMap::ID::pointer); + auto kind = malloc.kind; + Instruction* malloc_call{nullptr}; + if (is_kind(malloc.kind, MemOpKind::GpuMallocLike)) { + malloc_call = llvm::cast(malloc.call); + } else { + malloc_call = args.get_as(ArgMap::ID::pointer); + } Instruction* insertBefore = malloc_call->getNextNode(); if (malloc.array_cookie) { @@ -89,6 +94,7 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { auto typeid_value = args.get_as(ArgMap::ID::type_id); auto type_size_value = args.get_value(ArgMap::ID::type_size); + Value* pointer_value = args.get_value(ArgMap::ID::pointer); bool single_byte_type{false}; if (auto* const_int = llvm::dyn_cast(type_size_value)) { @@ -143,12 +149,28 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { target_memory_address); break; } + case MemOpKind::CudaMallocLike: + [[fallthrough]]; + case MemOpKind::HipMallocLike: { + auto* runtime_ptr_type = instrumentation_helper->getTypeFor(IType::ptr); +#if LLVM_VERSION_MAJOR >= 15 + auto* loaded_ptr = IRB.CreateLoad(runtime_ptr_type, pointer_value); +#else + auto* pointer_slot_type = llvm::PointerType::get(runtime_ptr_type, 0); + auto* pointer_slot = IRB.CreateBitOrPointerCast(pointer_value, pointer_slot_type); + auto* loaded_ptr = IRB.CreateLoad(runtime_ptr_type, pointer_slot); +#endif + pointer_value = IRB.CreateBitOrPointerCast(loaded_ptr, instrumentation_helper->getTypeFor(IType::ptr)); + auto bytes = args.get_value(ArgMap::ID::byte_count); + element_count = calculate_element_count(bytes); + break; + } default: LOG_ERROR("Unknown malloc kind. Not instrumenting. " << util::dump(*malloc_call)); continue; } - function_instrumenter_->insert_heap_instrumentation(IRB, malloc.call, {malloc_call, element_count, typeid_value}); + function_instrumenter_->insert_heap_instrumentation(IRB, malloc.call, {pointer_value, element_count, typeid_value}); // const auto callback_id = ifunc_for_function(IFunc::heap, malloc.call); // auto type_id_param = function_instrumenter->getOrRegister(typeid_value); @@ -294,4 +316,4 @@ InstrCount MemOpInstrumentation::instrumentGlobal(const GlobalArgList& globals) return counter; } -} // namespace typeart \ No newline at end of file +} // namespace typeart diff --git a/lib/passes/instrumentation/TypeARTFunctions.cpp b/lib/passes/instrumentation/TypeARTFunctions.cpp index 0a1a8ea5..0828ca00 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.cpp +++ b/lib/passes/instrumentation/TypeARTFunctions.cpp @@ -16,6 +16,8 @@ #include "configuration/Configuration.h" #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" +#include "support/CudaUtil.h" +#include "support/HipUtil.h" #include "support/Logger.h" #include "support/OmpUtil.h" @@ -49,9 +51,11 @@ namespace typeart { namespace detail { std::string get_func_suffix(IFunc id) { switch (id) { - // case IFunc::free_cuda: - // case IFunc::heap_cuda: - // return "_cuda"; + case IFunc::free_cuda: + case IFunc::heap_cuda: + case IFunc::free_hip: + case IFunc::heap_hip: + return "_gpu"; case IFunc::free_omp: case IFunc::heap_omp: case IFunc::stack_omp: @@ -62,13 +66,21 @@ std::string get_func_suffix(IFunc id) { } } -enum class IFuncType : unsigned { standard, omp, cuda }; +enum class IFuncType : unsigned { standard, omp, cuda, hip }; IFuncType ifunc_type_for(llvm::Function* f) { if (f == nullptr) { return IFuncType::standard; } + if (cuda::is_cuda_function(*f)) { + return IFuncType::cuda; + } + + if (hip::is_hip_function(*f)) { + return IFuncType::hip; + } + if (util::omp::isOmpContext(f)) { return IFuncType::omp; } @@ -88,28 +100,38 @@ IFunc ifunc_for_function(IFunc general_type, llvm::Value* value) { } else if (llvm::isa(value)) { type = detail::ifunc_type_for(nullptr); } else if (auto callbase = llvm::dyn_cast(value)) { - type = detail::ifunc_type_for(callbase->getFunction()); - // auto maybe_cuda = detail::ifunc_type_for(callbase->getCalledFunction()); - // if (maybe_cuda == detail::IFuncType::cuda) { - // type = detail::IFuncType::cuda; - // } + type = detail::ifunc_type_for(callbase->getFunction()); + auto called_context = detail::ifunc_type_for(callbase->getCalledFunction()); + if (called_context == detail::IFuncType::cuda || called_context == detail::IFuncType::hip) { + type = called_context; + } } if (detail::IFuncType::standard == type) { return general_type; } - // if (detail::IFuncType::cuda == type) { - // switch (general_type) { - // case IFunc::heap: - // return IFunc::heap_cuda; - // case IFunc::free: - // return IFunc::free_cuda; - // default: - // return general_type; - // // llvm_unreachable("IFunc not supported for CUDA."); - // } - // } + if (detail::IFuncType::cuda == type) { + switch (general_type) { + case IFunc::heap: + return IFunc::heap_cuda; + case IFunc::free: + return IFunc::free_cuda; + default: + return general_type; + } + } + + if (detail::IFuncType::hip == type) { + switch (general_type) { + case IFunc::heap: + return IFunc::heap_hip; + case IFunc::free: + return IFunc::free_hip; + default: + return general_type; + } + } switch (general_type) { case IFunc::stack: @@ -176,6 +198,7 @@ llvm::Function* TAFunctionDeclarator::make_function(IFunc func_id, llvm::StringR const auto name = make_fname(basename, args); if (auto it = function_map.find(name); it != function_map.end()) { + typeart_functions.putFunctionFor(func_id, it->second); return it->second; } @@ -287,6 +310,8 @@ TypeArtFunc typeart_alloc_omp = typeart_alloc; TypeArtFunc typeart_alloc_stacks_omp = typeart_alloc_stack; TypeArtFunc typeart_free_omp = typeart_free; TypeArtFunc typeart_leave_scope_omp = typeart_leave_scope; +TypeArtFunc typeart_alloc_cuda = typeart_alloc; +TypeArtFunc typeart_free_cuda = typeart_free; TypeArtFunc typeart_alloc_mty{"__typeart_alloc_mty"}; TypeArtFunc typeart_alloc_stack_mty{"__typeart_alloc_stack_mty"}; @@ -295,6 +320,8 @@ TypeArtFunc typeart_register_type{"__typeart_register_type"}; TypeArtFunc typeart_alloc_omp_mty = typeart_alloc_mty; TypeArtFunc typeart_alloc_stacks_omp_mty = typeart_alloc_stack_mty; +TypeArtFunc typeart_alloc_cuda_mty = typeart_alloc_mty; + } // namespace callbacks std::unique_ptr declare_instrumentation_functions(llvm::Module& m, @@ -318,6 +345,7 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& decl_alternatives.make_function(IFunc::stack, typeart_alloc_stack_mty.name, alloc_arg_types_mty); typeart_alloc_global_mty.f = decl_alternatives.make_function(IFunc::global, typeart_alloc_global_mty.name, alloc_arg_types_mty); + // functions_alternative.putFunctionFor(IFunc::heap_cuda, llvm::cast(typeart_alloc_mty.f)); typeart_register_type.f = decl.make_function(IFunc::type, typeart_register_type.name, free_arg_types); typeart_alloc.f = decl.make_function(IFunc::heap, typeart_alloc.name, alloc_arg_types); @@ -332,11 +360,20 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& typeart_leave_scope_omp.f = decl.make_function(IFunc::scope_omp, typeart_leave_scope_omp.name, leavescope_arg_types); + typeart_alloc_cuda.f = decl.make_function(IFunc::heap_cuda, typeart_alloc_cuda.name, alloc_arg_types); + typeart_free_cuda.f = decl.make_function(IFunc::free_cuda, typeart_free_cuda.name, free_arg_types); + decl.make_function(IFunc::heap_hip, typeart_alloc_cuda.name, alloc_arg_types); + decl.make_function(IFunc::free_hip, typeart_free_cuda.name, free_arg_types); + typeart_alloc_omp_mty.f = decl_alternatives.make_function(IFunc::heap_omp, typeart_alloc_omp_mty.name, alloc_arg_types_mty); typeart_alloc_stacks_omp_mty.f = decl_alternatives.make_function(IFunc::stack_omp, typeart_alloc_stacks_omp_mty.name, alloc_arg_types_mty); + typeart_alloc_cuda_mty.f = + decl_alternatives.make_function(IFunc::heap_cuda, typeart_alloc_cuda_mty.name, alloc_arg_types_mty); + decl_alternatives.make_function(IFunc::heap_hip, typeart_alloc_cuda_mty.name, alloc_arg_types_mty); + return std::make_unique(functions, functions_alternative); } diff --git a/lib/passes/instrumentation/TypeARTFunctions.h b/lib/passes/instrumentation/TypeARTFunctions.h index 21c135da..e43a103c 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.h +++ b/lib/passes/instrumentation/TypeARTFunctions.h @@ -32,7 +32,22 @@ namespace config { class Configuration; } -enum class IFunc : unsigned { heap, stack, global, free, scope, heap_omp, stack_omp, free_omp, scope_omp, type }; +enum class IFunc : unsigned { + heap, + stack, + global, + free, + scope, + heap_omp, + stack_omp, + free_omp, + scope_omp, + heap_cuda, + free_cuda, + heap_hip, + free_hip, + type +}; IFunc ifunc_for_function(IFunc general_type, llvm::Value* value); diff --git a/lib/passes/support/CudaUtil.h b/lib/passes/support/CudaUtil.h new file mode 100644 index 00000000..bc1ae58b --- /dev/null +++ b/lib/passes/support/CudaUtil.h @@ -0,0 +1,101 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_CUDAUTIL_H +#define TYPEART_CUDAUTIL_H + +#include "analysis/MemOpData.h" +#include "support/Util.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include +#include +#include + +namespace typeart::cuda { + +inline std::optional bitcast_for(llvm::Value* cuda_ptr) { + std::optional fallback; + for (auto& use : cuda_ptr->uses()) { + auto* use_value = use.get(); + auto* bitcast = llvm::dyn_cast(use_value); + if (bitcast == nullptr) { + continue; + } + + if (auto* primary_bitcast = llvm::dyn_cast(bitcast->getOperand(0))) { + return primary_bitcast; + } + + fallback = bitcast; + return fallback; + } + return fallback; +} + +inline std::optional bitcast_for(const llvm::CallBase& cuda_call) { + return bitcast_for(cuda_call.getArgOperand(0)); +} + +inline bool is_device_module(const llvm::Module& module) { +#if LLVM_VERSION_MAJOR >= 21 + const auto triple = module.getTargetTriple().str(); +#else + const auto triple = module.getTargetTriple(); +#endif + return llvm::StringRef{triple}.find("nvptx") != llvm::StringRef::npos; +} + +inline bool is_device_stub(const llvm::Function& function) { + const auto function_name = util::demangle(function.getName()); + return function_name.find("__device_stub__") != std::string::npos; +} + +inline bool is_dim3_init(const llvm::Function& function) { + const auto function_name = util::demangle(function.getName()); + return function_name.find("dim3::dim3") != std::string::npos; +} + +inline bool is_cuda_function(const llvm::Function& function) { + const auto function_name = llvm::StringRef{function.getName()}; + return util::starts_with_any_of(function_name, "cuda"); +} + +inline bool is_cuda_helper_function(const llvm::Function& function) { + if (is_device_stub(function) || is_dim3_init(function)) { + return true; + } + const auto function_name = llvm::StringRef{function.getName()}; + return util::starts_with_any_of(function_name, "__cuda"); +} + +inline bool is_templated_malloc_like(llvm::StringRef name) { + const auto templ_start_pos = name.find_first_of('<'); + if (templ_start_pos == llvm::StringRef::npos) { + return false; + } + auto extracted_fn = name.substr(0, templ_start_pos); + MemOps ops; + return ops.allocKind(extracted_fn) == MemOpKind::CudaMallocLike; +} + +inline bool is_templated_malloc_like(const llvm::Function& function) { + const std::string name = util::try_demangle(function); + return is_templated_malloc_like(name); +} + +} // namespace typeart::cuda + +#endif // TYPEART_CUDAUTIL_H diff --git a/lib/passes/support/GpuUtil.h b/lib/passes/support/GpuUtil.h new file mode 100644 index 00000000..f695a1a4 --- /dev/null +++ b/lib/passes/support/GpuUtil.h @@ -0,0 +1,68 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_GPUUTIL_H +#define TYPEART_GPUUTIL_H + +#include "analysis/MemOpData.h" +#include "support/CudaUtil.h" +#include "support/HipUtil.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include + +namespace typeart::gpu { + +inline bool is_device_module(const llvm::Module& module) { + return cuda::is_device_module(module) || hip::is_device_module(module); +} + +inline std::optional bitcast_for(const llvm::CallBase& cb, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::bitcast_for(cb); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::bitcast_for(cb); + } + return std::nullopt; +} + +inline bool is_templated_malloc_like(llvm::StringRef name, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::is_templated_malloc_like(name); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::is_templated_malloc_like(name); + } + return false; +} + +inline bool is_templated_malloc_like(const llvm::Function& function, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::is_templated_malloc_like(function); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::is_templated_malloc_like(function); + } + return false; +} + +inline bool is_gpu_function(const llvm::Function& function) { + return cuda::is_cuda_function(function) || hip::is_hip_function(function); +} + +} // namespace typeart::gpu + +#endif // TYPEART_GPUUTIL_H diff --git a/lib/passes/support/HipUtil.h b/lib/passes/support/HipUtil.h new file mode 100644 index 00000000..e22d967d --- /dev/null +++ b/lib/passes/support/HipUtil.h @@ -0,0 +1,83 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_HIPUTIL_H +#define TYPEART_HIPUTIL_H + +#include "analysis/MemOpData.h" +#include "support/Util.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include +#include +#include + +namespace typeart::hip { + +inline std::optional bitcast_for(llvm::Value* hip_ptr) { + std::optional fallback; + for (auto& use : hip_ptr->uses()) { + auto* use_value = use.get(); + auto* bitcast = llvm::dyn_cast(use_value); + if (bitcast == nullptr) { + continue; + } + + if (auto* primary_bitcast = llvm::dyn_cast(bitcast->getOperand(0))) { + return primary_bitcast; + } + + fallback = bitcast; + return fallback; + } + return fallback; +} + +inline std::optional bitcast_for(const llvm::CallBase& hip_call) { + return bitcast_for(hip_call.getArgOperand(0)); +} + +inline bool is_device_module(const llvm::Module& module) { +#if LLVM_VERSION_MAJOR >= 21 + const auto triple = module.getTargetTriple().str(); +#else + const auto triple = module.getTargetTriple(); +#endif + return llvm::StringRef{triple}.find("amdgcn") != llvm::StringRef::npos; +} + +inline bool is_hip_function(const llvm::Function& function) { + const auto function_name = util::try_demangle(function); + return util::starts_with_any_of(function_name, "hip"); +} + +inline bool is_templated_malloc_like(llvm::StringRef name) { + const auto templ_start_pos = name.find_first_of('<'); + if (templ_start_pos == llvm::StringRef::npos) { + return false; + } + auto extracted_fn = name.substr(0, templ_start_pos); + MemOps ops; + return ops.allocKind(extracted_fn) == MemOpKind::HipMallocLike; +} + +inline bool is_templated_malloc_like(const llvm::Function& function) { + const std::string name = util::try_demangle(function); + return is_templated_malloc_like(name); +} + +} // namespace typeart::hip + +#endif // TYPEART_HIPUTIL_H diff --git a/lib/passes/support/TypeUtil.cpp b/lib/passes/support/TypeUtil.cpp index 94c9fe75..653ec0d4 100644 --- a/lib/passes/support/TypeUtil.cpp +++ b/lib/passes/support/TypeUtil.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include + using namespace llvm; namespace typeart::util::type { @@ -106,4 +108,17 @@ unsigned getPointerSizeInBytes(llvm::Type* /*ptrT*/, const llvm::DataLayout& dl) return dl.getPointerSizeInBits() / 8; } +std::optional getPointerElementType(llvm::Type* ptr_type) { + auto* pointer_type = dyn_cast_or_null(ptr_type); + if (pointer_type == nullptr) { + return {}; + } + +#if LLVM_VERSION_MAJOR < 15 + return pointer_type->getPointerElementType(); +#else + return {}; +#endif +} + } // namespace typeart::util::type diff --git a/lib/passes/support/TypeUtil.h b/lib/passes/support/TypeUtil.h index 129f6d99..684e4055 100644 --- a/lib/passes/support/TypeUtil.h +++ b/lib/passes/support/TypeUtil.h @@ -20,6 +20,8 @@ class AllocaInst; class LLVMContext; } // namespace llvm +#include + namespace typeart::util::type { #if LLVM_VERSION_MAJOR < 15 @@ -44,6 +46,8 @@ unsigned getStructSizeInBytes(llvm::Type* structT, const llvm::DataLayout& dl); unsigned getPointerSizeInBytes(llvm::Type* ptrT, const llvm::DataLayout& dl); +std::optional getPointerElementType(llvm::Type* ptr_type); + } // namespace typeart::util::type #endif // TYPEART_TYPE_UTIL_H diff --git a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp index 90a19cc3..5c2feb32 100644 --- a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp +++ b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp @@ -13,6 +13,8 @@ #include "../TypeIDGenerator.h" #include "Dimeta.h" #include "DimetaData.h" +#include "analysis/MemOpData.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "typegen/TypeGenerator.h" #include "typelib/TypeDatabase.h" @@ -97,6 +99,29 @@ auto apply_function(const Type& type, Func&& handle_qualified_type) { } // namespace detail namespace workaround { +namespace detail { +void remove_pointer_level_impl(dimeta::LocatedType& val) { + const auto remove_pointer_level = [](auto& qual) { + auto pointer_like_iter = llvm::find_if(qual, [](auto qualifier) { + switch (qualifier) { + case dimeta::Qualifier::kPtr: + case dimeta::Qualifier::kRef: + case dimeta::Qualifier::kPtrToMember: + return true; + default: + break; + } + return false; + }); + if (pointer_like_iter != std::end(qual)) { + LOG_DEBUG("Removing pointer level " << static_cast(*pointer_like_iter)) + qual.erase(pointer_like_iter); + } + }; + std::visit([&](auto&& qualified_type) { remove_pointer_level(qualified_type.qual); }, val.type); +} +} // namespace detail + void remove_pointer_level(const llvm::AllocaInst* alloc, dimeta::LocatedType& val) { // If the alloca instruction is not a pointer, but the located_type has a pointer-like qualifier, we remove it. // Workaround for inlining issue, see test typemapping/05_milc_inline_metadata.c @@ -105,27 +130,16 @@ void remove_pointer_level(const llvm::AllocaInst* alloc, dimeta::LocatedType& va // this will cause MPI handle arrays (typedef "ptr to opaque struct") to be considered a pointer if (!alloc->getAllocatedType()->isPointerTy() && !alloc->getAllocatedType()->isArrayTy()) { LOG_DEBUG("Alloca is not a pointer type: " << *alloc->getAllocatedType()) - - const auto remove_pointer_level = [](auto& qual) { - auto pointer_like_iter = llvm::find_if(qual, [](auto qualifier) { - switch (qualifier) { - case dimeta::Qualifier::kPtr: - case dimeta::Qualifier::kRef: - case dimeta::Qualifier::kPtrToMember: - return true; - default: - break; - } - return false; - }); - if (pointer_like_iter != std::end(qual)) { - LOG_DEBUG("Removing pointer level " << static_cast(*pointer_like_iter)) - qual.erase(pointer_like_iter); - } - }; - std::visit([&](auto&& qualified_type) { remove_pointer_level(qualified_type.qual); }, val.type); + detail::remove_pointer_level_impl(val); } } + +void remove_pointer_level(const llvm::CallBase* call, dimeta::LocatedType& val) { + // If the call base is a templated cudaMalloc<...> call, current we need to remove a single pointer level to correct + // determine the allocated type + detail::remove_pointer_level_impl(val); +} + } // namespace workaround template @@ -480,6 +494,17 @@ class DimetaTypeManager final : public TypeIDGenerator { if (val) { LOG_DEBUG("Registering malloc-like") + const auto function_name = val->location.function; + MemOps mem_operations; + auto kind = call->getCalledFunction() != nullptr ? mem_operations.kind(call->getCalledFunction()->getName()) + : std::nullopt; + + if (kind && is_kind(kind.value(), MemOpKind::GpuMallocLike) && + gpu::is_templated_malloc_like(function_name, kind.value())) { + LOG_DEBUG("Workaround for pointer level of call base " << function_name) + workaround::remove_pointer_level(call, val.value()); + } + return {getOrRegister(val->type, true), array_size(val->type)}; } } else if (auto* alloc = llvm::dyn_cast(type)) { diff --git a/lib/passes/typegen/ir/TypeManager.cpp b/lib/passes/typegen/ir/TypeManager.cpp index daf26dbb..2c346700 100644 --- a/lib/passes/typegen/ir/TypeManager.cpp +++ b/lib/passes/typegen/ir/TypeManager.cpp @@ -15,6 +15,7 @@ #include "IRTypeGen.h" #include "StructTypeHandler.h" #include "VectorTypeHandler.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/TypeUtil.h" #include "support/Util.h" @@ -277,9 +278,27 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { auto malloc_call = mdata.call; const llvm::DataLayout& dl = malloc_call->getModule()->getDataLayout(); BitCastInst* primaryBitcast = mdata.primary; + llvm::Type* allocation_type = nullptr; + + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike) && primaryBitcast == nullptr) { + if (auto bitcast = gpu::bitcast_for(*malloc_call, mdata.kind); bitcast.has_value()) { + primaryBitcast = *bitcast; + } + } + + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { + allocation_type = llvm::Type::getInt8Ty(malloc_call->getContext()); + } else { + auto pointee_type = tu::getPointerElementType(malloc_call->getType()); + allocation_type = !pointee_type ? llvm::Type::getInt8Ty(malloc_call->getContext()) : *pointee_type; + } + + int typeId = getOrRegisterType(allocation_type, dl); // retrieveTypeID(tu::getVoidType(c)); + + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { + typeId = TYPEART_POINTER; + } - int typeId = getOrRegisterType(malloc_call->getType()->getPointerElementType(), - dl); // retrieveTypeID(tu::getVoidType(c)); if (typeId == TYPEART_UNKNOWN_TYPE) { LOG_ERROR("Unknown heap type. Not instrumenting. " << util::dump(*malloc_call)); // TODO notify caller that we skipped: via lambda callback function @@ -287,11 +306,34 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { }; // Number of bytes per element, 1 for void* - unsigned typeSize = tu::getTypeSizeInBytes(malloc_call->getType()->getPointerElementType(), dl); + unsigned typeSize = tu::getTypeSizeInBytes(allocation_type, dl); + + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { + typeSize = 1; + } // Use the first cast as the determining type (if there is any) if (primaryBitcast != nullptr) { - auto* dstPtrType = primaryBitcast->getDestTy()->getPointerElementType(); + llvm::Type* dstPtrType = nullptr; + if (auto pointee_type = tu::getPointerElementType(primaryBitcast->getDestTy()); pointee_type.has_value()) { + dstPtrType = *pointee_type; + } + // Basically: getSrcTy()->getPointerElementType()->getPointerElementType(): + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike) && dstPtrType == nullptr) { + if (auto pointee_type = tu::getPointerElementType(primaryBitcast->getSrcTy()); pointee_type.has_value()) { + dstPtrType = *pointee_type; + } + if (dstPtrType != nullptr && dstPtrType->isPointerTy()) { + if (auto nested = tu::getPointerElementType(dstPtrType); nested.has_value()) { + dstPtrType = *nested; + } + } + } + + if (dstPtrType == nullptr) { + LOG_WARNING("Could not resolve non-opaque pointee type for allocation cast. Keeping fallback type.") + return {typeId, 0}; + } typeSize = tu::getTypeSizeInBytes(dstPtrType, dl); diff --git a/lib/runtime/AllocationTracking.cpp b/lib/runtime/AllocationTracking.cpp index afcbf973..ce03c931 100644 --- a/lib/runtime/AllocationTracking.cpp +++ b/lib/runtime/AllocationTracking.cpp @@ -289,6 +289,18 @@ void __typeart_leave_scope_omp(int alloca_count) { typeart::RuntimeSystem::get().allocation_tracker().onLeaveScope(alloca_count, retAddr); } +void __typeart_alloc_gpu(const void* addr, int typeId, size_t count) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + typeart::RuntimeSystem::get().allocation_tracker().onAlloc(addr, typeId, count, retAddr); +} + +void __typeart_free_gpu(const void* addr) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + typeart::RuntimeSystem::get().allocation_tracker().onFreeHeap(addr, retAddr); +} + void __typeart_alloc_mty(const void* addr, const void* info, size_t count) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); @@ -298,6 +310,15 @@ void __typeart_alloc_mty(const void* addr, const void* info, size_t count) { rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr); } +void __typeart_alloc_mty_gpu(const void* addr, const void* info, size_t count) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + const auto type_id = reinterpret_cast(info)->type_id; + auto& rt = typeart::RuntimeSystem::get(); + assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match"); + rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr); +} + void __typeart_alloc_stack_mty(const void* addr, const void* info, size_t count) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); diff --git a/lib/runtime/CallbackInterface.h b/lib/runtime/CallbackInterface.h index 83ec2d06..95a3f885 100644 --- a/lib/runtime/CallbackInterface.h +++ b/lib/runtime/CallbackInterface.h @@ -39,6 +39,9 @@ TYPEART_EXPORT void __typeart_free_omp(const void* addr); TYPEART_EXPORT void __typeart_alloc_stack_omp(const void* addr, int type_id, size_t count); TYPEART_EXPORT void __typeart_leave_scope_omp(int alloca_count); +TYPEART_EXPORT void __typeart_alloc_gpu(const void* addr, int type_id, size_t count); +TYPEART_EXPORT void __typeart_free_gpu(const void* addr); + // Called for inlined type definitions mode TYPEART_EXPORT void __typeart_alloc_mty(const void* addr, const void* info, size_t count); TYPEART_EXPORT void __typeart_alloc_global_mty(const void* addr, const void* info, size_t count); @@ -47,6 +50,8 @@ TYPEART_EXPORT void __typeart_register_type(const void* type); TYPEART_EXPORT void __typeart_alloc_global_mty_omp(const void* addr, const void* info, size_t count); TYPEART_EXPORT void __typeart_alloc_stack_mty_omp(const void* addr, const void* info, size_t count); + +TYPEART_EXPORT void __typeart_alloc_mty_gpu(const void* addr, const void* info, size_t count); #ifdef __cplusplus } #endif diff --git a/lib/support/ConfigurationBaseOptions.h b/lib/support/ConfigurationBaseOptions.h index f7386c8e..e8bc1fc3 100644 --- a/lib/support/ConfigurationBaseOptions.h +++ b/lib/support/ConfigurationBaseOptions.h @@ -18,6 +18,7 @@ TYPEART_CONFIG_OPTION(types, "types", std::string, "typeart-types.yaml", "Locati "TYPES") TYPEART_CONFIG_OPTION(stats, "stats", bool, false, "Show statistics for TypeArt type pass.", "STATS") TYPEART_CONFIG_OPTION(heap, "heap", bool, true, "Instrument heap allocation/free instructions.", "HEAP") +TYPEART_CONFIG_OPTION(gpu, "gpu", bool, false, "Instrument GPU allocation/free instructions.", "GPU") TYPEART_CONFIG_OPTION(stack, "stack", bool, false, "Instrument stack allocations.", "STACK") TYPEART_CONFIG_OPTION(global, "global", bool, false, "Instrument global allocations.", "GLOBAL") TYPEART_CONFIG_OPTION(stack_lifetime, "stack-lifetime", bool, true, diff --git a/scripts/typeart-wrapperv2.in b/scripts/typeart-wrapperv2.in index 494cb4b9..dfea2317 100644 --- a/scripts/typeart-wrapperv2.in +++ b/scripts/typeart-wrapperv2.in @@ -69,15 +69,64 @@ function typeart_is_linking_fn() { return 1 } +function typeart_parse_typeart_option_fn() { + local typeart_option="$1" + local option_payload="${typeart_option#--typeart-}" + local option_name="${option_payload%%=*}" + local option_value="${option_payload#*=}" + + if [ -z "$option_name" ] || [ "$option_payload" = "$option_value" ]; then + echo "TypeART wrapper option requires --typeart-