From 8d672f5fccd147d4f7797dbd8694cfd8eb434f6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Sat, 25 Apr 2026 21:20:47 +0200 Subject: [PATCH 01/24] Initial port of CUDA support --- cmake/typeartToolchainOptions.cmake | 7 ++ lib/passes/TypeARTPass.cpp | 10 +++ lib/passes/analysis/MemOpData.h | 7 ++ lib/passes/analysis/MemOpVisitor.cpp | 12 ++- lib/passes/configuration/PassBuilderUtil.h | 7 +- .../CallBackFunctionInserter.cpp | 8 +- .../instrumentation/MemOpArgCollector.cpp | 8 ++ .../instrumentation/MemOpInstrumentation.cpp | 28 ++++++- .../instrumentation/TypeARTFunctions.cpp | 48 ++++++----- lib/passes/instrumentation/TypeARTFunctions.h | 15 +++- lib/passes/support/CudaUtil.h | 84 +++++++++++++++++++ lib/passes/support/TypeUtil.cpp | 15 ++++ lib/passes/support/TypeUtil.h | 4 + lib/passes/typegen/ir/TypeManager.cpp | 50 ++++++++++- lib/runtime/AllocationTracking.cpp | 12 +++ lib/runtime/CMakeLists.txt | 22 +++++ lib/runtime/CallbackInterface.h | 3 + lib/runtime/CudaRuntimeInterface.h | 36 ++++++++ lib/runtime/CudaSupport.cpp | 43 ++++++++++ test/CMakeLists.txt | 65 ++++++++++++++ test/cuda/pass/01_cudamalloc.c | 16 ++++ test/cuda/pass/02_cudafree.c | 14 ++++ test/cuda/pass/03_cudahostalloc.c | 14 ++++ test/cuda/pass/06_cudamalloc_nonvoid.cpp | 25 ++++++ test/cuda/pass/07_axpy.c | 32 +++++++ test/lit.cfg | 12 +++ test/lit.site.cfg.in | 4 + test/pass/misc/05_make_all_callbacks.c | 4 +- 28 files changed, 565 insertions(+), 40 deletions(-) create mode 100644 lib/passes/support/CudaUtil.h create mode 100644 lib/runtime/CudaRuntimeInterface.h create mode 100644 lib/runtime/CudaSupport.cpp create mode 100644 test/cuda/pass/01_cudamalloc.c create mode 100644 test/cuda/pass/02_cudafree.c create mode 100644 test/cuda/pass/03_cudahostalloc.c create mode 100644 test/cuda/pass/06_cudamalloc_nonvoid.cpp create mode 100644 test/cuda/pass/07_axpy.c diff --git a/cmake/typeartToolchainOptions.cmake b/cmake/typeartToolchainOptions.cmake index 741c9b3e..e0f3f8ed 100644 --- a/cmake/typeartToolchainOptions.cmake +++ b/cmake/typeartToolchainOptions.cmake @@ -163,6 +163,13 @@ set_package_properties(Python3 PROPERTIES "The Python3 interpreter is used for lit-testing and the MPI interceptor tool code generation." ) +find_package(CUDAToolkit QUIET) +set_package_properties(CUDAToolkit PROPERTIES + TYPE OPTIONAL + PURPOSE + "CUDA toolkit enables host-side CUDA instrumentation and runtime helpers." +) + typeart_find_llvm_progs(TYPEART_CLANG_EXEC "clang-${LLVM_VERSION_MAJOR};clang" DEFAULT_EXE "clang") typeart_find_llvm_progs(TYPEART_CLANGCXX_EXEC "clang++-${LLVM_VERSION_MAJOR};clang++" DEFAULT_EXE "clang++") typeart_find_llvm_progs(TYPEART_LLC_EXEC "llc-${LLVM_VERSION_MAJOR};llc" DEFAULT_EXE "llc") diff --git a/lib/passes/TypeARTPass.cpp b/lib/passes/TypeARTPass.cpp index cf63ac3b..79256a4b 100644 --- a/lib/passes/TypeARTPass.cpp +++ b/lib/passes/TypeARTPass.cpp @@ -25,6 +25,7 @@ #include "instrumentation/TypeARTFunctions.h" #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" +#include "support/CudaUtil.h" #include "support/Logger.h" #include "support/ModuleDumper.h" #include "support/Table.h" @@ -280,6 +281,11 @@ run(llvm::Module& m, llvm::ModuleAnalysisManager&) { } bool runOnModule(llvm::Module& m) { + if (cuda::is_device_module(m)) { + LOG_DEBUG("Skipping CUDA device module: " << m.getName()); + return false; + } + meminst_finder->runOnModule(m); const bool instrument_global = configuration()[config::ConfigStdArgs::global]; bool globals_were_instrumented{false}; @@ -324,6 +330,10 @@ bool runOnFunc(llvm::Function& f) { return false; } + if (cuda::is_cuda_helper_function(f)) { + return false; + } + if (!meminst_finder->hasFunctionData(f)) { LOG_WARNING("No allocation data could be retrieved for function: " << f.getName()); return false; diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index 34d173e5..b96db234 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -39,6 +39,7 @@ enum class MemOpKind : uint8_t { ReallocLike = 1 << 4, // re-allocated (existing) memory FreeLike = 1 << 5, // free memory DeleteLike = 1 << 6, // delete (cpp) memory + CudaMallocLike = 1 << 7, // cuda out-parameter allocation MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, AllocLike = MallocOrCallocLike, AnyAlloc = AllocLike | ReallocLike, @@ -101,6 +102,10 @@ struct MemOps { {"_ZnajSt11align_val_tRKSt9nothrow_t", MemOpKind::MallocLike}, /*new[](unsigned int, align_val_t, nothrow)*/ {"_ZnamSt11align_val_t", MemOpKind::NewLike}, /*new[](unsigned long, align_val_t)*/ {"_ZnamSt11align_val_tRKSt9nothrow_t", MemOpKind::MallocLike}, /*new[](unsigned long, align_val_t, nothrow)*/ + {"cudaMalloc", MemOpKind::CudaMallocLike}, + {"cudaHostAlloc", MemOpKind::CudaMallocLike}, + {"cudaMallocHost", MemOpKind::CudaMallocLike}, + {"cudaMallocManaged", MemOpKind::CudaMallocLike}, }; const llvm::StringMap dealloc_map{ @@ -119,6 +124,8 @@ struct MemOps { {"_ZdlPvmSt11align_val_t", MemOpKind::DeleteLike}, /* delete(void*, unsigned long, align_val_t) */ {"_ZdaPvjSt11align_val_t", MemOpKind::DeleteLike}, /* delete[](void*, unsigned int, align_val_t) */ {"_ZdaPvmSt11align_val_t", MemOpKind::DeleteLike}, /* delete[](void*, unsigned long, align_val_t) */ + {"cudaFree", MemOpKind::FreeLike}, + {"cudaFreeHost", MemOpKind::FreeLike}, }; //clang-format off }; diff --git a/lib/passes/analysis/MemOpVisitor.cpp b/lib/passes/analysis/MemOpVisitor.cpp index 9b835cc8..c42a92ea 100644 --- a/lib/passes/analysis/MemOpVisitor.cpp +++ b/lib/passes/analysis/MemOpVisitor.cpp @@ -16,6 +16,7 @@ #include "compat/CallSite.h" #include "configuration/Configuration.h" #include "support/ConfigurationBase.h" +#include "support/CudaUtil.h" #include "support/Error.h" #include "support/Logger.h" #include "support/TypeUtil.h" @@ -222,10 +223,17 @@ void collect_casts_from_stack(llvm::StoreInst* store_inst, MallocBcasts& out_bca } } -std::pair collectRelevantMallocUsers(llvm::CallBase& call_inst) { +std::pair collectRelevantMallocUsers(llvm::CallBase& call_inst, MemOpKind kind) { auto geps = MallocGeps{}; auto bcasts = MallocBcasts{}; + if (kind == MemOpKind::CudaMallocLike) { + if (auto bitcast = cuda::bitcast_for(call_inst); bitcast.has_value()) { + bcasts.insert(*bitcast); + } + return {geps, bcasts}; + } + for (auto* user : call_inst.users()) { if (auto* bit_cast = llvm::dyn_cast(user)) { bcasts.insert(bit_cast); @@ -325,7 +333,7 @@ std::optional handleArrayCookie(llvm::CallBase& ci, const Mallo } void MemOpVisitor::visitMallocLike(llvm::CallBase& ci, MemOpKind k) { - auto [geps, bcasts] = collectRelevantMallocUsers(ci); + auto [geps, bcasts] = collectRelevantMallocUsers(ci, k); auto primary_cast = bcasts.empty() ? nullptr : *bcasts.begin(); auto array_cookie = handleArrayCookie(ci, geps, bcasts, primary_cast); if (primary_cast == nullptr) { diff --git a/lib/passes/configuration/PassBuilderUtil.h b/lib/passes/configuration/PassBuilderUtil.h index cea89fcd..94fac7ce 100644 --- a/lib/passes/configuration/PassBuilderUtil.h +++ b/lib/passes/configuration/PassBuilderUtil.h @@ -16,6 +16,7 @@ #define TYPEART_PASS_BUILDER_UTIL_H #include "support/Logger.h" +#include "support/Util.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -29,11 +30,7 @@ inline bool checkParametrizedPassName(llvm::StringRef Name, llvm::StringRef Pass // normal pass name w/o parameters == default parameters if (Name.empty()) return true; -#if LLVM_VERSION_MAJOR > 15 - return Name.starts_with("<") && Name.ends_with(">"); -#else - return Name.startswith("<") && Name.endswith(">"); -#endif + return starts_with_any_of(Name, "<") && ends_with_any_of(Name, ">"); } /// This performs customized parsing of pass name with parameters. diff --git a/lib/passes/instrumentation/CallBackFunctionInserter.cpp b/lib/passes/instrumentation/CallBackFunctionInserter.cpp index cd89f9f7..b9c1d361 100644 --- a/lib/passes/instrumentation/CallBackFunctionInserter.cpp +++ b/lib/passes/instrumentation/CallBackFunctionInserter.cpp @@ -5,6 +5,7 @@ #include "support/ConfigurationBase.h" #include "support/Logger.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -55,8 +56,9 @@ llvm::CallInst* CallbackFunctionInserter::create_instrumentation_call(llvm::IRBu const auto callback_id = ifunc_for_function(callback_type, instruction_or_value); auto type_id_param_out = type_id_handler_->getOrRegister(args.typeid_value); - const auto mode = llvm::isa(type_id_param_out) ? mode_ : TypeSerializationImplementation::FILE; - auto function = function_query_->getFunctionFor(callback_id, mode); + const bool has_global_type_payload = !llvm::isa(type_id_param_out); + const auto mode = has_global_type_payload ? mode_ : TypeSerializationImplementation::FILE; + auto function = function_query_->getFunctionFor(callback_id, mode); return IRB.CreateCall(function, llvm::ArrayRef{args.pointer_value, type_id_param_out, args.element_count}); @@ -91,4 +93,4 @@ std::unique_ptr make_callback_inserter(const config::Co return std::make_unique(configuration, std::move(type_id_handler), function_query); } -} // namespace typeart \ No newline at end of file +} // namespace typeart diff --git a/lib/passes/instrumentation/MemOpArgCollector.cpp b/lib/passes/instrumentation/MemOpArgCollector.cpp index bd6315ed..b70ba713 100644 --- a/lib/passes/instrumentation/MemOpArgCollector.cpp +++ b/lib/passes/instrumentation/MemOpArgCollector.cpp @@ -104,6 +104,14 @@ HeapArgList MemOpArgCollector::collectHeap(const MallocDataList& mallocs) { case MemOpKind::AlignedAllocLike: byte_count = malloc_call->getArgOperand(1); break; + case MemOpKind::CudaMallocLike: + byte_count = malloc_call->getArgOperand(1); + if (mdata.primary != nullptr) { + pointer = mdata.primary->getOperand(0); + } else { + pointer = malloc_call->getArgOperand(0); + } + break; default: LOG_ERROR("Unknown malloc kind. Not instrumenting. " << util::dump(*malloc_call)); // TODO see above continues diff --git a/lib/passes/instrumentation/MemOpInstrumentation.cpp b/lib/passes/instrumentation/MemOpInstrumentation.cpp index 950bfd8d..d16a99f5 100644 --- a/lib/passes/instrumentation/MemOpInstrumentation.cpp +++ b/lib/passes/instrumentation/MemOpInstrumentation.cpp @@ -72,8 +72,13 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { const bool is_llvm_ir_type = static_cast(type_gen) == static_cast(TypegenImplementation::IR); for (const auto& [malloc, args] : heap) { - auto kind = malloc.kind; - auto* malloc_call = args.get_as(ArgMap::ID::pointer); + auto kind = malloc.kind; + Instruction* malloc_call{nullptr}; + if (malloc.kind == MemOpKind::CudaMallocLike) { + malloc_call = llvm::cast(malloc.call); + } else { + malloc_call = args.get_as(ArgMap::ID::pointer); + } Instruction* insertBefore = malloc_call->getNextNode(); if (malloc.array_cookie) { @@ -89,6 +94,7 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { auto typeid_value = args.get_as(ArgMap::ID::type_id); auto type_size_value = args.get_value(ArgMap::ID::type_size); + Value* pointer_value = args.get_value(ArgMap::ID::pointer); bool single_byte_type{false}; if (auto* const_int = llvm::dyn_cast(type_size_value)) { @@ -143,12 +149,26 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { target_memory_address); break; } + case MemOpKind::CudaMallocLike: { + auto* runtime_ptr_type = instrumentation_helper->getTypeFor(IType::ptr); +#if LLVM_VERSION_MAJOR >= 15 + auto* loaded_ptr = IRB.CreateLoad(runtime_ptr_type, pointer_value); +#else + auto* pointer_slot_type = llvm::PointerType::get(runtime_ptr_type, 0); + auto* pointer_slot = IRB.CreateBitOrPointerCast(pointer_value, pointer_slot_type); + auto* loaded_ptr = IRB.CreateLoad(runtime_ptr_type, pointer_slot); +#endif + pointer_value = IRB.CreateBitOrPointerCast(loaded_ptr, instrumentation_helper->getTypeFor(IType::ptr)); + auto bytes = args.get_value(ArgMap::ID::byte_count); + element_count = calculate_element_count(bytes); + break; + } default: LOG_ERROR("Unknown malloc kind. Not instrumenting. " << util::dump(*malloc_call)); continue; } - function_instrumenter_->insert_heap_instrumentation(IRB, malloc.call, {malloc_call, element_count, typeid_value}); + function_instrumenter_->insert_heap_instrumentation(IRB, malloc.call, {pointer_value, element_count, typeid_value}); // const auto callback_id = ifunc_for_function(IFunc::heap, malloc.call); // auto type_id_param = function_instrumenter->getOrRegister(typeid_value); @@ -294,4 +314,4 @@ InstrCount MemOpInstrumentation::instrumentGlobal(const GlobalArgList& globals) return counter; } -} // namespace typeart \ No newline at end of file +} // namespace typeart diff --git a/lib/passes/instrumentation/TypeARTFunctions.cpp b/lib/passes/instrumentation/TypeARTFunctions.cpp index 0a1a8ea5..67168d47 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.cpp +++ b/lib/passes/instrumentation/TypeARTFunctions.cpp @@ -16,6 +16,7 @@ #include "configuration/Configuration.h" #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" +#include "support/CudaUtil.h" #include "support/Logger.h" #include "support/OmpUtil.h" @@ -49,9 +50,9 @@ namespace typeart { namespace detail { std::string get_func_suffix(IFunc id) { switch (id) { - // case IFunc::free_cuda: - // case IFunc::heap_cuda: - // return "_cuda"; + case IFunc::free_cuda: + case IFunc::heap_cuda: + return "_cuda"; case IFunc::free_omp: case IFunc::heap_omp: case IFunc::stack_omp: @@ -69,6 +70,10 @@ IFuncType ifunc_type_for(llvm::Function* f) { return IFuncType::standard; } + if (cuda::is_cuda_function(*f)) { + return IFuncType::cuda; + } + if (util::omp::isOmpContext(f)) { return IFuncType::omp; } @@ -88,28 +93,27 @@ IFunc ifunc_for_function(IFunc general_type, llvm::Value* value) { } else if (llvm::isa(value)) { type = detail::ifunc_type_for(nullptr); } else if (auto callbase = llvm::dyn_cast(value)) { - type = detail::ifunc_type_for(callbase->getFunction()); - // auto maybe_cuda = detail::ifunc_type_for(callbase->getCalledFunction()); - // if (maybe_cuda == detail::IFuncType::cuda) { - // type = detail::IFuncType::cuda; - // } + type = detail::ifunc_type_for(callbase->getFunction()); + auto maybe_cuda = detail::ifunc_type_for(callbase->getCalledFunction()); + if (maybe_cuda == detail::IFuncType::cuda) { + type = detail::IFuncType::cuda; + } } if (detail::IFuncType::standard == type) { return general_type; } - // if (detail::IFuncType::cuda == type) { - // switch (general_type) { - // case IFunc::heap: - // return IFunc::heap_cuda; - // case IFunc::free: - // return IFunc::free_cuda; - // default: - // return general_type; - // // llvm_unreachable("IFunc not supported for CUDA."); - // } - // } + if (detail::IFuncType::cuda == type) { + switch (general_type) { + case IFunc::heap: + return IFunc::heap_cuda; + case IFunc::free: + return IFunc::free_cuda; + default: + return general_type; + } + } switch (general_type) { case IFunc::stack: @@ -287,6 +291,8 @@ TypeArtFunc typeart_alloc_omp = typeart_alloc; TypeArtFunc typeart_alloc_stacks_omp = typeart_alloc_stack; TypeArtFunc typeart_free_omp = typeart_free; TypeArtFunc typeart_leave_scope_omp = typeart_leave_scope; +TypeArtFunc typeart_alloc_cuda = typeart_alloc; +TypeArtFunc typeart_free_cuda = typeart_free; TypeArtFunc typeart_alloc_mty{"__typeart_alloc_mty"}; TypeArtFunc typeart_alloc_stack_mty{"__typeart_alloc_stack_mty"}; @@ -318,6 +324,7 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& decl_alternatives.make_function(IFunc::stack, typeart_alloc_stack_mty.name, alloc_arg_types_mty); typeart_alloc_global_mty.f = decl_alternatives.make_function(IFunc::global, typeart_alloc_global_mty.name, alloc_arg_types_mty); + functions_alternative.putFunctionFor(IFunc::heap_cuda, llvm::cast(typeart_alloc_mty.f)); typeart_register_type.f = decl.make_function(IFunc::type, typeart_register_type.name, free_arg_types); typeart_alloc.f = decl.make_function(IFunc::heap, typeart_alloc.name, alloc_arg_types); @@ -332,6 +339,9 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& typeart_leave_scope_omp.f = decl.make_function(IFunc::scope_omp, typeart_leave_scope_omp.name, leavescope_arg_types); + typeart_alloc_cuda.f = decl.make_function(IFunc::heap_cuda, typeart_alloc_cuda.name, alloc_arg_types); + typeart_free_cuda.f = decl.make_function(IFunc::free_cuda, typeart_free_cuda.name, free_arg_types); + typeart_alloc_omp_mty.f = decl_alternatives.make_function(IFunc::heap_omp, typeart_alloc_omp_mty.name, alloc_arg_types_mty); typeart_alloc_stacks_omp_mty.f = diff --git a/lib/passes/instrumentation/TypeARTFunctions.h b/lib/passes/instrumentation/TypeARTFunctions.h index 21c135da..3128d92e 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.h +++ b/lib/passes/instrumentation/TypeARTFunctions.h @@ -32,7 +32,20 @@ namespace config { class Configuration; } -enum class IFunc : unsigned { heap, stack, global, free, scope, heap_omp, stack_omp, free_omp, scope_omp, type }; +enum class IFunc : unsigned { + heap, + stack, + global, + free, + scope, + heap_omp, + stack_omp, + free_omp, + scope_omp, + heap_cuda, + free_cuda, + type +}; IFunc ifunc_for_function(IFunc general_type, llvm::Value* value); diff --git a/lib/passes/support/CudaUtil.h b/lib/passes/support/CudaUtil.h new file mode 100644 index 00000000..69eba5a0 --- /dev/null +++ b/lib/passes/support/CudaUtil.h @@ -0,0 +1,84 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_CUDAUTIL_H +#define TYPEART_CUDAUTIL_H + +#include "support/Util.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include +#include + +namespace typeart::cuda { + +inline std::optional bitcast_for(llvm::Value* cuda_ptr) { + std::optional fallback; + for (auto& use : cuda_ptr->uses()) { + auto* use_value = use.get(); + auto* bitcast = llvm::dyn_cast(use_value); + if (bitcast == nullptr) { + continue; + } + + if (auto* primary_bitcast = llvm::dyn_cast(bitcast->getOperand(0))) { + return primary_bitcast; + } + + fallback = bitcast; + return fallback; + } + return fallback; +} + +inline std::optional bitcast_for(const llvm::CallBase& cuda_call) { + return bitcast_for(cuda_call.getArgOperand(0)); +} + +inline bool is_device_module(const llvm::Module& module) { +#if LLVM_VERSION_MAJOR >= 20 + const auto triple = module.getTargetTriple().str(); +#else + const auto triple = module.getTargetTriple(); +#endif + return llvm::StringRef{triple}.find("nvptx") != llvm::StringRef::npos; +} + +inline bool is_device_stub(const llvm::Function& function) { + const auto function_name = util::demangle(function.getName()); + return function_name.find("__device_stub__") != std::string::npos; +} + +inline bool is_dim3_init(const llvm::Function& function) { + const auto function_name = util::demangle(function.getName()); + return function_name.find("dim3::dim3") != std::string::npos; +} + +inline bool is_cuda_function(const llvm::Function& function) { + const auto function_name = llvm::StringRef{function.getName()}; + return util::starts_with_any_of(function_name, "cuda"); +} + +inline bool is_cuda_helper_function(const llvm::Function& function) { + if (is_device_stub(function) || is_dim3_init(function)) { + return true; + } + const auto function_name = llvm::StringRef{function.getName()}; + return util::starts_with_any_of(function_name, "__cuda"); +} + +} // namespace typeart::cuda + +#endif // TYPEART_CUDAUTIL_H diff --git a/lib/passes/support/TypeUtil.cpp b/lib/passes/support/TypeUtil.cpp index 94c9fe75..653ec0d4 100644 --- a/lib/passes/support/TypeUtil.cpp +++ b/lib/passes/support/TypeUtil.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include + using namespace llvm; namespace typeart::util::type { @@ -106,4 +108,17 @@ unsigned getPointerSizeInBytes(llvm::Type* /*ptrT*/, const llvm::DataLayout& dl) return dl.getPointerSizeInBits() / 8; } +std::optional getPointerElementType(llvm::Type* ptr_type) { + auto* pointer_type = dyn_cast_or_null(ptr_type); + if (pointer_type == nullptr) { + return {}; + } + +#if LLVM_VERSION_MAJOR < 15 + return pointer_type->getPointerElementType(); +#else + return {}; +#endif +} + } // namespace typeart::util::type diff --git a/lib/passes/support/TypeUtil.h b/lib/passes/support/TypeUtil.h index 129f6d99..684e4055 100644 --- a/lib/passes/support/TypeUtil.h +++ b/lib/passes/support/TypeUtil.h @@ -20,6 +20,8 @@ class AllocaInst; class LLVMContext; } // namespace llvm +#include + namespace typeart::util::type { #if LLVM_VERSION_MAJOR < 15 @@ -44,6 +46,8 @@ unsigned getStructSizeInBytes(llvm::Type* structT, const llvm::DataLayout& dl); unsigned getPointerSizeInBytes(llvm::Type* ptrT, const llvm::DataLayout& dl); +std::optional getPointerElementType(llvm::Type* ptr_type); + } // namespace typeart::util::type #endif // TYPEART_TYPE_UTIL_H diff --git a/lib/passes/typegen/ir/TypeManager.cpp b/lib/passes/typegen/ir/TypeManager.cpp index daf26dbb..1927f920 100644 --- a/lib/passes/typegen/ir/TypeManager.cpp +++ b/lib/passes/typegen/ir/TypeManager.cpp @@ -15,6 +15,7 @@ #include "IRTypeGen.h" #include "StructTypeHandler.h" #include "VectorTypeHandler.h" +#include "support/CudaUtil.h" #include "support/Logger.h" #include "support/TypeUtil.h" #include "support/Util.h" @@ -277,9 +278,27 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { auto malloc_call = mdata.call; const llvm::DataLayout& dl = malloc_call->getModule()->getDataLayout(); BitCastInst* primaryBitcast = mdata.primary; + llvm::Type* allocation_type = nullptr; + + if (mdata.kind == MemOpKind::CudaMallocLike && primaryBitcast == nullptr) { + if (auto bitcast = cuda::bitcast_for(*malloc_call); bitcast.has_value()) { + primaryBitcast = *bitcast; + } + } + + if (mdata.kind == MemOpKind::CudaMallocLike) { + allocation_type = llvm::Type::getInt8Ty(malloc_call->getContext()); + } else { + auto pointee_type = tu::getPointerElementType(malloc_call->getType()); + allocation_type = !pointee_type ? llvm::Type::getInt8Ty(malloc_call->getContext()) : *pointee_type; + } + + int typeId = getOrRegisterType(allocation_type, dl); // retrieveTypeID(tu::getVoidType(c)); + + if (mdata.kind == MemOpKind::CudaMallocLike) { + typeId = TYPEART_POINTER; + } - int typeId = getOrRegisterType(malloc_call->getType()->getPointerElementType(), - dl); // retrieveTypeID(tu::getVoidType(c)); if (typeId == TYPEART_UNKNOWN_TYPE) { LOG_ERROR("Unknown heap type. Not instrumenting. " << util::dump(*malloc_call)); // TODO notify caller that we skipped: via lambda callback function @@ -287,11 +306,34 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { }; // Number of bytes per element, 1 for void* - unsigned typeSize = tu::getTypeSizeInBytes(malloc_call->getType()->getPointerElementType(), dl); + unsigned typeSize = tu::getTypeSizeInBytes(allocation_type, dl); + + if (mdata.kind == MemOpKind::CudaMallocLike) { + typeSize = 1; + } // Use the first cast as the determining type (if there is any) if (primaryBitcast != nullptr) { - auto* dstPtrType = primaryBitcast->getDestTy()->getPointerElementType(); + llvm::Type* dstPtrType = nullptr; + if (auto pointee_type = tu::getPointerElementType(primaryBitcast->getDestTy()); pointee_type.has_value()) { + dstPtrType = *pointee_type; + } + // Basically: getSrcTy()->getPointerElementType()->getPointerElementType(): + if (mdata.kind == MemOpKind::CudaMallocLike && dstPtrType == nullptr) { + if (auto pointee_type = tu::getPointerElementType(primaryBitcast->getSrcTy()); pointee_type.has_value()) { + dstPtrType = *pointee_type; + } + if (dstPtrType != nullptr && dstPtrType->isPointerTy()) { + if (auto nested = tu::getPointerElementType(dstPtrType); nested.has_value()) { + dstPtrType = *nested; + } + } + } + + if (dstPtrType == nullptr) { + LOG_WARNING("Could not resolve non-opaque pointee type for allocation cast. Keeping fallback type.") + return {typeId, 0}; + } typeSize = tu::getTypeSizeInBytes(dstPtrType, dl); diff --git a/lib/runtime/AllocationTracking.cpp b/lib/runtime/AllocationTracking.cpp index afcbf973..d27b5d0f 100644 --- a/lib/runtime/AllocationTracking.cpp +++ b/lib/runtime/AllocationTracking.cpp @@ -289,6 +289,18 @@ void __typeart_leave_scope_omp(int alloca_count) { typeart::RuntimeSystem::get().allocation_tracker().onLeaveScope(alloca_count, retAddr); } +void __typeart_alloc_cuda(const void* addr, int typeId, size_t count) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + typeart::RuntimeSystem::get().allocation_tracker().onAlloc(addr, typeId, count, retAddr); +} + +void __typeart_free_cuda(const void* addr) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + typeart::RuntimeSystem::get().allocation_tracker().onFreeHeap(addr, retAddr); +} + void __typeart_alloc_mty(const void* addr, const void* info, size_t count) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); diff --git a/lib/runtime/CMakeLists.txt b/lib/runtime/CMakeLists.txt index c889e7d7..c8467f71 100644 --- a/lib/runtime/CMakeLists.txt +++ b/lib/runtime/CMakeLists.txt @@ -18,6 +18,7 @@ add_custom_command( set(RUNTIME_LIB_SOURCES AccessCounter.h CallbackInterface.h + CudaRuntimeInterface.h RuntimeData.h RuntimeInterface.h TypeResolution.cpp @@ -27,6 +28,7 @@ set(RUNTIME_LIB_SOURCES TypeResolution.h Runtime.cpp Runtime.h + CudaSupport.cpp ${TYPEART_META_SOURCE} $<$:../support/MPILogger.cpp> ) @@ -99,6 +101,10 @@ target_include_directories( target_include_directories(${TYPEART_PREFIX}_Runtime SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) +if(CUDAToolkit_FOUND) + target_include_directories(${TYPEART_PREFIX}_Runtime SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) +endif() + target_compile_definitions( ${TYPEART_PREFIX}_Runtime PRIVATE TYPEART_LOG_LEVEL=${TYPEART_LOG_LEVEL_RT} @@ -108,8 +114,18 @@ target_compile_definitions( $<$:TYPEART_ABSEIL> $<$:USE_SAFEPTR> $<$:TYPEART_DISABLE_THREAD_SAFETY> + $<$:TYPEART_HAS_CUDA=1> ) +if(CUDAToolkit_FOUND) + if(TARGET CUDA::cudart) + target_link_libraries(${TYPEART_PREFIX}_Runtime PRIVATE CUDA::cudart) + endif() + if(TARGET CUDA::cuda_driver) + target_link_libraries(${TYPEART_PREFIX}_Runtime PRIVATE CUDA::cuda_driver) + endif() +endif() + typeart_target_compile_options(${TYPEART_PREFIX}_Runtime) typeart_target_define_file_basename(${TYPEART_PREFIX}_Runtime) typeart_target_coverage_options(${TYPEART_PREFIX}_Runtime) @@ -137,6 +153,12 @@ install(FILES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} ) +if(CUDAToolkit_FOUND) + install(FILES CudaRuntimeInterface.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} + ) +endif() + install( TARGETS ${TYPEART_PREFIX}_Runtime EXPORT ${TARGETS_EXPORT_NAME} diff --git a/lib/runtime/CallbackInterface.h b/lib/runtime/CallbackInterface.h index 83ec2d06..b9db02b9 100644 --- a/lib/runtime/CallbackInterface.h +++ b/lib/runtime/CallbackInterface.h @@ -39,6 +39,9 @@ TYPEART_EXPORT void __typeart_free_omp(const void* addr); TYPEART_EXPORT void __typeart_alloc_stack_omp(const void* addr, int type_id, size_t count); TYPEART_EXPORT void __typeart_leave_scope_omp(int alloca_count); +TYPEART_EXPORT void __typeart_alloc_cuda(const void* addr, int type_id, size_t count); +TYPEART_EXPORT void __typeart_free_cuda(const void* addr); + // Called for inlined type definitions mode TYPEART_EXPORT void __typeart_alloc_mty(const void* addr, const void* info, size_t count); TYPEART_EXPORT void __typeart_alloc_global_mty(const void* addr, const void* info, size_t count); diff --git a/lib/runtime/CudaRuntimeInterface.h b/lib/runtime/CudaRuntimeInterface.h new file mode 100644 index 00000000..2b42ee96 --- /dev/null +++ b/lib/runtime/CudaRuntimeInterface.h @@ -0,0 +1,36 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_CUDARUNTIMEINTERFACE_H +#define TYPEART_CUDARUNTIMEINTERFACE_H + +#include "RuntimeExport.h" +#include "RuntimeInterface.h" + +#ifdef __cplusplus +#include +#else +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +TYPEART_EXPORT typeart_status typeart_cuda_is_device_ptr(const void* addr, bool* is_device_ptr_flag); + +#ifdef __cplusplus +} +#endif + +#endif // TYPEART_CUDARUNTIMEINTERFACE_H diff --git a/lib/runtime/CudaSupport.cpp b/lib/runtime/CudaSupport.cpp new file mode 100644 index 00000000..3f358bc5 --- /dev/null +++ b/lib/runtime/CudaSupport.cpp @@ -0,0 +1,43 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#include "CudaRuntimeInterface.h" + +#ifdef TYPEART_HAS_CUDA +#include +#endif + +typeart_status typeart_cuda_is_device_ptr(const void* addr, bool* is_device_ptr_flag) { + if (is_device_ptr_flag == nullptr) { + return TYPEART_ERROR; + } + +#ifdef TYPEART_HAS_CUDA + CUmemorytype mem_type; + CUresult status = + cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, reinterpret_cast(addr)); + if (status != CUDA_SUCCESS) { + *is_device_ptr_flag = false; + if (status == CUDA_ERROR_INVALID_VALUE) { + return TYPEART_OK; + } + return TYPEART_ERROR; + } + + *is_device_ptr_flag = (mem_type == CU_MEMORYTYPE_DEVICE); + return TYPEART_OK; +#else + (void)addr; + *is_device_ptr_flag = false; + return TYPEART_OK; +#endif +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 13ae0faa..361044d0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,6 +14,37 @@ typeart_find_llvm_progs(TYPEART_LIT_EXEC # add_subdirectory(compat) +function(cuda_runnable_detect cuda_is_runnable) + set(${cuda_is_runnable} 0 PARENT_SCOPE) + if(NOT CUDAToolkit_FOUND) + return() + endif() + + find_program(TYPEART_NVIDIA_SMI_EXEC nvidia-smi) + mark_as_advanced(TYPEART_NVIDIA_SMI_EXEC) + if(NOT TYPEART_NVIDIA_SMI_EXEC) + return() + endif() + + execute_process( + COMMAND ${TYPEART_NVIDIA_SMI_EXEC} -L + RESULT_VARIABLE smi_result + OUTPUT_VARIABLE smi_out + ERROR_QUIET + ) + if(NOT smi_result EQUAL 0) + return() + endif() + + string(REGEX MATCHALL "GPU [0-9]+:" smi_gpu_list ${smi_out}) + list(LENGTH smi_gpu_list TYPEART_CUDA_GPU_COUNT) + if(TYPEART_CUDA_GPU_COUNT GREATER 0) + set(${cuda_is_runnable} 1 PARENT_SCOPE) + endif() +endfunction() + +cuda_runnable_detect(TYPEART_CUDA_RUNNABLE) + macro(pythonize_bool truth_var var) if(${truth_var}) set(${var} True) @@ -43,6 +74,37 @@ function(typeart_configure_lit_site input output) pythonize_bool(Threads_FOUND TYPEARTPASS_THREADS) pythonize_bool(TYPEART_DISABLE_THREAD_SAFETY TYPEARTPASS_THREAD_UNSAFE) + set(TYPEARTPASS_CUDA_FLAGS "") + set(TYPEARTPASS_CUDA_PATH "") + if(CUDAToolkit_FOUND) + if(DEFINED CUDAToolkit_TARGET_DIR AND NOT "${CUDAToolkit_TARGET_DIR}" STREQUAL "") + set(TYPEARTPASS_CUDA_PATH "${CUDAToolkit_TARGET_DIR}") + elseif(DEFINED CUDAToolkit_LIBRARY_ROOT AND NOT "${CUDAToolkit_LIBRARY_ROOT}" STREQUAL "") + set(TYPEARTPASS_CUDA_PATH "${CUDAToolkit_LIBRARY_ROOT}") + elseif(DEFINED CUDAToolkit_BIN_DIR AND NOT "${CUDAToolkit_BIN_DIR}" STREQUAL "") + cmake_path(GET CUDAToolkit_BIN_DIR PARENT_PATH TYPEARTPASS_CUDA_PATH) + endif() + + foreach(cuda_inc ${CUDAToolkit_INCLUDE_DIRS}) + string(APPEND TYPEARTPASS_CUDA_FLAGS " -isystem ${cuda_inc}") + endforeach() + else() + find_path(TYPEART_CUDA_RUNTIME_HEADER_DIR cuda_runtime_api.h + HINTS /usr/local/cuda/include /usr/include /opt/cuda/include + ) + if(TYPEART_CUDA_RUNTIME_HEADER_DIR) + cmake_path(GET TYPEART_CUDA_RUNTIME_HEADER_DIR PARENT_PATH TYPEARTPASS_CUDA_PATH) + endif() + endif() + + set(TYPEART_CUDA_STATIC_AVAILABLE FALSE) + if(CUDAToolkit_FOUND OR NOT "${TYPEARTPASS_CUDA_PATH}" STREQUAL "") + set(TYPEART_CUDA_STATIC_AVAILABLE TRUE) + endif() + + pythonize_bool(TYPEART_CUDA_STATIC_AVAILABLE TYPEARTPASS_CUDA_STATIC) + pythonize_bool(TYPEART_CUDA_RUNNABLE TYPEARTPASS_CUDA_RUNTIME) + pythonize_bool(MPI_C_FOUND TYPEARTPASS_MPI_C) pythonize_bool(MPI_CXX_FOUND TYPEARTPASS_MPI_CXX) pythonize_bool(TYPEART_CI_RUN TYPEARTPASS_CI_RUN) @@ -143,6 +205,7 @@ endif() set(TYPEART_SUITES all pass + cuda runtime script typemapping @@ -155,8 +218,10 @@ set(TYPEART_SUITES_WORKERS ${NUM_CPU} 1 1 + 1 ${NUM_CPU} 1 + 1 ) typeart_add_lit_target(SUITES ${TYPEART_SUITES} WORKERS ${TYPEART_SUITES_WORKERS}) diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c new file mode 100644 index 00000000..a5b5f183 --- /dev/null +++ b/test/cuda/pass/01_cudamalloc.c @@ -0,0 +1,16 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: cuda_static + +// CHECK: call i32 @cudaMalloc +// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z]+]] = load {{.*}}, {{.*}} +// CHECK-NEXT: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], + +int main() { + const int N = 20; + float* d_x; + + cudaMalloc((void**)&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/cuda/pass/02_cudafree.c b/test/cuda/pass/02_cudafree.c new file mode 100644 index 00000000..b91c4b9a --- /dev/null +++ b/test/cuda/pass/02_cudafree.c @@ -0,0 +1,14 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: cuda_static + +// CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]]) +// CHECK-NEXT: __typeart_free_cuda({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) + +int main() { + float* d_x; + + cudaFree(d_x); + + return 0; +} diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c new file mode 100644 index 00000000..ab7d9f2c --- /dev/null +++ b/test/cuda/pass/03_cudahostalloc.c @@ -0,0 +1,14 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: cuda_static + +// CHECK: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 20) + +int main() { + const int N = 20; + float* d_x; + + cudaHostAlloc((void**)&d_x, N * sizeof(float), cudaHostAllocDefault); + + return 0; +} diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp new file mode 100644 index 00000000..1f07164f --- /dev/null +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -0,0 +1,25 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: cuda_static + +// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) + +struct X { + int a; + int b; +}; + +int main() { + const int N = 20; + float* d_x; + double* dd_y; + X* sd_z; + + cudaMalloc(&d_x, N * sizeof(float)); + cudaMalloc(&dd_y, N * sizeof(double)); + cudaMalloc(&sd_z, N * sizeof(X)); + + return 0; +} diff --git a/test/cuda/pass/07_axpy.c b/test/cuda/pass/07_axpy.c new file mode 100644 index 00000000..279053cf --- /dev/null +++ b/test/cuda/pass/07_axpy.c @@ -0,0 +1,32 @@ +// RUN: %apply %s -x cuda --cuda-gpu-arch=sm_72 2>&1 | %filecheck %s + +// REQUIRES: cuda + +// CHECK: Malloc : 2 + +__global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + float* device_x; + float* device_y; + cudaMalloc((void**)&device_x, kDataLen * sizeof(float)); + cudaMalloc((void**)&device_y, kDataLen * sizeof(float)); + + cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); + + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + cudaDeviceSynchronize(); + cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); + + cudaDeviceReset(); + return 0; +} diff --git a/test/lit.cfg b/test/lit.cfg index fc8dbd45..ea2158a7 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -58,6 +58,11 @@ if config.has_legacy_wrapper: if config.is_ci: config.available_features.add('ci') +if getattr(config, 'cuda_static', False): + config.available_features.add('cuda') +if getattr(config, 'cuda_runtime', False): + config.available_features.add('cuda_runtime') + profile_files = getattr(config, 'profile_file', None) typeart_base_lib_dir= getattr(config, 'typeart_base_lib_dir', None) typeart_lib_root = getattr(config, 'typeart_lib_dir', None) @@ -69,6 +74,11 @@ transform_pass = '{}/{}'.format(typeart_lib_root, transform_name) # std_plugin_args = 'typeart --typeart-stats=true' std_plugin_args_newpm = 'typeart&1 | \ -// RUN: %filecheck %s +// RUN: %c-to-llvm -Wimplicit-function-declaration %s -I%runtime_path -I%base_path/lib/runtime | %apply-typeart +// --typeart-stack=true -S 2>&1 | \ RUN: %filecheck %s #include "CallbackInterface.h" From 35b2ea8a39fdbe07ffb2101acdfa6ee207b4be64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Sun, 26 Apr 2026 10:54:43 +0200 Subject: [PATCH 02/24] Fix tests --- test/cuda/pass/07_axpy.c | 4 ++-- test/lit.cfg | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/cuda/pass/07_axpy.c b/test/cuda/pass/07_axpy.c index 279053cf..c661593f 100644 --- a/test/cuda/pass/07_axpy.c +++ b/test/cuda/pass/07_axpy.c @@ -1,6 +1,6 @@ -// RUN: %apply %s -x cuda --cuda-gpu-arch=sm_72 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s -// REQUIRES: cuda +// REQUIRES: cuda_static // CHECK: Malloc : 2 diff --git a/test/lit.cfg b/test/lit.cfg index ea2158a7..a4fdda17 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -58,9 +58,9 @@ if config.has_legacy_wrapper: if config.is_ci: config.available_features.add('ci') -if getattr(config, 'cuda_static', False): - config.available_features.add('cuda') -if getattr(config, 'cuda_runtime', False): +if config.cuda_static: + config.available_features.add('cuda_static') +if config.cuda_runtime: config.available_features.add('cuda_runtime') profile_files = getattr(config, 'profile_file', None) From 26005ffeaa53e104158cdfaf33556ee157fc5a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Sun, 26 Apr 2026 11:20:18 +0200 Subject: [PATCH 03/24] Initial tests pass --- lib/passes/TypeARTPass.cpp | 9 ++++----- test/cuda/runtime/07_axpy.c | 34 ++++++++++++++++++++++++++++++++++ test/lit.cfg | 3 +++ test/lit.site.cfg.in | 1 + 4 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 test/cuda/runtime/07_axpy.c diff --git a/lib/passes/TypeARTPass.cpp b/lib/passes/TypeARTPass.cpp index 79256a4b..ba3d4b2f 100644 --- a/lib/passes/TypeARTPass.cpp +++ b/lib/passes/TypeARTPass.cpp @@ -270,6 +270,10 @@ class TypeArtPass : public llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Module& m, llvm::ModuleAnalysisManager&) { + if (cuda::is_device_module(m)) { + LOG_DEBUG("Skipping CUDA device module: " << m.getName()); + return llvm::PreservedAnalyses::all(); + } bool changed{false}; changed |= doInitialization(m); const bool heap = configuration()[config::ConfigStdArgs::heap]; // Must happen after doInit @@ -281,11 +285,6 @@ run(llvm::Module& m, llvm::ModuleAnalysisManager&) { } bool runOnModule(llvm::Module& m) { - if (cuda::is_device_module(m)) { - LOG_DEBUG("Skipping CUDA device module: " << m.getName()); - return false; - } - meminst_finder->runOnModule(m); const bool instrument_global = configuration()[config::ConfigStdArgs::global]; bool globals_were_instrumented{false}; diff --git a/test/cuda/runtime/07_axpy.c b/test/cuda/runtime/07_axpy.c new file mode 100644 index 00000000..2ca6a2e6 --- /dev/null +++ b/test/cuda/runtime/07_axpy.c @@ -0,0 +1,34 @@ +// RUN: %wrapper-cc -x cuda --cuda-gpu-arch=sm_50 %cuda_link %s -o %s.exe +// RUN: %s.exe 2>&1 | %filecheck %s + +// REQUIRES: cuda_runtime +// UNSUPPORTED: sanitizer + +// CHECK: Total heap{{[ ]*}}: 2 , 2 , - + +__global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + float* device_x; + float* device_y; + cudaMalloc((void**)&device_x, kDataLen * sizeof(float)); + cudaMalloc((void**)&device_y, kDataLen * sizeof(float)); + + cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); + + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + cudaDeviceSynchronize(); + cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); + + cudaDeviceReset(); + return 0; +} diff --git a/test/lit.cfg b/test/lit.cfg index a4fdda17..928bf8e8 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -79,6 +79,7 @@ cuda_host_args = '-x cuda --cuda-host-only -nocudalib' if cuda_path: cuda_host_args += ' --cuda-path={}'.format(cuda_path) cuda_flags = getattr(config, 'cuda_flags', '') +cuda_link_flags = f"-L{config.cuda_lib} -Wl,-rpath,{config.cuda_lib} -lcudart" type_file = 'typeart-types.yaml' openmp_c_flags = getattr(config, 'openmp_c_flags', None) openmp_cxx_flags = getattr(config, 'openmp_cxx_flags', None) @@ -140,6 +141,8 @@ config.substitutions.append(('%arg_std', std_plugin_args_newpm)) config.substitutions.append(('%omp_c_flags', openmp_c_flags)) config.substitutions.append(('%omp_cpp_flags', openmp_cxx_flags)) +config.substitutions.append(('%cuda_link', cuda_link_flags)) + # TODO refactor typeart arguments (and add args for enabling heap/stack/globals etc.) # config.substitutions.append(('%arg_stack', '-typeart-stack')) # config.substitutions.append(('%arg_heap', '-typeart-heap')) diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 96fb248e..b399460c 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -43,6 +43,7 @@ config.cuda_static=@TYPEARTPASS_CUDA_STATIC@ config.cuda_runtime=@TYPEARTPASS_CUDA_RUNTIME@ config.cuda_flags="@TYPEARTPASS_CUDA_FLAGS@" config.cuda_path="@TYPEARTPASS_CUDA_PATH@" +config.cuda_lib="@CUDAToolkit_LIBRARY_DIR@" config.python_interp = "@Python3_EXECUTABLE@" From a37e9619ae17474ddc208f852289ffb4e1a55c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Mon, 27 Apr 2026 15:11:29 +0200 Subject: [PATCH 04/24] Workaround handling of templated cuda allocation fn --- lib/passes/typegen/dimeta/DimetaTypeGen.cpp | 64 +++++++++++++++------ test/cuda/pass/01_cudamalloc.c | 4 +- test/cuda/pass/03_cudahostalloc.c | 4 +- test/cuda/pass/06_cudamalloc_nonvoid.cpp | 6 +- test/cuda/runtime/07_axpy.c | 8 +++ 5 files changed, 61 insertions(+), 25 deletions(-) diff --git a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp index 90a19cc3..cf19791d 100644 --- a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp +++ b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp @@ -13,6 +13,7 @@ #include "../TypeIDGenerator.h" #include "Dimeta.h" #include "DimetaData.h" +#include "analysis/MemOpData.h" #include "support/Logger.h" #include "typegen/TypeGenerator.h" #include "typelib/TypeDatabase.h" @@ -97,6 +98,29 @@ auto apply_function(const Type& type, Func&& handle_qualified_type) { } // namespace detail namespace workaround { +namespace detail { +void remove_pointer_level_impl(dimeta::LocatedType& val) { + const auto remove_pointer_level = [](auto& qual) { + auto pointer_like_iter = llvm::find_if(qual, [](auto qualifier) { + switch (qualifier) { + case dimeta::Qualifier::kPtr: + case dimeta::Qualifier::kRef: + case dimeta::Qualifier::kPtrToMember: + return true; + default: + break; + } + return false; + }); + if (pointer_like_iter != std::end(qual)) { + LOG_DEBUG("Removing pointer level " << static_cast(*pointer_like_iter)) + qual.erase(pointer_like_iter); + } + }; + std::visit([&](auto&& qualified_type) { remove_pointer_level(qualified_type.qual); }, val.type); +} +} // namespace detail + void remove_pointer_level(const llvm::AllocaInst* alloc, dimeta::LocatedType& val) { // If the alloca instruction is not a pointer, but the located_type has a pointer-like qualifier, we remove it. // Workaround for inlining issue, see test typemapping/05_milc_inline_metadata.c @@ -105,27 +129,16 @@ void remove_pointer_level(const llvm::AllocaInst* alloc, dimeta::LocatedType& va // this will cause MPI handle arrays (typedef "ptr to opaque struct") to be considered a pointer if (!alloc->getAllocatedType()->isPointerTy() && !alloc->getAllocatedType()->isArrayTy()) { LOG_DEBUG("Alloca is not a pointer type: " << *alloc->getAllocatedType()) - - const auto remove_pointer_level = [](auto& qual) { - auto pointer_like_iter = llvm::find_if(qual, [](auto qualifier) { - switch (qualifier) { - case dimeta::Qualifier::kPtr: - case dimeta::Qualifier::kRef: - case dimeta::Qualifier::kPtrToMember: - return true; - default: - break; - } - return false; - }); - if (pointer_like_iter != std::end(qual)) { - LOG_DEBUG("Removing pointer level " << static_cast(*pointer_like_iter)) - qual.erase(pointer_like_iter); - } - }; - std::visit([&](auto&& qualified_type) { remove_pointer_level(qualified_type.qual); }, val.type); + detail::remove_pointer_level_impl(val); } } + +void remove_pointer_level(const llvm::CallBase* call, dimeta::LocatedType& val) { + // If the call base is a templated cudaMalloc<...> call, current we need to remove a single pointer level to correct + // determine the allocated type + detail::remove_pointer_level_impl(val); +} + } // namespace workaround template @@ -479,6 +492,19 @@ class DimetaTypeManager final : public TypeIDGenerator { if (val) { LOG_DEBUG("Registering malloc-like") + const auto is_template_fn = [](const auto& func_str) { + return !func_str.empty() && func_str.back() == '>' && func_str.find('<') != std::string::npos; + }; + + const auto function_name = val->location.function; + if (call->getCalledFunction() != nullptr && is_template_fn(function_name)) { + MemOps mem_operations; + auto kind = mem_operations.kind(call->getCalledFunction()->getName()); + if (kind && kind.value() == MemOpKind::CudaMallocLike) { + LOG_DEBUG("Workaround for pointer level of call base " << function_name) + workaround::remove_pointer_level(call, val.value()); + } + } return {getOrRegister(val->type, true), array_size(val->type)}; } diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c index a5b5f183..efceb49a 100644 --- a/test/cuda/pass/01_cudamalloc.c +++ b/test/cuda/pass/01_cudamalloc.c @@ -2,8 +2,8 @@ // REQUIRES: cuda_static -// CHECK: call i32 @cudaMalloc -// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z]+]] = load {{.*}}, {{.*}} +// CHECK: call i32 @cudaMalloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] // CHECK-NEXT: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], int main() { diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c index ab7d9f2c..01c25109 100644 --- a/test/cuda/pass/03_cudahostalloc.c +++ b/test/cuda/pass/03_cudahostalloc.c @@ -2,7 +2,9 @@ // REQUIRES: cuda_static -// CHECK: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 20) +// CHECK: call i32 @cudaHostAlloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] +// CHECK-NEXT: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], int main() { const int N = 20; diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp index 1f07164f..07e9297d 100644 --- a/test/cuda/pass/06_cudamalloc_nonvoid.cpp +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -2,9 +2,9 @@ // REQUIRES: cuda_static -// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) -// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) -// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) +// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) +// CHECK: __typeart_alloc_mty({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, {{(ptr|i32)}} {{[@_a-z0-9A-Z]+}}, i64 {{.*}}) struct X { int a; diff --git a/test/cuda/runtime/07_axpy.c b/test/cuda/runtime/07_axpy.c index 2ca6a2e6..fb89b8f7 100644 --- a/test/cuda/runtime/07_axpy.c +++ b/test/cuda/runtime/07_axpy.c @@ -4,8 +4,10 @@ // REQUIRES: cuda_runtime // UNSUPPORTED: sanitizer +// CHECK: [0]=2 [1]=4 [2]=6 [3]=8 // CHECK: Total heap{{[ ]*}}: 2 , 2 , - +#include __global__ void axpy(float a, float* x, float* y) { y[threadIdx.x] = a * x[threadIdx.x]; } @@ -30,5 +32,11 @@ int main(int argc, char* argv[]) { cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); cudaDeviceReset(); + + for (int i = 0; i < kDataLen; ++i) { + printf("[%i]=%.0f ", i, host_y[i]); + } + printf("\n"); + return 0; } From 4fb4d795bc6b6fe0eec599d701c124891724e742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Mon, 27 Apr 2026 15:31:08 +0200 Subject: [PATCH 05/24] Instead of cuda, use gpu for callbacks --- lib/passes/instrumentation/TypeARTFunctions.cpp | 9 +++++++-- lib/runtime/AllocationTracking.cpp | 13 +++++++++++-- lib/runtime/CallbackInterface.h | 6 ++++-- test/cuda/pass/01_cudamalloc.c | 2 +- test/cuda/pass/02_cudafree.c | 2 +- test/cuda/pass/03_cudahostalloc.c | 2 +- test/cuda/pass/06_cudamalloc_nonvoid.cpp | 6 +++--- 7 files changed, 28 insertions(+), 12 deletions(-) diff --git a/lib/passes/instrumentation/TypeARTFunctions.cpp b/lib/passes/instrumentation/TypeARTFunctions.cpp index 67168d47..17c295fa 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.cpp +++ b/lib/passes/instrumentation/TypeARTFunctions.cpp @@ -52,7 +52,7 @@ std::string get_func_suffix(IFunc id) { switch (id) { case IFunc::free_cuda: case IFunc::heap_cuda: - return "_cuda"; + return "_gpu"; case IFunc::free_omp: case IFunc::heap_omp: case IFunc::stack_omp: @@ -301,6 +301,8 @@ TypeArtFunc typeart_register_type{"__typeart_register_type"}; TypeArtFunc typeart_alloc_omp_mty = typeart_alloc_mty; TypeArtFunc typeart_alloc_stacks_omp_mty = typeart_alloc_stack_mty; +TypeArtFunc typeart_alloc_cuda_mty = typeart_alloc_mty; + } // namespace callbacks std::unique_ptr declare_instrumentation_functions(llvm::Module& m, @@ -324,7 +326,7 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& decl_alternatives.make_function(IFunc::stack, typeart_alloc_stack_mty.name, alloc_arg_types_mty); typeart_alloc_global_mty.f = decl_alternatives.make_function(IFunc::global, typeart_alloc_global_mty.name, alloc_arg_types_mty); - functions_alternative.putFunctionFor(IFunc::heap_cuda, llvm::cast(typeart_alloc_mty.f)); + // functions_alternative.putFunctionFor(IFunc::heap_cuda, llvm::cast(typeart_alloc_mty.f)); typeart_register_type.f = decl.make_function(IFunc::type, typeart_register_type.name, free_arg_types); typeart_alloc.f = decl.make_function(IFunc::heap, typeart_alloc.name, alloc_arg_types); @@ -347,6 +349,9 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& typeart_alloc_stacks_omp_mty.f = decl_alternatives.make_function(IFunc::stack_omp, typeart_alloc_stacks_omp_mty.name, alloc_arg_types_mty); + typeart_alloc_cuda_mty.f = + decl_alternatives.make_function(IFunc::heap_cuda, typeart_alloc_cuda_mty.name, alloc_arg_types_mty); + return std::make_unique(functions, functions_alternative); } diff --git a/lib/runtime/AllocationTracking.cpp b/lib/runtime/AllocationTracking.cpp index d27b5d0f..ce03c931 100644 --- a/lib/runtime/AllocationTracking.cpp +++ b/lib/runtime/AllocationTracking.cpp @@ -289,13 +289,13 @@ void __typeart_leave_scope_omp(int alloca_count) { typeart::RuntimeSystem::get().allocation_tracker().onLeaveScope(alloca_count, retAddr); } -void __typeart_alloc_cuda(const void* addr, int typeId, size_t count) { +void __typeart_alloc_gpu(const void* addr, int typeId, size_t count) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); typeart::RuntimeSystem::get().allocation_tracker().onAlloc(addr, typeId, count, retAddr); } -void __typeart_free_cuda(const void* addr) { +void __typeart_free_gpu(const void* addr) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); typeart::RuntimeSystem::get().allocation_tracker().onFreeHeap(addr, retAddr); @@ -310,6 +310,15 @@ void __typeart_alloc_mty(const void* addr, const void* info, size_t count) { rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr); } +void __typeart_alloc_mty_gpu(const void* addr, const void* info, size_t count) { + TYPEART_RUNTIME_GUARD; + const void* retAddr = __builtin_return_address(0); + const auto type_id = reinterpret_cast(info)->type_id; + auto& rt = typeart::RuntimeSystem::get(); + assert(type_id == rt.type_translator().get_type_id_for(info) && "Type ID of global and lookup must match"); + rt.allocation_tracker().onAlloc(addr, type_id, count, retAddr); +} + void __typeart_alloc_stack_mty(const void* addr, const void* info, size_t count) { TYPEART_RUNTIME_GUARD; const void* retAddr = __builtin_return_address(0); diff --git a/lib/runtime/CallbackInterface.h b/lib/runtime/CallbackInterface.h index b9db02b9..95a3f885 100644 --- a/lib/runtime/CallbackInterface.h +++ b/lib/runtime/CallbackInterface.h @@ -39,8 +39,8 @@ TYPEART_EXPORT void __typeart_free_omp(const void* addr); TYPEART_EXPORT void __typeart_alloc_stack_omp(const void* addr, int type_id, size_t count); TYPEART_EXPORT void __typeart_leave_scope_omp(int alloca_count); -TYPEART_EXPORT void __typeart_alloc_cuda(const void* addr, int type_id, size_t count); -TYPEART_EXPORT void __typeart_free_cuda(const void* addr); +TYPEART_EXPORT void __typeart_alloc_gpu(const void* addr, int type_id, size_t count); +TYPEART_EXPORT void __typeart_free_gpu(const void* addr); // Called for inlined type definitions mode TYPEART_EXPORT void __typeart_alloc_mty(const void* addr, const void* info, size_t count); @@ -50,6 +50,8 @@ TYPEART_EXPORT void __typeart_register_type(const void* type); TYPEART_EXPORT void __typeart_alloc_global_mty_omp(const void* addr, const void* info, size_t count); TYPEART_EXPORT void __typeart_alloc_stack_mty_omp(const void* addr, const void* info, size_t count); + +TYPEART_EXPORT void __typeart_alloc_mty_gpu(const void* addr, const void* info, size_t count); #ifdef __cplusplus } #endif diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c index efceb49a..2e1c7822 100644 --- a/test/cuda/pass/01_cudamalloc.c +++ b/test/cuda/pass/01_cudamalloc.c @@ -4,7 +4,7 @@ // CHECK: call i32 @cudaMalloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] -// CHECK-NEXT: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], +// CHECK-NEXT: call void @__typeart_alloc_gpu({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], int main() { const int N = 20; diff --git a/test/cuda/pass/02_cudafree.c b/test/cuda/pass/02_cudafree.c index b91c4b9a..be1da9ed 100644 --- a/test/cuda/pass/02_cudafree.c +++ b/test/cuda/pass/02_cudafree.c @@ -3,7 +3,7 @@ // REQUIRES: cuda_static // CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]]) -// CHECK-NEXT: __typeart_free_cuda({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) +// CHECK-NEXT: __typeart_free_gpu({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) int main() { float* d_x; diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c index 01c25109..097611ae 100644 --- a/test/cuda/pass/03_cudahostalloc.c +++ b/test/cuda/pass/03_cudahostalloc.c @@ -4,7 +4,7 @@ // CHECK: call i32 @cudaHostAlloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] -// CHECK-NEXT: call void @__typeart_alloc_cuda({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], +// CHECK-NEXT: call void @__typeart_alloc_gpu({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], int main() { const int N = 20; diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp index 07e9297d..53551de3 100644 --- a/test/cuda/pass/06_cudamalloc_nonvoid.cpp +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -2,9 +2,9 @@ // REQUIRES: cuda_static -// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) -// CHECK: __typeart_alloc_cuda({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) -// CHECK: __typeart_alloc_mty({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, {{(ptr|i32)}} {{[@_a-z0-9A-Z]+}}, i64 {{.*}}) +// CHECK: __typeart_alloc_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) +// CHECK: __typeart_alloc_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) +// CHECK: __typeart_alloc_mty_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, {{(ptr|i32)}} {{[@_a-z0-9A-Z]+}}, i64 {{.*}}) struct X { int a; From eaf5170e535772c8ba44dfbe4b1b6f96e317003e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 11:02:57 +0200 Subject: [PATCH 06/24] Solidify the cuda template workaround --- lib/passes/support/CudaUtil.h | 17 +++++++++++++++++ lib/passes/typegen/dimeta/DimetaTypeGen.cpp | 8 +++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/passes/support/CudaUtil.h b/lib/passes/support/CudaUtil.h index 69eba5a0..af4add08 100644 --- a/lib/passes/support/CudaUtil.h +++ b/lib/passes/support/CudaUtil.h @@ -13,12 +13,14 @@ #ifndef TYPEART_CUDAUTIL_H #define TYPEART_CUDAUTIL_H +#include "analysis/MemOpData.h" #include "support/Util.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include #include #include @@ -79,6 +81,21 @@ inline bool is_cuda_helper_function(const llvm::Function& function) { return util::starts_with_any_of(function_name, "__cuda"); } +inline bool is_templated_malloc_like(llvm::StringRef name) { + const auto templ_start_pos = name.find_first_of('<'); + if (templ_start_pos == llvm::StringRef::npos) { + return false; + } + auto extracted_fn = name.substr(0, templ_start_pos); + MemOps ops; + return ops.allocKind(extracted_fn) == MemOpKind::CudaMallocLike; +} + +inline bool is_templated_malloc_like(const llvm::Function& function) { + const std::string name = util::try_demangle(function); + return is_templated_malloc_like(name); +} + } // namespace typeart::cuda #endif // TYPEART_CUDAUTIL_H diff --git a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp index cf19791d..528bce9d 100644 --- a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp +++ b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp @@ -14,6 +14,7 @@ #include "Dimeta.h" #include "DimetaData.h" #include "analysis/MemOpData.h" +#include "support/CudaUtil.h" #include "support/Logger.h" #include "typegen/TypeGenerator.h" #include "typelib/TypeDatabase.h" @@ -492,15 +493,12 @@ class DimetaTypeManager final : public TypeIDGenerator { if (val) { LOG_DEBUG("Registering malloc-like") - const auto is_template_fn = [](const auto& func_str) { - return !func_str.empty() && func_str.back() == '>' && func_str.find('<') != std::string::npos; - }; const auto function_name = val->location.function; - if (call->getCalledFunction() != nullptr && is_template_fn(function_name)) { + if (call->getCalledFunction() != nullptr && cuda::is_templated_malloc_like(function_name)) { MemOps mem_operations; auto kind = mem_operations.kind(call->getCalledFunction()->getName()); - if (kind && kind.value() == MemOpKind::CudaMallocLike) { + if (kind == MemOpKind::CudaMallocLike) { LOG_DEBUG("Workaround for pointer level of call base " << function_name) workaround::remove_pointer_level(call, val.value()); } From 627794f2dee46c0bacc8f97958714b2dd37f3135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 12:04:16 +0200 Subject: [PATCH 07/24] Tests pass with LLVM 14 --- test/cuda/pass/01_cudamalloc.c | 14 ++++++++++---- test/cuda/pass/03_cudahostalloc.c | 14 ++++++++++---- test/cuda/pass/06_cudamalloc_nonvoid.cpp | 12 ++++++++---- test/lit.cfg | 4 ++++ 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c index 2e1c7822..9aa7cde3 100644 --- a/test/cuda/pass/01_cudamalloc.c +++ b/test/cuda/pass/01_cudamalloc.c @@ -1,10 +1,16 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static -// CHECK: call i32 @cudaMalloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], -// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] -// CHECK-NEXT: call void @__typeart_alloc_gpu({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], +// LLVM: call i32 @cudaMalloc(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr {{.*}}[[CUDA_PTR]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMalloc(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) int main() { const int N = 20; diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c index 097611ae..9ea9477c 100644 --- a/test/cuda/pass/03_cudahostalloc.c +++ b/test/cuda/pass/03_cudahostalloc.c @@ -1,10 +1,16 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static -// CHECK: call i32 @cudaHostAlloc({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], -// CHECK-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] -// CHECK-NEXT: call void @__typeart_alloc_gpu({{(ptr|i8\*)}} {{.*}}[[CUDA_PTR]], +// LLVM: call i32 @cudaHostAlloc(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr {{.*}}[[CUDA_PTR]], + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaHostAlloc(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) int main() { const int N = 20; diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp index 53551de3..1c58cf8d 100644 --- a/test/cuda/pass/06_cudamalloc_nonvoid.cpp +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -1,10 +1,14 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static -// CHECK: __typeart_alloc_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) -// CHECK: __typeart_alloc_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) -// CHECK: __typeart_alloc_mty_gpu({{(ptr|i8\*)}} %{{[0-9a-z_]+}}, {{(ptr|i32)}} {{[@_a-z0-9A-Z]+}}, i64 {{.*}}) +// LLVM: __typeart_alloc_gpu(ptr %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) +// LLVM: __typeart_alloc_gpu(ptr %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) +// LLVM: __typeart_alloc_mty_gpu(ptr %{{[0-9a-z_]+}}, ptr {{[@_a-z0-9A-Z]+}}, i64 {{.*}}) + +// LLVM_LEGACY: __typeart_alloc_gpu(i8* %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) +// LLVM_LEGACY: __typeart_alloc_gpu(i8* %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) +// LLVM_LEGACY: __typeart_alloc_gpu(i8* %{{[0-9a-z_]+}}, i32 2{{[0-9][0-9]}}, i64 {{.*}}) struct X { int a; diff --git a/test/lit.cfg b/test/lit.cfg index 928bf8e8..d565c392 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -117,6 +117,10 @@ config.substitutions.append(('%clang-cc', clang_cc)) config.substitutions.append(('%opt', opt)) config.substitutions.append(('%filecheck', filecheck)) config.substitutions.append(('%llc', llc)) +if config.llvm_version < 15: + config.substitutions.append(('%llvm-version-check', 'LLVM_LEGACY')) +else: + config.substitutions.append(('%llvm-version-check', 'LLVM')) # Substitutions: executables use "-" separator, variables use underscore config.substitutions.append(('%base_path', config.typeart_project_dir)) From 0faa9b124756c3b692eeee24488305ec9f31e93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 13:10:56 +0200 Subject: [PATCH 08/24] Extended allocation function set --- lib/passes/analysis/MemOpData.h | 2 ++ lib/runtime/CMakeLists.txt | 1 - test/cuda/pass/04_cudamalloc_host.c | 22 ++++++++++++++ test/cuda/pass/05_cudamalloc_managed.c | 22 ++++++++++++++ test/cuda/pass/08_cudamalloc_async.c | 41 ++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 test/cuda/pass/04_cudamalloc_host.c create mode 100644 test/cuda/pass/05_cudamalloc_managed.c create mode 100644 test/cuda/pass/08_cudamalloc_async.c diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index b96db234..f2d2145b 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -106,6 +106,8 @@ struct MemOps { {"cudaHostAlloc", MemOpKind::CudaMallocLike}, {"cudaMallocHost", MemOpKind::CudaMallocLike}, {"cudaMallocManaged", MemOpKind::CudaMallocLike}, + {"cudaMallocAsync", MemOpKind::CudaMallocLike}, + {"cudaMallocFromPoolAsync", MemOpKind::CudaMallocLike}, }; const llvm::StringMap dealloc_map{ diff --git a/lib/runtime/CMakeLists.txt b/lib/runtime/CMakeLists.txt index c8467f71..9e2c0e46 100644 --- a/lib/runtime/CMakeLists.txt +++ b/lib/runtime/CMakeLists.txt @@ -28,7 +28,6 @@ set(RUNTIME_LIB_SOURCES TypeResolution.h Runtime.cpp Runtime.h - CudaSupport.cpp ${TYPEART_META_SOURCE} $<$:../support/MPILogger.cpp> ) diff --git a/test/cuda/pass/04_cudamalloc_host.c b/test/cuda/pass/04_cudamalloc_host.c new file mode 100644 index 00000000..fc0923ea --- /dev/null +++ b/test/cuda/pass/04_cudamalloc_host.c @@ -0,0 +1,22 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check + +// REQUIRES: cuda_static + +// LLVM: call i32 @cudaMallocHost(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr {{.*}}[[CUDA_PTR]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocHost(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) + +int main() { + const int N = 20; + float* d_x; + + cudaMallocHost((void**)&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/cuda/pass/05_cudamalloc_managed.c b/test/cuda/pass/05_cudamalloc_managed.c new file mode 100644 index 00000000..47e21d32 --- /dev/null +++ b/test/cuda/pass/05_cudamalloc_managed.c @@ -0,0 +1,22 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check + +// REQUIRES: cuda_static + +// LLVM: call i32 @cudaMallocManaged(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr {{.*}}[[CUDA_PTR]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocManaged(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) + +int main() { + const int N = 20; + float* d_x; + + cudaMallocManaged((void**)&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/cuda/pass/08_cudamalloc_async.c b/test/cuda/pass/08_cudamalloc_async.c new file mode 100644 index 00000000..655450e0 --- /dev/null +++ b/test/cuda/pass/08_cudamalloc_async.c @@ -0,0 +1,41 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check + +// REQUIRES: cuda_static + +// clang-format off +// LLVM: call i32 @cudaMallocAsync(ptr {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64{{.*}} 80, ptr {{.*}}) +// LLVM-NEXT: [[CUDA_PTR_X:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_X]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_X]], i32 23, i64 20) + +// LLVM: call i32 @cudaMallocFromPoolAsync(ptr {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64{{.*}} 80, ptr {{.*}}, ptr {{.*}}) +// LLVM-NEXT: [[CUDA_PTR_Y:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_Y]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_Y]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocAsync(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocFromPoolAsync(i8** {{.*}}[[CAST1]], +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) +// clang-format on + +int main() { + const int N = 20; + float* d_x; + float* d_y; + + cudaStream_t stream; + cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + + cudaMallocAsync((void**)&d_x, N * sizeof(float), stream); + + cudaMemPool_t pool; + cudaMallocFromPoolAsync((void**)&d_y, N * sizeof(float), pool, stream); + + return 0; +} \ No newline at end of file From fb8058ae390b871ca9b9d5fc074280a2fc08b553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 13:22:24 +0200 Subject: [PATCH 09/24] cudaFree variation --- lib/passes/analysis/MemOpData.h | 1 + test/cuda/pass/02_cudafree.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index f2d2145b..4fcadeea 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -128,6 +128,7 @@ struct MemOps { {"_ZdaPvmSt11align_val_t", MemOpKind::DeleteLike}, /* delete[](void*, unsigned long, align_val_t) */ {"cudaFree", MemOpKind::FreeLike}, {"cudaFreeHost", MemOpKind::FreeLike}, + {"cudaFreeAsync", MemOpKind::FreeLike}, }; //clang-format off }; diff --git a/test/cuda/pass/02_cudafree.c b/test/cuda/pass/02_cudafree.c index be1da9ed..12e5cbf6 100644 --- a/test/cuda/pass/02_cudafree.c +++ b/test/cuda/pass/02_cudafree.c @@ -5,10 +5,21 @@ // CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]]) // CHECK-NEXT: __typeart_free_gpu({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) +// CHECK: call i32 @cudaFreeHost({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]]) +// CHECK-NEXT: __typeart_free_gpu({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) + +// CHECK: call i32 @cudaFreeAsync({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]], +// CHECK-NEXT: __typeart_free_gpu({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) + int main() { float* d_x; cudaFree(d_x); + cudaFreeHost(d_x); + + cudaStream_t stream; + cudaFreeAsync(d_x, stream); + return 0; } From 46adda7bedf25ea25f00d24686189cedda4ed05b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 14:06:05 +0200 Subject: [PATCH 10/24] Add GPU flag to enable CUDA instrumentation --- lib/passes/Commandline.cpp | 13 ++++++++++--- lib/passes/analysis/MemOpVisitor.cpp | 18 +++++++++++++----- lib/passes/analysis/MemOpVisitor.h | 2 ++ .../configuration/EnvironmentConfiguration.cpp | 3 +++ lib/passes/configuration/PassConfiguration.cpp | 8 +++++++- lib/passes/configuration/TypeARTOptions.cpp | 3 +++ lib/passes/configuration/TypeARTOptions.h | 1 + lib/support/ConfigurationBaseOptions.h | 1 + test/cuda/pass/01_cudamalloc.c | 2 +- test/cuda/pass/02_cudafree.c | 2 +- test/cuda/pass/03_cudahostalloc.c | 2 +- test/cuda/pass/04_cudamalloc_host.c | 2 +- test/cuda/pass/05_cudamalloc_managed.c | 2 +- test/cuda/pass/06_cudamalloc_nonvoid.cpp | 2 +- test/cuda/pass/07_axpy.c | 2 +- test/cuda/pass/08_cudamalloc_async.c | 2 +- test/cuda/runtime/07_axpy.c | 2 +- test/pass/misc/05_make_all_callbacks.c | 4 ++-- test/pass/misc/07_config_file.c | 1 + test/pass/misc/08_config_file_default.c | 1 + test/pass/misc/11_env_flags.c | 1 + test/pass/misc/12_env_flags_single_override.c | 1 + test/pass/misc/13_env_flags_phase_heap.c | 1 + test/pass/misc/14_env_flags_phase_ignored.c | 1 + 24 files changed, 57 insertions(+), 20 deletions(-) diff --git a/lib/passes/Commandline.cpp b/lib/passes/Commandline.cpp index 9619e166..8e4100ee 100644 --- a/lib/passes/Commandline.cpp +++ b/lib/passes/Commandline.cpp @@ -70,9 +70,14 @@ static cl::opt cl_typeart_stats(CommandlineStdArgs: cl::cat(typeart_category)); static cl::opt cl_typeart_instrument_heap(CommandlineStdArgs::heap, - cl::desc(ConfigStdArgDescriptions::heap), - cl::init(ConfigStdArgValues::heap), - cl::cat(typeart_category)); + cl::desc(ConfigStdArgDescriptions::heap), + cl::init(ConfigStdArgValues::heap), + cl::cat(typeart_category)); + +static cl::opt cl_typeart_instrument_gpu(CommandlineStdArgs::gpu, + cl::desc(ConfigStdArgDescriptions::gpu), + cl::init(ConfigStdArgValues::gpu), + cl::cat(typeart_category)); static cl::opt cl_typeart_instrument_global(CommandlineStdArgs::global, cl::desc(ConfigStdArgDescriptions::global), @@ -206,6 +211,7 @@ CommandLineOptions::CommandLineOptions() { make_entry(ConfigStdArgs::types, cl_typeart_type_file), make_entry(ConfigStdArgs::stats, cl_typeart_stats), make_entry(ConfigStdArgs::heap, cl_typeart_instrument_heap), + make_entry(ConfigStdArgs::gpu, cl_typeart_instrument_gpu), make_entry(ConfigStdArgs::global, cl_typeart_instrument_global), make_entry(ConfigStdArgs::stack, cl_typeart_instrument_stack), make_entry(ConfigStdArgs::type_serialization, cl_typeart_type_serialization), @@ -226,6 +232,7 @@ CommandLineOptions::CommandLineOptions() { make_occurr_entry(ConfigStdArgs::types, cl_typeart_type_file), make_occurr_entry(ConfigStdArgs::stats, cl_typeart_stats), make_occurr_entry(ConfigStdArgs::heap, cl_typeart_instrument_heap), + make_occurr_entry(ConfigStdArgs::gpu, cl_typeart_instrument_gpu), make_occurr_entry(ConfigStdArgs::global, cl_typeart_instrument_global), make_occurr_entry(ConfigStdArgs::stack, cl_typeart_instrument_stack), make_occurr_entry(ConfigStdArgs::type_serialization, cl_typeart_type_serialization), diff --git a/lib/passes/analysis/MemOpVisitor.cpp b/lib/passes/analysis/MemOpVisitor.cpp index c42a92ea..e9c98811 100644 --- a/lib/passes/analysis/MemOpVisitor.cpp +++ b/lib/passes/analysis/MemOpVisitor.cpp @@ -55,9 +55,14 @@ MemOpVisitor::MemOpVisitor() : MemOpVisitor(true, true) { } MemOpVisitor::MemOpVisitor(const config::Configuration& config) - : MemOpVisitor(config[config::ConfigStdArgs::stack], config[config::ConfigStdArgs::heap]) { + : MemOpVisitor(config[config::ConfigStdArgs::stack], config[config::ConfigStdArgs::heap], + config[config::ConfigStdArgs::gpu]) { } -MemOpVisitor::MemOpVisitor(bool stack, bool heap) : collect_allocas(stack), collect_heap(heap) { +MemOpVisitor::MemOpVisitor(bool stack, bool heap) : MemOpVisitor(stack, heap, true) { +} + +MemOpVisitor::MemOpVisitor(bool stack, bool heap, bool gpu) + : collect_allocas(stack), collect_heap(heap), collect_gpu(gpu) { } void MemOpVisitor::collect(llvm::Function& function) { @@ -92,14 +97,17 @@ void MemOpVisitor::visitCallBase(llvm::CallBase& cb) { if (!collect_heap) { return; } + const auto* called_function = cb.getCalledFunction(); + if (!collect_gpu && called_function != nullptr && cuda::is_cuda_function(*called_function)) { + return; + } const auto isInSet = [&](const auto& fMap) -> std::optional { - const auto* f = cb.getCalledFunction(); - if (!f) { + if (called_function == nullptr) { // TODO handle calls through, e.g., function pointers? - seems infeasible // LOG_INFO("Encountered indirect call, skipping."); return {}; } - const auto name = f->getName().str(); + const auto name = called_function->getName().str(); const auto res = fMap.find(name); if (res != fMap.end()) { diff --git a/lib/passes/analysis/MemOpVisitor.h b/lib/passes/analysis/MemOpVisitor.h index fc982549..bc626854 100644 --- a/lib/passes/analysis/MemOpVisitor.h +++ b/lib/passes/analysis/MemOpVisitor.h @@ -39,11 +39,13 @@ struct MemOpVisitor : public llvm::InstVisitor { MemOps mem_operations{}; bool collect_allocas; bool collect_heap; + bool collect_gpu; public: MemOpVisitor(); explicit MemOpVisitor(const config::Configuration& config); MemOpVisitor(bool stack, bool heap); + MemOpVisitor(bool stack, bool heap, bool gpu); void collect(llvm::Function& function); void collectGlobals(llvm::Module& module); void clear(); diff --git a/lib/passes/configuration/EnvironmentConfiguration.cpp b/lib/passes/configuration/EnvironmentConfiguration.cpp index 76182011..6eec7254 100644 --- a/lib/passes/configuration/EnvironmentConfiguration.cpp +++ b/lib/passes/configuration/EnvironmentConfiguration.cpp @@ -110,6 +110,8 @@ EnvironmentFlagsOptions::EnvironmentFlagsOptions() { EnvironmentStdArgsValues::stats), make_entry(ConfigStdArgs::heap, EnvironmentStdArgs::heap, EnvironmentStdArgsValues::heap), + make_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu, + EnvironmentStdArgsValues::gpu), make_entry(ConfigStdArgs::global, EnvironmentStdArgs::global, EnvironmentStdArgsValues::global), make_entry(ConfigStdArgs::stack, EnvironmentStdArgs::stack, @@ -150,6 +152,7 @@ EnvironmentFlagsOptions::EnvironmentFlagsOptions() { make_occurr_entry(ConfigStdArgs::types, config::EnvironmentStdArgs::types), make_occurr_entry(ConfigStdArgs::stats, EnvironmentStdArgs::stats), make_occurr_entry(ConfigStdArgs::heap, EnvironmentStdArgs::heap), + make_occurr_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu), make_occurr_entry(ConfigStdArgs::global, EnvironmentStdArgs::global), make_occurr_entry(ConfigStdArgs::stack, EnvironmentStdArgs::stack), make_occurr_entry(ConfigStdArgs::type_serialization, EnvironmentStdArgs::type_serialization), diff --git a/lib/passes/configuration/PassConfiguration.cpp b/lib/passes/configuration/PassConfiguration.cpp index 5c067571..ad911f74 100644 --- a/lib/passes/configuration/PassConfiguration.cpp +++ b/lib/passes/configuration/PassConfiguration.cpp @@ -59,6 +59,12 @@ PassConfig parse_typeart_config_with_occurrence(llvm::StringRef parameters) { continue; } + if (parameter_name == ConfigStdArgs::gpu) { + result.gpu = enable; + occurrence_map[ConfigStdArgs::gpu] = true; + continue; + } + if (parameter_name == ConfigStdArgs::stack) { result.stack = enable; occurrence_map[ConfigStdArgs::stack] = true; @@ -162,4 +168,4 @@ PassConfig parse_typeart_config_with_occurrence(llvm::StringRef parameters) { return {result, occurrence_map}; } -} // namespace typeart::config::pass \ No newline at end of file +} // namespace typeart::config::pass diff --git a/lib/passes/configuration/TypeARTOptions.cpp b/lib/passes/configuration/TypeARTOptions.cpp index 1c82427f..30b0d0fd 100644 --- a/lib/passes/configuration/TypeARTOptions.cpp +++ b/lib/passes/configuration/TypeARTOptions.cpp @@ -90,6 +90,7 @@ struct llvm::yaml::MappingTraits { using typeart::config::ConfigStdArgs; yml_io.mapRequired(ConfigStdArgs::types, info.types); yml_io.mapRequired(ConfigStdArgs::heap, info.heap); + yml_io.mapOptional(ConfigStdArgs::gpu, info.gpu); yml_io.mapRequired(ConfigStdArgs::stack, info.stack); yml_io.mapOptional(ConfigStdArgs::global, info.global); yml_io.mapOptional(ConfigStdArgs::stats, info.statistics); @@ -138,6 +139,7 @@ TypeARTConfigOptions construct_with(Constructor&& make_entry) { make_entry(ConfigStdArgs::types, config.types); make_entry(ConfigStdArgs::stats, config.statistics); make_entry(ConfigStdArgs::heap, config.heap); + make_entry(ConfigStdArgs::gpu, config.gpu); make_entry(ConfigStdArgs::global, config.global); make_entry(ConfigStdArgs::stack, config.stack); make_entry(ConfigStdArgs::stack_lifetime, config.stack_lifetime); @@ -186,6 +188,7 @@ OptionsMap options_to_map(const TypeARTConfigOptions& config) { make_entry(ConfigStdArgs::types, config.types), make_entry(ConfigStdArgs::stats, config.statistics), make_entry(ConfigStdArgs::heap, config.heap), + make_entry(ConfigStdArgs::gpu, config.gpu), make_entry(ConfigStdArgs::global, config.global), make_entry(ConfigStdArgs::stack, config.stack), make_entry(ConfigStdArgs::stack_lifetime, config.stack_lifetime), diff --git a/lib/passes/configuration/TypeARTOptions.h b/lib/passes/configuration/TypeARTOptions.h index 03e7884d..56b99424 100644 --- a/lib/passes/configuration/TypeARTOptions.h +++ b/lib/passes/configuration/TypeARTOptions.h @@ -46,6 +46,7 @@ struct TypeARTAnalysisOptions { struct TypeARTConfigOptions { std::string types{ConfigStdArgValues::types}; bool heap{ConfigStdArgValues::heap}; + bool gpu{ConfigStdArgValues::gpu}; bool stack{ConfigStdArgValues::stack}; bool global{ConfigStdArgValues::global}; bool statistics{ConfigStdArgValues::stats}; diff --git a/lib/support/ConfigurationBaseOptions.h b/lib/support/ConfigurationBaseOptions.h index f7386c8e..e8bc1fc3 100644 --- a/lib/support/ConfigurationBaseOptions.h +++ b/lib/support/ConfigurationBaseOptions.h @@ -18,6 +18,7 @@ TYPEART_CONFIG_OPTION(types, "types", std::string, "typeart-types.yaml", "Locati "TYPES") TYPEART_CONFIG_OPTION(stats, "stats", bool, false, "Show statistics for TypeArt type pass.", "STATS") TYPEART_CONFIG_OPTION(heap, "heap", bool, true, "Instrument heap allocation/free instructions.", "HEAP") +TYPEART_CONFIG_OPTION(gpu, "gpu", bool, false, "Instrument GPU allocation/free instructions.", "GPU") TYPEART_CONFIG_OPTION(stack, "stack", bool, false, "Instrument stack allocations.", "STACK") TYPEART_CONFIG_OPTION(global, "global", bool, false, "Instrument global allocations.", "GLOBAL") TYPEART_CONFIG_OPTION(stack_lifetime, "stack-lifetime", bool, true, diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c index 9aa7cde3..0e180be3 100644 --- a/test/cuda/pass/01_cudamalloc.c +++ b/test/cuda/pass/01_cudamalloc.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/pass/02_cudafree.c b/test/cuda/pass/02_cudafree.c index 12e5cbf6..273b5674 100644 --- a/test/cuda/pass/02_cudafree.c +++ b/test/cuda/pass/02_cudafree.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s // REQUIRES: cuda_static diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c index 9ea9477c..745b76ca 100644 --- a/test/cuda/pass/03_cudahostalloc.c +++ b/test/cuda/pass/03_cudahostalloc.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/pass/04_cudamalloc_host.c b/test/cuda/pass/04_cudamalloc_host.c index fc0923ea..2930268f 100644 --- a/test/cuda/pass/04_cudamalloc_host.c +++ b/test/cuda/pass/04_cudamalloc_host.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/pass/05_cudamalloc_managed.c b/test/cuda/pass/05_cudamalloc_managed.c index 47e21d32..74b69aa6 100644 --- a/test/cuda/pass/05_cudamalloc_managed.c +++ b/test/cuda/pass/05_cudamalloc_managed.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp index 1c58cf8d..8f310fce 100644 --- a/test/cuda/pass/06_cudamalloc_nonvoid.cpp +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/pass/07_axpy.c b/test/cuda/pass/07_axpy.c index c661593f..f60ae8cc 100644 --- a/test/cuda/pass/07_axpy.c +++ b/test/cuda/pass/07_axpy.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s // REQUIRES: cuda_static diff --git a/test/cuda/pass/08_cudamalloc_async.c b/test/cuda/pass/08_cudamalloc_async.c index 655450e0..25c1a060 100644 --- a/test/cuda/pass/08_cudamalloc_async.c +++ b/test/cuda/pass/08_cudamalloc_async.c @@ -1,4 +1,4 @@ -// RUN: %cuda-c-to-llvm %s | %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda_static diff --git a/test/cuda/runtime/07_axpy.c b/test/cuda/runtime/07_axpy.c index fb89b8f7..440d87c4 100644 --- a/test/cuda/runtime/07_axpy.c +++ b/test/cuda/runtime/07_axpy.c @@ -1,4 +1,4 @@ -// RUN: %wrapper-cc -x cuda --cuda-gpu-arch=sm_50 %cuda_link %s -o %s.exe +// RUN: TYPEART_GPU=true %wrapper-cc -x cuda --cuda-gpu-arch=sm_50 %cuda_link %s -o %s.exe // RUN: %s.exe 2>&1 | %filecheck %s // REQUIRES: cuda_runtime diff --git a/test/pass/misc/05_make_all_callbacks.c b/test/pass/misc/05_make_all_callbacks.c index f54e3258..b287257d 100644 --- a/test/pass/misc/05_make_all_callbacks.c +++ b/test/pass/misc/05_make_all_callbacks.c @@ -1,5 +1,5 @@ -// RUN: %c-to-llvm -Wimplicit-function-declaration %s -I%runtime_path -I%base_path/lib/runtime | %apply-typeart -// --typeart-stack=true -S 2>&1 | \ RUN: %filecheck %s +// RUN: %c-to-llvm -Wimplicit-function-declaration %s -I%runtime_path -I%base_path/lib/runtime | %apply-typeart \ +// RUN: --typeart-stack=true -S 2>&1 | %filecheck %s #include "CallbackInterface.h" diff --git a/test/pass/misc/07_config_file.c b/test/pass/misc/07_config_file.c index b2386ebf..de6f0bc5 100644 --- a/test/pass/misc/07_config_file.c +++ b/test/pass/misc/07_config_file.c @@ -8,6 +8,7 @@ void test() { // CHECK: types: 07_config_file.c.yaml // CHECK-NEXT: heap: false +// CHECK-NEXT: gpu: false // CHECK-NEXT: stack: true // CHECK-NEXT: global: false // CHECK-NEXT: stats: {{.*}} diff --git a/test/pass/misc/08_config_file_default.c b/test/pass/misc/08_config_file_default.c index 6e908b29..cf426311 100644 --- a/test/pass/misc/08_config_file_default.c +++ b/test/pass/misc/08_config_file_default.c @@ -6,6 +6,7 @@ // CHECK: types: {{.*}}.yaml // CHECK-NEXT: heap: true +// CHECK-NEXT: gpu: false // CHECK-NEXT: stack: false // CHECK-NEXT: global: false // CHECK-NEXT: stats: true diff --git a/test/pass/misc/11_env_flags.c b/test/pass/misc/11_env_flags.c index 5aeceb80..a0b32f19 100644 --- a/test/pass/misc/11_env_flags.c +++ b/test/pass/misc/11_env_flags.c @@ -5,6 +5,7 @@ // CHECK-NEXT: --- // CHECK-NEXT: types: {{.*}} // CHECK-NEXT: heap: false +// CHECK-NEXT: gpu: false // CHECK-NEXT: stack: false // CHECK-NEXT: global: false // CHECK-NEXT: stats: false diff --git a/test/pass/misc/12_env_flags_single_override.c b/test/pass/misc/12_env_flags_single_override.c index 15a1f8b1..b2187556 100644 --- a/test/pass/misc/12_env_flags_single_override.c +++ b/test/pass/misc/12_env_flags_single_override.c @@ -5,6 +5,7 @@ // CHECK-NEXT: --- // CHECK-NEXT: types: {{.*}} // CHECK-NEXT: heap: true +// CHECK-NEXT: gpu: false // CHECK-NEXT: stack: false // CHECK-NEXT: global: false // CHECK-NEXT: stats: false diff --git a/test/pass/misc/13_env_flags_phase_heap.c b/test/pass/misc/13_env_flags_phase_heap.c index 5eb78c18..e2da7517 100644 --- a/test/pass/misc/13_env_flags_phase_heap.c +++ b/test/pass/misc/13_env_flags_phase_heap.c @@ -5,6 +5,7 @@ // CHECK-NEXT: --- // CHECK-NEXT: types: {{.*}} // CHECK-NEXT: heap: true +// CHECK-NEXT: gpu: false // CHECK-NEXT: stack: true // CHECK-NEXT: global: true // CHECK-NEXT: stats: false diff --git a/test/pass/misc/14_env_flags_phase_ignored.c b/test/pass/misc/14_env_flags_phase_ignored.c index 280887bb..358525f2 100644 --- a/test/pass/misc/14_env_flags_phase_ignored.c +++ b/test/pass/misc/14_env_flags_phase_ignored.c @@ -3,6 +3,7 @@ // CHECK: Emitting TypeART configuration content // CHECK: heap: true +// CHECK: gpu: false // CHECK-NOT: stack: true // CHECK-NOT: {{^}}global: true // CHECK-NOT: stats: false From 1ec4793cac11f91cdcd64b4842871cb7ef5ef383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 28 Apr 2026 16:41:09 +0200 Subject: [PATCH 11/24] Initial HIP support --- cmake/typeartToolchainOptions.cmake | 7 ++ externals/dimeta/CMakeLists.txt | 2 +- lib/passes/analysis/MemOpData.h | 19 ++++- lib/passes/analysis/MemOpVisitor.cpp | 8 +- .../instrumentation/MemOpArgCollector.cpp | 2 + .../instrumentation/MemOpInstrumentation.cpp | 6 +- .../instrumentation/TypeARTFunctions.cpp | 32 ++++++-- lib/passes/instrumentation/TypeARTFunctions.h | 2 + lib/passes/support/GpuUtil.h | 63 ++++++++++++++++ lib/passes/support/HipUtil.h | 74 +++++++++++++++++++ lib/passes/typegen/dimeta/DimetaTypeGen.cpp | 15 ++-- lib/passes/typegen/ir/TypeManager.cpp | 14 ++-- test/CMakeLists.txt | 43 ++++++++++- test/hip/pass/01_hipmalloc.c | 17 +++++ test/hip/pass/02_hipfree.c | 15 ++++ test/lit.cfg | 7 ++ test/lit.site.cfg.in | 2 + 17 files changed, 297 insertions(+), 31 deletions(-) create mode 100644 lib/passes/support/GpuUtil.h create mode 100644 lib/passes/support/HipUtil.h create mode 100644 test/hip/pass/01_hipmalloc.c create mode 100644 test/hip/pass/02_hipfree.c diff --git a/cmake/typeartToolchainOptions.cmake b/cmake/typeartToolchainOptions.cmake index e0f3f8ed..a1444b4a 100644 --- a/cmake/typeartToolchainOptions.cmake +++ b/cmake/typeartToolchainOptions.cmake @@ -170,6 +170,13 @@ set_package_properties(CUDAToolkit PROPERTIES "CUDA toolkit enables host-side CUDA instrumentation and runtime helpers." ) +find_package(hip QUIET) +set_package_properties(hip PROPERTIES + TYPE OPTIONAL + PURPOSE + "HIP enables host-side HIP instrumentation and runtime helpers." +) + typeart_find_llvm_progs(TYPEART_CLANG_EXEC "clang-${LLVM_VERSION_MAJOR};clang" DEFAULT_EXE "clang") typeart_find_llvm_progs(TYPEART_CLANGCXX_EXEC "clang++-${LLVM_VERSION_MAJOR};clang++" DEFAULT_EXE "clang++") typeart_find_llvm_progs(TYPEART_LLC_EXEC "llc-${LLVM_VERSION_MAJOR};llc" DEFAULT_EXE "llc") diff --git a/externals/dimeta/CMakeLists.txt b/externals/dimeta/CMakeLists.txt index 85d23e22..d0a19ddc 100644 --- a/externals/dimeta/CMakeLists.txt +++ b/externals/dimeta/CMakeLists.txt @@ -1,7 +1,7 @@ FetchContent_Declare( llvm-dimeta GIT_REPOSITORY https://github.com/ahueck/llvm-dimeta - GIT_TAG v0.5.1 + GIT_TAG feat/hip GIT_SHALLOW 1 ) diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index 4fcadeea..8255eb01 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -31,7 +31,7 @@ class IntrinsicInst; } // namespace llvm namespace typeart { -enum class MemOpKind : uint8_t { +enum class MemOpKind : uint16_t { NewLike = 1 << 0, // allocates, never null MallocLike = 1 << 1 | NewLike, // allocates, maybe null AlignedAllocLike = 1 << 2, // allocates aligned, maybe null @@ -40,12 +40,19 @@ enum class MemOpKind : uint8_t { FreeLike = 1 << 5, // free memory DeleteLike = 1 << 6, // delete (cpp) memory CudaMallocLike = 1 << 7, // cuda out-parameter allocation + HipMallocLike = 1 << 8, // hip out-parameter allocation MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, AllocLike = MallocOrCallocLike, AnyAlloc = AllocLike | ReallocLike, - AnyFree = FreeLike | DeleteLike + AnyFree = FreeLike | DeleteLike, + GpuMallocLike = CudaMallocLike | HipMallocLike }; +inline bool is_kind(MemOpKind kind, MemOpKind mask) { + return (static_cast>(kind) & + static_cast>(mask)) != 0; +} + struct MemOps { inline std::optional kind(llvm::StringRef function) const { if (auto alloc = allocKind(function)) { @@ -108,6 +115,11 @@ struct MemOps { {"cudaMallocManaged", MemOpKind::CudaMallocLike}, {"cudaMallocAsync", MemOpKind::CudaMallocLike}, {"cudaMallocFromPoolAsync", MemOpKind::CudaMallocLike}, + {"hipMalloc", MemOpKind::HipMallocLike}, + {"hipHostMalloc", MemOpKind::HipMallocLike}, + {"hipMallocManaged", MemOpKind::HipMallocLike}, + {"hipMallocAsync", MemOpKind::HipMallocLike}, + {"hipMallocFromPoolAsync", MemOpKind::HipMallocLike}, }; const llvm::StringMap dealloc_map{ @@ -129,6 +141,9 @@ struct MemOps { {"cudaFree", MemOpKind::FreeLike}, {"cudaFreeHost", MemOpKind::FreeLike}, {"cudaFreeAsync", MemOpKind::FreeLike}, + {"hipFree", MemOpKind::FreeLike}, + {"hipFreeHost", MemOpKind::FreeLike}, + {"hipFreeAsync", MemOpKind::FreeLike}, }; //clang-format off }; diff --git a/lib/passes/analysis/MemOpVisitor.cpp b/lib/passes/analysis/MemOpVisitor.cpp index e9c98811..e77762cd 100644 --- a/lib/passes/analysis/MemOpVisitor.cpp +++ b/lib/passes/analysis/MemOpVisitor.cpp @@ -16,7 +16,7 @@ #include "compat/CallSite.h" #include "configuration/Configuration.h" #include "support/ConfigurationBase.h" -#include "support/CudaUtil.h" +#include "support/GpuUtil.h" #include "support/Error.h" #include "support/Logger.h" #include "support/TypeUtil.h" @@ -98,7 +98,7 @@ void MemOpVisitor::visitCallBase(llvm::CallBase& cb) { return; } const auto* called_function = cb.getCalledFunction(); - if (!collect_gpu && called_function != nullptr && cuda::is_cuda_function(*called_function)) { + if (!collect_gpu && called_function != nullptr && gpu::is_gpu_function(*called_function)) { return; } const auto isInSet = [&](const auto& fMap) -> std::optional { @@ -235,8 +235,8 @@ std::pair collectRelevantMallocUsers(llvm::CallBase& c auto geps = MallocGeps{}; auto bcasts = MallocBcasts{}; - if (kind == MemOpKind::CudaMallocLike) { - if (auto bitcast = cuda::bitcast_for(call_inst); bitcast.has_value()) { + if (is_kind(kind, MemOpKind::GpuMallocLike)) { + if (auto bitcast = gpu::bitcast_for(call_inst, kind); bitcast.has_value()) { bcasts.insert(*bitcast); } return {geps, bcasts}; diff --git a/lib/passes/instrumentation/MemOpArgCollector.cpp b/lib/passes/instrumentation/MemOpArgCollector.cpp index b70ba713..f1531c1e 100644 --- a/lib/passes/instrumentation/MemOpArgCollector.cpp +++ b/lib/passes/instrumentation/MemOpArgCollector.cpp @@ -105,6 +105,8 @@ HeapArgList MemOpArgCollector::collectHeap(const MallocDataList& mallocs) { byte_count = malloc_call->getArgOperand(1); break; case MemOpKind::CudaMallocLike: + [[fallthrough]]; + case MemOpKind::HipMallocLike: byte_count = malloc_call->getArgOperand(1); if (mdata.primary != nullptr) { pointer = mdata.primary->getOperand(0); diff --git a/lib/passes/instrumentation/MemOpInstrumentation.cpp b/lib/passes/instrumentation/MemOpInstrumentation.cpp index d16a99f5..2d742c80 100644 --- a/lib/passes/instrumentation/MemOpInstrumentation.cpp +++ b/lib/passes/instrumentation/MemOpInstrumentation.cpp @@ -74,7 +74,7 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { for (const auto& [malloc, args] : heap) { auto kind = malloc.kind; Instruction* malloc_call{nullptr}; - if (malloc.kind == MemOpKind::CudaMallocLike) { + if (is_kind(malloc.kind, MemOpKind::GpuMallocLike)) { malloc_call = llvm::cast(malloc.call); } else { malloc_call = args.get_as(ArgMap::ID::pointer); @@ -149,7 +149,9 @@ InstrCount MemOpInstrumentation::instrumentHeap(const HeapArgList& heap) { target_memory_address); break; } - case MemOpKind::CudaMallocLike: { + case MemOpKind::CudaMallocLike: + [[fallthrough]]; + case MemOpKind::HipMallocLike: { auto* runtime_ptr_type = instrumentation_helper->getTypeFor(IType::ptr); #if LLVM_VERSION_MAJOR >= 15 auto* loaded_ptr = IRB.CreateLoad(runtime_ptr_type, pointer_value); diff --git a/lib/passes/instrumentation/TypeARTFunctions.cpp b/lib/passes/instrumentation/TypeARTFunctions.cpp index 17c295fa..b2bc1cea 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.cpp +++ b/lib/passes/instrumentation/TypeARTFunctions.cpp @@ -17,6 +17,7 @@ #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" #include "support/CudaUtil.h" +#include "support/HipUtil.h" #include "support/Logger.h" #include "support/OmpUtil.h" @@ -52,6 +53,8 @@ std::string get_func_suffix(IFunc id) { switch (id) { case IFunc::free_cuda: case IFunc::heap_cuda: + case IFunc::free_hip: + case IFunc::heap_hip: return "_gpu"; case IFunc::free_omp: case IFunc::heap_omp: @@ -63,7 +66,7 @@ std::string get_func_suffix(IFunc id) { } } -enum class IFuncType : unsigned { standard, omp, cuda }; +enum class IFuncType : unsigned { standard, omp, cuda, hip }; IFuncType ifunc_type_for(llvm::Function* f) { if (f == nullptr) { @@ -74,6 +77,10 @@ IFuncType ifunc_type_for(llvm::Function* f) { return IFuncType::cuda; } + if (hip::is_hip_function(*f)) { + return IFuncType::hip; + } + if (util::omp::isOmpContext(f)) { return IFuncType::omp; } @@ -93,10 +100,10 @@ IFunc ifunc_for_function(IFunc general_type, llvm::Value* value) { } else if (llvm::isa(value)) { type = detail::ifunc_type_for(nullptr); } else if (auto callbase = llvm::dyn_cast(value)) { - type = detail::ifunc_type_for(callbase->getFunction()); - auto maybe_cuda = detail::ifunc_type_for(callbase->getCalledFunction()); - if (maybe_cuda == detail::IFuncType::cuda) { - type = detail::IFuncType::cuda; + type = detail::ifunc_type_for(callbase->getFunction()); + auto called_context = detail::ifunc_type_for(callbase->getCalledFunction()); + if (called_context == detail::IFuncType::cuda || called_context == detail::IFuncType::hip) { + type = called_context; } } @@ -115,6 +122,17 @@ IFunc ifunc_for_function(IFunc general_type, llvm::Value* value) { } } + if (detail::IFuncType::hip == type) { + switch (general_type) { + case IFunc::heap: + return IFunc::heap_hip; + case IFunc::free: + return IFunc::free_hip; + default: + return general_type; + } + } + switch (general_type) { case IFunc::stack: return IFunc::stack_omp; @@ -180,6 +198,7 @@ llvm::Function* TAFunctionDeclarator::make_function(IFunc func_id, llvm::StringR const auto name = make_fname(basename, args); if (auto it = function_map.find(name); it != function_map.end()) { + typeart_functions.putFunctionFor(func_id, it->second); return it->second; } @@ -343,6 +362,8 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& typeart_alloc_cuda.f = decl.make_function(IFunc::heap_cuda, typeart_alloc_cuda.name, alloc_arg_types); typeart_free_cuda.f = decl.make_function(IFunc::free_cuda, typeart_free_cuda.name, free_arg_types); + decl.make_function(IFunc::heap_hip, typeart_alloc_cuda.name, alloc_arg_types); + decl.make_function(IFunc::free_hip, typeart_free_cuda.name, free_arg_types); typeart_alloc_omp_mty.f = decl_alternatives.make_function(IFunc::heap_omp, typeart_alloc_omp_mty.name, alloc_arg_types_mty); @@ -351,6 +372,7 @@ std::unique_ptr declare_instrumentation_functions(llvm::Module& typeart_alloc_cuda_mty.f = decl_alternatives.make_function(IFunc::heap_cuda, typeart_alloc_cuda_mty.name, alloc_arg_types_mty); + decl_alternatives.make_function(IFunc::heap_hip, typeart_alloc_cuda_mty.name, alloc_arg_types_mty); return std::make_unique(functions, functions_alternative); } diff --git a/lib/passes/instrumentation/TypeARTFunctions.h b/lib/passes/instrumentation/TypeARTFunctions.h index 3128d92e..e43a103c 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.h +++ b/lib/passes/instrumentation/TypeARTFunctions.h @@ -44,6 +44,8 @@ enum class IFunc : unsigned { scope_omp, heap_cuda, free_cuda, + heap_hip, + free_hip, type }; diff --git a/lib/passes/support/GpuUtil.h b/lib/passes/support/GpuUtil.h new file mode 100644 index 00000000..5ebcf653 --- /dev/null +++ b/lib/passes/support/GpuUtil.h @@ -0,0 +1,63 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_GPUUTIL_H +#define TYPEART_GPUUTIL_H + +#include "analysis/MemOpData.h" +#include "support/CudaUtil.h" +#include "support/HipUtil.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" + +#include + +namespace typeart::gpu { + +inline std::optional bitcast_for(const llvm::CallBase& cb, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::bitcast_for(cb); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::bitcast_for(cb); + } + return std::nullopt; +} + +inline bool is_templated_malloc_like(llvm::StringRef name, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::is_templated_malloc_like(name); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::is_templated_malloc_like(name); + } + return false; +} + +inline bool is_templated_malloc_like(const llvm::Function& function, MemOpKind kind) { + if (kind == MemOpKind::CudaMallocLike) { + return cuda::is_templated_malloc_like(function); + } + if (kind == MemOpKind::HipMallocLike) { + return hip::is_templated_malloc_like(function); + } + return false; +} + +inline bool is_gpu_function(const llvm::Function& function) { + return cuda::is_cuda_function(function) || hip::is_hip_function(function); +} + +} // namespace typeart::gpu + +#endif // TYPEART_GPUUTIL_H diff --git a/lib/passes/support/HipUtil.h b/lib/passes/support/HipUtil.h new file mode 100644 index 00000000..1628db97 --- /dev/null +++ b/lib/passes/support/HipUtil.h @@ -0,0 +1,74 @@ +// TypeART library +// +// Copyright (c) 2017-2026 TypeART Authors +// Distributed under the BSD 3-Clause license. +// (See accompanying file LICENSE.txt or copy at +// https://opensource.org/licenses/BSD-3-Clause) +// +// Project home: https://github.com/tudasc/TypeART +// +// SPDX-License-Identifier: BSD-3-Clause +// + +#ifndef TYPEART_HIPUTIL_H +#define TYPEART_HIPUTIL_H + +#include "analysis/MemOpData.h" +#include "support/Util.h" + +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#include +#include +#include + +namespace typeart::hip { + +inline std::optional bitcast_for(llvm::Value* hip_ptr) { + std::optional fallback; + for (auto& use : hip_ptr->uses()) { + auto* use_value = use.get(); + auto* bitcast = llvm::dyn_cast(use_value); + if (bitcast == nullptr) { + continue; + } + + if (auto* primary_bitcast = llvm::dyn_cast(bitcast->getOperand(0))) { + return primary_bitcast; + } + + fallback = bitcast; + return fallback; + } + return fallback; +} + +inline std::optional bitcast_for(const llvm::CallBase& hip_call) { + return bitcast_for(hip_call.getArgOperand(0)); +} + +inline bool is_hip_function(const llvm::Function& function) { + const auto function_name = util::try_demangle(function); + return util::starts_with_any_of(function_name, "hip"); +} + +inline bool is_templated_malloc_like(llvm::StringRef name) { + const auto templ_start_pos = name.find_first_of('<'); + if (templ_start_pos == llvm::StringRef::npos) { + return false; + } + auto extracted_fn = name.substr(0, templ_start_pos); + MemOps ops; + return ops.allocKind(extracted_fn) == MemOpKind::HipMallocLike; +} + +inline bool is_templated_malloc_like(const llvm::Function& function) { + const std::string name = util::try_demangle(function); + return is_templated_malloc_like(name); +} + +} // namespace typeart::hip + +#endif // TYPEART_HIPUTIL_H diff --git a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp index 528bce9d..87b4002a 100644 --- a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp +++ b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp @@ -14,7 +14,7 @@ #include "Dimeta.h" #include "DimetaData.h" #include "analysis/MemOpData.h" -#include "support/CudaUtil.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "typegen/TypeGenerator.h" #include "typelib/TypeDatabase.h" @@ -495,13 +495,12 @@ class DimetaTypeManager final : public TypeIDGenerator { LOG_DEBUG("Registering malloc-like") const auto function_name = val->location.function; - if (call->getCalledFunction() != nullptr && cuda::is_templated_malloc_like(function_name)) { - MemOps mem_operations; - auto kind = mem_operations.kind(call->getCalledFunction()->getName()); - if (kind == MemOpKind::CudaMallocLike) { - LOG_DEBUG("Workaround for pointer level of call base " << function_name) - workaround::remove_pointer_level(call, val.value()); - } + MemOps mem_operations; + auto kind = call->getCalledFunction() != nullptr ? mem_operations.kind(call->getCalledFunction()->getName()) : std::nullopt; + + if (kind && is_kind(kind.value(), MemOpKind::GpuMallocLike) && gpu::is_templated_malloc_like(function_name, kind.value())) { + LOG_DEBUG("Workaround for pointer level of call base " << function_name) + workaround::remove_pointer_level(call, val.value()); } return {getOrRegister(val->type, true), array_size(val->type)}; diff --git a/lib/passes/typegen/ir/TypeManager.cpp b/lib/passes/typegen/ir/TypeManager.cpp index 1927f920..2c346700 100644 --- a/lib/passes/typegen/ir/TypeManager.cpp +++ b/lib/passes/typegen/ir/TypeManager.cpp @@ -15,7 +15,7 @@ #include "IRTypeGen.h" #include "StructTypeHandler.h" #include "VectorTypeHandler.h" -#include "support/CudaUtil.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/TypeUtil.h" #include "support/Util.h" @@ -280,13 +280,13 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { BitCastInst* primaryBitcast = mdata.primary; llvm::Type* allocation_type = nullptr; - if (mdata.kind == MemOpKind::CudaMallocLike && primaryBitcast == nullptr) { - if (auto bitcast = cuda::bitcast_for(*malloc_call); bitcast.has_value()) { + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike) && primaryBitcast == nullptr) { + if (auto bitcast = gpu::bitcast_for(*malloc_call, mdata.kind); bitcast.has_value()) { primaryBitcast = *bitcast; } } - if (mdata.kind == MemOpKind::CudaMallocLike) { + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { allocation_type = llvm::Type::getInt8Ty(malloc_call->getContext()); } else { auto pointee_type = tu::getPointerElementType(malloc_call->getType()); @@ -295,7 +295,7 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { int typeId = getOrRegisterType(allocation_type, dl); // retrieveTypeID(tu::getVoidType(c)); - if (mdata.kind == MemOpKind::CudaMallocLike) { + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { typeId = TYPEART_POINTER; } @@ -308,7 +308,7 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { // Number of bytes per element, 1 for void* unsigned typeSize = tu::getTypeSizeInBytes(allocation_type, dl); - if (mdata.kind == MemOpKind::CudaMallocLike) { + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike)) { typeSize = 1; } @@ -319,7 +319,7 @@ TypeIdentifier TypeManager::getOrRegisterType(const MallocData& mdata) { dstPtrType = *pointee_type; } // Basically: getSrcTy()->getPointerElementType()->getPointerElementType(): - if (mdata.kind == MemOpKind::CudaMallocLike && dstPtrType == nullptr) { + if (is_kind(mdata.kind, MemOpKind::GpuMallocLike) && dstPtrType == nullptr) { if (auto pointee_type = tu::getPointerElementType(primaryBitcast->getSrcTy()); pointee_type.has_value()) { dstPtrType = *pointee_type; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 361044d0..6c3f6c53 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,6 +45,37 @@ endfunction() cuda_runnable_detect(TYPEART_CUDA_RUNNABLE) +function(hip_runnable_detect hip_is_runnable) + set(${hip_is_runnable} 0 PARENT_SCOPE) + if(NOT hip_FOUND) + return() + endif() + + find_program(TYPEART_ROCM_SMI_EXEC rocm-smi) + mark_as_advanced(TYPEART_ROCM_SMI_EXEC) + if(NOT TYPEART_ROCM_SMI_EXEC) + return() + endif() + + execute_process( + COMMAND ${TYPEART_ROCM_SMI_EXEC} -L + RESULT_VARIABLE smi_result + OUTPUT_VARIABLE smi_out + ERROR_QUIET + ) + if(NOT smi_result EQUAL 0) + return() + endif() + + string(REGEX MATCHALL "GPU [0-9]+:" smi_gpu_list ${smi_out}) + list(LENGTH smi_gpu_list TYPEART_HIP_GPU_COUNT) + if(TYPEART_HIP_GPU_COUNT GREATER 0) + set(${hip_is_runnable} 1 PARENT_SCOPE) + endif() +endfunction() + +hip_runnable_detect(TYPEART_HIP_RUNNABLE) + macro(pythonize_bool truth_var var) if(${truth_var}) set(${var} True) @@ -76,6 +107,7 @@ function(typeart_configure_lit_site input output) set(TYPEARTPASS_CUDA_FLAGS "") set(TYPEARTPASS_CUDA_PATH "") + if(CUDAToolkit_FOUND) if(DEFINED CUDAToolkit_TARGET_DIR AND NOT "${CUDAToolkit_TARGET_DIR}" STREQUAL "") set(TYPEARTPASS_CUDA_PATH "${CUDAToolkit_TARGET_DIR}") @@ -97,9 +129,14 @@ function(typeart_configure_lit_site input output) endif() endif() - set(TYPEART_CUDA_STATIC_AVAILABLE FALSE) + set(TYPEART_CUDA_STATIC_AVAILABLE False) if(CUDAToolkit_FOUND OR NOT "${TYPEARTPASS_CUDA_PATH}" STREQUAL "") - set(TYPEART_CUDA_STATIC_AVAILABLE TRUE) + set(TYPEART_CUDA_STATIC_AVAILABLE True) + endif() + + set(TYPEART_HIP_STATIC_AVAILABLE False) + if(hip_FOUND) + set(TYPEART_HIP_STATIC_AVAILABLE True) endif() pythonize_bool(TYPEART_CUDA_STATIC_AVAILABLE TYPEARTPASS_CUDA_STATIC) @@ -206,6 +243,7 @@ set(TYPEART_SUITES all pass cuda + hip runtime script typemapping @@ -219,6 +257,7 @@ set(TYPEART_SUITES_WORKERS 1 1 1 + 1 ${NUM_CPU} 1 1 diff --git a/test/hip/pass/01_hipmalloc.c b/test/hip/pass/01_hipmalloc.c new file mode 100644 index 00000000..01833e53 --- /dev/null +++ b/test/hip/pass/01_hipmalloc.c @@ -0,0 +1,17 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check + +// REQUIRES: hip_static && !llvm-14 + +// LLVM: call i32 @hipMalloc(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// LLVM: [[HIP_LOAD:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] +// LLVM: call void @__typeart_alloc_gpu(ptr [[HIP_LOAD]], i32 23 + +#include +int main() { + const int N = 20; + float* d_x; + + hipMalloc(&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/hip/pass/02_hipfree.c b/test/hip/pass/02_hipfree.c new file mode 100644 index 00000000..b7efaa99 --- /dev/null +++ b/test/hip/pass/02_hipfree.c @@ -0,0 +1,15 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip_static && !llvm-14 + +// CHECK: call i32 @hipFree(ptr {{.*}}[[HIP_POINTER:%[0-9a-z]+]]) +// CHECK-NEXT: __typeart_free_gpu(ptr {{.*}}[[HIP_POINTER]]) + +#include +int main() { + float* d_x; + + hipFree(d_x); + + return 0; +} diff --git a/test/lit.cfg b/test/lit.cfg index d565c392..9ee0e6c2 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -63,6 +63,9 @@ if config.cuda_static: if config.cuda_runtime: config.available_features.add('cuda_runtime') +if config.has_hip: + config.available_features.add('hip_static') + profile_files = getattr(config, 'profile_file', None) typeart_base_lib_dir= getattr(config, 'typeart_base_lib_dir', None) typeart_lib_root = getattr(config, 'typeart_lib_dir', None) @@ -80,6 +83,8 @@ if cuda_path: cuda_host_args += ' --cuda-path={}'.format(cuda_path) cuda_flags = getattr(config, 'cuda_flags', '') cuda_link_flags = f"-L{config.cuda_lib} -Wl,-rpath,{config.cuda_lib} -lcudart" +hip_host_args = '-x hip --offload-host-only -nogpulib' +hip_flags = getattr(config, 'hip_flags', '') type_file = 'typeart-types.yaml' openmp_c_flags = getattr(config, 'openmp_c_flags', None) openmp_cxx_flags = getattr(config, 'openmp_cxx_flags', None) @@ -157,6 +162,8 @@ config.substitutions.append(('%c-to-llvm', '{} {}'.format(clang_cc, to_llvm_args config.substitutions.append(('%cpp-to-llvm', '{} {}'.format(clang_cpp, to_llvm_args))) config.substitutions.append(('%cuda-c-to-llvm', '{} {} {} {}'.format(clang_cc, cuda_host_args, cuda_flags, to_llvm_args))) config.substitutions.append(('%cuda-cpp-to-llvm', '{} {} {} {}'.format(clang_cpp, cuda_host_args, cuda_flags, to_llvm_args))) +config.substitutions.append(('%hip-c-to-llvm', '{} {} {} {}'.format(clang_cc, hip_host_args, hip_flags, to_llvm_args))) +config.substitutions.append(('%hip-cpp-to-llvm', '{} {} {} {}'.format(clang_cpp, hip_host_args, hip_flags, to_llvm_args))) config.substitutions.append(('%run', '{}/run.sh'.format(typeart_script_dir))) config.substitutions.append(('%apply', '{}/apply.sh'.format(typeart_script_dir))) diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index b399460c..8837a79c 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -45,6 +45,8 @@ config.cuda_flags="@TYPEARTPASS_CUDA_FLAGS@" config.cuda_path="@TYPEARTPASS_CUDA_PATH@" config.cuda_lib="@CUDAToolkit_LIBRARY_DIR@" +config.has_hip = "@TYPEART_HIP_STATIC_AVAILABLE@" + config.python_interp = "@Python3_EXECUTABLE@" config.llvm_version = @LLVM_VERSION_MAJOR@ From 780d892b01115a2dc88f3eb3cef77c848e87bfb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Wed, 29 Apr 2026 16:33:45 +0200 Subject: [PATCH 12/24] Simplify CMake GPU test setup --- lib/passes/TypeARTPass.cpp | 5 +- lib/passes/support/GpuUtil.h | 5 ++ lib/passes/support/HipUtil.h | 9 +++ test/CMakeLists.txt | 74 +++++++++++++++++------- test/cuda/pass/01_cudamalloc.c | 2 +- test/cuda/pass/02_cudafree.c | 2 +- test/cuda/pass/03_cudahostalloc.c | 2 +- test/cuda/pass/04_cudamalloc_host.c | 2 +- test/cuda/pass/05_cudamalloc_managed.c | 2 +- test/cuda/pass/06_cudamalloc_nonvoid.cpp | 2 +- test/cuda/pass/07_axpy.c | 2 +- test/cuda/pass/08_cudamalloc_async.c | 4 +- test/hip/pass/01_hipmalloc.c | 2 +- test/hip/pass/02_hipfree.c | 2 +- test/lit.cfg | 16 +++-- test/lit.site.cfg.in | 11 ++-- 16 files changed, 100 insertions(+), 42 deletions(-) diff --git a/lib/passes/TypeARTPass.cpp b/lib/passes/TypeARTPass.cpp index ba3d4b2f..19851132 100644 --- a/lib/passes/TypeARTPass.cpp +++ b/lib/passes/TypeARTPass.cpp @@ -26,6 +26,7 @@ #include "instrumentation/TypeIDProvider.h" #include "support/ConfigurationBase.h" #include "support/CudaUtil.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/ModuleDumper.h" #include "support/Table.h" @@ -270,8 +271,8 @@ class TypeArtPass : public llvm::PassInfoMixin { llvm::PreservedAnalyses run(llvm::Module& m, llvm::ModuleAnalysisManager&) { - if (cuda::is_device_module(m)) { - LOG_DEBUG("Skipping CUDA device module: " << m.getName()); + if (gpu::is_device_module(m)) { + LOG_DEBUG("Skipping GPU device module: " << m.getName()); return llvm::PreservedAnalyses::all(); } bool changed{false}; diff --git a/lib/passes/support/GpuUtil.h b/lib/passes/support/GpuUtil.h index 5ebcf653..f695a1a4 100644 --- a/lib/passes/support/GpuUtil.h +++ b/lib/passes/support/GpuUtil.h @@ -19,11 +19,16 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include namespace typeart::gpu { +inline bool is_device_module(const llvm::Module& module) { + return cuda::is_device_module(module) || hip::is_device_module(module); +} + inline std::optional bitcast_for(const llvm::CallBase& cb, MemOpKind kind) { if (kind == MemOpKind::CudaMallocLike) { return cuda::bitcast_for(cb); diff --git a/lib/passes/support/HipUtil.h b/lib/passes/support/HipUtil.h index 1628db97..360ad074 100644 --- a/lib/passes/support/HipUtil.h +++ b/lib/passes/support/HipUtil.h @@ -49,6 +49,15 @@ inline std::optional bitcast_for(const llvm::CallBase& hip_c return bitcast_for(hip_call.getArgOperand(0)); } +inline bool is_device_module(const llvm::Module& module) { +#if LLVM_VERSION_MAJOR >= 20 + const auto triple = module.getTargetTriple().str(); +#else + const auto triple = module.getTargetTriple(); +#endif + return llvm::StringRef{triple}.find("amdgcn") != llvm::StringRef::npos; +} + inline bool is_hip_function(const llvm::Function& function) { const auto function_name = util::try_demangle(function); return util::starts_with_any_of(function_name, "hip"); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6c3f6c53..28451681 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,10 @@ typeart_find_llvm_progs(TYPEART_FILECHECK_EXEC "FileCheck-${LLVM_VERSION_MAJOR}; find_package(Python3 COMPONENTS Interpreter) +set(TYPEART_HIP_ARCH_OVERRIDE "" CACHE STRING "Override HIP offload architecture for lit tests (e.g., gfx1031)") + +option(TYPEART_DEBUG_HIP_TARGETS "Print HIP target properties during configure" OFF) + if(LLVM_EXTERNAL_LIT) cmake_path(GET LLVM_EXTERNAL_LIT PARENT_PATH LLVM_EXTERNAL_LIT_DIR) endif() @@ -45,8 +49,9 @@ endfunction() cuda_runnable_detect(TYPEART_CUDA_RUNNABLE) -function(hip_runnable_detect hip_is_runnable) +function(hip_runnable_detect hip_is_runnable hip_arch) set(${hip_is_runnable} 0 PARENT_SCOPE) + set(${hip_arch} "" PARENT_SCOPE) if(NOT hip_FOUND) return() endif() @@ -58,7 +63,7 @@ function(hip_runnable_detect hip_is_runnable) endif() execute_process( - COMMAND ${TYPEART_ROCM_SMI_EXEC} -L + COMMAND ${TYPEART_ROCM_SMI_EXEC} --showproductname --showuniqueid --json RESULT_VARIABLE smi_result OUTPUT_VARIABLE smi_out ERROR_QUIET @@ -67,14 +72,25 @@ function(hip_runnable_detect hip_is_runnable) return() endif() - string(REGEX MATCHALL "GPU [0-9]+:" smi_gpu_list ${smi_out}) - list(LENGTH smi_gpu_list TYPEART_HIP_GPU_COUNT) + string(FIND "${smi_out}" "{" json_start) + if(json_start EQUAL -1) + return() + endif() + + string(SUBSTRING "${smi_out}" ${json_start} -1 clean_json) + string(JSON TYPEART_HIP_GPU_COUNT LENGTH "${clean_json}") + if(TYPEART_HIP_GPU_COUNT GREATER 0) set(${hip_is_runnable} 1 PARENT_SCOPE) + string(JSON TYPEART_HIP_ARCH GET "${clean_json}" "card0" "GFX Version") + set(${hip_arch} ${TYPEART_HIP_ARCH} PARENT_SCOPE) endif() endfunction() -hip_runnable_detect(TYPEART_HIP_RUNNABLE) +hip_runnable_detect(TYPEART_HIP_RUNNABLE TYPEART_HIP_ARCH) +if(TYPEART_HIP_ARCH_OVERRIDE) + set(TYPEART_HIP_ARCH ${TYPEART_HIP_ARCH_OVERRIDE}) +endif() macro(pythonize_bool truth_var var) if(${truth_var}) @@ -107,7 +123,9 @@ function(typeart_configure_lit_site input output) set(TYPEARTPASS_CUDA_FLAGS "") set(TYPEARTPASS_CUDA_PATH "") - + set(TYPEARTPASS_HIP_FLAGS "") + set(TYPEARTPASS_HIP_LINK_FLAGS "") + if(CUDAToolkit_FOUND) if(DEFINED CUDAToolkit_TARGET_DIR AND NOT "${CUDAToolkit_TARGET_DIR}" STREQUAL "") set(TYPEARTPASS_CUDA_PATH "${CUDAToolkit_TARGET_DIR}") @@ -117,30 +135,44 @@ function(typeart_configure_lit_site input output) cmake_path(GET CUDAToolkit_BIN_DIR PARENT_PATH TYPEARTPASS_CUDA_PATH) endif() - foreach(cuda_inc ${CUDAToolkit_INCLUDE_DIRS}) - string(APPEND TYPEARTPASS_CUDA_FLAGS " -isystem ${cuda_inc}") - endforeach() - else() - find_path(TYPEART_CUDA_RUNTIME_HEADER_DIR cuda_runtime_api.h - HINTS /usr/local/cuda/include /usr/include /opt/cuda/include - ) - if(TYPEART_CUDA_RUNTIME_HEADER_DIR) - cmake_path(GET TYPEART_CUDA_RUNTIME_HEADER_DIR PARENT_PATH TYPEARTPASS_CUDA_PATH) + if(CUDAToolkit_INCLUDE_DIRS) + list(TRANSFORM CUDAToolkit_INCLUDE_DIRS PREPEND "-isystem ") + list(JOIN CUDAToolkit_INCLUDE_DIRS " " TYPEARTPASS_CUDA_FLAGS) + string(PREPEND TYPEARTPASS_CUDA_FLAGS " ") endif() endif() - set(TYPEART_CUDA_STATIC_AVAILABLE False) - if(CUDAToolkit_FOUND OR NOT "${TYPEARTPASS_CUDA_PATH}" STREQUAL "") - set(TYPEART_CUDA_STATIC_AVAILABLE True) - endif() + set(TYPEART_CUDA_STATIC_AVAILABLE ${CUDAToolkit_FOUND}) set(TYPEART_HIP_STATIC_AVAILABLE False) if(hip_FOUND) set(TYPEART_HIP_STATIC_AVAILABLE True) + + set(TYPEART_HIP_INCLUDE_DIR_INPUT "") + if(hip_INCLUDE_DIRS) + set(TYPEART_HIP_INCLUDE_DIR_INPUT ${hip_INCLUDE_DIRS}) + elseif(hip_INCLUDE_DIR) + set(TYPEART_HIP_INCLUDE_DIR_INPUT ${hip_INCLUDE_DIR}) + endif() + + if(TYPEART_HIP_INCLUDE_DIR_INPUT) + list(TRANSFORM TYPEART_HIP_INCLUDE_DIR_INPUT PREPEND "-isystem ") + list(JOIN TYPEART_HIP_INCLUDE_DIR_INPUT " " TYPEARTPASS_HIP_FLAGS) + string(PREPEND TYPEARTPASS_HIP_FLAGS " ") + endif() + + set(TYPEART_HIP_LINK_FLAGS_MISC "--hip-link") + if(hip_LIB_INSTALL_DIR) + list(APPEND TYPEART_HIP_LINK_FLAGS_MISC "-L${hip_LIB_INSTALL_DIR}") + list(APPEND TYPEART_HIP_LINK_FLAGS_MISC "-Wl,-rpath,${hip_LIB_INSTALL_DIR}") + endif() + list(JOIN TYPEART_HIP_LINK_FLAGS_MISC " " TYPEARTPASS_HIP_LINK_FLAGS) endif() - pythonize_bool(TYPEART_CUDA_STATIC_AVAILABLE TYPEARTPASS_CUDA_STATIC) - pythonize_bool(TYPEART_CUDA_RUNNABLE TYPEARTPASS_CUDA_RUNTIME) + pythonize_bool(TYPEART_CUDA_STATIC_AVAILABLE TYPEARTPASS_HAS_CUDA) + pythonize_bool(TYPEART_CUDA_RUNNABLE TYPEARTPASS_CUDA_RUNNABLE) + pythonize_bool(TYPEART_HIP_STATIC_AVAILABLE TYPEARTPASS_HAS_HIP) + pythonize_bool(TYPEART_HIP_RUNNABLE TYPEARTPASS_HIP_RUNNABLE) pythonize_bool(MPI_C_FOUND TYPEARTPASS_MPI_C) pythonize_bool(MPI_CXX_FOUND TYPEARTPASS_MPI_CXX) diff --git a/test/cuda/pass/01_cudamalloc.c b/test/cuda/pass/01_cudamalloc.c index 0e180be3..4a63c9f0 100644 --- a/test/cuda/pass/01_cudamalloc.c +++ b/test/cuda/pass/01_cudamalloc.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // LLVM: call i32 @cudaMalloc(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] diff --git a/test/cuda/pass/02_cudafree.c b/test/cuda/pass/02_cudafree.c index 273b5674..a14e7198 100644 --- a/test/cuda/pass/02_cudafree.c +++ b/test/cuda/pass/02_cudafree.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s -// REQUIRES: cuda_static +// REQUIRES: cuda // CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[0-9a-z]+]]) // CHECK-NEXT: __typeart_free_gpu({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) diff --git a/test/cuda/pass/03_cudahostalloc.c b/test/cuda/pass/03_cudahostalloc.c index 745b76ca..34ef38e2 100644 --- a/test/cuda/pass/03_cudahostalloc.c +++ b/test/cuda/pass/03_cudahostalloc.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // LLVM: call i32 @cudaHostAlloc(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER]] diff --git a/test/cuda/pass/04_cudamalloc_host.c b/test/cuda/pass/04_cudamalloc_host.c index 2930268f..11b1e998 100644 --- a/test/cuda/pass/04_cudamalloc_host.c +++ b/test/cuda/pass/04_cudamalloc_host.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // LLVM: call i32 @cudaMallocHost(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] diff --git a/test/cuda/pass/05_cudamalloc_managed.c b/test/cuda/pass/05_cudamalloc_managed.c index 74b69aa6..925a733e 100644 --- a/test/cuda/pass/05_cudamalloc_managed.c +++ b/test/cuda/pass/05_cudamalloc_managed.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // LLVM: call i32 @cudaMallocManaged(ptr {{.*}}[[CU_POINTER:%[_0-9a-z]+]], // LLVM-NEXT: [[CUDA_PTR:%[0-9a-z_]+]] = load {{.*}}, {{.*}}[[CU_POINTER]] diff --git a/test/cuda/pass/06_cudamalloc_nonvoid.cpp b/test/cuda/pass/06_cudamalloc_nonvoid.cpp index 8f310fce..57296074 100644 --- a/test/cuda/pass/06_cudamalloc_nonvoid.cpp +++ b/test/cuda/pass/06_cudamalloc_nonvoid.cpp @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // LLVM: __typeart_alloc_gpu(ptr %{{[0-9a-z_]+}}, i32 23, i64 {{.*}}) // LLVM: __typeart_alloc_gpu(ptr %{{[0-9a-z_]+}}, i32 24, i64 {{.*}}) diff --git a/test/cuda/pass/07_axpy.c b/test/cuda/pass/07_axpy.c index f60ae8cc..37be150d 100644 --- a/test/cuda/pass/07_axpy.c +++ b/test/cuda/pass/07_axpy.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s -// REQUIRES: cuda_static +// REQUIRES: cuda // CHECK: Malloc : 2 diff --git a/test/cuda/pass/08_cudamalloc_async.c b/test/cuda/pass/08_cudamalloc_async.c index 25c1a060..667b6840 100644 --- a/test/cuda/pass/08_cudamalloc_async.c +++ b/test/cuda/pass/08_cudamalloc_async.c @@ -1,6 +1,6 @@ // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: cuda_static +// REQUIRES: cuda // clang-format off // LLVM: call i32 @cudaMallocAsync(ptr {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64{{.*}} 80, ptr {{.*}}) @@ -38,4 +38,4 @@ int main() { cudaMallocFromPoolAsync((void**)&d_y, N * sizeof(float), pool, stream); return 0; -} \ No newline at end of file +} diff --git a/test/hip/pass/01_hipmalloc.c b/test/hip/pass/01_hipmalloc.c index 01833e53..ca7dd7c2 100644 --- a/test/hip/pass/01_hipmalloc.c +++ b/test/hip/pass/01_hipmalloc.c @@ -1,6 +1,6 @@ // RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check -// REQUIRES: hip_static && !llvm-14 +// REQUIRES: hip && !llvm-14 // LLVM: call i32 @hipMalloc(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], // LLVM: [[HIP_LOAD:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] diff --git a/test/hip/pass/02_hipfree.c b/test/hip/pass/02_hipfree.c index b7efaa99..24046408 100644 --- a/test/hip/pass/02_hipfree.c +++ b/test/hip/pass/02_hipfree.c @@ -1,6 +1,6 @@ // RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s -// REQUIRES: hip_static && !llvm-14 +// REQUIRES: hip && !llvm-14 // CHECK: call i32 @hipFree(ptr {{.*}}[[HIP_POINTER:%[0-9a-z]+]]) // CHECK-NEXT: __typeart_free_gpu(ptr {{.*}}[[HIP_POINTER]]) diff --git a/test/lit.cfg b/test/lit.cfg index 9ee0e6c2..46e787c7 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -58,13 +58,15 @@ if config.has_legacy_wrapper: if config.is_ci: config.available_features.add('ci') -if config.cuda_static: - config.available_features.add('cuda_static') -if config.cuda_runtime: +if config.has_cuda: + config.available_features.add('cuda') +if config.cuda_runnable: config.available_features.add('cuda_runtime') if config.has_hip: - config.available_features.add('hip_static') + config.available_features.add('hip') +if config.hip_runnable: + config.available_features.add('hip_runtime') profile_files = getattr(config, 'profile_file', None) typeart_base_lib_dir= getattr(config, 'typeart_base_lib_dir', None) @@ -85,6 +87,10 @@ cuda_flags = getattr(config, 'cuda_flags', '') cuda_link_flags = f"-L{config.cuda_lib} -Wl,-rpath,{config.cuda_lib} -lcudart" hip_host_args = '-x hip --offload-host-only -nogpulib' hip_flags = getattr(config, 'hip_flags', '') +hip_link_flags = getattr(config, 'hip_link', '') +hip_arch = getattr(config, 'hip_arch', '') +if not hip_arch: + hip_arch = 'native' type_file = 'typeart-types.yaml' openmp_c_flags = getattr(config, 'openmp_c_flags', None) openmp_cxx_flags = getattr(config, 'openmp_cxx_flags', None) @@ -151,6 +157,8 @@ config.substitutions.append(('%omp_c_flags', openmp_c_flags)) config.substitutions.append(('%omp_cpp_flags', openmp_cxx_flags)) config.substitutions.append(('%cuda_link', cuda_link_flags)) +config.substitutions.append(('%hip_link', hip_link_flags)) +config.substitutions.append(('%hip_arch', hip_arch)) # TODO refactor typeart arguments (and add args for enabling heap/stack/globals etc.) # config.substitutions.append(('%arg_stack', '-typeart-stack')) diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index 8837a79c..e1f852aa 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -39,13 +39,16 @@ config.tsan=@TYPEARTPASS_TSAN@ config.asan=@TYPEARTPASS_ASAN@ config.ubsan=@TYPEARTPASS_UBSAN@ config.coverage=@TYPEART_COVERAGE@ -config.cuda_static=@TYPEARTPASS_CUDA_STATIC@ -config.cuda_runtime=@TYPEARTPASS_CUDA_RUNTIME@ +config.has_cuda=@TYPEARTPASS_HAS_CUDA@ +config.cuda_runnable=@TYPEARTPASS_CUDA_RUNNABLE@ config.cuda_flags="@TYPEARTPASS_CUDA_FLAGS@" config.cuda_path="@TYPEARTPASS_CUDA_PATH@" config.cuda_lib="@CUDAToolkit_LIBRARY_DIR@" - -config.has_hip = "@TYPEART_HIP_STATIC_AVAILABLE@" +config.has_hip=@TYPEARTPASS_HAS_HIP@ +config.hip_runnable=@TYPEARTPASS_HIP_RUNNABLE@ +config.hip_flags="@TYPEARTPASS_HIP_FLAGS@" +config.hip_link="@TYPEARTPASS_HIP_LINK_FLAGS@" +config.hip_arch="@TYPEART_HIP_ARCH@" config.python_interp = "@Python3_EXECUTABLE@" From 5a9b5f95023d42541ce65ef2ef5b9836c615929d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Thu, 30 Apr 2026 15:56:40 +0200 Subject: [PATCH 13/24] More comprehensive testing --- externals/dimeta/CMakeLists.txt | 2 +- lib/passes/analysis/MemOpData.h | 1 + test/cuda/pass/07_axpy.c | 32 ----------------- ...damalloc_async.c => 07_cudamalloc_async.c} | 0 test/cuda/pass/08_cudamalloc_inline_types.c | 34 +++++++++++++++++++ test/cuda/pass/09_cudamalloc_async.c | 34 +++++++++++++++++++ test/cuda/pass/10_cudamalloc_gpu_disabled.c | 19 +++++++++++ test/cuda/runtime/07_axpy.c | 2 +- test/hip/pass/01_hipmalloc.c | 10 +++--- test/hip/pass/02_hipfree.c | 11 ++++++ test/hip/pass/03_hiphostalloc.c | 17 ++++++++++ test/hip/pass/04_hipmalloc_host.c | 17 ++++++++++ test/hip/pass/05_hipmalloc_managed.c | 17 ++++++++++ test/hip/pass/07_hipmalloc_async.c | 30 ++++++++++++++++ test/hip/pass/08_hipmalloc_inline_types.c | 34 +++++++++++++++++++ test/hip/pass/09_hipmalloc_async.c | 26 ++++++++++++++ test/hip/pass/10_hipmalloc_gpu_disabled.c | 21 ++++++++++++ 17 files changed, 268 insertions(+), 39 deletions(-) delete mode 100644 test/cuda/pass/07_axpy.c rename test/cuda/pass/{08_cudamalloc_async.c => 07_cudamalloc_async.c} (100%) create mode 100644 test/cuda/pass/08_cudamalloc_inline_types.c create mode 100644 test/cuda/pass/09_cudamalloc_async.c create mode 100644 test/cuda/pass/10_cudamalloc_gpu_disabled.c create mode 100644 test/hip/pass/03_hiphostalloc.c create mode 100644 test/hip/pass/04_hipmalloc_host.c create mode 100644 test/hip/pass/05_hipmalloc_managed.c create mode 100644 test/hip/pass/07_hipmalloc_async.c create mode 100644 test/hip/pass/08_hipmalloc_inline_types.c create mode 100644 test/hip/pass/09_hipmalloc_async.c create mode 100644 test/hip/pass/10_hipmalloc_gpu_disabled.c diff --git a/externals/dimeta/CMakeLists.txt b/externals/dimeta/CMakeLists.txt index d0a19ddc..40fd1525 100644 --- a/externals/dimeta/CMakeLists.txt +++ b/externals/dimeta/CMakeLists.txt @@ -1,7 +1,7 @@ FetchContent_Declare( llvm-dimeta GIT_REPOSITORY https://github.com/ahueck/llvm-dimeta - GIT_TAG feat/hip + GIT_TAG devel GIT_SHALLOW 1 ) diff --git a/lib/passes/analysis/MemOpData.h b/lib/passes/analysis/MemOpData.h index 8255eb01..2af24f4e 100644 --- a/lib/passes/analysis/MemOpData.h +++ b/lib/passes/analysis/MemOpData.h @@ -116,6 +116,7 @@ struct MemOps { {"cudaMallocAsync", MemOpKind::CudaMallocLike}, {"cudaMallocFromPoolAsync", MemOpKind::CudaMallocLike}, {"hipMalloc", MemOpKind::HipMallocLike}, + {"hipMallocHost", MemOpKind::HipMallocLike}, {"hipHostMalloc", MemOpKind::HipMallocLike}, {"hipMallocManaged", MemOpKind::HipMallocLike}, {"hipMallocAsync", MemOpKind::HipMallocLike}, diff --git a/test/cuda/pass/07_axpy.c b/test/cuda/pass/07_axpy.c deleted file mode 100644 index 37be150d..00000000 --- a/test/cuda/pass/07_axpy.c +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s - -// REQUIRES: cuda - -// CHECK: Malloc : 2 - -__global__ void axpy(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -int main(int argc, char* argv[]) { - const int kDataLen = 4; - - float a = 2.0f; - float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; - float host_y[kDataLen]; - - float* device_x; - float* device_y; - cudaMalloc((void**)&device_x, kDataLen * sizeof(float)); - cudaMalloc((void**)&device_y, kDataLen * sizeof(float)); - - cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); - - axpy<<<1, kDataLen>>>(a, device_x, device_y); - - cudaDeviceSynchronize(); - cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); - - cudaDeviceReset(); - return 0; -} diff --git a/test/cuda/pass/08_cudamalloc_async.c b/test/cuda/pass/07_cudamalloc_async.c similarity index 100% rename from test/cuda/pass/08_cudamalloc_async.c rename to test/cuda/pass/07_cudamalloc_async.c diff --git a/test/cuda/pass/08_cudamalloc_inline_types.c b/test/cuda/pass/08_cudamalloc_inline_types.c new file mode 100644 index 00000000..f006a72f --- /dev/null +++ b/test/cuda/pass/08_cudamalloc_inline_types.c @@ -0,0 +1,34 @@ +// clang-format off +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=inline -S 2>&1 | %filecheck %s --check-prefix INLINE +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=hybrid -S 2>&1 | %filecheck %s --check-prefix HYBRID +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=file -S 2>&1 | %filecheck %s --check-prefix FILE +// clang-format on + +// REQUIRES: cuda +// REQUIRES: !llvm-14 + +// INLINE: %struct._typeart_struct_layout_t = type { i32, i32, ptr } +// INLINE: call void @__typeart_alloc_mty(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// INLINE: call void @__typeart_register_type(ptr @_typeart_{{.*}}) + +// HYBRID: call void @__typeart_alloc_cuda(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// HYBRID: call void @__typeart_alloc_mty(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// HYBRID: call void @__typeart_register_type(ptr @_typeart_{{.*}}) + +// FILE: call void @__typeart_alloc_cuda(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) + +typedef struct MyData { + int a; + double b; +} MyData; + +int main() { + const int N = 20; + float* d_x; + MyData* d_s; + + cudaMalloc((void**)&d_x, N * sizeof(float)); + cudaMalloc((void**)&d_s, N * sizeof(MyData)); + + return 0; +} diff --git a/test/cuda/pass/09_cudamalloc_async.c b/test/cuda/pass/09_cudamalloc_async.c new file mode 100644 index 00000000..f46d1996 --- /dev/null +++ b/test/cuda/pass/09_cudamalloc_async.c @@ -0,0 +1,34 @@ +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=true %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check + +// REQUIRES: cuda + +// clang-format off +// LLVM: call i32 @{{.*}}(ptr {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 80, ptr {{.*}}) +// LLVM-NEXT: [[CUDA_PTR_X:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_X]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_X]], i32 23, i64 20) + +// LLVM: call i32 @{{.*}}(ptr {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 80, ptr {{.*}}, ptr {{.*}}) +// LLVM-NEXT: [[CUDA_PTR_Y:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_Y]] +// LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_Y]], i32 23, i64 20) + +// LLVM_LEGACY: call i32 @{{.*}}({{(ptr|i8\*\*)}} {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 80, {{(ptr|i8\*)}} {{.*}}) +// LLVM_LEGACY: [[CUDA_PTR_X:%[0-9a-z_]+]] = load i8*, i8** [[CU_POINTER_X]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[CUDA_PTR_X]], i32 23, i64 20) + +// LLVM_LEGACY: call i32 @{{.*}}({{(ptr|i8\*\*)}} {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 80, {{(ptr|i8\*)}} {{.*}}, {{(ptr|i8\*)}} {{.*}}) +// LLVM_LEGACY: [[CUDA_PTR_Y:%[0-9a-z_]+]] = load i8*, i8** [[CU_POINTER_Y]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[CUDA_PTR_Y]], i32 23, i64 20) +// clang-format on + +int main() { + const int N = 20; + float* d_x; + float* d_y; + cudaStream_t stream = 0; + cudaMemPool_t pool = 0; + + cudaMallocAsync((void**)&d_x, N * sizeof(float), stream); + cudaMallocFromPoolAsync((void**)&d_y, N * sizeof(float), pool, stream); + + return 0; +} diff --git a/test/cuda/pass/10_cudamalloc_gpu_disabled.c b/test/cuda/pass/10_cudamalloc_gpu_disabled.c new file mode 100644 index 00000000..2c001879 --- /dev/null +++ b/test/cuda/pass/10_cudamalloc_gpu_disabled.c @@ -0,0 +1,19 @@ +// RUN: %cuda-c-to-llvm %s | %apply-typeart --typeart-gpu=false -S 2>&1 | %filecheck %s +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=false %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: cuda + +// CHECK: call i32 @{{.*}}({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) +// CHECK-NOT: call void @__typeart_alloc_gpu( +// CHECK-NOT: call void @__typeart_free_gpu( + +int main() { + const int N = 20; + float* d_x; + + cudaMalloc((void**)&d_x, N * sizeof(float)); + cudaFree(d_x); + + return 0; +} diff --git a/test/cuda/runtime/07_axpy.c b/test/cuda/runtime/07_axpy.c index 440d87c4..4cc61e02 100644 --- a/test/cuda/runtime/07_axpy.c +++ b/test/cuda/runtime/07_axpy.c @@ -1,7 +1,7 @@ // RUN: TYPEART_GPU=true %wrapper-cc -x cuda --cuda-gpu-arch=sm_50 %cuda_link %s -o %s.exe // RUN: %s.exe 2>&1 | %filecheck %s -// REQUIRES: cuda_runtime +// REQUIRES: cuda_runtime && softcounter // UNSUPPORTED: sanitizer // CHECK: [0]=2 [1]=4 [2]=6 [3]=8 diff --git a/test/hip/pass/01_hipmalloc.c b/test/hip/pass/01_hipmalloc.c index ca7dd7c2..ba4aeba3 100644 --- a/test/hip/pass/01_hipmalloc.c +++ b/test/hip/pass/01_hipmalloc.c @@ -1,17 +1,17 @@ -// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s // REQUIRES: hip && !llvm-14 -// LLVM: call i32 @hipMalloc(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], -// LLVM: [[HIP_LOAD:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] -// LLVM: call void @__typeart_alloc_gpu(ptr [[HIP_LOAD]], i32 23 +// CHECK: call i32 @hipMalloc(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[HIP_PTR:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR]], i32 23, i64 20) #include int main() { const int N = 20; float* d_x; - hipMalloc(&d_x, N * sizeof(float)); + hipMalloc((void**)&d_x, N * sizeof(float)); return 0; } diff --git a/test/hip/pass/02_hipfree.c b/test/hip/pass/02_hipfree.c index 24046408..5ff21f9f 100644 --- a/test/hip/pass/02_hipfree.c +++ b/test/hip/pass/02_hipfree.c @@ -5,11 +5,22 @@ // CHECK: call i32 @hipFree(ptr {{.*}}[[HIP_POINTER:%[0-9a-z]+]]) // CHECK-NEXT: __typeart_free_gpu(ptr {{.*}}[[HIP_POINTER]]) +// CHECK: call i32 @hipFreeHost(ptr {{.*}}[[HIP_POINTER:%[0-9a-z]+]]) +// CHECK-NEXT: __typeart_free_gpu(ptr {{.*}}[[HIP_POINTER]]) + +// CHECK: call i32 @hipFreeAsync(ptr {{.*}}[[HIP_POINTER:%[0-9a-z]+]], +// CHECK-NEXT: __typeart_free_gpu(ptr {{.*}}[[HIP_POINTER]]) + #include int main() { float* d_x; hipFree(d_x); + hipFreeHost(d_x); + + hipStream_t stream; + hipFreeAsync(d_x, stream); + return 0; } diff --git a/test/hip/pass/03_hiphostalloc.c b/test/hip/pass/03_hiphostalloc.c new file mode 100644 index 00000000..80a0079a --- /dev/null +++ b/test/hip/pass/03_hiphostalloc.c @@ -0,0 +1,17 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// CHECK: call i32 @hipHostMalloc(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[HIP_PTR:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr {{.*}}[[HIP_PTR]], + +#include +int main() { + const int N = 20; + float* d_x; + + hipHostMalloc((void**)&d_x, N * sizeof(float), hipHostMallocDefault); + + return 0; +} diff --git a/test/hip/pass/04_hipmalloc_host.c b/test/hip/pass/04_hipmalloc_host.c new file mode 100644 index 00000000..cc5e4468 --- /dev/null +++ b/test/hip/pass/04_hipmalloc_host.c @@ -0,0 +1,17 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// CHECK: call i32 @hipMallocHost(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[HIP_PTR:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR]], i32 23, i64 20) + +#include +int main() { + const int N = 20; + float* d_x; + + hipMallocHost((void**)&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/hip/pass/05_hipmalloc_managed.c b/test/hip/pass/05_hipmalloc_managed.c new file mode 100644 index 00000000..861367fc --- /dev/null +++ b/test/hip/pass/05_hipmalloc_managed.c @@ -0,0 +1,17 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// CHECK: call i32 @hipMallocManaged(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// CHECK-NEXT: [[HIP_PTR:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR]], i32 23, i64 20) + +#include +int main() { + const int N = 20; + float* d_x; + + hipMallocManaged((void**)&d_x, N * sizeof(float)); + + return 0; +} diff --git a/test/hip/pass/07_hipmalloc_async.c b/test/hip/pass/07_hipmalloc_async.c new file mode 100644 index 00000000..43547b5f --- /dev/null +++ b/test/hip/pass/07_hipmalloc_async.c @@ -0,0 +1,30 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// clang-format off +// CHECK: call i32 @hipMallocAsync(ptr {{.*}}[[HIP_POINTER_X:%[_0-9a-z]+]], i64{{.*}} 80, ptr {{.*}}) +// CHECK-NEXT: [[HIP_PTR_X:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER_X]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR_X]], i32 23, i64 20) + +// CHECK: call i32 @hipMallocFromPoolAsync(ptr {{.*}}[[HIP_POINTER_Y:%[_0-9a-z]+]], i64{{.*}} 80, ptr {{.*}}, ptr {{.*}}) +// CHECK-NEXT: [[HIP_PTR_Y:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER_Y]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR_Y]], i32 23, i64 20) +// clang-format on + +#include +int main() { + const int N = 20; + float* d_x; + float* d_y; + + hipStream_t stream; + hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + + hipMallocAsync((void**)&d_x, N * sizeof(float), stream); + + hipMemPool_t pool; + hipMallocFromPoolAsync((void**)&d_y, N * sizeof(float), pool, stream); + + return 0; +} diff --git a/test/hip/pass/08_hipmalloc_inline_types.c b/test/hip/pass/08_hipmalloc_inline_types.c new file mode 100644 index 00000000..417acc5a --- /dev/null +++ b/test/hip/pass/08_hipmalloc_inline_types.c @@ -0,0 +1,34 @@ +// clang-format off +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=inline -S 2>&1 | %filecheck %s --check-prefix INLINE +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=hybrid -S 2>&1 | %filecheck %s --check-prefix HYBRID +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=true %apply-typeart -typeart-type-serialization=file -S 2>&1 | %filecheck %s --check-prefix FILE +// clang-format on + +// REQUIRES: hip && !llvm-14 + +// INLINE: %struct._typeart_struct_layout_t = type { i32, i32, ptr } +// INLINE: call void @__typeart_alloc_mty_gpu(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// INLINE: call void @__typeart_register_type(ptr @_typeart_{{.*}}) + +// HYBRID: call void @__typeart_alloc_gpu(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// HYBRID: call void @__typeart_alloc_mty_gpu(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// HYBRID: call void @__typeart_register_type(ptr @_typeart_{{.*}}) + +// FILE: call void @__typeart_alloc_gpu(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) + +#include +typedef struct MyData { + int a; + double b; +} MyData; + +int main() { + const int N = 20; + float* d_x; + MyData* d_s; + + hipMalloc((void**)&d_x, N * sizeof(float)); + hipMalloc((void**)&d_s, N * sizeof(MyData)); + + return 0; +} diff --git a/test/hip/pass/09_hipmalloc_async.c b/test/hip/pass/09_hipmalloc_async.c new file mode 100644 index 00000000..7217efb6 --- /dev/null +++ b/test/hip/pass/09_hipmalloc_async.c @@ -0,0 +1,26 @@ +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// CHECK: call i32 @{{.*}}(ptr {{.*}}[[HIP_POINTER_X:%[_0-9a-z]+]], i64 {{.*}}80, ptr {{.*}}) +// CHECK-NEXT: [[HIP_PTR_X:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER_X]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR_X]], i32 23, i64 20) + +// CHECK: call i32 @{{.*}}(ptr {{.*}}[[HIP_POINTER_Y:%[_0-9a-z]+]], i64 {{.*}}80, ptr {{.*}}, ptr {{.*}}) +// CHECK-NEXT: [[HIP_PTR_Y:%[0-9a-z_]+]] = load ptr, ptr [[HIP_POINTER_Y]] +// CHECK-NEXT: call void @__typeart_alloc_gpu(ptr [[HIP_PTR_Y]], i32 23, i64 20) + +#include + +int main() { + const int N = 20; + float* d_x; + float* d_y; + hipStream_t stream = 0; + hipMemPool_t pool = 0; + + hipMallocAsync((void**)&d_x, N * sizeof(float), stream); + hipMallocFromPoolAsync((void**)&d_y, N * sizeof(float), pool, stream); + + return 0; +} diff --git a/test/hip/pass/10_hipmalloc_gpu_disabled.c b/test/hip/pass/10_hipmalloc_gpu_disabled.c new file mode 100644 index 00000000..14e97ea9 --- /dev/null +++ b/test/hip/pass/10_hipmalloc_gpu_disabled.c @@ -0,0 +1,21 @@ +// RUN: %hip-cpp-to-llvm %s | %apply-typeart --typeart-gpu=false -S 2>&1 | %filecheck %s +// RUN: %hip-cpp-to-llvm %s | TYPEART_GPU=0 %apply-typeart -S 2>&1 | %filecheck %s + +// REQUIRES: hip && !llvm-14 + +// CHECK: call i32 @{{.*}}(ptr {{.*}}[[HIP_POINTER:%[_0-9a-z]+]], +// CHECK: [[HIP_PTR:%[_0-9a-z]+]] = load ptr, ptr [[HIP_POINTER]] +// CHECK: call i32 @hipFree(ptr {{.*}}[[HIP_PTR]]) +// CHECK-NOT: call void @__typeart_alloc_gpu( +// CHECK-NOT: call void @__typeart_free_gpu( + +#include +int main() { + const int N = 20; + float* d_x; + + hipMalloc((void**)&d_x, N * sizeof(float)); + hipFree(d_x); + + return 0; +} From c76761920e88c78c60e129126354271129878758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Thu, 30 Apr 2026 16:49:04 +0200 Subject: [PATCH 14/24] CUDA test fixes --- test/cuda/pass/08_cudamalloc_inline_types.c | 8 +++---- test/cuda/pass/09_cudamalloc_async.c | 24 ++++++++++++--------- test/cuda/pass/10_cudamalloc_gpu_disabled.c | 6 +++--- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/test/cuda/pass/08_cudamalloc_inline_types.c b/test/cuda/pass/08_cudamalloc_inline_types.c index f006a72f..29953c15 100644 --- a/test/cuda/pass/08_cudamalloc_inline_types.c +++ b/test/cuda/pass/08_cudamalloc_inline_types.c @@ -8,14 +8,14 @@ // REQUIRES: !llvm-14 // INLINE: %struct._typeart_struct_layout_t = type { i32, i32, ptr } -// INLINE: call void @__typeart_alloc_mty(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// INLINE: call void @__typeart_alloc_mty_gpu(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) // INLINE: call void @__typeart_register_type(ptr @_typeart_{{.*}}) -// HYBRID: call void @__typeart_alloc_cuda(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) -// HYBRID: call void @__typeart_alloc_mty(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) +// HYBRID: call void @__typeart_alloc_gpu(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// HYBRID: call void @__typeart_alloc_mty_gpu(ptr %{{[0-9a-z]+}}, ptr @_typeart_{{.*}}, i64 {{.*}}) // HYBRID: call void @__typeart_register_type(ptr @_typeart_{{.*}}) -// FILE: call void @__typeart_alloc_cuda(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) +// FILE: call void @__typeart_alloc_gpu(ptr %{{[0-9a-z]+}}, i32 {{[0-9]+}}, i64 {{.*}}) typedef struct MyData { int a; diff --git a/test/cuda/pass/09_cudamalloc_async.c b/test/cuda/pass/09_cudamalloc_async.c index f46d1996..b5c4967d 100644 --- a/test/cuda/pass/09_cudamalloc_async.c +++ b/test/cuda/pass/09_cudamalloc_async.c @@ -1,23 +1,27 @@ -// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=true %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check +// RUN: %cuda-c-to-llvm %s | TYPEART_GPU=1 %apply-typeart -S 2>&1 | %filecheck %s --check-prefix=%llvm-version-check // REQUIRES: cuda // clang-format off -// LLVM: call i32 @{{.*}}(ptr {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 80, ptr {{.*}}) +// LLVM: call i32 @cudaMallocAsync(ptr {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 {{.*}}80, ptr {{.*}}) // LLVM-NEXT: [[CUDA_PTR_X:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_X]] // LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_X]], i32 23, i64 20) -// LLVM: call i32 @{{.*}}(ptr {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 80, ptr {{.*}}, ptr {{.*}}) +// LLVM: call i32 @cudaMallocFromPoolAsync(ptr {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 {{.*}}80, ptr {{.*}}, ptr {{.*}}) // LLVM-NEXT: [[CUDA_PTR_Y:%[0-9a-z_]+]] = load ptr, ptr [[CU_POINTER_Y]] // LLVM-NEXT: call void @__typeart_alloc_gpu(ptr [[CUDA_PTR_Y]], i32 23, i64 20) -// LLVM_LEGACY: call i32 @{{.*}}({{(ptr|i8\*\*)}} {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 80, {{(ptr|i8\*)}} {{.*}}) -// LLVM_LEGACY: [[CUDA_PTR_X:%[0-9a-z_]+]] = load i8*, i8** [[CU_POINTER_X]] -// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[CUDA_PTR_X]], i32 23, i64 20) - -// LLVM_LEGACY: call i32 @{{.*}}({{(ptr|i8\*\*)}} {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 80, {{(ptr|i8\*)}} {{.*}}, {{(ptr|i8\*)}} {{.*}}) -// LLVM_LEGACY: [[CUDA_PTR_Y:%[0-9a-z_]+]] = load i8*, i8** [[CU_POINTER_Y]] -// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[CUDA_PTR_Y]], i32 23, i64 20) +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocAsync(i8** {{.*}}[[CU_POINTER_X:%[_0-9a-z]+]], i64 {{.*}}80, +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) + +// LLVM_LEGACY: [[CAST1:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR:%[0-9a-zA-Z_]+]] to i8** +// LLVM_LEGACY: call i32 @cudaMallocFromPoolAsync(i8** {{.*}}[[CU_POINTER_Y:%[_0-9a-z]+]], i64 {{.*}}80, +// LLVM_LEGACY: [[CAST2:%[0-9a-z_]+]] = bitcast float** [[SRC_VAR]] to i8** +// LLVM_LEGACY: [[LOADED_PTR:%[0-9a-z_]+]] = load i8*, i8** [[CAST2]] +// LLVM_LEGACY: call void @__typeart_alloc_gpu(i8* [[LOADED_PTR]], i32 23, i64 20) // clang-format on int main() { diff --git a/test/cuda/pass/10_cudamalloc_gpu_disabled.c b/test/cuda/pass/10_cudamalloc_gpu_disabled.c index 2c001879..47f0a662 100644 --- a/test/cuda/pass/10_cudamalloc_gpu_disabled.c +++ b/test/cuda/pass/10_cudamalloc_gpu_disabled.c @@ -1,10 +1,10 @@ // RUN: %cuda-c-to-llvm %s | %apply-typeart --typeart-gpu=false -S 2>&1 | %filecheck %s // RUN: %cuda-c-to-llvm %s | TYPEART_GPU=false %apply-typeart -S 2>&1 | %filecheck %s -// REQUIRES: cuda +// REQUIRES: cuda && !llvm-14 -// CHECK: call i32 @{{.*}}({{(ptr|i8\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], -// CHECK: call i32 @cudaFree({{(ptr|i8\*)}} {{.*}}[[CU_POINTER]]) +// CHECK: call i32 @cudaMalloc({{(ptr|i8\*\*)}} {{.*}}[[CU_POINTER:%[_0-9a-z]+]], +// CHECK: call i32 @cudaFree({{(ptr|i8\*)}} // CHECK-NOT: call void @__typeart_alloc_gpu( // CHECK-NOT: call void @__typeart_free_gpu( From 3a66c4782d73c8613552522f10c163d2f2f5c31e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Thu, 30 Apr 2026 16:56:39 +0200 Subject: [PATCH 15/24] HIP runtime test --- test/hip/runtime/01_axpy.c | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 test/hip/runtime/01_axpy.c diff --git a/test/hip/runtime/01_axpy.c b/test/hip/runtime/01_axpy.c new file mode 100644 index 00000000..89f61eb8 --- /dev/null +++ b/test/hip/runtime/01_axpy.c @@ -0,0 +1,43 @@ +// RUN: TYPEART_GPU=true %wrapper-cc -x hip --offload-arch=%hip_arch %hip_link %s -o %s.exe +// RUN: %s.exe 2>&1 | %filecheck %s + +// REQUIRES: hip_runtime && !llvm-14 && softcounter +// UNSUPPORTED: sanitizer + +// CHECK: [0]=2 [1]=4 [2]=6 [3]=8 +// CHECK: Total heap{{[ ]*}}: 2 , 2 , - + +#include +#include +__global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + float* device_x; + float* device_y; + hipMalloc((void**)&device_x, kDataLen * sizeof(float)); + hipMalloc((void**)&device_y, kDataLen * sizeof(float)); + + hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); + + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + hipDeviceSynchronize(); + hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost); + + hipDeviceReset(); + + for (int i = 0; i < kDataLen; ++i) { + printf("[%i]=%.0f ", i, host_y[i]); + } + printf("\n"); + + return 0; +} From 61645e210dbde5328c08f2f0601055b8addbc8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Thu, 30 Apr 2026 18:02:27 +0200 Subject: [PATCH 16/24] Enable CUDA support in CI --- .github/actions/setup-typeart/action.yml | 82 +++++++++++++++++ .github/workflows/basic-ci.yml | 109 ++++++++++++++--------- 2 files changed, 150 insertions(+), 41 deletions(-) create mode 100644 .github/actions/setup-typeart/action.yml diff --git a/.github/actions/setup-typeart/action.yml b/.github/actions/setup-typeart/action.yml new file mode 100644 index 00000000..cbe854a1 --- /dev/null +++ b/.github/actions/setup-typeart/action.yml @@ -0,0 +1,82 @@ +name: 'Setup TypeART Environment' +description: 'Sets up LLVM, Clang, OpenMPI, and other dependencies for TypeART CI' + +inputs: + llvm-version: + description: 'LLVM version to install' + required: true + typeart-typegen-legacy: + description: 'Set TYPEART_TYPEGEN_IR env variable' + required: false + install-libcxx: + description: 'Install libc++' + required: false + default: 'false' + install-lcov: + description: 'Install lcov' + required: false + default: 'false' + install-omp: + description: 'Install LLVM OpenMP runtime' + required: false + default: 'true' + setup-mold: + description: 'Setup Mold Linker' + required: false + default: 'false' + +runs: + using: "composite" + steps: + - name: LLVM apt + if: ${{ inputs.llvm-version >= 19 }} + shell: bash + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-${{ inputs.llvm-version }} main" | sudo tee /etc/apt/sources.list.d/llvm-${{ inputs.llvm-version }}.list + + - name: Update apt + shell: bash + run: sudo apt-get update + + - name: Install LLVM + shell: bash + run: sudo apt-get install -y libllvm${{ inputs.llvm-version }} llvm-${{ inputs.llvm-version }} llvm-${{ inputs.llvm-version }}-dev + + - name: Install LLVM OpenMP runtime + if: ${{ inputs.install-omp == 'true' }} + shell: bash + run: sudo apt-get install -y libomp-${{ inputs.llvm-version }}-dev + + - name: Install Clang + shell: bash + run: sudo apt-get install -y clang-${{ inputs.llvm-version }} clang-tidy-${{ inputs.llvm-version }} + + - name: Install libc++ + if: ${{ inputs.install-libcxx == 'true' }} + shell: bash + run: sudo apt-get install -y --no-install-recommends libc++-${{ inputs.llvm-version }}-dev libc++abi-${{ inputs.llvm-version }}-dev + + - name: Install OpenMPI + shell: bash + run: sudo apt-get install -y libopenmpi-dev openmpi-bin + + - name: Install lcov + if: ${{ inputs.install-lcov == 'true' }} + shell: bash + run: sudo apt-get install -y lcov + + - name: Setup Mold Linker + if: ${{ inputs.setup-mold == 'true' && inputs.llvm-version > 14 }} + uses: rui314/setup-mold@v1 + + - name: Setup env + shell: bash + run: | + sudo ln -f -s /usr/bin/clang-${{ inputs.llvm-version }} /usr/bin/clang + sudo ln -f -s /usr/bin/clang++-${{ inputs.llvm-version }} /usr/bin/clang++ + echo "LLVM_CMAKE_DIR=/usr/lib/llvm-${{ inputs.llvm-version }}/cmake" >> $GITHUB_ENV + echo "EXTERNAL_LIT=/usr/lib/llvm-${{ inputs.llvm-version >= 20 && 18 || inputs.llvm-version }}/build/utils/lit/lit.py" >> $GITHUB_ENV + if [ -n "${{ inputs.typeart-typegen-legacy }}" ]; then + echo "TYPEART_TYPEGEN_IR=${{ inputs.typeart-typegen-legacy }}" >> $GITHUB_ENV + fi diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index 8c420922..d7936d6a 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -81,80 +81,107 @@ jobs: steps: - uses: actions/checkout@v6 - - name: LLVM apt - if: ${{ matrix.platform.llvm-version >= 19 }} - run: | - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-${{ matrix.platform.llvm-version }} main" | sudo tee /etc/apt/sources.list.d/llvm-${{ matrix.platform.llvm-version }}.list - - - name: Update apt - run: sudo apt-get update + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: ${{ matrix.platform.llvm-version }} + typeart-typegen-legacy: ${{ matrix.platform.typeart-typegen-legacy }} + install-libcxx: ${{ matrix.preset.libcxx || 'false' }} + install-lcov: ${{ matrix.preset.coverage || 'false' }} + install-omp: 'true' + setup-mold: 'true' - - name: Install LLVM - run: sudo apt-get install libllvm${{ matrix.platform.llvm-version }} llvm-${{ matrix.platform.llvm-version }} llvm-${{ matrix.platform.llvm-version }}-dev + - name: Configure TypeART + run: cmake -B build --preset ${{ matrix.preset.name }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} - - name: Install LLVM OpenMP runtime - run: sudo apt-get install libomp-${{ matrix.platform.llvm-version }}-dev + - name: Build TypeART + run: cmake --build build --parallel 2 - - name: Install Clang - run: sudo apt-get install clang-${{ matrix.platform.llvm-version }} clang-tidy-${{ matrix.platform.llvm-version }} + - name: Prepare TypeART coverage + if: matrix.preset.coverage + run: cmake --build build --target typeart-lcov-clean - - name: Install libc++ - if: matrix.preset.libcxx - run: sudo apt-get install --no-install-recommends libc++-${{ matrix.platform.llvm-version }}-dev libc++abi-${{ matrix.platform.llvm-version }}-dev + - name: Test TypeART lit-suite + if: matrix.preset.skip_test == false + run: cmake --build build --target check-typeart - - name: Install OpenMPI - run: sudo apt-get install libopenmpi-dev openmpi-bin + - name: Build coverage report + if: matrix.preset.coverage + run: cmake --build build --target typeart-lcov-html - - name: Install lcov + - name: Coveralls (parallel) if: matrix.preset.coverage - run: sudo apt-get install lcov + uses: coverallsapp/github-action@v2.3.6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: build/typeart.coverage + flag-name: ${{ matrix.preset.name }}-${{ matrix.platform.llvm-version }}-${{ matrix.platform.typeart-typegen-legacy }} + parallel: true - - name: Setup Mold Linker - if: ${{ matrix.platform.llvm-version > 14 }} - uses: rui314/setup-mold@v1 + cuda-suite: + strategy: + fail-fast: false + matrix: + include: + - llvm-version: 14 + os: ubuntu-22.04 + cuda: 11.8.0 + - llvm-version: 22 + os: ubuntu-24.04 + cuda: 12.6.0 - - name: Setup env - run: | - sudo ln -f -s /usr/bin/clang-${{ matrix.platform.llvm-version }} /usr/bin/clang - sudo ln -f -s /usr/bin/clang++-${{ matrix.platform.llvm-version }} /usr/bin/clang++ - echo "LLVM_CMAKE_DIR=/usr/lib/llvm-${{ matrix.platform.llvm-version }}/cmake" >> $GITHUB_ENV - echo "EXTERNAL_LIT=/usr/lib/llvm-${{ matrix.platform.llvm-version >= 20 && 18 || matrix.platform.llvm-version }}/build/utils/lit/lit.py" >> $GITHUB_ENV - echo "TYPEART_TYPEGEN_IR=${{ matrix.platform.typeart-typegen-legacy }}" >> $GITHUB_ENV + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v6 + + - uses: Jimver/cuda-toolkit@v0.2.35 + id: cuda-toolkit + with: + cuda: "${{ matrix.cuda }}" + method: network + sub-packages: '["nvcc", "cudart", "cudart-dev"]' + non-cuda-sub-packages: '["libcurand", "libcurand-dev"]' + + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: ${{ matrix.llvm-version }} + typeart-typegen-legacy: 0 + install-lcov: 'true' + install-omp: 'true' + setup-mold: 'false' - name: Configure TypeART - run: cmake -B build --preset ${{ matrix.preset.name }} -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} + run: cmake -B build --preset ci-cov-thread-safe -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} - name: Build TypeART run: cmake --build build --parallel 2 - name: Prepare TypeART coverage - if: matrix.preset.coverage run: cmake --build build --target typeart-lcov-clean - - name: Test TypeART lit-suite - if: matrix.preset.skip_test == false - run: cmake --build build --target check-typeart + - name: Test TypeART cuda-suite + run: cmake --build build --target check-typeart-cuda - name: Build coverage report - if: matrix.preset.coverage run: cmake --build build --target typeart-lcov-html - name: Coveralls (parallel) - if: matrix.preset.coverage uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} path-to-lcov: build/typeart.coverage - flag-name: ${{ matrix.preset.name }}-${{ matrix.platform.llvm-version }}-${{ matrix.platform.typeart-typegen-legacy }} + flag-name: cuda-suite-${{ matrix.llvm-version }}-${{ matrix.cuda }} parallel: true finish-coverage: - needs: lit-suite + if: ${{ always() }} + needs: [lit-suite, cuda-suite] runs-on: ubuntu-24.04 steps: - name: Coveralls Finished uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} - parallel-finished: true \ No newline at end of file + parallel-finished: true From 88918912c3284267a5f037f33cb3e5d8332658e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Thu, 30 Apr 2026 18:23:32 +0200 Subject: [PATCH 17/24] CI error fixes --- .github/workflows/basic-ci.yml | 3 ++- cmake/modules/coverage-lcov.cmake | 18 ++++++++++++++++-- lib/passes/Commandline.cpp | 12 ++++++------ lib/passes/analysis/MemOpVisitor.cpp | 2 +- .../configuration/EnvironmentConfiguration.cpp | 3 +-- .../instrumentation/TypeARTFunctions.cpp | 2 +- lib/passes/support/CudaUtil.h | 2 +- lib/passes/support/HipUtil.h | 2 +- lib/passes/typegen/dimeta/DimetaTypeGen.cpp | 6 ++++-- 9 files changed, 33 insertions(+), 17 deletions(-) diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index d7936d6a..77dd7684 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -162,7 +162,7 @@ jobs: run: cmake --build build --target typeart-lcov-clean - name: Test TypeART cuda-suite - run: cmake --build build --target check-typeart-cuda + run: cmake --build build --target check-typeart - name: Build coverage report run: cmake --build build --target typeart-lcov-html @@ -181,6 +181,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Coveralls Finished + uses: coverallsapp/github-action@v2.3.6 with: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/cmake/modules/coverage-lcov.cmake b/cmake/modules/coverage-lcov.cmake index c97a985b..0b836576 100644 --- a/cmake/modules/coverage-lcov.cmake +++ b/cmake/modules/coverage-lcov.cmake @@ -5,6 +5,20 @@ if(TYPEART_LCOV_EXEC-NOTFOUND OR TYPEART_GENHTML_EXEC-NOTFOUND) message(WARNING "lcov and genhtml command needed for coverage.") endif() +# Detect whether lcov supports: --ignore-errors unused +set(TYPEART_LCOV_IGNORE_UNUSED) +if(TYPEART_LCOV_EXEC) + execute_process( + COMMAND ${TYPEART_LCOV_EXEC} --ignore-errors unused --version + RESULT_VARIABLE TYPEART_LCOV_IGNORE_UNUSED_RESULT + OUTPUT_QUIET + ERROR_QUIET + ) + if(TYPEART_LCOV_IGNORE_UNUSED_RESULT EQUAL 0) + set(TYPEART_LCOV_IGNORE_UNUSED --ignore-errors unused) + endif() +endif() + add_custom_target( typeart-lcov-clean COMMAND ${TYPEART_LCOV_EXEC} -d ${CMAKE_BINARY_DIR} -z @@ -27,7 +41,7 @@ endif() add_custom_target( typeart-lcov-make - COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} + COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} ${TYPEART_LCOV_IGNORE_UNUSED} --no-external -c -d ${CMAKE_BINARY_DIR} -b ${CMAKE_SOURCE_DIR} -o typeart.coverage COMMAND ${TYPEART_LCOV_EXEC} --rc derive_function_end_line=0 --remove typeart.coverage '${CMAKE_BINARY_DIR}/*' -o typeart.coverage ) @@ -50,7 +64,7 @@ function(typeart_target_lcov target) add_custom_target( typeart-lcov-make-${target} - COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} + COMMAND ${TYPEART_LCOV_EXEC} ${GCOV_TOOL} ${GCOV_WORKAROUND} ${TYPEART_LCOV_IGNORE_UNUSED} --no-external -c -d ${CMAKE_BINARY_DIR} -b ${LCOV_TARGET_SOURCE_DIR} -o counter-${target}.pro COMMAND ${TYPEART_LCOV_EXEC} --remove counter-${target}.pro '${CMAKE_BINARY_DIR}/*' diff --git a/lib/passes/Commandline.cpp b/lib/passes/Commandline.cpp index 8e4100ee..b4171625 100644 --- a/lib/passes/Commandline.cpp +++ b/lib/passes/Commandline.cpp @@ -70,14 +70,14 @@ static cl::opt cl_typeart_stats(CommandlineStdArgs: cl::cat(typeart_category)); static cl::opt cl_typeart_instrument_heap(CommandlineStdArgs::heap, - cl::desc(ConfigStdArgDescriptions::heap), - cl::init(ConfigStdArgValues::heap), - cl::cat(typeart_category)); + cl::desc(ConfigStdArgDescriptions::heap), + cl::init(ConfigStdArgValues::heap), + cl::cat(typeart_category)); static cl::opt cl_typeart_instrument_gpu(CommandlineStdArgs::gpu, - cl::desc(ConfigStdArgDescriptions::gpu), - cl::init(ConfigStdArgValues::gpu), - cl::cat(typeart_category)); + cl::desc(ConfigStdArgDescriptions::gpu), + cl::init(ConfigStdArgValues::gpu), + cl::cat(typeart_category)); static cl::opt cl_typeart_instrument_global(CommandlineStdArgs::global, cl::desc(ConfigStdArgDescriptions::global), diff --git a/lib/passes/analysis/MemOpVisitor.cpp b/lib/passes/analysis/MemOpVisitor.cpp index e77762cd..57a81d61 100644 --- a/lib/passes/analysis/MemOpVisitor.cpp +++ b/lib/passes/analysis/MemOpVisitor.cpp @@ -16,8 +16,8 @@ #include "compat/CallSite.h" #include "configuration/Configuration.h" #include "support/ConfigurationBase.h" -#include "support/GpuUtil.h" #include "support/Error.h" +#include "support/GpuUtil.h" #include "support/Logger.h" #include "support/TypeUtil.h" #include "support/Util.h" diff --git a/lib/passes/configuration/EnvironmentConfiguration.cpp b/lib/passes/configuration/EnvironmentConfiguration.cpp index 6eec7254..c36eb9c4 100644 --- a/lib/passes/configuration/EnvironmentConfiguration.cpp +++ b/lib/passes/configuration/EnvironmentConfiguration.cpp @@ -110,8 +110,7 @@ EnvironmentFlagsOptions::EnvironmentFlagsOptions() { EnvironmentStdArgsValues::stats), make_entry(ConfigStdArgs::heap, EnvironmentStdArgs::heap, EnvironmentStdArgsValues::heap), - make_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu, - EnvironmentStdArgsValues::gpu), + make_entry(ConfigStdArgs::gpu, EnvironmentStdArgs::gpu, EnvironmentStdArgsValues::gpu), make_entry(ConfigStdArgs::global, EnvironmentStdArgs::global, EnvironmentStdArgsValues::global), make_entry(ConfigStdArgs::stack, EnvironmentStdArgs::stack, diff --git a/lib/passes/instrumentation/TypeARTFunctions.cpp b/lib/passes/instrumentation/TypeARTFunctions.cpp index b2bc1cea..0828ca00 100644 --- a/lib/passes/instrumentation/TypeARTFunctions.cpp +++ b/lib/passes/instrumentation/TypeARTFunctions.cpp @@ -100,7 +100,7 @@ IFunc ifunc_for_function(IFunc general_type, llvm::Value* value) { } else if (llvm::isa(value)) { type = detail::ifunc_type_for(nullptr); } else if (auto callbase = llvm::dyn_cast(value)) { - type = detail::ifunc_type_for(callbase->getFunction()); + type = detail::ifunc_type_for(callbase->getFunction()); auto called_context = detail::ifunc_type_for(callbase->getCalledFunction()); if (called_context == detail::IFuncType::cuda || called_context == detail::IFuncType::hip) { type = called_context; diff --git a/lib/passes/support/CudaUtil.h b/lib/passes/support/CudaUtil.h index af4add08..bc1ae58b 100644 --- a/lib/passes/support/CudaUtil.h +++ b/lib/passes/support/CudaUtil.h @@ -50,7 +50,7 @@ inline std::optional bitcast_for(const llvm::CallBase& cuda_ } inline bool is_device_module(const llvm::Module& module) { -#if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 21 const auto triple = module.getTargetTriple().str(); #else const auto triple = module.getTargetTriple(); diff --git a/lib/passes/support/HipUtil.h b/lib/passes/support/HipUtil.h index 360ad074..e22d967d 100644 --- a/lib/passes/support/HipUtil.h +++ b/lib/passes/support/HipUtil.h @@ -50,7 +50,7 @@ inline std::optional bitcast_for(const llvm::CallBase& hip_c } inline bool is_device_module(const llvm::Module& module) { -#if LLVM_VERSION_MAJOR >= 20 +#if LLVM_VERSION_MAJOR >= 21 const auto triple = module.getTargetTriple().str(); #else const auto triple = module.getTargetTriple(); diff --git a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp index 87b4002a..5c2feb32 100644 --- a/lib/passes/typegen/dimeta/DimetaTypeGen.cpp +++ b/lib/passes/typegen/dimeta/DimetaTypeGen.cpp @@ -496,9 +496,11 @@ class DimetaTypeManager final : public TypeIDGenerator { const auto function_name = val->location.function; MemOps mem_operations; - auto kind = call->getCalledFunction() != nullptr ? mem_operations.kind(call->getCalledFunction()->getName()) : std::nullopt; + auto kind = call->getCalledFunction() != nullptr ? mem_operations.kind(call->getCalledFunction()->getName()) + : std::nullopt; - if (kind && is_kind(kind.value(), MemOpKind::GpuMallocLike) && gpu::is_templated_malloc_like(function_name, kind.value())) { + if (kind && is_kind(kind.value(), MemOpKind::GpuMallocLike) && + gpu::is_templated_malloc_like(function_name, kind.value())) { LOG_DEBUG("Workaround for pointer level of call base " << function_name) workaround::remove_pointer_level(call, val.value()); } From f285f0e8f79c5b3cbc630b0ec42ad171b740304c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Fri, 1 May 2026 13:37:10 +0200 Subject: [PATCH 18/24] Fix lcov issue and remove CUDA linking --- cmake/modules/coverage-lcov.cmake | 17 +++++++----- lib/runtime/CMakeLists.txt | 21 --------------- lib/runtime/CudaRuntimeInterface.h | 36 ------------------------- lib/runtime/CudaSupport.cpp | 43 ------------------------------ 4 files changed, 10 insertions(+), 107 deletions(-) delete mode 100644 lib/runtime/CudaRuntimeInterface.h delete mode 100644 lib/runtime/CudaSupport.cpp diff --git a/cmake/modules/coverage-lcov.cmake b/cmake/modules/coverage-lcov.cmake index 0b836576..61893485 100644 --- a/cmake/modules/coverage-lcov.cmake +++ b/cmake/modules/coverage-lcov.cmake @@ -5,17 +5,20 @@ if(TYPEART_LCOV_EXEC-NOTFOUND OR TYPEART_GENHTML_EXEC-NOTFOUND) message(WARNING "lcov and genhtml command needed for coverage.") endif() -# Detect whether lcov supports: --ignore-errors unused +# Detect whether lcov supports: --ignore-errors unused: +# - avoids CUDA error "geninfo: ERROR: 'exclude' pattern '*/Version.cpp' is unused" set(TYPEART_LCOV_IGNORE_UNUSED) if(TYPEART_LCOV_EXEC) execute_process( - COMMAND ${TYPEART_LCOV_EXEC} --ignore-errors unused --version - RESULT_VARIABLE TYPEART_LCOV_IGNORE_UNUSED_RESULT - OUTPUT_QUIET - ERROR_QUIET + COMMAND ${TYPEART_LCOV_EXEC} --version + OUTPUT_VARIABLE TYPEART_LCOV_VERSION_STRING + OUTPUT_STRIP_TRAILING_WHITESPACE ) - if(TYPEART_LCOV_IGNORE_UNUSED_RESULT EQUAL 0) - set(TYPEART_LCOV_IGNORE_UNUSED --ignore-errors unused) + if(TYPEART_LCOV_VERSION_STRING MATCHES "LCOV version ([0-9]+)\\.([0-9]+)") + set(TYPEART_LCOV_VERSION_MAJOR ${CMAKE_MATCH_1}) + if(TYPEART_LCOV_VERSION_MAJOR GREATER_EQUAL 2) + set(TYPEART_LCOV_IGNORE_UNUSED --ignore-errors unused) + endif() endif() endif() diff --git a/lib/runtime/CMakeLists.txt b/lib/runtime/CMakeLists.txt index 9e2c0e46..c889e7d7 100644 --- a/lib/runtime/CMakeLists.txt +++ b/lib/runtime/CMakeLists.txt @@ -18,7 +18,6 @@ add_custom_command( set(RUNTIME_LIB_SOURCES AccessCounter.h CallbackInterface.h - CudaRuntimeInterface.h RuntimeData.h RuntimeInterface.h TypeResolution.cpp @@ -100,10 +99,6 @@ target_include_directories( target_include_directories(${TYPEART_PREFIX}_Runtime SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) -if(CUDAToolkit_FOUND) - target_include_directories(${TYPEART_PREFIX}_Runtime SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) -endif() - target_compile_definitions( ${TYPEART_PREFIX}_Runtime PRIVATE TYPEART_LOG_LEVEL=${TYPEART_LOG_LEVEL_RT} @@ -113,18 +108,8 @@ target_compile_definitions( $<$:TYPEART_ABSEIL> $<$:USE_SAFEPTR> $<$:TYPEART_DISABLE_THREAD_SAFETY> - $<$:TYPEART_HAS_CUDA=1> ) -if(CUDAToolkit_FOUND) - if(TARGET CUDA::cudart) - target_link_libraries(${TYPEART_PREFIX}_Runtime PRIVATE CUDA::cudart) - endif() - if(TARGET CUDA::cuda_driver) - target_link_libraries(${TYPEART_PREFIX}_Runtime PRIVATE CUDA::cuda_driver) - endif() -endif() - typeart_target_compile_options(${TYPEART_PREFIX}_Runtime) typeart_target_define_file_basename(${TYPEART_PREFIX}_Runtime) typeart_target_coverage_options(${TYPEART_PREFIX}_Runtime) @@ -152,12 +137,6 @@ install(FILES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} ) -if(CUDAToolkit_FOUND) - install(FILES CudaRuntimeInterface.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} - ) -endif() - install( TARGETS ${TYPEART_PREFIX}_Runtime EXPORT ${TARGETS_EXPORT_NAME} diff --git a/lib/runtime/CudaRuntimeInterface.h b/lib/runtime/CudaRuntimeInterface.h deleted file mode 100644 index 2b42ee96..00000000 --- a/lib/runtime/CudaRuntimeInterface.h +++ /dev/null @@ -1,36 +0,0 @@ -// TypeART library -// -// Copyright (c) 2017-2026 TypeART Authors -// Distributed under the BSD 3-Clause license. -// (See accompanying file LICENSE.txt or copy at -// https://opensource.org/licenses/BSD-3-Clause) -// -// Project home: https://github.com/tudasc/TypeART -// -// SPDX-License-Identifier: BSD-3-Clause -// - -#ifndef TYPEART_CUDARUNTIMEINTERFACE_H -#define TYPEART_CUDARUNTIMEINTERFACE_H - -#include "RuntimeExport.h" -#include "RuntimeInterface.h" - -#ifdef __cplusplus -#include -#else -#include -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -TYPEART_EXPORT typeart_status typeart_cuda_is_device_ptr(const void* addr, bool* is_device_ptr_flag); - -#ifdef __cplusplus -} -#endif - -#endif // TYPEART_CUDARUNTIMEINTERFACE_H diff --git a/lib/runtime/CudaSupport.cpp b/lib/runtime/CudaSupport.cpp deleted file mode 100644 index 3f358bc5..00000000 --- a/lib/runtime/CudaSupport.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// TypeART library -// -// Copyright (c) 2017-2026 TypeART Authors -// Distributed under the BSD 3-Clause license. -// (See accompanying file LICENSE.txt or copy at -// https://opensource.org/licenses/BSD-3-Clause) -// -// Project home: https://github.com/tudasc/TypeART -// -// SPDX-License-Identifier: BSD-3-Clause -// - -#include "CudaRuntimeInterface.h" - -#ifdef TYPEART_HAS_CUDA -#include -#endif - -typeart_status typeart_cuda_is_device_ptr(const void* addr, bool* is_device_ptr_flag) { - if (is_device_ptr_flag == nullptr) { - return TYPEART_ERROR; - } - -#ifdef TYPEART_HAS_CUDA - CUmemorytype mem_type; - CUresult status = - cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, reinterpret_cast(addr)); - if (status != CUDA_SUCCESS) { - *is_device_ptr_flag = false; - if (status == CUDA_ERROR_INVALID_VALUE) { - return TYPEART_OK; - } - return TYPEART_ERROR; - } - - *is_device_ptr_flag = (mem_type == CU_MEMORYTYPE_DEVICE); - return TYPEART_OK; -#else - (void)addr; - *is_device_ptr_flag = false; - return TYPEART_OK; -#endif -} From f3181ef51396506688dd25ccd8240785ea079584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Fri, 1 May 2026 14:52:40 +0200 Subject: [PATCH 19/24] HIP testing --- .github/workflows/basic-ci.yml | 70 +++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index 77dd7684..2cff501c 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -150,7 +150,7 @@ jobs: typeart-typegen-legacy: 0 install-lcov: 'true' install-omp: 'true' - setup-mold: 'false' + setup-mold: 'true' - name: Configure TypeART run: cmake -B build --preset ci-cov-thread-safe -DLLVM_DIR=${LLVM_CMAKE_DIR} -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} @@ -175,9 +175,75 @@ jobs: flag-name: cuda-suite-${{ matrix.llvm-version }}-${{ matrix.cuda }} parallel: true + hip-suite: + runs-on: ubuntu-24.04 + + env: + ROCM_PATH: /opt/rocm + ROCM_VERSION: 6.4.4 + + steps: + - uses: actions/checkout@v6 + + - name: Setup ROCm Toolchain + run: | + sudo mkdir --parents --mode=0755 /etc/apt/keyrings + wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/rocm.gpg + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" | sudo tee /etc/apt/sources.list.d/rocm.list + echo -e "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 + + - name: Install ROCm ${{ env.ROCM_VERSION }} + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + rocm-llvm \ + rocm-llvm-dev \ + rocm-dev + + - name: Setup ROCm Environment + run: | + sudo ln -sfn /opt/rocm-${ROCM_VERSION} /opt/rocm + echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH + echo "/opt/rocm/bin" >> $GITHUB_PATH + + - name: Setup TypeART Environment + uses: ./.github/actions/setup-typeart + with: + llvm-version: 18 + typeart-typegen-legacy: 0 + install-lcov: 'true' + install-omp: 'true' + setup-mold: 'true' + + - name: Configure TypeART + run: | + cmake -B build --preset ci-cov-thread-safe \ + -DCMAKE_C_COMPILER=amdclang -DCMAKE_CXX_COMPILER=amdclang++ \ + -DLLVM_DIR=${ROCM_PATH}/llvm/lib/cmake/llvm -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} + + - name: Build TypeART + run: cmake --build build --parallel 2 + + - name: Prepare TypeART coverage + run: cmake --build build --target typeart-lcov-clean + + - name: Test TypeART hip-suite + run: cmake --build build --target check-typeart + + - name: Build coverage report + run: cmake --build build --target typeart-lcov-html + + - name: Coveralls (parallel) + uses: coverallsapp/github-action@v2.3.6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: build/typeart.coverage + flag-name: hip-suite-rocm-${{ env.ROCM_VERSION }} + parallel: true + finish-coverage: if: ${{ always() }} - needs: [lit-suite, cuda-suite] + needs: [lit-suite, cuda-suite, hip-suite] runs-on: ubuntu-24.04 steps: - name: Coveralls Finished From 673e5f9e554efe346bf1c89a73070a4510ba51f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Fri, 1 May 2026 15:16:44 +0200 Subject: [PATCH 20/24] Try fix HIP omp issue --- .github/workflows/basic-ci.yml | 1 + test/runtime/21_runtime_manual.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index 2cff501c..b2e96cc0 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -205,6 +205,7 @@ jobs: sudo ln -sfn /opt/rocm-${ROCM_VERSION} /opt/rocm echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH echo "/opt/rocm/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=/opt/rocm/llvm/lib:/opt/rocm/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - name: Setup TypeART Environment uses: ./.github/actions/setup-typeart diff --git a/test/runtime/21_runtime_manual.cpp b/test/runtime/21_runtime_manual.cpp index 5b81bb9a..784f1e55 100644 --- a/test/runtime/21_runtime_manual.cpp +++ b/test/runtime/21_runtime_manual.cpp @@ -1,5 +1,5 @@ // clang-format off -// RUN: clang++ -std=c++17 -I%S/../../ -I%S/../../lib/typelib -I%S/../../lib %s -o %s.exe +// RUN: %clang-cpp -std=c++17 -I%S/../../ -I%S/../../lib/typelib -I%S/../../lib %s -o %s.exe // RUN: %s.exe 2>&1 | %filecheck %s // clang-format on // FIXME this test doesn't add to the coverage data. From c465c8fb5947aead5b441a515df0a4f8c40f86b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Fri, 1 May 2026 16:00:30 +0200 Subject: [PATCH 21/24] Set vars explicitly --- .github/workflows/basic-ci.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index b2e96cc0..822146f1 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -213,14 +213,20 @@ jobs: llvm-version: 18 typeart-typegen-legacy: 0 install-lcov: 'true' - install-omp: 'true' + install-omp: 'false' setup-mold: 'true' - name: Configure TypeART run: | cmake -B build --preset ci-cov-thread-safe \ - -DCMAKE_C_COMPILER=amdclang -DCMAKE_CXX_COMPILER=amdclang++ \ - -DLLVM_DIR=${ROCM_PATH}/llvm/lib/cmake/llvm -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} + -DCMAKE_C_COMPILER=/opt/rocm/bin/amdclang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/bin/amdclang++ \ + -DLLVM_DIR=${ROCM_PATH}/llvm/lib/cmake/llvm -DLLVM_EXTERNAL_LIT=${EXTERNAL_LIT} \ + -DTYPEART_CLANG_EXEC=/opt/rocm/bin/amdclang \ + -DTYPEART_CLANGCXX_EXEC=/opt/rocm/bin/amdclang++ \ + -DTYPEART_OPT_EXEC=/opt/rocm/llvm/bin/opt \ + -DTYPEART_LLC_EXEC=/opt/rocm/llvm/bin/llc \ + -DTYPEART_LLVMCONFIG_COMMAND=/opt/rocm/llvm/bin/llvm-config - name: Build TypeART run: cmake --build build --parallel 2 From 54cee42456e417584969764479de55973c1b12d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Fri, 1 May 2026 16:13:36 +0200 Subject: [PATCH 22/24] Disable OpenMP in HIP tests --- .github/workflows/basic-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/basic-ci.yml b/.github/workflows/basic-ci.yml index 822146f1..5f511500 100644 --- a/.github/workflows/basic-ci.yml +++ b/.github/workflows/basic-ci.yml @@ -226,7 +226,8 @@ jobs: -DTYPEART_CLANGCXX_EXEC=/opt/rocm/bin/amdclang++ \ -DTYPEART_OPT_EXEC=/opt/rocm/llvm/bin/opt \ -DTYPEART_LLC_EXEC=/opt/rocm/llvm/bin/llc \ - -DTYPEART_LLVMCONFIG_COMMAND=/opt/rocm/llvm/bin/llvm-config + -DTYPEART_LLVMCONFIG_COMMAND=/opt/rocm/llvm/bin/llvm-config \ + -DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=ON - name: Build TypeART run: cmake --build build --parallel 2 From 9ca0e8ae2f6808af8db98a7968aec6739694f8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20H=C3=BCck?= Date: Tue, 5 May 2026 15:17:12 +0200 Subject: [PATCH 23/24] Wrapper parses typeart-related flags --- scripts/typeart-wrapperv2.in | 74 ++++++++++++++++--- .../cuda/pass/11_wrapper_gpu_wrapper_option.c | 32 ++++++++ test/hip/pass/11_wrapper_gpu_wrapper_option.c | 34 +++++++++ test/script/16_wrapper_parse_options.c | 21 ++++++ 4 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 test/cuda/pass/11_wrapper_gpu_wrapper_option.c create mode 100644 test/hip/pass/11_wrapper_gpu_wrapper_option.c create mode 100644 test/script/16_wrapper_parse_options.c diff --git a/scripts/typeart-wrapperv2.in b/scripts/typeart-wrapperv2.in index 494cb4b9..dfea2317 100644 --- a/scripts/typeart-wrapperv2.in +++ b/scripts/typeart-wrapperv2.in @@ -69,15 +69,64 @@ function typeart_is_linking_fn() { return 1 } +function typeart_parse_typeart_option_fn() { + local typeart_option="$1" + local option_payload="${typeart_option#--typeart-}" + local option_name="${option_payload%%=*}" + local option_value="${option_payload#*=}" + + if [ -z "$option_name" ] || [ "$option_payload" = "$option_value" ]; then + echo "TypeART wrapper option requires --typeart-