diff --git a/buildbot/configure.py b/buildbot/configure.py index 4874c0a44557a..ed0e9c946a1c3 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -258,6 +258,9 @@ def do_configure(args, passthrough_args): if args.add_security_flags: cmake_cmd.extend(["-DEXTRA_SECURITY_FLAGS={}".format(args.add_security_flags)]) + if args.static_opencl_adapter: + cmake_cmd.extend(["-DUR_STATIC_ADAPTER_OPENCL=ON"]) + # Add path to root CMakeLists.txt cmake_cmd.append(llvm_dir) @@ -439,6 +442,11 @@ def main(): parser.add_argument( "--use-zstd", action="store_true", help="Force zstd linkage while building." ) + parser.add_argument( + "--static-opencl-adapter", + action="store_true", + help="Build OpenCL adapter as static library with dynamic OpenCL loading (no OpenCL dependency at link time).", + ) parser.add_argument( "--print-cmake-flags", action="store_true", diff --git a/unified-runtime/CMakeLists.txt b/unified-runtime/CMakeLists.txt index 00e73e9d26ed2..3f1fdc0126bee 100644 --- a/unified-runtime/CMakeLists.txt +++ b/unified-runtime/CMakeLists.txt @@ -48,6 +48,7 @@ option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF) option(UR_BUILD_ADAPTER_OFFLOAD "Build the experimental Offload adapter" OFF) option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF) +option(UR_STATIC_ADAPTER_OPENCL "Build the OpenCL adapter as static library with dynamic OpenCL loading" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) diff --git a/unified-runtime/source/adapters/opencl/CMakeLists.txt b/unified-runtime/source/adapters/opencl/CMakeLists.txt index fec5b83709566..470bbcd85b1db 100644 --- a/unified-runtime/source/adapters/opencl/CMakeLists.txt +++ b/unified-runtime/source/adapters/opencl/CMakeLists.txt @@ -10,7 +10,13 @@ find_package(Threads REQUIRED) set(TARGET_NAME ur_adapter_opencl) -add_ur_adapter(${TARGET_NAME} SHARED +# Determine adapter library type (SHARED or STATIC) +set(ADAPTER_LIB_TYPE SHARED) +if(UR_STATIC_ADAPTER_OPENCL) + set(ADAPTER_LIB_TYPE STATIC) +endif() + +add_ur_adapter(${TARGET_NAME} ${ADAPTER_LIB_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp @@ -61,11 +67,32 @@ target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../" ) -target_link_libraries(${TARGET_NAME} PRIVATE - ${PROJECT_NAME}::headers - ${PROJECT_NAME}::common - ${PROJECT_NAME}::umf - Threads::Threads - OpenCL-Headers - ${OpenCL_LIBRARY} -) +if(UR_STATIC_ADAPTER_OPENCL) + # When building as static library, add dynamic loading source files + # and don't link against OpenCL library + target_sources(${TARGET_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/ocl_dynamic_lib.cpp + ) + target_compile_definitions(${TARGET_NAME} PRIVATE UR_STATIC_ADAPTER_OPENCL) + target_link_libraries(${TARGET_NAME} PRIVATE + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::common + ${PROJECT_NAME}::umf + Threads::Threads + OpenCL-Headers + ) + # Add dl library for dynamic loading on Unix systems + if(UNIX) + target_link_libraries(${TARGET_NAME} PRIVATE ${CMAKE_DL_LIBS}) + endif() +else() + # Standard shared library build - link against OpenCL library + target_link_libraries(${TARGET_NAME} PRIVATE + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::common + ${PROJECT_NAME}::umf + Threads::Threads + OpenCL-Headers + ${OpenCL_LIBRARY} + ) +endif() diff --git a/unified-runtime/source/adapters/opencl/adapter.cpp b/unified-runtime/source/adapters/opencl/adapter.cpp index 031829d7fb6cd..ca77bf3fe2dd5 100644 --- a/unified-runtime/source/adapters/opencl/adapter.cpp +++ b/unified-runtime/source/adapters/opencl/adapter.cpp @@ -11,11 +11,15 @@ #include "common.hpp" #include "ur/ur.hpp" +#ifdef UR_STATIC_ADAPTER_OPENCL +#include "ocl_dynamic_lib.hpp" +#else #ifdef _MSC_VER #include #else #include #endif +#endif // There can only be one OpenCL adapter alive at a time. // If it is alive (more get/retains than releases called), this is a pointer to @@ -23,6 +27,16 @@ static ur_adapter_handle_t liveAdapter = nullptr; ur_adapter_handle_t_::ur_adapter_handle_t_() : handle_base() { +#ifdef UR_STATIC_ADAPTER_OPENCL + if (!ocl::loadOCLLibrary()) { + return; + } + // Mirror the symbols already resolved by ocl_dynamic_lib into the per-adapter + // struct fields, so call sites like `getAdapter()->FUNC(...)` work. +#define CL_CORE_FUNCTION(FUNC) FUNC = ocl::FUNC##_ptr; +#include "core_functions.def" +#undef CL_CORE_FUNCTION +#else #ifdef _MSC_VER // Retrieving handle of an already linked OpenCL.dll library doesn't increase @@ -44,13 +58,23 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() : handle_base() { #undef CL_CORE_FUNCTION #endif // _MSC_VER +#endif // UR_STATIC_ADAPTER_OPENCL assert(!liveAdapter); liveAdapter = this; } ur_adapter_handle_t_::~ur_adapter_handle_t_() { +#ifdef UR_STATIC_ADAPTER_OPENCL + // Constructor may have returned early (load failure) without setting + // liveAdapter, so only clean up if this adapter was fully initialized. + if (liveAdapter == this) { + liveAdapter = nullptr; + ocl::unloadOCLLibrary(); + } +#else assert(liveAdapter == this); liveAdapter = nullptr; +#endif } ur_adapter_handle_t ur::cl::getAdapter() { @@ -69,17 +93,28 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, std::lock_guard Lock{AdapterConstructionMutex}; if (!liveAdapter) { - *phAdapters = new ur_adapter_handle_t_(); - } else { - *phAdapters = liveAdapter; + ur_adapter_handle_t_ *newAdapter = new ur_adapter_handle_t_(); + if (!liveAdapter) { + delete newAdapter; + if (pNumAdapters) { + *pNumAdapters = 0; + } + return UR_RESULT_ERROR_UNINITIALIZED; + } } - auto &adapter = *phAdapters; - adapter->RefCount.retain(); + *phAdapters = liveAdapter; + liveAdapter->RefCount.retain(); } if (pNumAdapters) { +#ifdef UR_STATIC_ADAPTER_OPENCL + // Probe libOpenCL for the count-only query pattern (NumEntries == 0); + // loadOCLLibrary() is idempotent. + *pNumAdapters = (liveAdapter || ocl::loadOCLLibrary()) ? 1 : 0; +#else *pNumAdapters = 1; +#endif } return UR_RESULT_SUCCESS; diff --git a/unified-runtime/source/adapters/opencl/common.hpp b/unified-runtime/source/adapters/opencl/common.hpp index aefea1f6c55a0..0ea435d804201 100644 --- a/unified-runtime/source/adapters/opencl/common.hpp +++ b/unified-runtime/source/adapters/opencl/common.hpp @@ -14,6 +14,13 @@ #include #include + +#ifdef UR_STATIC_ADAPTER_OPENCL +// Include dynamic loading header which will redirect all OpenCL function calls +// to our dynamically loaded function pointers +#include "ocl_dynamic_lib.hpp" +#endif + #include #include #include diff --git a/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.cpp b/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.cpp new file mode 100644 index 0000000000000..14babcc45416e --- /dev/null +++ b/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.cpp @@ -0,0 +1,143 @@ +//===------- ocl_dynamic_lib.cpp - OpenCL Dynamic Loading -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#ifdef UR_STATIC_ADAPTER_OPENCL + +// Define this before including the header to prevent macro redefinitions +#define OCL_DYNAMIC_LIB_IMPL +#include "ocl_dynamic_lib.hpp" + +#include "logger/ur_logger.hpp" + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include + +namespace ocl { + +// Define storage for all function pointers using X-macros +#define OCL_FUNC(name) decltype(::name) *name##_ptr = nullptr; +#include "ocl_functions.def" +#undef OCL_FUNC + +static void *OCLLibHandle = nullptr; +static std::mutex OCLLoadMutex; +static bool OCLLoaded = false; +static bool OCLLoadSuccess = false; + +template +static bool getSymbolAddr(void *handle, const char *name, T *funcPtr) { +#ifdef _WIN32 + *funcPtr = reinterpret_cast(GetProcAddress((HMODULE)handle, name)); +#else + *funcPtr = reinterpret_cast(dlsym(handle, name)); +#endif + return *funcPtr != nullptr; +} + +static void loadOCLLibraryImpl() { +#ifdef _WIN32 + OCLLibHandle = + LoadLibraryExA("OpenCL.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + if (!OCLLibHandle) { + DWORD error = GetLastError(); + UR_LOG(ERR, + "Failed to load OpenCL.dll from system directory (error code: {})", + error); + return; + } + UR_LOG(DEBUG, "Successfully loaded OpenCL.dll"); +#else + OCLLibHandle = dlopen("libOpenCL.so.1", RTLD_NOW | RTLD_LOCAL); + if (!OCLLibHandle) { + const char *error1 = dlerror(); + UR_LOG(DEBUG, "Failed to load libOpenCL.so.1: {}", + error1 ? error1 : "unknown error"); + + OCLLibHandle = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL); + if (!OCLLibHandle) { + const char *error2 = dlerror(); + UR_LOG(ERR, + "Failed to load OpenCL library. Tried libOpenCL.so.1 and " + "libOpenCL.so: {}", + error2 ? error2 : "unknown error"); + return; + } + UR_LOG(DEBUG, "Successfully loaded libOpenCL.so"); + } else { + UR_LOG(DEBUG, "Successfully loaded libOpenCL.so.1"); + } +#endif + + bool success = true; + int missing = 0; + +#define OCL_FUNC(name) \ + do { \ + if (!getSymbolAddr(OCLLibHandle, #name, &name##_ptr)) { \ + UR_LOG(ERR, "Required OpenCL function not found: {}", #name); \ + missing++; \ + success = false; \ + } \ + } while (0); + +#include "ocl_functions.def" +#undef OCL_FUNC + + if (!success) { + UR_LOG(ERR, "Failed to load {} required OpenCL function(s)", missing); + // Required symbols missing — close the handle we opened to avoid a leak. +#ifdef _WIN32 + FreeLibrary((HMODULE)OCLLibHandle); +#else + dlclose(OCLLibHandle); +#endif + OCLLibHandle = nullptr; +#define OCL_FUNC(name) name##_ptr = nullptr; +#include "ocl_functions.def" +#undef OCL_FUNC + return; + } + + OCLLoadSuccess = true; +} + +bool loadOCLLibrary() { + std::lock_guard Lock{OCLLoadMutex}; + if (!OCLLoaded) { + loadOCLLibraryImpl(); + OCLLoaded = true; + } + return OCLLoadSuccess; +} + +void unloadOCLLibrary() { + std::lock_guard Lock{OCLLoadMutex}; + if (OCLLibHandle) { +#ifdef _WIN32 + FreeLibrary((HMODULE)OCLLibHandle); +#else + dlclose(OCLLibHandle); +#endif + OCLLibHandle = nullptr; + +#define OCL_FUNC(name) name##_ptr = nullptr; +#include "ocl_functions.def" +#undef OCL_FUNC + } + OCLLoaded = false; + OCLLoadSuccess = false; +} + +} // namespace ocl + +#endif // UR_STATIC_ADAPTER_OPENCL diff --git a/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.hpp b/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.hpp new file mode 100644 index 0000000000000..b2ae46f5de064 --- /dev/null +++ b/unified-runtime/source/adapters/opencl/ocl_dynamic_lib.hpp @@ -0,0 +1,108 @@ +//===------- ocl_dynamic_lib.hpp - OpenCL Dynamic Loading ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#ifdef UR_STATIC_ADAPTER_OPENCL + +// Include OpenCL headers BEFORE any redirection +#include +#include + +namespace ocl { + +// Declare function pointers for all OpenCL functions using X-macros +#define OCL_FUNC(name) extern decltype(::name) *name##_ptr; +#include "ocl_functions.def" +#undef OCL_FUNC + +bool loadOCLLibrary(); +void unloadOCLLibrary(); + +} // namespace ocl + +// Only define the redirection macros if we're NOT in the implementation file +// The implementation file needs the original function names for decltype +#ifndef OCL_DYNAMIC_LIB_IMPL + +// Redirect all OpenCL function calls to our dynamically loaded pointers +#define clBuildProgram ocl::clBuildProgram_ptr +#define clCompileProgram ocl::clCompileProgram_ptr +#define clCreateBuffer ocl::clCreateBuffer_ptr +#define clCreateCommandQueue ocl::clCreateCommandQueue_ptr +#define clCreateCommandQueueWithProperties \ + ocl::clCreateCommandQueueWithProperties_ptr +#define clCreateContext ocl::clCreateContext_ptr +#define clCreateImage ocl::clCreateImage_ptr +#define clCreateKernel ocl::clCreateKernel_ptr +#define clCreateProgramWithBinary ocl::clCreateProgramWithBinary_ptr +#define clCreateProgramWithIL ocl::clCreateProgramWithIL_ptr +#define clCreateSampler ocl::clCreateSampler_ptr +#define clCreateSubBuffer ocl::clCreateSubBuffer_ptr +#define clCreateSubDevices ocl::clCreateSubDevices_ptr +#define clEnqueueBarrierWithWaitList ocl::clEnqueueBarrierWithWaitList_ptr +#define clEnqueueCopyBuffer ocl::clEnqueueCopyBuffer_ptr +#define clEnqueueCopyBufferRect ocl::clEnqueueCopyBufferRect_ptr +#define clEnqueueCopyImage ocl::clEnqueueCopyImage_ptr +#define clEnqueueFillBuffer ocl::clEnqueueFillBuffer_ptr +#define clEnqueueMapBuffer ocl::clEnqueueMapBuffer_ptr +#define clEnqueueMarkerWithWaitList ocl::clEnqueueMarkerWithWaitList_ptr +#define clEnqueueNDRangeKernel ocl::clEnqueueNDRangeKernel_ptr +#define clEnqueueReadBuffer ocl::clEnqueueReadBuffer_ptr +#define clEnqueueReadBufferRect ocl::clEnqueueReadBufferRect_ptr +#define clEnqueueReadImage ocl::clEnqueueReadImage_ptr +#define clEnqueueUnmapMemObject ocl::clEnqueueUnmapMemObject_ptr +#define clEnqueueWriteBuffer ocl::clEnqueueWriteBuffer_ptr +#define clEnqueueWriteBufferRect ocl::clEnqueueWriteBufferRect_ptr +#define clEnqueueWriteImage ocl::clEnqueueWriteImage_ptr +#define clFinish ocl::clFinish_ptr +#define clGetCommandQueueInfo ocl::clGetCommandQueueInfo_ptr +#define clGetContextInfo ocl::clGetContextInfo_ptr +#define clGetDeviceAndHostTimer ocl::clGetDeviceAndHostTimer_ptr +#define clGetDeviceIDs ocl::clGetDeviceIDs_ptr +#define clGetDeviceInfo ocl::clGetDeviceInfo_ptr +#define clGetEventInfo ocl::clGetEventInfo_ptr +#define clGetEventProfilingInfo ocl::clGetEventProfilingInfo_ptr +#define clGetExtensionFunctionAddressForPlatform \ + ocl::clGetExtensionFunctionAddressForPlatform_ptr +#define clGetHostTimer ocl::clGetHostTimer_ptr +#define clGetImageInfo ocl::clGetImageInfo_ptr +#define clGetKernelInfo ocl::clGetKernelInfo_ptr +#define clGetKernelSubGroupInfo ocl::clGetKernelSubGroupInfo_ptr +#define clGetKernelWorkGroupInfo ocl::clGetKernelWorkGroupInfo_ptr +#define clGetMemObjectInfo ocl::clGetMemObjectInfo_ptr +#define clGetPlatformIDs ocl::clGetPlatformIDs_ptr +#define clGetPlatformInfo ocl::clGetPlatformInfo_ptr +#define clGetProgramBuildInfo ocl::clGetProgramBuildInfo_ptr +#define clGetProgramInfo ocl::clGetProgramInfo_ptr +#define clGetSamplerInfo ocl::clGetSamplerInfo_ptr +#define clLinkProgram ocl::clLinkProgram_ptr +#define clReleaseCommandQueue ocl::clReleaseCommandQueue_ptr +#define clReleaseContext ocl::clReleaseContext_ptr +#define clReleaseDevice ocl::clReleaseDevice_ptr +#define clReleaseEvent ocl::clReleaseEvent_ptr +#define clReleaseKernel ocl::clReleaseKernel_ptr +#define clReleaseMemObject ocl::clReleaseMemObject_ptr +#define clReleaseProgram ocl::clReleaseProgram_ptr +#define clReleaseSampler ocl::clReleaseSampler_ptr +#define clRetainContext ocl::clRetainContext_ptr +#define clRetainDevice ocl::clRetainDevice_ptr +#define clRetainEvent ocl::clRetainEvent_ptr +#define clSetEventCallback ocl::clSetEventCallback_ptr +#define clSetKernelArg ocl::clSetKernelArg_ptr +#define clSetKernelExecInfo ocl::clSetKernelExecInfo_ptr +#define clWaitForEvents ocl::clWaitForEvents_ptr +// Intentionally NOT redirected: clSetProgramSpecializationConstant, +// clSetContextDestructorCallback. These names are also struct fields of +// ur_adapter_handle_t_ (see core_functions.def), and the redirect macro would +// rewrite member-access expressions like `adapter->clSetProgram...` into +// invalid syntax. In static mode those fields are populated from +// ocl::*_ptr by the adapter constructor instead. + +#endif // OCL_DYNAMIC_LIB_IMPL + +#endif // UR_STATIC_ADAPTER_OPENCL diff --git a/unified-runtime/source/adapters/opencl/ocl_functions.def b/unified-runtime/source/adapters/opencl/ocl_functions.def new file mode 100644 index 0000000000000..c43d3a4590a37 --- /dev/null +++ b/unified-runtime/source/adapters/opencl/ocl_functions.def @@ -0,0 +1,72 @@ +// OpenCL function definitions for dynamic loading. +// Format: OCL_FUNC(name) +// Every symbol the static OpenCL adapter references at runtime must appear +// here, and only those symbols. A missing symbol at load time aborts adapter +// initialization. + +OCL_FUNC(clBuildProgram) +OCL_FUNC(clCompileProgram) +OCL_FUNC(clCreateBuffer) +OCL_FUNC(clCreateCommandQueue) +OCL_FUNC(clCreateCommandQueueWithProperties) +OCL_FUNC(clCreateContext) +OCL_FUNC(clCreateImage) +OCL_FUNC(clCreateKernel) +OCL_FUNC(clCreateProgramWithBinary) +OCL_FUNC(clCreateProgramWithIL) +OCL_FUNC(clCreateSampler) +OCL_FUNC(clCreateSubBuffer) +OCL_FUNC(clCreateSubDevices) +OCL_FUNC(clEnqueueBarrierWithWaitList) +OCL_FUNC(clEnqueueCopyBuffer) +OCL_FUNC(clEnqueueCopyBufferRect) +OCL_FUNC(clEnqueueCopyImage) +OCL_FUNC(clEnqueueFillBuffer) +OCL_FUNC(clEnqueueMapBuffer) +OCL_FUNC(clEnqueueMarkerWithWaitList) +OCL_FUNC(clEnqueueNDRangeKernel) +OCL_FUNC(clEnqueueReadBuffer) +OCL_FUNC(clEnqueueReadBufferRect) +OCL_FUNC(clEnqueueReadImage) +OCL_FUNC(clEnqueueUnmapMemObject) +OCL_FUNC(clEnqueueWriteBuffer) +OCL_FUNC(clEnqueueWriteBufferRect) +OCL_FUNC(clEnqueueWriteImage) +OCL_FUNC(clFinish) +OCL_FUNC(clGetCommandQueueInfo) +OCL_FUNC(clGetContextInfo) +OCL_FUNC(clGetDeviceAndHostTimer) +OCL_FUNC(clGetDeviceIDs) +OCL_FUNC(clGetDeviceInfo) +OCL_FUNC(clGetEventInfo) +OCL_FUNC(clGetEventProfilingInfo) +OCL_FUNC(clGetExtensionFunctionAddressForPlatform) +OCL_FUNC(clGetHostTimer) +OCL_FUNC(clGetImageInfo) +OCL_FUNC(clGetKernelInfo) +OCL_FUNC(clGetKernelSubGroupInfo) +OCL_FUNC(clGetKernelWorkGroupInfo) +OCL_FUNC(clGetMemObjectInfo) +OCL_FUNC(clGetPlatformIDs) +OCL_FUNC(clGetPlatformInfo) +OCL_FUNC(clGetProgramBuildInfo) +OCL_FUNC(clGetProgramInfo) +OCL_FUNC(clGetSamplerInfo) +OCL_FUNC(clLinkProgram) +OCL_FUNC(clReleaseCommandQueue) +OCL_FUNC(clReleaseContext) +OCL_FUNC(clReleaseDevice) +OCL_FUNC(clReleaseEvent) +OCL_FUNC(clReleaseKernel) +OCL_FUNC(clReleaseMemObject) +OCL_FUNC(clReleaseProgram) +OCL_FUNC(clReleaseSampler) +OCL_FUNC(clRetainContext) +OCL_FUNC(clRetainDevice) +OCL_FUNC(clRetainEvent) +OCL_FUNC(clSetContextDestructorCallback) +OCL_FUNC(clSetEventCallback) +OCL_FUNC(clSetKernelArg) +OCL_FUNC(clSetKernelExecInfo) +OCL_FUNC(clSetProgramSpecializationConstant) +OCL_FUNC(clWaitForEvents) diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp index 8e1c9ee94b8c3..bcaa886d33e48 100644 --- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp @@ -28,7 +28,11 @@ ur_result_t validateProcInputs(ur_api_version_t Version, void *pDdiTable) { } } // namespace +#ifdef UR_STATIC_ADAPTER_OPENCL +namespace ur::opencl { +#else extern "C" { +#endif UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( ur_api_version_t version, ur_adapter_dditable_t *pDdiTable) { @@ -509,6 +513,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetGraphExpProcAddrTable( return UR_RESULT_SUCCESS; } +#ifndef UR_STATIC_ADAPTER_OPENCL UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, ur_dditable_t *pDdiTable) { urGetAdapterProcAddrTable(version, &pDdiTable->Adapter); @@ -539,14 +544,147 @@ UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, return UR_RESULT_SUCCESS; } +#endif // UR_STATIC_ADAPTER_OPENCL +#ifdef UR_STATIC_ADAPTER_OPENCL +} // namespace ur::opencl +#else } // extern "C" +#endif -const ur_dditable_t *ur::opencl::ddi_getter::value() { +namespace { +ur_result_t populateDdiTable(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + +#ifdef UR_STATIC_ADAPTER_OPENCL +#define ADAPTER_CALL ::ur::opencl +#else +#define ADAPTER_CALL +#endif + + result = ADAPTER_CALL::urGetAdapterProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Adapter); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetBindlessImagesExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetCommandBufferExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Event); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetGraphExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->GraphExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetIPCExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->IPCExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ADAPTER_CALL::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetMemoryExportExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->MemoryExportExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Queue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetQueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->QueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ADAPTER_CALL::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); + if (result != UR_RESULT_SUCCESS) + return result; + result = ADAPTER_CALL::urGetDeviceExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->DeviceExp); + if (result != UR_RESULT_SUCCESS) + return result; + +#undef ADAPTER_CALL + + return result; +} +} // namespace + +namespace ur::opencl { +const ur_dditable_t *ddi_getter::value() { static std::once_flag flag; static ur_dditable_t table; - std::call_once(flag, - []() { urAllAddrTable(UR_API_VERSION_CURRENT, &table); }); + std::call_once(flag, []() { populateDdiTable(&table); }); return &table; } + +#ifdef UR_STATIC_ADAPTER_OPENCL +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + return populateDdiTable(ddi); +} +#endif +} // namespace ur::opencl diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.hpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.hpp new file mode 100644 index 0000000000000..6d50a456668c5 --- /dev/null +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.hpp @@ -0,0 +1,24 @@ +//===--------- ur_interface_loader.hpp - OpenCL Adapter ---------------===// +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace ur::opencl { + +struct ddi_getter { + static const ur_dditable_t *value(); +}; + +#ifdef UR_STATIC_ADAPTER_OPENCL +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif + +} // namespace ur::opencl diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index 1b7244714edc2..ec17cb352c1d2 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -75,6 +75,11 @@ if(UR_STATIC_ADAPTER_L0) target_compile_definitions(ur_loader PRIVATE UR_STATIC_ADAPTER_LEVEL_ZERO) endif() +if(UR_STATIC_ADAPTER_OPENCL) + target_link_libraries(ur_loader PRIVATE ur_adapter_opencl) + target_compile_definitions(ur_loader PRIVATE UR_STATIC_ADAPTER_OPENCL) +endif() + if(UR_ENABLE_TRACING) target_link_libraries(ur_loader PRIVATE ${TARGET_XPTI}) target_include_directories(ur_loader PRIVATE ${xpti_SOURCE_DIR}/include) diff --git a/unified-runtime/source/loader/ur_loader.cpp b/unified-runtime/source/loader/ur_loader.cpp index 7e3412b65784b..c13b88fd2ce96 100644 --- a/unified-runtime/source/loader/ur_loader.cpp +++ b/unified-runtime/source/loader/ur_loader.cpp @@ -11,6 +11,9 @@ #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO #include "adapters/level_zero/ur_interface_loader.hpp" #endif +#ifdef UR_STATIC_ADAPTER_OPENCL +#include "adapters/opencl/ur_interface_loader.hpp" +#endif namespace ur_loader { /////////////////////////////////////////////////////////////////////////////// @@ -31,12 +34,18 @@ ur_result_t context_t::init() { UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); #endif -#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#if defined(UR_STATIC_ADAPTER_LEVEL_ZERO) || defined(UR_STATIC_ADAPTER_OPENCL) // If the adapters were force loaded, it means the user wants to use // a specific adapter library. Don't load any static adapters. if (!adapter_registry.adaptersForceLoaded()) { +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO auto &level_zero = platforms.emplace_back(nullptr); ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable); +#endif +#ifdef UR_STATIC_ADAPTER_OPENCL + auto &opencl = platforms.emplace_back(nullptr); + ur::opencl::urAdapterGetDdiTables(&opencl.dditable); +#endif } #endif