diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 2720e35e4b42c..54626c614614b 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -286,6 +286,8 @@ void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj, waitForEvents(DepEvents); OutEvent = nullptr; + MemObj->prepareForAllocation(TargetContext); + return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr, OutEvent); } diff --git a/sycl/source/detail/sycl_mem_obj_i.hpp b/sycl/source/detail/sycl_mem_obj_i.hpp index d2e37075b3ea8..3f325c7653cbf 100644 --- a/sycl/source/detail/sycl_mem_obj_i.hpp +++ b/sycl/source/detail/sycl_mem_obj_i.hpp @@ -46,6 +46,10 @@ class SYCLMemObjI { virtual void *allocateMem(context_impl *Context, bool InitFromUserData, void *HostPtr, ur_event_handle_t &InteropEvent) = 0; + // Optional hook executed right before allocateMem(). Memory objects can use + // it to resolve context/backend-dependent allocation policy. + virtual void prepareForAllocation(context_impl *Context) { (void)Context; } + // Should be used for memory object created without use_host_ptr property. virtual void *allocateHostMem() = 0; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index bae53031930de..be4b0e46ffff2 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -13,10 +13,32 @@ #include #include +#include + namespace sycl { inline namespace _V1 { namespace detail { +namespace { + +size_t getBackendShadowCopyAlignment(context_impl *Context) { + size_t RequiredAlign = 1; + for (const auto &Device : Context->getDevices()) { + const uint32_t AlignBits = + Device.get_info(); + if (AlignBits == 0) + continue; + + // UR reports MEM_BASE_ADDR_ALIGN in bits. + const size_t AlignBytes = (static_cast(AlignBits) + 7) / 8; + if (AlignBytes > RequiredAlign) + RequiredAlign = AlignBytes; + } + return RequiredAlign; +} + +} // namespace + SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject, const context &SyclContext, const size_t, event AvailableEvent, @@ -143,7 +165,7 @@ void SYCLMemObjT::updateHostMemory(void *const Ptr) { void SYCLMemObjT::updateHostMemory() { // Don't try updating host memory when shutting down. if ((MUploadDataFunctor != nullptr) && MNeedWriteBack && - GlobalHandler::instance().isOkToDefer()) + !MBackendOwnsWriteBack && GlobalHandler::instance().isOkToDefer()) MUploadDataFunctor(); // If we're attached to a memory record, process the deletion of the memory @@ -162,12 +184,70 @@ void SYCLMemObjT::updateHostMemory() { (Result || !GlobalHandler::instance().isOkToDefer()) && "removeMemoryObject should not return false in mem object destructor"); } - releaseHostMem(MShadowCopy); + detail::OSUtil::alignedFree(MShadowCopy); if (MOpenCLInterop) { getAdapter().call(MInteropMemObject); } } + +void SYCLMemObjT::materializeShadowCopy(const void *SourcePtr, + size_t RequiredAlign) { + if (MPendingShadowCopyAlignment > RequiredAlign) + RequiredAlign = MPendingShadowCopyAlignment; + + if (RequiredAlign == 0) + RequiredAlign = 1; + + MPendingShadowCopyAlignment = RequiredAlign; + + void *OldUserPtr = MUserPtr; + void *OldShadowCopy = MShadowCopy; + const void *CopySource = SourcePtr; + if (OldShadowCopy) { + if ((reinterpret_cast(OldShadowCopy) % RequiredAlign) == + 0) { + MUserPtr = OldShadowCopy; + return; + } + CopySource = OldShadowCopy; + } + + assert(CopySource != nullptr && + "Cannot materialize a shadow copy without source data"); + + // Allocate the shadow copy via the platform-aligned allocator directly, + // bypassing the user-provided allocator. Shadow copies are an internal + // runtime detail; the user allocator cannot be relied upon to satisfy + // backend alignment requirements (e.g. CL_DEVICE_MEM_BASE_ADDR_ALIGN). + const size_t AllocBytes = + MSizeInBytes == 0 + ? RequiredAlign + : MSizeInBytes + + (RequiredAlign - (MSizeInBytes % RequiredAlign)) % RequiredAlign; + void *NewShadowCopy = detail::OSUtil::alignedAlloc(RequiredAlign, AllocBytes); + if (!NewShadowCopy) + throw std::bad_alloc(); + if (MSizeInBytes != 0) + std::memcpy(NewShadowCopy, CopySource, MSizeInBytes); + + MShadowCopy = NewShadowCopy; + MUserPtr = NewShadowCopy; + updateRecordedMemAllocation(OldUserPtr, NewShadowCopy); + + detail::OSUtil::alignedFree(OldShadowCopy); +} + +void SYCLMemObjT::updateRecordedMemAllocation(void *OldPtr, void *NewPtr) { + if (MRecord == nullptr || OldPtr == nullptr || OldPtr == NewPtr) + return; + + for (auto *AllocaCmd : MRecord->MAllocaCommands) { + if (AllocaCmd->MMemAllocation == OldPtr) + AllocaCmd->MMemAllocation = NewPtr; + } +} + adapter_impl &SYCLMemObjT::getAdapter() const { assert((MInteropContext != nullptr) && "Trying to get Adapter from SYCLMemObjT with nullptr ContextImpl."); @@ -176,6 +256,69 @@ adapter_impl &SYCLMemObjT::getAdapter() const { bool SYCLMemObjT::isInterop() const { return MOpenCLInterop; } +void SYCLMemObjT::prepareForAllocation(context_impl *Context) { + // Context may be null for host allocations; nothing backend-specific to do. + if (!Context) + return; + + if (!MHasPendingAlignedShadowCopy) + return; + + bool SkipShadowCopy = false; + backend Backend = Context->getPlatformImpl().getBackend(); + auto Devices = Context->getDevices(); + if (Devices.size() != 0) + Backend = Devices.front().getBackend(); + + const size_t BackendRequiredAlign = getBackendShadowCopyAlignment(Context); + + switch (Backend) { + case backend::ext_oneapi_level_zero: + SkipShadowCopy = true; + break; + case backend::ext_oneapi_cuda: + case backend::ext_oneapi_hip: + case backend::opencl: + case backend::ext_oneapi_native_cpu: + case backend::ext_oneapi_offload: + SkipShadowCopy = false; + break; + case backend::all: + default: + assert(false && "Unexpected SYCL backend"); + break; + } + + std::lock_guard Lock(MCreateShadowCopyMtx); + if (!MHasPendingAlignedShadowCopy) + return; + if (BackendRequiredAlign > MPendingShadowCopyAlignment) + MPendingShadowCopyAlignment = BackendRequiredAlign; + if (SkipShadowCopy) { + if (MShadowCopy != nullptr) { + // A writable host accessor already forced a SYCL shadow copy. Keep using + // that path so the final copy-back still targets the original user ptr. + MBackendOwnsWriteBack = false; + MHasPendingAlignedShadowCopy = false; + return; + } + + // Backend (UR) will manage the misaligned host pointer through its own + // internal staging buffer and owns the final copy-back to the original ptr. + MCreateShadowCopy = []() -> void {}; + MBackendOwnsWriteBack = true; + if (!MHostPtrReadOnly) + MUploadDataFunctor = nullptr; + MHasPendingAlignedShadowCopy = false; + return; + } + + materializeShadowCopy(MUserPtr, BackendRequiredAlign); + MCreateShadowCopy = []() -> void {}; + MBackendOwnsWriteBack = false; + MHasPendingAlignedShadowCopy = false; +} + void SYCLMemObjT::determineHostPtr(context_impl *Context, bool InitFromUserData, void *&HostPtr, bool &HostPtrReadOnly) { // The data for the allocation can be provided via either the user pointer @@ -232,13 +375,7 @@ void SYCLMemObjT::handleWriteAccessorCreation() { MCreateShadowCopy(); MCreateShadowCopy = []() -> void {}; } - if (MRecord != nullptr && MUserPtr != InitialUserPtr) { - for (auto &it : MRecord->MAllocaCommands) { - if (it->MMemAllocation == InitialUserPtr) { - it->MMemAllocation = MUserPtr; - } - } - } + updateRecordedMemAllocation(InitialUserPtr, MUserPtr); } } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 4893d154a7566..c26490e2c8954 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -151,6 +152,8 @@ class SYCLMemObjT : public SYCLMemObjI { protected: void updateHostMemory(void *const Ptr); + void materializeShadowCopy(const void *SourcePtr, size_t RequiredAlign); + void updateRecordedMemAllocation(void *OldPtr, void *NewPtr); // Update host with the latest data + notify scheduler that the memory object // is going to die. After this method is finished no further operations with @@ -190,16 +193,17 @@ class SYCLMemObjT : public SYCLMemObjI { MUserPtr = HostPtr; std::lock_guard Lock(MCreateShadowCopyMtx); MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void { - setAlign(RequiredAlign); - MShadowCopy = allocateHostMem(); - MUserPtr = MShadowCopy; - std::memcpy(MUserPtr, HostPtr, MSizeInBytes); + materializeShadowCopy(HostPtr, RequiredAlign); }; } else { - setAlign(RequiredAlign); - MShadowCopy = allocateHostMem(); - MUserPtr = MShadowCopy; - std::memcpy(MUserPtr, HostPtr, MSizeInBytes); + MUserPtr = HostPtr; + if (RequiredAlign > MPendingShadowCopyAlignment) + MPendingShadowCopyAlignment = RequiredAlign; + MHasPendingAlignedShadowCopy = true; + std::lock_guard Lock(MCreateShadowCopyMtx); + MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void { + materializeShadowCopy(HostPtr, RequiredAlign); + }; } } } @@ -224,16 +228,17 @@ class SYCLMemObjT : public SYCLMemObjI { MUserPtr = HostPtr.get(); std::lock_guard Lock(MCreateShadowCopyMtx); MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void { - setAlign(RequiredAlign); - MShadowCopy = allocateHostMem(); - MUserPtr = MShadowCopy; - std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes); + materializeShadowCopy(HostPtr.get(), RequiredAlign); }; } else { - setAlign(RequiredAlign); - MShadowCopy = allocateHostMem(); - MUserPtr = MShadowCopy; - std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes); + MUserPtr = HostPtr.get(); + if (RequiredAlign > MPendingShadowCopyAlignment) + MPendingShadowCopyAlignment = RequiredAlign; + MHasPendingAlignedShadowCopy = true; + std::lock_guard Lock(MCreateShadowCopyMtx); + MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void { + materializeShadowCopy(HostPtr.get(), RequiredAlign); + }; } } } @@ -247,8 +252,13 @@ class SYCLMemObjT : public SYCLMemObjI { "Buffer constructor from a pair of iterator values does " "not support use_host_ptr property."); - setAlign(RequiredAlign); - MShadowCopy = allocateHostMem(); + // Shadow copies are an internal runtime detail; always allocate via the + // platform-aligned allocator so all MShadowCopy frees are uniform. + MShadowCopy = detail::OSUtil::alignedAlloc( + RequiredAlign, std::max(MSizeInBytes, RequiredAlign)); + if (!MShadowCopy) + throw exception(make_error_code(errc::runtime), + "Failed to allocate shadow copy"); MUserPtr = MShadowCopy; CopyFromInput(MUserPtr); @@ -260,6 +270,8 @@ class SYCLMemObjT : public SYCLMemObjI { void handleWriteAccessorCreation(); + void prepareForAllocation(context_impl *Context) override; + void *allocateMem(context_impl *Context, bool InitFromUserData, void *HostPtr, ur_event_handle_t &InteropEvent) override { (void)Context; @@ -291,7 +303,9 @@ class SYCLMemObjT : public SYCLMemObjI { void markAsInternal() { MIsInternal = true; } /// Returns true if this memory object requires a write_back on destruction. - bool needsWriteBack() const { return MNeedWriteBack && MUploadDataFunctor; } + bool needsWriteBack() const { + return MNeedWriteBack && MUploadDataFunctor && !MBackendOwnsWriteBack; + } /// Increment an internal counter for how many graphs are currently using this /// memory object. @@ -315,9 +329,9 @@ class SYCLMemObjT : public SYCLMemObjI { /// Returns true if any graphs are currently using this memory object. bool isUsedInGraph() const { return MGraphUseCount > 0; } - + const property_list &getPropList() const { return MProps; } - + protected: // An allocateMem helper that determines which host ptr to use void determineHostPtr(context_impl *Context, bool InitFromUserData, @@ -346,7 +360,8 @@ class SYCLMemObjT : public SYCLMemObjI { size_t MSizeInBytes = 0; // User's pointer passed to constructor. void *MUserPtr; - // Copy of memory passed by user to constructor. + // Copy of memory passed by user to constructor. Always allocated via + // OSUtil::alignedAlloc (never via MAllocator) so teardown is uniform. void *MShadowCopy; // Function which update host with final data on memory object destruction. std::function MUploadDataFunctor; @@ -369,6 +384,15 @@ class SYCLMemObjT : public SYCLMemObjI { // accessor is created. std::function MCreateShadowCopy = []() -> void {}; std::mutex MCreateShadowCopyMtx; + // The strongest backend alignment requirement observed so far. Deferred + // shadow-copy materialization uses this to upgrade from frontend alignment + // (e.g. alignof(T)) to the backend host-pointer requirement. + size_t MPendingShadowCopyAlignment = 0; + // Set when misaligned input data cannot be used directly and the shadow-copy + // decision is deferred until backend/platform is known. + bool MHasPendingAlignedShadowCopy = false; + // True when backend/adapter is responsible for final host copy-back. + bool MBackendOwnsWriteBack = false; bool MOwnNativeHandle = true; }; } // namespace detail diff --git a/sycl/test-e2e/Regression/buffer_shadow_copy_platform_policy.cpp b/sycl/test-e2e/Regression/buffer_shadow_copy_platform_policy.cpp new file mode 100644 index 0000000000000..669174d98d1f3 --- /dev/null +++ b/sycl/test-e2e/Regression/buffer_shadow_copy_platform_policy.cpp @@ -0,0 +1,119 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Regression test for the triple-buffer issue: when a SYCL buffer is +// constructed with a misaligned or non-USM host pointer, the runtime must +// ensure that: +// 1) (read path) the kernel observes the original host data correctly; +// 2) (write path) kernel-side modifications are written back to the original +// host pointer once the buffer goes out of scope. +// +// These two invariants must hold regardless of whether the SYCL runtime or the +// UR adapter is responsible for the internal copy/write-back. The test is +// intentionally backend-agnostic. + +#include + +#include +#include +#include +#include + +// Read-only kernel: sum all elements and return the result. +static int runReadOnlySumKernel(sycl::queue &Q, const int *HostPtr, size_t N) { + sycl::buffer Buf(HostPtr, sycl::range<1>(N)); + sycl::buffer SumBuf(1); + + Q.submit([&](sycl::handler &CGH) { + auto InAcc = Buf.get_access(CGH); + auto SumAcc = SumBuf.get_access(CGH); + CGH.single_task([=]() { + int Sum = 0; + for (size_t I = 0; I < N; ++I) + Sum += InAcc[I]; + SumAcc[0] = Sum; + }); + }); + Q.wait_and_throw(); + + auto SumHostAcc = SumBuf.get_host_access(); + return SumHostAcc[0]; +} + +// Writable kernel path; buffer destruction happens at scope exit. +static void runWriteKernel(sycl::queue &Q, int *HostPtr, size_t N) { + sycl::buffer Buf(HostPtr, sycl::range<1>(N)); + + Q.submit([&](sycl::handler &CGH) { + auto OutAcc = Buf.get_access(CGH); + CGH.single_task([=]() { + for (size_t I = 0; I < N; ++I) + OutAcc[I] = static_cast(I * 3 + 7); + }); + }); + Q.wait_and_throw(); +} + +// Verifies host-side result after writable-buffer destruction. +static bool checkExpectedPattern(const int *Ptr, size_t N) { + for (size_t I = 0; I < N; ++I) { + if (Ptr[I] != static_cast(I * 3 + 7)) + return false; + } + return true; +} + +int main() { + constexpr size_t N = 32; + sycl::queue Q; + + // Build aligned reference data. + std::vector AlignedInput(N); + for (size_t I = 0; I < N; ++I) + AlignedInput[I] = static_cast(I); + + // Build a deliberately misaligned copy: offset by 1 byte so that the int* + // is not naturally aligned. + std::vector Storage(sizeof(int) * N + 1); + int *UnalignedPtr = reinterpret_cast(Storage.data() + 1); + std::memcpy(UnalignedPtr, AlignedInput.data(), sizeof(int) * N); + const int *ReadOnlyUnalignedPtr = UnalignedPtr; + + const int ExpectedSum = static_cast((N - 1) * N / 2); + + // --- Read path correctness --- + // Both aligned and misaligned host pointers must produce the correct sum. + const int AlignedSum = runReadOnlySumKernel(Q, AlignedInput.data(), N); + if (AlignedSum != ExpectedSum) { + std::cerr << "Unexpected aligned sum: " << AlignedSum << "\n"; + return 1; + } + + const int MisalignedSum = runReadOnlySumKernel(Q, ReadOnlyUnalignedPtr, N); + if (MisalignedSum != ExpectedSum) { + std::cerr << "Unexpected misaligned sum: " << MisalignedSum << "\n"; + return 1; + } + + // --- Write-back correctness --- + // After the buffer goes out of scope the kernel-written pattern must be + // visible at the original host pointer, even when that pointer is misaligned. + std::vector AlignedWritable(N, 0); + std::vector WritableStorage(sizeof(int) * N + 1, 0); + int *UnalignedWritablePtr = + reinterpret_cast(WritableStorage.data() + 1); + + runWriteKernel(Q, AlignedWritable.data(), N); + runWriteKernel(Q, UnalignedWritablePtr, N); + + if (!checkExpectedPattern(AlignedWritable.data(), N)) { + std::cerr << "Unexpected data in aligned writable buffer\n"; + return 1; + } + if (!checkExpectedPattern(UnalignedWritablePtr, N)) { + std::cerr << "Unexpected data in misaligned writable buffer\n"; + return 1; + } + + return 0; +} diff --git a/unified-runtime/source/adapters/level_zero/v2/memory.cpp b/unified-runtime/source/adapters/level_zero/v2/memory.cpp index e91348ad257ce..185693f0d059b 100644 --- a/unified-runtime/source/adapters/level_zero/v2/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/memory.cpp @@ -82,6 +82,19 @@ static ur_result_t synchronousZeCopy(ur_context_handle_t hContext, return exceptionToResult(std::current_exception()); } +static ur_result_t +initializeBufferFromHost(ur_context_handle_t hContext, ur_mem_handle_t hBuffer, + ur_mem_buffer_t::device_access_mode_t accessMode, + const void *hostPtr, size_t size) try { + wait_list_view waitList(nullptr, 0); + void *dst = hBuffer->getBuffer()->getDevicePtr(nullptr, accessMode, 0, size, + nullptr, waitList); + auto hDevice = hContext->getDevices()[0]; + return synchronousZeCopy(hContext, hDevice, dst, hostPtr, size); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t( ur_context_handle_t hContext, void *hostPtr, size_t size, device_access_mode_t accessMode) @@ -109,12 +122,13 @@ ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t( }); if (hostPtr) { - // Initial copy using Level Zero for USM HOST memory auto hDevice = hContext->getDevices()[0]; UR_CALL_THROWS( synchronousZeCopy(hContext, hDevice, this->ptr.get(), hostPtr, size)); - // Store writeBackPtr for copy-back - needed when original pointer - // cannot be imported (e.g., misaligned, wrong allocation type) + mapToPtr = hostPtr; + // Mirror mapToPtr so that ~ur_integrated_buffer_handle_t copies device + // updates back to the original host pointer even without an explicit + // map/unmap. writeBackPtr = hostPtr; } } @@ -142,9 +156,9 @@ void *ur_integrated_buffer_handle_t::getDevicePtr( void *ur_integrated_buffer_handle_t::mapHostPtr( ur_map_flags_t flags, size_t offset, size_t mapSize, ze_command_list_handle_t cmdList, wait_list_view & /*waitListView*/) { - if (writeBackPtr) { - // Copy-back path: user gets back their original pointer - void *mappedPtr = ur_cast(writeBackPtr) + offset; + if (mapToPtr) { + // Staging path: map through the host pointer associated with this buffer. + void *mappedPtr = ur_cast(mapToPtr) + offset; if (flags & UR_MAP_FLAG_READ) { // Use Level Zero copy for USM HOST memory to ensure GPU visibility @@ -167,7 +181,7 @@ void *ur_integrated_buffer_handle_t::mapHostPtr( void ur_integrated_buffer_handle_t::unmapHostPtr( void *pMappedPtr, ze_command_list_handle_t cmdList, wait_list_view & /*waitListView*/) { - if (writeBackPtr) { + if (mapToPtr) { // Copy-back path: find the mapped region and copy data back if needed auto mappedRegion = std::find_if(mappedRegions.begin(), mappedRegions.end(), @@ -196,31 +210,17 @@ void ur_integrated_buffer_handle_t::unmapHostPtr( } void ur_integrated_buffer_handle_t::copyBackToHostIfNeeded() { - if (writeBackPtr) { - // Validate that the pointer is still valid before copy-back. - // SYCL might already do its own copy-back and free it. - ZeStruct memProps; - ze_device_handle_t device; - auto result = ZE_CALL_NOCHECK( - zeMemGetAllocProperties, - (hContext->getZeHandle(), writeBackPtr, &memProps, &device)); - - // If pointer is not a valid allocation (SYCL freed it), skip copy-back - if (result != ZE_RESULT_SUCCESS || - memProps.type == ZE_MEMORY_TYPE_UNKNOWN) { - writeBackPtr = nullptr; - return; - } + if (!writeBackPtr) { + return; + } - // Pointer is valid, perform copy-back - auto hDevice = hContext->getDevices()[0]; - auto result2 = synchronousZeCopy(hContext, hDevice, writeBackPtr, - this->ptr.get(), size); - if (result2 == UR_RESULT_SUCCESS) { - writeBackPtr = nullptr; - } else { - UR_LOG_SAFE(ERR, "Failed to copy-back buffer data: {}", result2); - } + auto hDevice = hContext->getDevices()[0]; + auto result = + synchronousZeCopy(hContext, hDevice, writeBackPtr, this->ptr.get(), size); + if (result == UR_RESULT_SUCCESS) { + writeBackPtr = nullptr; + } else { + UR_LOG_SAFE(ERR, "Failed to copy-back buffer data: {}", result); } } @@ -637,6 +637,8 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext, } void *hostPtr = pProperties ? pProperties->pHost : nullptr; + const bool useHostPtr = (flags & UR_MEM_FLAG_USE_HOST_POINTER) != 0; + const bool copyHostPtr = (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) != 0; auto accessMode = ur_mem_buffer_t::getDeviceAccessMode(flags); // For integrated devices, use zero-copy host buffers. The integrated buffer @@ -646,11 +648,44 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext, // 3. Host pointer can be imported - import it // 4. Otherwise - allocate USM and copy-back through map/unmap operations if (useHostBuffer(hContext)) { - *phBuffer = ur_mem_handle_t_::create( - hContext, hostPtr, size, accessMode); + if (useHostPtr) { + *phBuffer = ur_mem_handle_t_::create( + hContext, hostPtr, size, accessMode); + } else if (copyHostPtr && hostPtr) { + // Not used by SYCL today. Kept for direct UR calls to + // urMemBufferCreate(..., UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER, ...) + // without UR_MEM_FLAG_USE_HOST_POINTER. + auto buffer = std::unique_ptr( + ur_mem_handle_t_::create( + hContext, nullptr, size, accessMode)); + UR_CALL(initializeBufferFromHost(hContext, buffer.get(), accessMode, + hostPtr, size)); + *phBuffer = buffer.release(); + } else { + *phBuffer = ur_mem_handle_t_::create( + hContext, hostPtr, size, accessMode); + } } else { - *phBuffer = ur_mem_handle_t_::create( - hContext, hostPtr, size, accessMode); + if (useHostPtr && copyHostPtr && hostPtr) { + *phBuffer = ur_mem_handle_t_::create( + hContext, nullptr, nullptr, size, accessMode, hostPtr, false); + } else if (useHostPtr) { + *phBuffer = ur_mem_handle_t_::create( + hContext, hostPtr, size, accessMode); + } else if (copyHostPtr && hostPtr) { + // Not used by SYCL today. Kept for direct UR calls to + // urMemBufferCreate(..., UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER, ...) + // without UR_MEM_FLAG_USE_HOST_POINTER. + auto buffer = std::unique_ptr( + ur_mem_handle_t_::create( + hContext, nullptr, size, accessMode)); + UR_CALL(initializeBufferFromHost(hContext, buffer.get(), accessMode, + hostPtr, size)); + *phBuffer = buffer.release(); + } else { + *phBuffer = ur_mem_handle_t_::create( + hContext, hostPtr, size, accessMode); + } } return UR_RESULT_SUCCESS; diff --git a/unified-runtime/source/adapters/level_zero/v2/memory.hpp b/unified-runtime/source/adapters/level_zero/v2/memory.hpp index c793cf97d79eb..702b91811419d 100644 --- a/unified-runtime/source/adapters/level_zero/v2/memory.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/memory.hpp @@ -125,6 +125,7 @@ struct ur_integrated_buffer_handle_t : ur_mem_buffer_t { private: usm_unique_ptr_t ptr; + void *mapToPtr = nullptr; void *writeBackPtr = nullptr; std::vector mappedRegions; };