Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
waitForEvents(DepEvents);
OutEvent = nullptr;

MemObj->prepareForAllocation(TargetContext);

return MemObj->allocateMem(TargetContext, InitFromUserData, HostPtr,
OutEvent);
}
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/sycl_mem_obj_i.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class SYCLMemObjI {
virtual void *allocateMem(context_impl *Context, bool InitFromUserData,
void *HostPtr, ur_event_handle_t &InteropEvent) = 0;

// Optional hook executed right before allocateMem(). Memory objects can use
// it to resolve context/backend-dependent allocation policy.
virtual void prepareForAllocation(context_impl *Context) { (void)Context; }

// Should be used for memory object created without use_host_ptr property.
virtual void *allocateHostMem() = 0;

Expand Down
155 changes: 146 additions & 9 deletions sycl/source/detail/sycl_mem_obj_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,32 @@
#include <detail/scheduler/scheduler.hpp>
#include <detail/sycl_mem_obj_t.hpp>

#include <cstdint>

namespace sycl {
inline namespace _V1 {
namespace detail {

namespace {

size_t getBackendShadowCopyAlignment(context_impl *Context) {
size_t RequiredAlign = 1;
for (const auto &Device : Context->getDevices()) {
const uint32_t AlignBits =
Device.get_info<info::device::mem_base_addr_align>();
if (AlignBits == 0)
continue;

// UR reports MEM_BASE_ADDR_ALIGN in bits.
const size_t AlignBytes = (static_cast<size_t>(AlignBits) + 7) / 8;
if (AlignBytes > RequiredAlign)
RequiredAlign = AlignBytes;
}
return RequiredAlign;
}

} // namespace

SYCLMemObjT::SYCLMemObjT(ur_native_handle_t MemObject,
const context &SyclContext, const size_t,
event AvailableEvent,
Expand Down Expand Up @@ -143,7 +165,7 @@ void SYCLMemObjT::updateHostMemory(void *const Ptr) {
void SYCLMemObjT::updateHostMemory() {
// Don't try updating host memory when shutting down.
if ((MUploadDataFunctor != nullptr) && MNeedWriteBack &&
GlobalHandler::instance().isOkToDefer())
!MBackendOwnsWriteBack && GlobalHandler::instance().isOkToDefer())
MUploadDataFunctor();

// If we're attached to a memory record, process the deletion of the memory
Expand All @@ -162,12 +184,70 @@ void SYCLMemObjT::updateHostMemory() {
(Result || !GlobalHandler::instance().isOkToDefer()) &&
"removeMemoryObject should not return false in mem object destructor");
}
releaseHostMem(MShadowCopy);
detail::OSUtil::alignedFree(MShadowCopy);

if (MOpenCLInterop) {
getAdapter().call<UrApiKind::urMemRelease>(MInteropMemObject);
}
}

void SYCLMemObjT::materializeShadowCopy(const void *SourcePtr,
size_t RequiredAlign) {
if (MPendingShadowCopyAlignment > RequiredAlign)
RequiredAlign = MPendingShadowCopyAlignment;

if (RequiredAlign == 0)
RequiredAlign = 1;

MPendingShadowCopyAlignment = RequiredAlign;

void *OldUserPtr = MUserPtr;
void *OldShadowCopy = MShadowCopy;
const void *CopySource = SourcePtr;
if (OldShadowCopy) {
if ((reinterpret_cast<std::uintptr_t>(OldShadowCopy) % RequiredAlign) ==
0) {
MUserPtr = OldShadowCopy;
return;
}
CopySource = OldShadowCopy;
}

assert(CopySource != nullptr &&
"Cannot materialize a shadow copy without source data");

// Allocate the shadow copy via the platform-aligned allocator directly,
// bypassing the user-provided allocator. Shadow copies are an internal
// runtime detail; the user allocator cannot be relied upon to satisfy
// backend alignment requirements (e.g. CL_DEVICE_MEM_BASE_ADDR_ALIGN).
const size_t AllocBytes =
MSizeInBytes == 0
? RequiredAlign
: MSizeInBytes +
(RequiredAlign - (MSizeInBytes % RequiredAlign)) % RequiredAlign;
void *NewShadowCopy = detail::OSUtil::alignedAlloc(RequiredAlign, AllocBytes);
if (!NewShadowCopy)
throw std::bad_alloc();
if (MSizeInBytes != 0)
std::memcpy(NewShadowCopy, CopySource, MSizeInBytes);

MShadowCopy = NewShadowCopy;
MUserPtr = NewShadowCopy;
updateRecordedMemAllocation(OldUserPtr, NewShadowCopy);

detail::OSUtil::alignedFree(OldShadowCopy);
}

void SYCLMemObjT::updateRecordedMemAllocation(void *OldPtr, void *NewPtr) {
if (MRecord == nullptr || OldPtr == nullptr || OldPtr == NewPtr)
return;

for (auto *AllocaCmd : MRecord->MAllocaCommands) {
if (AllocaCmd->MMemAllocation == OldPtr)
AllocaCmd->MMemAllocation = NewPtr;
}
}

adapter_impl &SYCLMemObjT::getAdapter() const {
assert((MInteropContext != nullptr) &&
"Trying to get Adapter from SYCLMemObjT with nullptr ContextImpl.");
Expand All @@ -176,6 +256,69 @@ adapter_impl &SYCLMemObjT::getAdapter() const {

bool SYCLMemObjT::isInterop() const { return MOpenCLInterop; }

void SYCLMemObjT::prepareForAllocation(context_impl *Context) {
// Context may be null for host allocations; nothing backend-specific to do.
if (!Context)
return;

if (!MHasPendingAlignedShadowCopy)
return;

bool SkipShadowCopy = false;
backend Backend = Context->getPlatformImpl().getBackend();
auto Devices = Context->getDevices();
if (Devices.size() != 0)
Backend = Devices.front().getBackend();

const size_t BackendRequiredAlign = getBackendShadowCopyAlignment(Context);

switch (Backend) {
case backend::ext_oneapi_level_zero:
SkipShadowCopy = true;
break;
case backend::ext_oneapi_cuda:
case backend::ext_oneapi_hip:
case backend::opencl:
case backend::ext_oneapi_native_cpu:
case backend::ext_oneapi_offload:
SkipShadowCopy = false;
break;
case backend::all:
default:
assert(false && "Unexpected SYCL backend");
break;
}

std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
if (!MHasPendingAlignedShadowCopy)
return;
if (BackendRequiredAlign > MPendingShadowCopyAlignment)
MPendingShadowCopyAlignment = BackendRequiredAlign;
if (SkipShadowCopy) {
if (MShadowCopy != nullptr) {
// A writable host accessor already forced a SYCL shadow copy. Keep using
// that path so the final copy-back still targets the original user ptr.
MBackendOwnsWriteBack = false;
MHasPendingAlignedShadowCopy = false;
return;
}

// Backend (UR) will manage the misaligned host pointer through its own
// internal staging buffer and owns the final copy-back to the original ptr.
MCreateShadowCopy = []() -> void {};
MBackendOwnsWriteBack = true;
if (!MHostPtrReadOnly)
MUploadDataFunctor = nullptr;
MHasPendingAlignedShadowCopy = false;
return;
}

materializeShadowCopy(MUserPtr, BackendRequiredAlign);
MCreateShadowCopy = []() -> void {};
MBackendOwnsWriteBack = false;
MHasPendingAlignedShadowCopy = false;
}

void SYCLMemObjT::determineHostPtr(context_impl *Context, bool InitFromUserData,
void *&HostPtr, bool &HostPtrReadOnly) {
// The data for the allocation can be provided via either the user pointer
Expand Down Expand Up @@ -232,13 +375,7 @@ void SYCLMemObjT::handleWriteAccessorCreation() {
MCreateShadowCopy();
MCreateShadowCopy = []() -> void {};
}
if (MRecord != nullptr && MUserPtr != InitialUserPtr) {
for (auto &it : MRecord->MAllocaCommands) {
if (it->MMemAllocation == InitialUserPtr) {
it->MMemAllocation = MUserPtr;
}
}
}
updateRecordedMemAllocation(InitialUserPtr, MUserPtr);
}

} // namespace detail
Expand Down
68 changes: 46 additions & 22 deletions sycl/source/detail/sycl_mem_obj_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <detail/sycl_mem_obj_i.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/export.hpp>
#include <sycl/detail/os_util.hpp>
#include <sycl/detail/sycl_mem_obj_allocator.hpp>
#include <sycl/detail/type_traits.hpp>
#include <sycl/detail/ur.hpp>
Expand Down Expand Up @@ -151,6 +152,8 @@ class SYCLMemObjT : public SYCLMemObjI {

protected:
void updateHostMemory(void *const Ptr);
void materializeShadowCopy(const void *SourcePtr, size_t RequiredAlign);
void updateRecordedMemAllocation(void *OldPtr, void *NewPtr);

// Update host with the latest data + notify scheduler that the memory object
// is going to die. After this method is finished no further operations with
Expand Down Expand Up @@ -190,16 +193,17 @@ class SYCLMemObjT : public SYCLMemObjI {
MUserPtr = HostPtr;
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr, MSizeInBytes);
materializeShadowCopy(HostPtr, RequiredAlign);
};
} else {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr, MSizeInBytes);
MUserPtr = HostPtr;
if (RequiredAlign > MPendingShadowCopyAlignment)
MPendingShadowCopyAlignment = RequiredAlign;
MHasPendingAlignedShadowCopy = true;
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
materializeShadowCopy(HostPtr, RequiredAlign);
};
}
}
}
Expand All @@ -224,16 +228,17 @@ class SYCLMemObjT : public SYCLMemObjI {
MUserPtr = HostPtr.get();
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes);
materializeShadowCopy(HostPtr.get(), RequiredAlign);
};
} else {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes);
MUserPtr = HostPtr.get();
if (RequiredAlign > MPendingShadowCopyAlignment)
MPendingShadowCopyAlignment = RequiredAlign;
MHasPendingAlignedShadowCopy = true;
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
materializeShadowCopy(HostPtr.get(), RequiredAlign);
};
}
}
}
Expand All @@ -247,8 +252,13 @@ class SYCLMemObjT : public SYCLMemObjI {
"Buffer constructor from a pair of iterator values does "
"not support use_host_ptr property.");

setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
// Shadow copies are an internal runtime detail; always allocate via the
// platform-aligned allocator so all MShadowCopy frees are uniform.
MShadowCopy = detail::OSUtil::alignedAlloc(
RequiredAlign, std::max(MSizeInBytes, RequiredAlign));
if (!MShadowCopy)
throw exception(make_error_code(errc::runtime),
"Failed to allocate shadow copy");
MUserPtr = MShadowCopy;

CopyFromInput(MUserPtr);
Expand All @@ -260,6 +270,8 @@ class SYCLMemObjT : public SYCLMemObjI {

void handleWriteAccessorCreation();

void prepareForAllocation(context_impl *Context) override;

void *allocateMem(context_impl *Context, bool InitFromUserData, void *HostPtr,
ur_event_handle_t &InteropEvent) override {
(void)Context;
Expand Down Expand Up @@ -291,7 +303,9 @@ class SYCLMemObjT : public SYCLMemObjI {
void markAsInternal() { MIsInternal = true; }

/// Returns true if this memory object requires a write_back on destruction.
bool needsWriteBack() const { return MNeedWriteBack && MUploadDataFunctor; }
bool needsWriteBack() const {
return MNeedWriteBack && MUploadDataFunctor && !MBackendOwnsWriteBack;
}

/// Increment an internal counter for how many graphs are currently using this
/// memory object.
Expand All @@ -315,9 +329,9 @@ class SYCLMemObjT : public SYCLMemObjI {

/// Returns true if any graphs are currently using this memory object.
bool isUsedInGraph() const { return MGraphUseCount > 0; }

const property_list &getPropList() const { return MProps; }

protected:
// An allocateMem helper that determines which host ptr to use
void determineHostPtr(context_impl *Context, bool InitFromUserData,
Expand Down Expand Up @@ -346,7 +360,8 @@ class SYCLMemObjT : public SYCLMemObjI {
size_t MSizeInBytes = 0;
// User's pointer passed to constructor.
void *MUserPtr;
// Copy of memory passed by user to constructor.
// Copy of memory passed by user to constructor. Always allocated via
// OSUtil::alignedAlloc (never via MAllocator) so teardown is uniform.
void *MShadowCopy;
// Function which update host with final data on memory object destruction.
std::function<void(void)> MUploadDataFunctor;
Expand All @@ -369,6 +384,15 @@ class SYCLMemObjT : public SYCLMemObjI {
// accessor is created.
std::function<void(void)> MCreateShadowCopy = []() -> void {};
std::mutex MCreateShadowCopyMtx;
// The strongest backend alignment requirement observed so far. Deferred
// shadow-copy materialization uses this to upgrade from frontend alignment
// (e.g. alignof(T)) to the backend host-pointer requirement.
size_t MPendingShadowCopyAlignment = 0;
// Set when misaligned input data cannot be used directly and the shadow-copy
// decision is deferred until backend/platform is known.
bool MHasPendingAlignedShadowCopy = false;
// True when backend/adapter is responsible for final host copy-back.
bool MBackendOwnsWriteBack = false;
bool MOwnNativeHandle = true;
};
} // namespace detail
Expand Down
Loading
Loading