Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5b2c398
[UR][L0] Restrict USM residency to peers with enabled P2P access
ldorau Apr 28, 2026
28d3621
[UR][L0] Extend memory residency tests with P2P checks
ldorau Apr 28, 2026
5cf0199
[SYCL] Refactor P2P peer access helpers to avoid duplication
ldorau May 11, 2026
b074b41
[UR][L0] Fix flaky disablePeerAccess test by removing free-memory check
ldorau May 11, 2026
d4674a7
[UR][L0] Add two new tests: allocAfterEnablingPeerAccess and allocBef…
ldorau May 12, 2026
70cfe66
[UR][L0] Fix P2P internal helper to operate on peerDevice
ldorau May 12, 2026
840f6a2
[UR][L0] Enforce P2P access check in urEnqueueUSMMemcpy
ldorau May 12, 2026
0452f83
[SYCL][E2E][USM] Add P2P USM residency test for L0 v2 adapter
ldorau May 6, 2026
f5f9e96
[UR][L0] Remove flaky free-memory assertion from enablePeerAccess test
ldorau May 13, 2026
fc17fed
[SYCL][E2E] Fix P2P cleanup order in p2p_usm_residency test
ldorau May 14, 2026
27d19f9
[UR][L0v2] Extract checkP2PAccess static helper from appendUSMMemcpy
ldorau May 14, 2026
d6050ea
[UR][L0v2] Fix inaccurate comment in p2pReadSucceedsWithPeerAccessEna…
ldorau May 14, 2026
481c108
[UR][L0v2] Remove redundant p2pReadFailsWithoutPeerAccessDisabled test
ldorau May 14, 2026
0134ef2
[SYCL][E2E] Fix P2P direction and cleanup in p2p_copy e2e test
ldorau May 14, 2026
6b03122
[UR][L0v2] Add p2pReadFailsAfterRevokingAccess test
ldorau May 15, 2026
234aad4
[UR][L0v2] Add p2pReadSucceedsAfterEnablingAccess test
ldorau May 15, 2026
247ac7f
[SYCL][E2E] Add Phase 4 to p2p_usm_residency: memcpy fails without P2P
ldorau May 15, 2026
64f238d
[SYCL][E2E] Add Phase 5 to p2p_usm_residency: fail then succeed after…
ldorau May 15, 2026
a74d1d6
[UR] Fix isPVC() to handle unsupported UR_DEVICE_INFO_DEVICE_ID
ldorau May 13, 2026
002f408
[UR][L0] Fix event_pool_test to properly release events through DDI t…
ldorau May 13, 2026
edc38fc
[UR][L0] Mark deferred_kernel_memcheck test as XFAIL
ldorau May 13, 2026
78e6552
[UR][L0] Migrate discrete buffer through host when P2P is not accessible
ldorau May 13, 2026
39a3ba8
[UR][L0v2] Improve discrete buffer host-migration when P2P is not acc…
ldorau May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions sycl/source/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,22 +209,32 @@ ur_native_handle_t device::getNative() const { return impl->getNative(); }

bool device::has(aspect Aspect) const { return impl->has(Aspect); }

template <detail::UrApiKind ApiKind>
static void p2pAccessHelper(const device &self, const device &peer,
ur_device_handle_t Device, ur_device_handle_t Peer,
detail::adapter_impl &Adapter,
const char *errorMsg) {
if (Device == Peer)
return;

if (peer.get_platform() != self.get_platform())
throw exception(errc::invalid, errorMsg);

Adapter.call<ApiKind>(Device, Peer);
}

void device::ext_oneapi_enable_peer_access(const device &peer) {
ur_device_handle_t Device = impl->getHandleRef();
ur_device_handle_t Peer = peer.impl->getHandleRef();
if (Device != Peer) {
detail::adapter_impl &Adapter = impl->getAdapter();
Adapter.call<detail::UrApiKind::urUsmP2PEnablePeerAccessExp>(Device, Peer);
}
p2pAccessHelper<detail::UrApiKind::urUsmP2PEnablePeerAccessExp>(
*this, peer, impl->getHandleRef(), peer.impl->getHandleRef(),
impl->getAdapter(),
"Cannot enable peer access between different platforms");
}

void device::ext_oneapi_disable_peer_access(const device &peer) {
ur_device_handle_t Device = impl->getHandleRef();
ur_device_handle_t Peer = peer.impl->getHandleRef();
if (Device != Peer) {
detail::adapter_impl &Adapter = impl->getAdapter();
Adapter.call<detail::UrApiKind::urUsmP2PDisablePeerAccessExp>(Device, Peer);
}
p2pAccessHelper<detail::UrApiKind::urUsmP2PDisablePeerAccessExp>(
*this, peer, impl->getHandleRef(), peer.impl->getHandleRef(),
impl->getAdapter(),
"Cannot disable peer access between different platforms");
}

bool device::ext_oneapi_can_access_peer(const device &peer,
Expand Down
7 changes: 5 additions & 2 deletions sycl/test-e2e/USM/P2P/p2p_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ int main() {
return 0;
}

// Enables Devs[0] to access Devs[1] memory.
Devs[0].ext_oneapi_enable_peer_access(Devs[1]);
// Enables Devs[1] to access Devs[0] memory (Devs[1]'s queue will read
// from arr0 which lives on Devs[0]).
Devs[1].ext_oneapi_enable_peer_access(Devs[0]);

std::vector<int> input(N);
std::iota(input.begin(), input.end(), 0);
Expand All @@ -53,6 +54,8 @@ int main() {
int out[N];
Queues[1].copy(arr1, out, N).wait();

// Disable P2P before releasing the allocation it was guarding.
Devs[1].ext_oneapi_disable_peer_access(Devs[0]);
sycl::free(arr0, Queues[0]);
sycl::free(arr1, Queues[1]);

Expand Down
327 changes: 327 additions & 0 deletions sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
//==-- p2p_usm_residency.cpp - P2P USM residency test ---------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Verify that the Level Zero v2 adapter correctly makes USM device memory
// resident on peer devices when P2P access is enabled.
//
// Phase 1: Allocates memory on dev0, fills it with a known pattern, enables
// P2P access from dev1 to dev0, then uses dev1's queue to copy the data to
// the host and verifies all values match the fill pattern.
//
// Phase 2 (opposite direction): Allocates memory on dev1, fills it with a
// different pattern, enables P2P access from dev0 to dev1, then uses dev0's
// queue to copy the data to the host and verifies correctness.
//
// Phase 3 (negative): Allocates memory on dev0, enables then disables P2P
// access from dev1, and verifies that a subsequent device-to-device memcpy
// via dev1's queue throws an exception.
//
// REQUIRES: level_zero && two-or-more-gpu-devices
// UNSUPPORTED: level_zero_v1_adapter
// UNSUPPORTED-INTENDED: Test is specific to the Level Zero v2 adapter.
//
// RUN: %{build} -o %t.out
// RUN: env UR_LOADER_USE_LEVEL_ZERO_V2=1 %{run} %t.out

#include <iostream>
#include <vector>

#include <sycl/detail/core.hpp>
#include <sycl/platform.hpp>
#include <sycl/usm.hpp>

using namespace sycl;

// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P so that
// dstDev can access srcDev's allocations, copy to host via dstQueue, verify
// all values, then clean up. Returns false on failure.
static bool testP2PRead(context &ctx, queue &srcQueue, device &srcDev,
queue &dstQueue, device &dstDev, size_t N, int fillVal,
const char *label) {
int *src = sycl::malloc_device<int>(N, srcQueue);
if (!src) {
std::cout << label << ": device alloc failed. Skipping.\n";
return true; // not a test failure
}
srcQueue.fill(src, fillVal, N).wait();

// Enable P2P: dstDev may now access allocations on srcDev. Under the
// Level Zero v2 adapter this also makes the srcDev allocation resident
// on dstDev.
std::cout << "Enabling P2P: dstDev may now access allocations on srcDev.\n";
dstDev.ext_oneapi_enable_peer_access(srcDev);

std::vector<int> result(N, 0);
dstQueue.memcpy(result.data(), src, N * sizeof(int)).wait();

std::cout
<< "Disabling P2P: dstDev may no longer access allocations on srcDev.\n";
dstDev.ext_oneapi_disable_peer_access(srcDev);
sycl::free(src, ctx);

for (size_t i = 0; i < N; ++i) {
if (result[i] != fillVal) {
std::cout << label << ": FAIL at index " << i << ": got " << result[i]
<< ", expected " << fillVal << "\n";
return false;
}
}
std::cout << label << ": OK\n";
return true;
}

// Allocate N ints on srcQueue's device, fill with fillVal, enable P2P, then
// disable P2P, and verify that a device-to-device memcpy from dstQueue fails
// (since dstDev should no longer be able to access srcDev's allocations after
// P2P is disabled).
static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue,
device &srcDev, queue &dstQueue,
device &dstDev, size_t N, int fillVal,
const char *label) {
int *src = sycl::malloc_device<int>(N, srcQueue);
if (!src) {
std::cout << label << ": device alloc failed (src). Skipping.\n";
return true;
}

int *dst = sycl::malloc_device<int>(N, dstQueue);
if (!dst) {
std::cout << label << ": device alloc failed (dst). Skipping.\n";
sycl::free(src, ctx);
return true;
}

srcQueue.fill(src, fillVal, N).wait();

// Enable then disable P2P: dstDev should no longer be able to access
// allocations on srcDev.
std::cout << "Enabling P2P (temporarily).\n";
dstDev.ext_oneapi_enable_peer_access(srcDev);
std::cout << "Disabling P2P: dstDev should no longer access srcDev.\n";
dstDev.ext_oneapi_disable_peer_access(srcDev);

// Attempt a device-to-device memcpy from src (on srcDev) to dst (on dstDev)
// via dstQueue after P2P has been revoked — this should fail.
bool gotException = false;
try {
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
} catch (sycl::exception &e) {
std::cout << label << ": memcpy threw exception: " << e.what() << "\n";
gotException = true;
}

sycl::free(dst, ctx);
sycl::free(src, ctx);

if (!gotException) {
std::cout << label
<< ": FAIL — device-to-device memcpy succeeded after P2P was "
"disabled\n";
return false;
}
std::cout << label << ": OK (memcpy correctly failed after P2P disable)\n";
return true;
}

// Allocate N ints on srcQueue's device, fill with fillVal, and verify that a
// device-to-device memcpy from dstQueue fails without ever enabling P2P (since
// dstDev must not access srcDev's allocations when P2P has never been enabled).
static bool testP2PReadFailsWithoutEnable(context &ctx, queue &srcQueue,
device &srcDev, queue &dstQueue,
device &dstDev, size_t N, int fillVal,
const char *label) {
(void)srcDev;
(void)dstDev;

int *src = sycl::malloc_device<int>(N, srcQueue);
if (!src) {
std::cout << label << ": device alloc failed (src). Skipping.\n";
return true;
}

int *dst = sycl::malloc_device<int>(N, dstQueue);
if (!dst) {
std::cout << label << ": device alloc failed (dst). Skipping.\n";
sycl::free(src, ctx);
return true;
}

srcQueue.fill(src, fillVal, N).wait();

// Attempt a device-to-device memcpy without ever enabling P2P — must fail.
bool gotException = false;
try {
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
} catch (sycl::exception &e) {
std::cout << label << ": memcpy threw exception: " << e.what() << "\n";
gotException = true;
}

sycl::free(dst, ctx);
sycl::free(src, ctx);

if (!gotException) {
std::cout << label
<< ": FAIL — device-to-device memcpy succeeded without P2P\n";
return false;
}
std::cout << label << ": OK (memcpy correctly failed without P2P)\n";
return true;
}

// Verify the transition from blocked to permitted using the same allocation:
// first attempt a device-to-device memcpy from dstQueue without P2P (must
// fail), then enable P2P and retry the copy (must succeed with correct data).
static bool testP2PReadFailsThenSucceedsAfterEnable(
context &ctx, queue &srcQueue, device &srcDev, queue &dstQueue,
device &dstDev, size_t N, int fillVal, const char *label) {
int *src = sycl::malloc_device<int>(N, srcQueue);
if (!src) {
std::cout << label << ": device alloc failed (src). Skipping.\n";
return true;
}

int *dst = sycl::malloc_device<int>(N, dstQueue);
if (!dst) {
std::cout << label << ": device alloc failed (dst). Skipping.\n";
sycl::free(src, ctx);
return true;
}

srcQueue.fill(src, fillVal, N).wait();

// Without P2P the copy must fail.
bool gotException = false;
try {
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
} catch (sycl::exception &e) {
std::cout << label << ": first memcpy (no P2P) threw: " << e.what() << "\n";
gotException = true;
}

if (!gotException) {
std::cout << label << ": FAIL — first memcpy succeeded without P2P\n";
sycl::free(dst, ctx);
sycl::free(src, ctx);
return false;
}

// Enable P2P: dstDev may now access allocations on srcDev.
std::cout << label << ": enabling P2P.\n";
dstDev.ext_oneapi_enable_peer_access(srcDev);

// Retry — must succeed now.
bool copyOk = true;
std::vector<int> result(N, 0);
try {
dstQueue.memcpy(dst, src, N * sizeof(int)).wait();
// Read back to host for verification.
dstQueue.memcpy(result.data(), dst, N * sizeof(int)).wait();
} catch (sycl::exception &e) {
std::cout << label << ": second memcpy (P2P enabled) threw: " << e.what()
<< "\n";
copyOk = false;
}

std::cout << label << ": disabling P2P.\n";
dstDev.ext_oneapi_disable_peer_access(srcDev);
sycl::free(dst, ctx);
sycl::free(src, ctx);

if (!copyOk)
return false;

for (size_t i = 0; i < N; ++i) {
if (result[i] != fillVal) {
std::cout << label << ": FAIL at index " << i << ": got " << result[i]
<< ", expected " << fillVal << "\n";
return false;
}
}
std::cout << label << ": OK (failed without P2P, succeeded after enable)\n";
return true;
}

int main() {
// Find a platform with at least two GPU devices.
std::vector<device> gpus;
for (auto &plat : platform::get_platforms()) {
gpus = plat.get_devices(info::device_type::gpu);
if (gpus.size() >= 2)
break;
}

if (gpus.size() < 2) {
std::cout << "Test requires at least two GPU devices on the same platform. "
"Skipping.\n";
return 0;
}

device &dev0 = gpus[0];
device &dev1 = gpus[1];

std::cout << "Device 0: " << dev0.get_info<info::device::name>() << "\n";
std::cout << "Device 1: " << dev1.get_info<info::device::name>() << "\n";

// Both devices share a single context for cross-device USM.
context ctx({dev0, dev1});
queue q0(ctx, dev0);
queue q1(ctx, dev1);

// Allocation size must exceed the disjoint pool's MaxPoolableSize (4 MB for
// device memory) so that the allocation goes directly to the memory provider
// where residency is established.
constexpr size_t N = 2 * 1024 * 1024; // 2M ints = 8 MB

// Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0).
std::cout << "Phase 1: dev1 reads dev0's memory (P2P: dev1 -> dev0).\n";
if (!dev1.ext_oneapi_can_access_peer(
dev0, ext::oneapi::peer_access::access_supported)) {
std::cout << "No hardware P2P support (dev1->dev0). Skipping.\n";
return 0;
}
if (!testP2PRead(ctx, q0, dev0, q1, dev1, N, 0x42,
"Phase 1 (dev1 reads dev0)"))
return 1;

// Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1).
std::cout
<< "Phase 2 (opposite): dev0 reads dev1's memory (P2P: dev0 -> dev1).\n";
if (!dev0.ext_oneapi_can_access_peer(
dev1, ext::oneapi::peer_access::access_supported)) {
std::cout << "No hardware P2P support (dev0->dev1). Skipping phase 2.\n";
std::cout << "PASS\n";
return 0;
}
if (!testP2PRead(ctx, q1, dev1, q0, dev0, N, 0x55,
"Phase 2 (dev0 reads dev1)"))
return 1;

// Phase 3: verify that memcpy fails after P2P is disabled.
std::cout << "Phase 3: verify memcpy fails after P2P is disabled.\n";
if (!testP2PReadFailsAfterDisable(ctx, q0, dev0, q1, dev1, N, 0x77,
"Phase 3 (dev1 reads dev0 after disable)"))
return 1;

// Phase 4: verify that memcpy fails without ever enabling P2P.
std::cout << "Phase 4: verify memcpy fails without ever enabling P2P.\n";
if (!testP2PReadFailsWithoutEnable(ctx, q0, dev0, q1, dev1, N, 0x99,
"Phase 4 (dev1 reads dev0 without P2P)"))
return 1;

// Phase 5: verify the transition from blocked to permitted.
std::cout << "Phase 5: verify memcpy fails without P2P then succeeds after "
"enabling it.\n";
if (!testP2PReadFailsThenSucceedsAfterEnable(
ctx, q0, dev0, q1, dev1, N, 0xAA,
"Phase 5 (dev1 reads dev0: fail then succeed)"))
return 1;

std::cout << "PASS\n";
return 0;
}
Loading
Loading