Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 74 additions & 74 deletions .github/workflows/ur-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,39 @@ jobs:
# Extra native CPU jobs are here to force the loader to be used.
# UR will not use the loader if there is only one target.
include:
- name: L0
runner: UR_L0
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
- name: L0_V2
runner: UR_L0
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
- name: L0
runner: UR_L0
static: ON
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
- name: L0
runner: UR_L0
other_adapter: NATIVE_CPU
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
- name: L0_V2
runner: UR_L0_BMG
image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# - name: L0
# runner: UR_L0
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# - name: L0_V2
# runner: UR_L0
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# - name: L0
# runner: UR_L0
# static: ON
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# - name: L0
# runner: UR_L0
# other_adapter: NATIVE_CPU
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
# - name: L0_V2
# runner: UR_L0_BMG
# image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
- name: CUDA
runner: UR_CUDA
image_options: -u 1001 --privileged --cap-add SYS_ADMIN --gpus all
- name: OPENCL
runner: UR_OPENCL
docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
image_options: -u 1001 --device=/dev/dri --device=/dev/kfd --privileged --cap-add SYS_ADMIN
- name: OPENCL
runner: UR_OPENCL
other_adapter: NATIVE_CPU
docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
image_options: -u 1001 --device=/dev/dri --device=/dev/kfd --privileged --cap-add SYS_ADMIN
- name: NATIVE_CPU
runner: UR_NATIVE_CPU
docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
# - name: OPENCL
# runner: UR_OPENCL
# docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
# image_options: -u 1001 --device=/dev/dri --device=/dev/kfd --privileged --cap-add SYS_ADMIN
# - name: OPENCL
# runner: UR_OPENCL
# other_adapter: NATIVE_CPU
# docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
# image_options: -u 1001 --device=/dev/dri --device=/dev/kfd --privileged --cap-add SYS_ADMIN
# - name: NATIVE_CPU
# runner: UR_NATIVE_CPU
# docker_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest"
# image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
uses: ./.github/workflows/ur-build-hw.yml
with:
adapter_name: ${{ matrix.name }}
Expand All @@ -100,48 +100,48 @@ jobs:
image_options: ${{ matrix.image_options || '' }}
install_igc_driver: ${{ contains(needs.detect_changes.outputs.filters, 'drivers') }}

offload_build:
name: Adapters (Offload)
needs: [detect_changes, source_checks]
if: ${{ !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur_offload_adapter') }}
uses: ./.github/workflows/ur-build-offload.yml

macos:
name: MacOS build only
needs: [detect_changes, source_checks]
if: ${{ !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur') }}
strategy:
matrix:
os: ['macos-latest']
runs-on: ${{matrix.os}}

steps:
- name: Checkout LLVM
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.1.0
with:
python-version: "3.10"

- name: Install prerequisites
working-directory: ${{github.workspace}}/unified-runtime
run: |
python3 -m pip install -r third_party/requirements.txt
python3 -m pip install -r third_party/requirements_testing.txt

- name: Install hwloc
run: brew install hwloc

- name: Configure Unified Runtime project
working-directory: ${{github.workspace}}/unified-runtime
run: >
cmake
-B${{github.workspace}}/build
-DUR_ENABLE_TRACING=ON
-DUR_DEVELOPER_MODE=ON
-DCMAKE_BUILD_TYPE=Release
-DUR_BUILD_TESTS=ON
-DUR_FORMAT_CPP_STYLE=ON
# offload_build:
# name: Adapters (Offload)
# needs: [detect_changes, source_checks]
# if: ${{ !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur_offload_adapter') }}
# uses: ./.github/workflows/ur-build-offload.yml

- name: Build
run: cmake --build ${{github.workspace}}/build -j $(sysctl -n hw.logicalcpu)
# macos:
# name: MacOS build only
# needs: [detect_changes, source_checks]
# if: ${{ !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur') }}
# strategy:
# matrix:
# os: ['macos-latest']
# runs-on: ${{matrix.os}}
#
# steps:
# - name: Checkout LLVM
# uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
#
# - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.1.0
# with:
# python-version: "3.10"
#
# - name: Install prerequisites
# working-directory: ${{github.workspace}}/unified-runtime
# run: |
# python3 -m pip install -r third_party/requirements.txt
# python3 -m pip install -r third_party/requirements_testing.txt
#
# - name: Install hwloc
# run: brew install hwloc
#
# - name: Configure Unified Runtime project
# working-directory: ${{github.workspace}}/unified-runtime
# run: >
# cmake
# -B${{github.workspace}}/build
# -DUR_ENABLE_TRACING=ON
# -DUR_DEVELOPER_MODE=ON
# -DCMAKE_BUILD_TYPE=Release
# -DUR_BUILD_TESTS=ON
# -DUR_FORMAT_CPP_STYLE=ON
#
# - name: Build
# run: cmake --build ${{github.workspace}}/build -j $(sysctl -n hw.logicalcpu)
29 changes: 24 additions & 5 deletions unified-runtime/source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,11 +573,30 @@ urUSMPoolTrimToExp(ur_context_handle_t hContext, ur_device_handle_t hDevice,
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t,
void *pDst,
const void *pSrc,
size_t Size) {
UR_CHECK_ERROR(cuMemcpy((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size));
UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(
ur_context_handle_t hContext, void *pDst, const void *pSrc, size_t Size) {
// cuMemcpy for device-to-device copies may NOT synchronize with the host
// or with other streams. According to CUDA documentation, device-to-device
// copies using cuMemcpy can execute asynchronously.
//
// Solution:
// 1. Synchronize all streams in context to ensure prior operations complete
// 2. Use cuMemcpyAsync with default stream for the copy
// 3. Synchronize the stream to ensure copy completes before returning
//
// Set the context
ScopedContext Active(hContext->getDevices().front());

// Ensure all pending operations in all streams have completed
UR_CHECK_ERROR(cuCtxSynchronize());

// Perform the copy using async API with default stream
UR_CHECK_ERROR(
cuMemcpyAsync((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size, 0));

// Synchronize the stream to ensure the copy has completed
UR_CHECK_ERROR(cuStreamSynchronize(0));

return UR_RESULT_SUCCESS;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_MULTI_QUEUE(urUSMContextMemcpyExpTestDevice);

TEST_P(urUSMContextMemcpyExpTestDevice, Success) {
// https://github.com/intel/llvm/issues/19688
UUR_KNOWN_FAILURE_ON(uur::CUDA{});
// Fixed by adding cuCtxSynchronize() before cuMemcpy in CUDA adapter
ASSERT_SUCCESS(
urUSMContextMemcpyExp(context, dst_ptr, src_ptr, allocation_size));
verifyData();
Expand Down
Loading