From 3d67c198c4671cba5099c985fe7998f8062e54e5 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 23 Apr 2026 15:03:03 +0200 Subject: [PATCH 1/8] [SYCL] Embed fsycl-id-queries-range in device image as property. --- clang/lib/Driver/ToolChains/Clang.cpp | 7 + .../SYCLPostLink/ComputeModuleRuntimeInfo.h | 1 + .../SYCLPostLink/ComputeModuleRuntimeInfo.cpp | 4 + llvm/tools/sycl-post-link/sycl-post-link.cpp | 23 ++- .../sycl/detail/id_queries_fit_in_int.hpp | 173 ------------------ sycl/include/sycl/handler.hpp | 7 - sycl/include/sycl/queue.hpp | 16 -- sycl/source/detail/device_binary_image.cpp | 4 +- sycl/source/detail/ndrange_desc.hpp | 40 ++++ sycl/source/detail/scheduler/commands.cpp | 60 +++++- .../include_deps/sycl_detail_core.hpp.cpp | 1 - .../sycl_khr_includes_handler.hpp.cpp | 1 - .../sycl_khr_includes_kernel_bundle.hpp.cpp | 1 - .../sycl_khr_includes_queue.hpp.cpp | 1 - .../sycl_khr_includes_reduction.hpp.cpp | 1 - .../sycl_khr_includes_stream.hpp.cpp | 1 - .../sycl_khr_includes_usm.hpp.cpp | 1 - sycl/unittests/CMakeLists.txt | 1 - sycl/unittests/range/CMakeLists.txt | 3 - sycl/unittests/range/int/CMakeLists.txt | 11 -- .../range/int/IdQueriesRangeValidation.cpp | 83 --------- sycl/unittests/range/size_t/CMakeLists.txt | 4 - .../range/size_t/IdQueriesRangeValidation.cpp | 51 ------ sycl/unittests/range/uint/CMakeLists.txt | 11 -- .../range/uint/IdQueriesRangeValidation.cpp | 82 --------- 25 files changed, 134 insertions(+), 454 deletions(-) delete mode 100644 sycl/include/sycl/detail/id_queries_fit_in_int.hpp delete mode 100644 sycl/unittests/range/CMakeLists.txt delete mode 100644 sycl/unittests/range/int/CMakeLists.txt delete mode 100644 sycl/unittests/range/int/IdQueriesRangeValidation.cpp delete mode 100644 sycl/unittests/range/size_t/CMakeLists.txt delete mode 100644 sycl/unittests/range/size_t/IdQueriesRangeValidation.cpp delete mode 100644 sycl/unittests/range/uint/CMakeLists.txt delete mode 100644 sycl/unittests/range/uint/IdQueriesRangeValidation.cpp diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2ebb3541b43c5..ebda5960ce431 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11147,6 +11147,13 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (allowDeviceImageDependencies(TCArgs)) addArgs(PostLinkArgs, TCArgs, {"-allow-device-image-dependencies"}); + // Forward -fsycl-id-queries-range= to sycl-post-link. + if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_id_queries_range_EQ)) { + SmallString<64> IdQueriesRangeOpt("-id-queries-range="); + IdQueriesRangeOpt += A->getValue(); + addArgs(PostLinkArgs, TCArgs, {IdQueriesRangeOpt.str()}); + } + // For bfloat16 conversions LLVM IR devicelib, we only need to embed it // when non-AOT compilation is used. if (TC.getTriple().isSPIROrSPIRV() && !TC.getTriple().isSPIRAOT()) { diff --git a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h index df95979c4d7ee..e25c688cf2554 100644 --- a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h @@ -27,6 +27,7 @@ struct GlobalBinImageProps { bool EmitExportedSymbols; bool EmitImportedSymbols; bool EmitDeviceGlobalPropSet; + int IdQueriesRange; // 0 = int, 1 = uint, 2 = size_t (default) }; bool isModuleUsingAsan(const Module &M); bool isModuleUsingMsan(const Module &M); diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index 3a517814a9020..aa82392d6c98b 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -377,6 +377,10 @@ PropSetRegTy computeModuleProperties(const Module &M, if (OptLevel != -1) PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "optLevel", OptLevel); } + { + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "idQueriesRange", + GlobProps.IdQueriesRange); + } { std::vector> ArgPos = getKernelNamesUsingImplicitLocalMem(M); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index fe4c215c33bbe..add1bb4bee9c0 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -241,6 +241,19 @@ cl::opt AllowDeviceImageDependencies{ cl::desc("Allow dependencies between device images"), cl::cat(PostLinkCat), cl::init(false)}; +enum IdQueriesRangeMode { IDQR_INT = 0, IDQR_UINT = 1, IDQR_SIZE_T = 2 }; + +cl::opt IdQueriesRange{ + "id-queries-range", + cl::desc("Specify the assumption about SYCL ID query value ranges"), + cl::Optional, + cl::init(IDQR_INT), + cl::values( + clEnumValN(IDQR_INT, "int", "ID query values fit within MAX_INT"), + clEnumValN(IDQR_UINT, "uint", "ID query values fit within MAX_UINT"), + clEnumValN(IDQR_SIZE_T, "size_t", "No restriction on ID query values")), + cl::cat(PostLinkCat)}; + struct IrPropSymFilenameTriple { std::string Ir; std::string Prop; @@ -311,9 +324,13 @@ Error saveModule( continue; auto CopyTriple = BaseTriple; if (DoPropGen) { - GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, - EmitKernelNames, EmitExportedSymbols, - EmitImportedSymbols, DeviceGlobals}; + GlobalBinImageProps Props = {EmitKernelParamInfo, + EmitProgramMetadata, + EmitKernelNames, + EmitExportedSymbols, + EmitImportedSymbols, + DeviceGlobals, + static_cast(IdQueriesRange)}; StringRef Target = OutputFile.Target; std::string NewSuff = Suffix.str(); if (!Target.empty()) diff --git a/sycl/include/sycl/detail/id_queries_fit_in_int.hpp b/sycl/include/sycl/detail/id_queries_fit_in_int.hpp deleted file mode 100644 index 47281b173149d..0000000000000 --- a/sycl/include/sycl/detail/id_queries_fit_in_int.hpp +++ /dev/null @@ -1,173 +0,0 @@ -//==-------------------- id_queries_fit_in_int.hpp -------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Our SYCL implementation has a special mode (introduced for performance -// reasons) in which it assume that all result of all id queries (i.e. global -// sizes, work-group sizes, local id, global id, etc.) fit within MAX_INT. -// -// This header contains corresponding helper functions related to this mode. -// -//===----------------------------------------------------------------------===// - -#pragma once - -// We only use those helpers to throw an exception if user selected a range that -// would violate the assumption. That can only happen on host and therefore to -// optimize our headers, the helpers below are only available for host -// compilation. -#ifndef __SYCL_DEVICE_ONLY__ - -#include -#include -#include - -#include -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -#if __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ -#if __SYCL_ID_QUERIES_FIT_IN_INT__ -constexpr static const char *Msg = - "Provided range and/or offset does not fit in int. Pass " - "`-fsycl-id-queries-range=size_t' to remove this limit."; -#else // __SYCL_ID_QUERIES_FIT_IN_UINT__ -constexpr static const char *Msg = - "Provided range and/or offset does not fit in unsigned int. Pass " - "`-fsycl-id-queries-range=size_t' to remove this limit."; -#endif - -template -typename std::enable_if_t::value || - std::is_same::value> -checkValueRangeImpl(ValT V) { -#if __SYCL_ID_QUERIES_FIT_IN_INT__ - static constexpr size_t Limit = - static_cast((std::numeric_limits::max)()); -#else // __SYCL_ID_QUERIES_FIT_IN_UINT__ - static constexpr size_t Limit = - static_cast((std::numeric_limits::max)()); -#endif - if (V > Limit) - throw sycl::exception(make_error_code(errc::nd_range), Msg); -} - -inline void checkMulOverflow(size_t a, size_t b) { -#ifndef _MSC_VER -#if __SYCL_ID_QUERIES_FIT_IN_INT__ - int Product; -#elif __SYCL_ID_QUERIES_FIT_IN_UINT__ - unsigned int Product; -#endif - if (__builtin_mul_overflow(a, b, &Product)) { - throw sycl::exception(make_error_code(errc::nd_range), Msg); - } -#else - checkValueRangeImpl(a); - checkValueRangeImpl(b); - size_t Product = a * b; - checkValueRangeImpl(Product); -#endif -} - -inline void checkMulOverflow(size_t a, size_t b, size_t c) { -#ifndef _MSC_VER -#if __SYCL_ID_QUERIES_FIT_IN_INT__ - int Product; -#elif __SYCL_ID_QUERIES_FIT_IN_UINT__ - unsigned int Product; -#endif - if (__builtin_mul_overflow(a, b, &Product) || - __builtin_mul_overflow(Product, c, &Product)) { - throw sycl::exception(make_error_code(errc::nd_range), Msg); - } -#else - checkValueRangeImpl(a); - checkValueRangeImpl(b); - size_t Product = a * b; - checkValueRangeImpl(Product); - - checkValueRangeImpl(c); - Product *= c; - checkValueRangeImpl(Product); -#endif -} - -// TODO: Remove this function when offsets are removed. -template -inline bool hasNonZeroOffset(const sycl::nd_range &V) { - size_t Product = 1; - for (int Dim = 0; Dim < Dims; ++Dim) { - Product *= V.get_offset()[Dim]; - } - return (Product != 0); -} -#endif // __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ - -template -void checkValueRange([[maybe_unused]] const sycl::range &V) { -#if __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ - if constexpr (Dims == 1) { - // For 1D range, just check the value against MAX_INT. - checkValueRangeImpl(V[0]); - } else if constexpr (Dims == 2) { - // For 2D range, check if computing the linear range overflows. - checkMulOverflow(V[0], V[1]); - } else if constexpr (Dims == 3) { - // For 3D range, check if computing the linear range overflows. - checkMulOverflow(V[0], V[1], V[2]); - } -#endif -} - -template -void checkValueRange([[maybe_unused]] const sycl::id &V) { -#if __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ - // An id cannot be linearized without a range, so check each component. - for (int Dim = 0; Dim < Dims; ++Dim) { - checkValueRangeImpl(V[Dim]); - } -#endif -} - -template -void checkValueRange([[maybe_unused]] const range &R, - [[maybe_unused]] const id &O) { -#if __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ - checkValueRange(R); - checkValueRange(O); - - for (size_t Dim = 0; Dim < Dims; ++Dim) { - unsigned long long Sum = R[Dim] + O[Dim]; - checkValueRangeImpl(Sum); - } -#endif -} - -template -void checkValueRange([[maybe_unused]] const sycl::nd_range &V) { -#if __SYCL_ID_QUERIES_FIT_IN_INT__ || __SYCL_ID_QUERIES_FIT_IN_UINT__ - // In an ND-range, we only need to check the global linear size, because: - // - The linear size must be greater than any of the dimensions. - // - Each dimension of the global range is larger than the local range. - // TODO: Remove this branch when offsets are removed. - if (hasNonZeroOffset(V)) /*[[unlikely]]*/ { - checkValueRange(V.get_global_range(), V.get_offset()); - } else { - checkValueRange(V.get_global_range()); - } -#endif -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl - -#endif diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3e9a4125987f9..c29f0b46ea2d0 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -875,7 +874,6 @@ class __SYCL_EXPORT handler { // kernel use items/ids in the user range, which means that // ID range assumptions can still be violated. So check the bounds // of the user range, instead of the rounded range. - detail::checkValueRange(UserRange); convertToRangeViewAndSetDescriptor(RoundedRange); StoreLambda( std::move(Wrapper)); @@ -903,7 +901,6 @@ class __SYCL_EXPORT handler { verifyUsedKernelBundleInternal(Info.Name); setKernelLaunchProperties( detail::extractKernelProperties(Props)); - detail::checkValueRange(UserRange); convertToRangeViewAndSetDescriptor(std::move(UserRange)); StoreLambda( std::move(KernelFunc)); @@ -929,7 +926,6 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); setDeviceKernelInfo(std::move(Kernel)); - detail::checkValueRange(NumWorkItems); convertToRangeViewAndSetDescriptor(std::move(NumWorkItems)); setKernelLaunchProperties(detail::extractKernelProperties(Props)); extractArgsAndReqs(); @@ -952,7 +948,6 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); setDeviceKernelInfo(std::move(Kernel)); - detail::checkValueRange(NDRange); convertToRangeViewAndSetDescriptor(std::move(NDRange)); setKernelLaunchProperties(detail::extractKernelProperties(Props)); extractArgsAndReqs(); @@ -988,7 +983,6 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); verifyUsedKernelBundleInternal(Info.Name); - detail::checkValueRange(params...); if constexpr (SetNumWorkGroups) { convertToRangeViewAndSetDescriptor(std::move(params)..., /*SetNumWorkGroups=*/true); @@ -1457,7 +1451,6 @@ class __SYCL_EXPORT handler { #ifndef __SYCL_DEVICE_ONLY__ throwIfActionIsCreated(); setDeviceKernelInfo(std::move(Kernel)); - detail::checkValueRange(NumWorkItems, WorkItemOffset); setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset)); extractArgsAndReqs(); #endif diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index e2ca6b9e8786a..8dcc007659048 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -18,7 +18,6 @@ #include // for code_location #include // for __SYCL2020_DEP... #include // for __SYCL_EXPORT -#include // for checkValueRange #include // for is_queue_info_... #include // for KernelInfo #include @@ -3988,10 +3987,6 @@ auto submit_kernel_direct_parallel_for(const queue &Queue, nd_range Range, "must be either sycl::nd_item or be convertible from sycl::nd_item"); using TransformedArgType = sycl::nd_item; -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(Range); -#endif - return submit_kernel_direct( @@ -4058,14 +4053,6 @@ auto submit_kernel_direct_parallel_for(const queue &Queue, range Range, using KTypeWrapper = decltype(Wrapper); using KName = std::conditional_t::value, KTypeWrapper, NameWT>; -#ifndef __SYCL_DEVICE_ONLY__ - // We are executing over the rounded range, but there are still - // items/ids that are constructed in the range rounded - // kernel, use items/ids in the user range, which means that - // ID range assumptions can still be violated. So check the bounds - // of the user range, instead of the rounded range. - detail::checkValueRange(Range); -#endif return submit_kernel_direct( @@ -4076,9 +4063,6 @@ auto submit_kernel_direct_parallel_for(const queue &Queue, range Range, // SYCL_LANGUAGE_VERSION >= 202012L { #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(Range); -#endif return submit_kernel_direct( diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 1d38a2ee6e9a4..9653f2030b598 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -592,7 +592,9 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getMiscProperties(); }, /*IgnoreDuplicates=*/true, /*DropProperty=*/ - [](std::string_view PropertyName) { return PropertyName == "optLevel"; }); + [](std::string_view PropertyName) { + return PropertyName == "optLevel" || PropertyName == "idQueriesRange"; + }); std::array *, diff --git a/sycl/source/detail/ndrange_desc.hpp b/sycl/source/detail/ndrange_desc.hpp index 0361de581cde7..4c1f383c39c72 100644 --- a/sycl/source/detail/ndrange_desc.hpp +++ b/sycl/source/detail/ndrange_desc.hpp @@ -13,6 +13,7 @@ #include #include +#include namespace sycl { inline namespace _V1 { @@ -99,6 +100,45 @@ class NDRDescT { std::array ClusterDimensions{1, 1, 1}; size_t Dims = 0; + // Returns the total number of global workgroups for the kernel execution + // along all dimensions, or the maximum value of size_t if overflow occurs. + uint64_t getNumGlobalWorkGroups() const { + auto getProductAndCheckForOverflow = [](const size_t &a, const size_t &b, + const size_t &c) -> uint64_t { + uint64_t Product = 0; + uint64_t MaxSizeTVal = std::numeric_limits::max(); + if (a == 0 || b == 0 || c == 0) { + return 0; + } + +#ifndef _MSC_VER + if (__builtin_mul_overflow(a, b, &Product) || + __builtin_mul_overflow(Product, c, &Product)) { + return MaxSizeTVal; // Overflow occurred, return max possible value. + } +#else + if (b > MaxSizeTVal / a) { + return MaxSizeTVal; // Overflow occurred, return max possible value. + } + Product = a * b; + + if (c > MaxSizeTVal / Product) { + return MaxSizeTVal; // Overflow occurred, return max possible value. + } + Product *= c; +#endif + + return Product; + }; + + if (NumWorkGroups[0] != 0) + return getProductAndCheckForOverflow(NumWorkGroups[0], NumWorkGroups[1], + NumWorkGroups[2]); + else + return getProductAndCheckForOverflow(GlobalSize[0], GlobalSize[1], + GlobalSize[2]); + } + private: void init(const size_t *N, bool SetNumWorkGroups) { if (SetNumWorkGroups) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 447ff0225eb03..29c0fe392e765 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2757,6 +2757,7 @@ void enqueueImpKernel( std::shared_ptr SyclKernelImpl; device_image_impl *DeviceImageImpl = nullptr; FastKernelCacheValPtr KernelCacheVal; + sycl_device_binary_property IdQueryRangeProp; if (nullptr != MSyclKernel) { assert(MSyclKernel->get_info() == @@ -2772,17 +2773,25 @@ void enqueueImpKernel( // their duplication in such cases. KernelMutex = &MSyclKernel->getNoncacheableEnqueueMutex(); EliminatedArgMask = MSyclKernel->getKernelArgMask(); + + if (!MSyclKernel->isInteropOrSourceBased()) { + auto &DeviceImage = detail::ProgramManager::getInstance().getDeviceImage( + DeviceKernelInfo.Name, ContextImpl, DeviceImpl); + IdQueryRangeProp = DeviceImage.getProperty("idQueriesRange"); + } } else if ((SyclKernelImpl = KernelBundleImplPtr ? KernelBundleImplPtr->tryGetKernel(DeviceKernelInfo.Name) : std::shared_ptr{nullptr})) { Kernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); - Program = DeviceImageImpl->get_ur_program(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); KernelMutex = SyclKernelImpl->getCacheMutex(); + + IdQueryRangeProp = + DeviceImageImpl->get_bin_image_ref()->getProperty("idQueriesRange"); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, DeviceKernelInfo, NDRDesc); @@ -2790,6 +2799,11 @@ void enqueueImpKernel( KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; EliminatedArgMask = KernelCacheVal->MKernelArgMask; + + const RTDeviceBinaryImage &DeviceImage = + detail::ProgramManager::getInstance().getDeviceImage( + DeviceKernelInfo.Name, ContextImpl, DeviceImpl); + IdQueryRangeProp = DeviceImage.getProperty("idQueriesRange"); } // We may need more events for the launch, so we make another reference. @@ -2810,6 +2824,50 @@ void enqueueImpKernel( EventsWaitList = std::move(EventsWithDeviceGlobalInits); } + // Get Max number of work groups that this kernel can accept. + { + // Skip the check for interop kernels. + if (!(MSyclKernel && MSyclKernel->isInterop())) { + uint64_t MaxRange; + string ErrMsg; + uint32_t IdQueriesRange = + IdQueryRangeProp ? DeviceBinaryProperty(IdQueryRangeProp).asUint32() + : 0; + switch (IdQueriesRange) { + case 1: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = + "The kernel was compiled with -fsycl-id-queries-range=uint, but " + "the " + "provided " + "range/offset exceeds the maximum value storable in a uint32_t. " + "Either reduce the range/offset or " + "recompile the kernel with -fsycl-id-queries-range=size_t."; + break; + case 2: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = "The provided range/offset exceeds the maximum " + "value storable in a size_t, " + "which is the maximum value supported by DPCPP."; + break; + case 0: + default: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = + "The kernel was compiled with -fsycl-id-queries-range=int, but the " + "provided " + "range/offset exceeds the maximum value storable in a int. Either " + "reduce the range/offset or " + "recompile the kernel with -fsycl-id-queries-range=[uint|size_t]."; + } + + if (NDRDesc.getNumGlobalWorkGroups() > MaxRange) { + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + ErrMsg.c_str()); + } + } + } + ur_result_t Error = UR_RESULT_SUCCESS; { // When KernelMutex is null, this means that in-memory caching is diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 4b2ed7b5f557c..92e4f03883da3 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -104,7 +104,6 @@ // CHECK-NEXT: kernel_handler.hpp // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: detail/range_rounding.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp index 41a55bb6bc72e..5804c1d6ca9c5 100644 --- a/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_handler.hpp.cpp @@ -103,7 +103,6 @@ // CHECK-NEXT: detail/compile_time_kernel_info.hpp // CHECK-NEXT: detail/kernel_desc.hpp // CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: detail/cg_types.hpp // CHECK-NEXT: detail/host_profiling_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp index 6ca81fb4900ad..2c8ec7fa398e7 100644 --- a/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_kernel_bundle.hpp.cpp @@ -111,7 +111,6 @@ // CHECK-NEXT: CL/cl_ext.h // CHECK-NEXT: detail/get_device_kernel_info.hpp // CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: detail/cg_types.hpp // CHECK-NEXT: detail/host_profiling_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index d55ffd18b5702..c33a38cc7eeec 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -108,7 +108,6 @@ // CHECK-NEXT: kernel_handler.hpp // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: detail/range_rounding.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index ad9af2fa578be..af3e769096090 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -149,7 +149,6 @@ // CHECK-NEXT: detail/get_device_kernel_info.hpp // CHECK-NEXT: detail/compile_time_kernel_info.hpp // CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: detail/cg_types.hpp // CHECK-NEXT: detail/host_profiling_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp index 8ff935e78abf4..7134bf4a9a9b2 100644 --- a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp @@ -129,7 +129,6 @@ // CHECK-NEXT: detail/compile_time_kernel_info.hpp // CHECK-NEXT: detail/kernel_desc.hpp // CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: detail/cg_types.hpp // CHECK-NEXT: detail/host_profiling_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 019772ccdbba9..7c5602c7dacb5 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -128,7 +128,6 @@ // CHECK-NEXT: kernel_handler.hpp // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: detail/range_rounding.hpp diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 0370caa29e4ee..9a322ae24fbaa 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -63,5 +63,4 @@ if (SYCL_ENABLE_XPTI_TRACING AND NOT WIN32) endif() add_subdirectory(sampler) add_subdirectory(reduction) -add_subdirectory(range) add_subdirectory(OneAPIDeviceSelector) diff --git a/sycl/unittests/range/CMakeLists.txt b/sycl/unittests/range/CMakeLists.txt deleted file mode 100644 index 4aa62ee3ef5e5..0000000000000 --- a/sycl/unittests/range/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_subdirectory(int) -add_subdirectory(uint) -add_subdirectory(size_t) diff --git a/sycl/unittests/range/int/CMakeLists.txt b/sycl/unittests/range/int/CMakeLists.txt deleted file mode 100644 index 257ae2ea8e2de..0000000000000 --- a/sycl/unittests/range/int/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_sycl_unittest(IdQueriesRangeValidationInt SHARED - IdQueriesRangeValidation.cpp -) -target_compile_definitions(IdQueriesRangeValidationInt-Non_Preview_Tests PRIVATE - __SYCL_ID_QUERIES_FIT_IN_INT__=1 -) -if(SYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB) - target_compile_definitions(IdQueriesRangeValidationInt-Preview_Tests PRIVATE - __SYCL_ID_QUERIES_FIT_IN_INT__=1 - ) -endif() diff --git a/sycl/unittests/range/int/IdQueriesRangeValidation.cpp b/sycl/unittests/range/int/IdQueriesRangeValidation.cpp deleted file mode 100644 index cfec17bf833d4..0000000000000 --- a/sycl/unittests/range/int/IdQueriesRangeValidation.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//==---- IdQueriesRangeValidation.cpp - Range validation unit tests -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// These tests validate the range checking logic for INT mode by directly -// calling the checkValueRange functions from -// sycl/detail/id_queries_fit_in_int.hpp. The __SYCL_ID_QUERIES_FIT_IN_INT__ -// macro is defined via target_compile_definitions in the CMakeLists.txt file. - -#include - -#include - -#include - -using namespace sycl; - -TEST(IdQueriesRangeValidation, Int_Range1D_AtLimit) { - range<1> r(INT_MAX); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, Int_Range1D_ExceedsLimit) { - // Skip if size_t can't hold values larger than INT_MAX - if constexpr (sizeof(size_t) <= sizeof(int)) { - GTEST_SKIP() << "size_t too small to test overflow beyond INT_MAX"; - } - range<1> r(static_cast(INT_MAX) + 1); - EXPECT_THROW(detail::checkValueRange(r), exception); -} - -TEST(IdQueriesRangeValidation, Int_Range2D_ProductExceedsLimit) { - // 46341 * 46341 = 2147488281 > INT_MAX (2147483647) - range<2> r(46341, 46341); - EXPECT_THROW(detail::checkValueRange(r), exception); -} - -TEST(IdQueriesRangeValidation, Int_Range2D_ProductAtLimit) { - // 46340 * 46340 = 2147395600 < INT_MAX - range<2> r(46340, 46340); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, Int_Range3D_ProductExceedsLimit) { - // 1290 * 1290 * 1290 = 2146689000 < INT_MAX, but 1291^3 > INT_MAX - range<3> r(1291, 1291, 1291); - EXPECT_THROW(detail::checkValueRange(r), exception); -} - -TEST(IdQueriesRangeValidation, Int_Range3D_ProductAtLimit) { - range<3> r(1290, 1290, 1290); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, Int_Id_ComponentExceedsLimit) { - // Skip if size_t can't hold values larger than INT_MAX - if constexpr (sizeof(size_t) <= sizeof(int)) { - GTEST_SKIP() << "size_t too small to test overflow beyond INT_MAX"; - } - id<3> offset(1, static_cast(INT_MAX) + 1, 1); - EXPECT_THROW(detail::checkValueRange(offset), exception); -} - -TEST(IdQueriesRangeValidation, Int_Id_ComponentAtLimit) { - id<3> offset(1, INT_MAX, 1); - EXPECT_NO_THROW(detail::checkValueRange(offset)); -} - -TEST(IdQueriesRangeValidation, Int_RangeWithOffset_SumExceedsLimit) { - range<1> r(INT_MAX); - id<1> offset(1); - EXPECT_THROW(detail::checkValueRange(r, offset), exception); -} - -TEST(IdQueriesRangeValidation, Int_RangeWithOffset_SumAtLimit) { - range<1> r(INT_MAX - 1); - id<1> offset(1); - EXPECT_NO_THROW(detail::checkValueRange(r, offset)); -} diff --git a/sycl/unittests/range/size_t/CMakeLists.txt b/sycl/unittests/range/size_t/CMakeLists.txt deleted file mode 100644 index 4cec8c9a6ec9a..0000000000000 --- a/sycl/unittests/range/size_t/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_sycl_unittest(IdQueriesRangeValidationSizeT SHARED - IdQueriesRangeValidation.cpp -) -# No compile definitions needed - size_t mode is the default when neither macro is defined diff --git a/sycl/unittests/range/size_t/IdQueriesRangeValidation.cpp b/sycl/unittests/range/size_t/IdQueriesRangeValidation.cpp deleted file mode 100644 index 23eb4ffa69cf8..0000000000000 --- a/sycl/unittests/range/size_t/IdQueriesRangeValidation.cpp +++ /dev/null @@ -1,51 +0,0 @@ -//==---- IdQueriesRangeValidation.cpp - Range validation unit tests -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// These tests validate the range checking logic for size_t mode by directly -// calling the checkValueRange functions from -// sycl/detail/id_queries_fit_in_int.hpp. In size_t mode, neither -// __SYCL_ID_QUERIES_FIT_IN_INT__ nor -// __SYCL_ID_QUERIES_FIT_IN_UINT__ is defined (no validation occurs). - -#include - -#include - -#include - -using namespace sycl; - -// Tests for size_t mode (no validation macros defined) -TEST(IdQueriesRangeValidation, SizeT_NoValidation_LargeRange) { - // Skip if size_t can't hold values larger than UINT_MAX - if constexpr (sizeof(size_t) <= sizeof(unsigned int)) { - GTEST_SKIP() << "size_t too small to test values beyond UINT_MAX"; - } - // In size_t mode, no validation occurs, so even huge values should not throw - range<1> r(static_cast(UINT_MAX) + 1); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, SizeT_NoValidation_LargeId) { - // Skip if size_t can't hold values larger than UINT_MAX - if constexpr (sizeof(size_t) <= sizeof(unsigned int)) { - GTEST_SKIP() << "size_t too small to test values beyond UINT_MAX"; - } - id<3> offset(1, static_cast(UINT_MAX) + 1, 1); - EXPECT_NO_THROW(detail::checkValueRange(offset)); -} - -TEST(IdQueriesRangeValidation, SizeT_NoValidation_LargeRangeWithOffset) { - // Skip if size_t can't hold values larger than UINT_MAX - if constexpr (sizeof(size_t) <= sizeof(unsigned int)) { - GTEST_SKIP() << "size_t too small to test values beyond UINT_MAX"; - } - range<1> r(static_cast(UINT_MAX) + 1); - id<1> offset(1); - EXPECT_NO_THROW(detail::checkValueRange(r, offset)); -} diff --git a/sycl/unittests/range/uint/CMakeLists.txt b/sycl/unittests/range/uint/CMakeLists.txt deleted file mode 100644 index bfa58a63d9db2..0000000000000 --- a/sycl/unittests/range/uint/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_sycl_unittest(IdQueriesRangeValidationUInt SHARED - IdQueriesRangeValidation.cpp -) -target_compile_definitions(IdQueriesRangeValidationUInt-Non_Preview_Tests PRIVATE - __SYCL_ID_QUERIES_FIT_IN_UINT__=1 -) -if(SYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB) - target_compile_definitions(IdQueriesRangeValidationUInt-Preview_Tests PRIVATE - __SYCL_ID_QUERIES_FIT_IN_UINT__=1 - ) -endif() diff --git a/sycl/unittests/range/uint/IdQueriesRangeValidation.cpp b/sycl/unittests/range/uint/IdQueriesRangeValidation.cpp deleted file mode 100644 index 9cdcfd8027cf8..0000000000000 --- a/sycl/unittests/range/uint/IdQueriesRangeValidation.cpp +++ /dev/null @@ -1,82 +0,0 @@ -//==---- IdQueriesRangeValidation.cpp - Range validation unit tests -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// These tests validate the range checking logic for UINT mode by directly -// calling the checkValueRange functions from -// sycl/detail/id_queries_fit_in_int.hpp. The __SYCL_ID_QUERIES_FIT_IN_UINT__ -// macro is defined via target_compile_definitions in the CMakeLists.txt file. - -#include - -#include - -#include - -using namespace sycl; - -TEST(IdQueriesRangeValidation, UInt_Range1D_AtLimit) { - range<1> r(UINT_MAX); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, UInt_Range1D_ExceedsLimit) { - // Skip if size_t can't hold values larger than UINT_MAX - if constexpr (sizeof(size_t) <= sizeof(unsigned int)) { - GTEST_SKIP() << "size_t too small to test overflow beyond UINT_MAX"; - } - range<1> r(static_cast(UINT_MAX) + 1); - EXPECT_THROW(detail::checkValueRange(r), exception); -} - -TEST(IdQueriesRangeValidation, UInt_Range1D_AboveIntMax) { - // Skip if size_t can't hold values larger than INT_MAX - if constexpr (sizeof(size_t) <= sizeof(int)) { - GTEST_SKIP() << "size_t too small to test values above INT_MAX"; - } - // This should succeed in UINT mode but would fail in INT mode - range<1> r(static_cast(INT_MAX) + 1); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, UInt_Range2D_ProductExceedsLimit) { - // 65536 * 65536 = 4294967296 > UINT_MAX (4294967295) - range<2> r(65536, 65536); - EXPECT_THROW(detail::checkValueRange(r), exception); -} - -TEST(IdQueriesRangeValidation, UInt_Range2D_ProductAtLimit) { - // 65535 * 65535 = 4294836225 < UINT_MAX - range<2> r(65535, 65535); - EXPECT_NO_THROW(detail::checkValueRange(r)); -} - -TEST(IdQueriesRangeValidation, UInt_Id_ComponentExceedsLimit) { - // Skip if size_t can't hold values larger than UINT_MAX - if constexpr (sizeof(size_t) <= sizeof(unsigned int)) { - GTEST_SKIP() << "size_t too small to test overflow beyond UINT_MAX"; - } - id<3> offset(1, static_cast(UINT_MAX) + 1, 1); - EXPECT_THROW(detail::checkValueRange(offset), exception); -} - -TEST(IdQueriesRangeValidation, UInt_Id_ComponentAtLimit) { - id<3> offset(1, UINT_MAX, 1); - EXPECT_NO_THROW(detail::checkValueRange(offset)); -} - -TEST(IdQueriesRangeValidation, UInt_RangeWithOffset_SumExceedsLimit) { - range<1> r(UINT_MAX); - id<1> offset(1); - EXPECT_THROW(detail::checkValueRange(r, offset), exception); -} - -TEST(IdQueriesRangeValidation, UInt_RangeWithOffset_SumAtLimit) { - range<1> r(UINT_MAX - 1); - id<1> offset(1); - EXPECT_NO_THROW(detail::checkValueRange(r, offset)); -} From f90f6c46f1fade94b001a4bd9217df283e76d2ff Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 11 May 2026 10:49:27 -0700 Subject: [PATCH 2/8] Fix calculation of work group numbers. Add E2E tests. --- sycl/source/detail/device_binary_image.cpp | 4 +- .../detail/error_handling/error_handling.cpp | 18 +++++ sycl/source/detail/ndrange_desc.hpp | 57 +++++++-------- sycl/source/detail/scheduler/commands.cpp | 13 ++-- sycl/test-e2e/Basic/large_range_error.cpp | 72 +++++++++++++++++++ .../level_zero/helpers/kernel_helpers.cpp | 45 +++++------- 6 files changed, 144 insertions(+), 65 deletions(-) create mode 100644 sycl/test-e2e/Basic/large_range_error.cpp diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 9653f2030b598..1d38a2ee6e9a4 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -592,9 +592,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getMiscProperties(); }, /*IgnoreDuplicates=*/true, /*DropProperty=*/ - [](std::string_view PropertyName) { - return PropertyName == "optLevel" || PropertyName == "idQueriesRange"; - }); + [](std::string_view PropertyName) { return PropertyName == "optLevel"; }); std::array *, diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index 53cd2eb84969a..34a622bc9e013 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -332,6 +332,24 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, // else unknown. fallback (below) } } + } else if (IsLevelZero) { + // Make sure maximum number of work groups in each dimension does not + // exceed uint32_t. + uint64_t NumGlobalWorkGroups = NDRDesc.getNumGlobalWorkGroups(); + uint64_t MaxUint = + static_cast(std::numeric_limits::max()); + // Split the max work groups across all dimensions and check if + // any dimension exceeds uint32_t. + uint64_t NumGlobalWorkGroupsPerDim = NumGlobalWorkGroups / NDRDesc.Dims; + if (NumGlobalWorkGroupsPerDim > MaxUint) { + std::string ErrorMessage = + "Number of global work groups in 1st dimension " + + std::to_string(NumGlobalWorkGroups) + + " exceeds the maximum " + "supported value of " + + std::to_string(MaxUint) + "."; + throw sycl::exception(make_error_code(errc::nd_range), ErrorMessage); + } } else { // TODO: Decide what checks (if any) we need for the other backends } diff --git a/sycl/source/detail/ndrange_desc.hpp b/sycl/source/detail/ndrange_desc.hpp index 4c1f383c39c72..2ce7ad8212122 100644 --- a/sycl/source/detail/ndrange_desc.hpp +++ b/sycl/source/detail/ndrange_desc.hpp @@ -101,42 +101,43 @@ class NDRDescT { size_t Dims = 0; // Returns the total number of global workgroups for the kernel execution - // along all dimensions, or the maximum value of size_t if overflow occurs. + // along all dimensions, or the maximum value of uint64_t if overflow occurs. uint64_t getNumGlobalWorkGroups() const { - auto getProductAndCheckForOverflow = [](const size_t &a, const size_t &b, - const size_t &c) -> uint64_t { - uint64_t Product = 0; - uint64_t MaxSizeTVal = std::numeric_limits::max(); - if (a == 0 || b == 0 || c == 0) { - return 0; - } - + if (Dims == 0) + return 0; + + auto getProductAndCheckForOverflow = [](const size_t *Vals, + size_t NumDims) -> uint64_t { + uint64_t Product = 1; + uint64_t MaxVal = std::numeric_limits::max(); + for (size_t I = 0; I < NumDims; ++I) { + if (Vals[I] == 0) + return 0; #ifndef _MSC_VER - if (__builtin_mul_overflow(a, b, &Product) || - __builtin_mul_overflow(Product, c, &Product)) { - return MaxSizeTVal; // Overflow occurred, return max possible value. - } + if (__builtin_mul_overflow(Product, static_cast(Vals[I]), + &Product)) + return MaxVal; #else - if (b > MaxSizeTVal / a) { - return MaxSizeTVal; // Overflow occurred, return max possible value. - } - Product = a * b; - - if (c > MaxSizeTVal / Product) { - return MaxSizeTVal; // Overflow occurred, return max possible value. - } - Product *= c; + if (Vals[I] > MaxVal / Product) + return MaxVal; + Product *= Vals[I]; #endif - + } return Product; }; if (NumWorkGroups[0] != 0) - return getProductAndCheckForOverflow(NumWorkGroups[0], NumWorkGroups[1], - NumWorkGroups[2]); - else - return getProductAndCheckForOverflow(GlobalSize[0], GlobalSize[1], - GlobalSize[2]); + return getProductAndCheckForOverflow(NumWorkGroups.data(), Dims); + + // TODO: Can we have a case where only GFlobalSize and GlobalOffset are + // are set, and not LocalSize? If so, we need to handle that case as well. + uint64_t GlobalProduct = + getProductAndCheckForOverflow(GlobalSize.data(), Dims); + uint64_t LocalProduct = + getProductAndCheckForOverflow(LocalSize.data(), Dims); + if (LocalProduct == 0) + return 0; + return GlobalProduct / LocalProduct; } private: diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 29c0fe392e765..3320da3e3fccc 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2757,7 +2757,7 @@ void enqueueImpKernel( std::shared_ptr SyclKernelImpl; device_image_impl *DeviceImageImpl = nullptr; FastKernelCacheValPtr KernelCacheVal; - sycl_device_binary_property IdQueryRangeProp; + sycl_device_binary_property IdQueryRangeProp = nullptr; if (nullptr != MSyclKernel) { assert(MSyclKernel->get_info() == @@ -2838,10 +2838,8 @@ void enqueueImpKernel( MaxRange = static_cast(std::numeric_limits::max()); ErrMsg = "The kernel was compiled with -fsycl-id-queries-range=uint, but " - "the " - "provided " - "range/offset exceeds the maximum value storable in a uint32_t. " - "Either reduce the range/offset or " + "the provided range/offset exceeds the maximum value storable in " + "an uint32_t. Either reduce the range/offset or " "recompile the kernel with -fsycl-id-queries-range=size_t."; break; case 2: @@ -2855,9 +2853,8 @@ void enqueueImpKernel( MaxRange = static_cast(std::numeric_limits::max()); ErrMsg = "The kernel was compiled with -fsycl-id-queries-range=int, but the " - "provided " - "range/offset exceeds the maximum value storable in a int. Either " - "reduce the range/offset or " + "provided range/offset exceeds the maximum value storable in an " + "int. Either reduce the range/offset or " "recompile the kernel with -fsycl-id-queries-range=[uint|size_t]."; } diff --git a/sycl/test-e2e/Basic/large_range_error.cpp b/sycl/test-e2e/Basic/large_range_error.cpp new file mode 100644 index 0000000000000..bbb318afaf246 --- /dev/null +++ b/sycl/test-e2e/Basic/large_range_error.cpp @@ -0,0 +1,72 @@ +// Complile the kernel with different -fsycl-id-queries-range values +// to test overflow detection and Level Zero work-group limits. + +// REQUIRES: level_zero + +// RUN: %{build} -o %t_int.out -fsycl-id-queries-range=int +// RUN: %{build} -o %t_uint.out -fsycl-id-queries-range=uint +// RUN: %{build} -o %t_size.out -fsycl-id-queries-range=size_t + +// RUN: %{run} %t_int.out 17179869184 16 2>&1 | FileCheck --check-prefix=CHECK-PASS %s +// RUN: %{run} %t_int.out 17179869184 8 2>&1 | FileCheck --check-prefix=CHECK-INT-EXCEEDS %s + +// RUN: %{run} %t_uint.out 17179869184 16 2>&1 | FileCheck --check-prefix=CHECK-PASS %s +// RUN: %{run} %t_uint.out 17179869184 4 2>&1 | FileCheck --check-prefix=CHECK-UINT-EXCEEDS %s + +// RUN: %{run} %t_size.out 17179869184 4 2>&1 | FileCheck --check-prefix=CHECK-SIZE-PER-DIM-EXCEEDS %s + +// Tests that launching kernels with large ranges produces proper error +// messages. Validates overflow detection and Level Zero work-group limits + +#include + +using namespace sycl; + +// CHECK-PASS: PASS + +// CHECK-INT-EXCEEDS: FAIL: The kernel was compiled with -fsycl-id-queries-range=int, +// CHECK-INT-EXCEEDS-SAME: but the provided range/offset exceeds the maximum value +// CHECK-INT-EXCEEDS-SAME: storable in an int. Either reduce the range/offset or +// CHECK-INT-EXCEEDS-SAME: recompile the kernel with -fsycl-id-queries-range=[uint|size_t]. + +// CHECK-UINT-EXCEEDS: FAIL: The kernel was compiled with -fsycl-id-queries-range=uint, +// CHECK-UINT-EXCEEDS-SAME: but the provided range/offset exceeds the maximum value +// CHECK-UINT-EXCEEDS-SAME: storable in an uint32_t. Either reduce the range/offset or +// CHECK-UINT-EXCEEDS-SAME: recompile the kernel with -fsycl-id-queries-range=size_t. + +// CHECK-SIZE-PER-DIM-EXCEEDS: FAIL: Number of global work groups in 1st dimension +// CHECK-SIZE-PER-DIM-EXCEEDS-SAME: 4294967296 exceeds the maximum supported value of +// CHECK-SIZE-PER-DIM-EXCEEDS-SAME: 4294967295. +void test_nd_range_large_workgroups(queue &q, size_t GlobalSize, + size_t LocalSize) { + try { + q.parallel_for(nd_range<1>(range<1>(GlobalSize), range<1>(LocalSize)), + [](nd_item<1>) {}); + q.wait_and_throw(); + std::cout << "PASS\n"; + } catch (const sycl::exception &e) { + std::cout << "FAIL: " << e.what() << std::endl; + } +} + +int main(int argc, char *argv[]) { + size_t GlobalSize = 17179869184; + size_t LocalSize = 8; + + // Accept Global and local size as arguments. + if (argc == 3) { + GlobalSize = std::stoull(argv[1]); + LocalSize = std::stoull(argv[2]); + } else { + std::cout << "Usage: " << argv[0] << " \n"; + return 1; + } + + queue q; + std::cout << "Device: " << q.get_device().get_info() + << "\n"; + + test_nd_range_large_workgroups(q, GlobalSize, LocalSize); + + return 0; +} diff --git a/unified-runtime/source/adapters/level_zero/helpers/kernel_helpers.cpp b/unified-runtime/source/adapters/level_zero/helpers/kernel_helpers.cpp index 38f5139f755ba..52e897b3fa7d5 100644 --- a/unified-runtime/source/adapters/level_zero/helpers/kernel_helpers.cpp +++ b/unified-runtime/source/adapters/level_zero/helpers/kernel_helpers.cpp @@ -91,6 +91,7 @@ ur_result_t calculateKernelWorkDimensions( // If LocalWorkSize is not provided then Kernel must be provided to query // suggested group size. UR_ASSERT(LocalWorkSize || Kernel, UR_RESULT_ERROR_INVALID_VALUE); + UR_ASSERT(WorkDim > 0 && WorkDim < 4, UR_RESULT_ERROR_INVALID_VALUE); // New variable needed because GlobalWorkSize parameter might not be of size // 3 @@ -105,34 +106,26 @@ ur_result_t calculateKernelWorkDimensions( UR_CALL(getSuggestedLocalWorkSize(Device, Kernel, GlobalWorkSize3D, WG)); } - // TODO: assert if sizes do not fit into 32-bit? - switch (WorkDim) { - case 3: - ZeThreadGroupDimensions.groupCountX = - ur_cast(GlobalWorkSize3D[0] / WG[0]); - ZeThreadGroupDimensions.groupCountY = - ur_cast(GlobalWorkSize3D[1] / WG[1]); - ZeThreadGroupDimensions.groupCountZ = - ur_cast(GlobalWorkSize3D[2] / WG[2]); - break; - case 2: - ZeThreadGroupDimensions.groupCountX = - ur_cast(GlobalWorkSize3D[0] / WG[0]); - ZeThreadGroupDimensions.groupCountY = - ur_cast(GlobalWorkSize3D[1] / WG[1]); - WG[2] = 1; - break; - case 1: - ZeThreadGroupDimensions.groupCountX = - ur_cast(GlobalWorkSize3D[0] / WG[0]); - WG[1] = WG[2] = 1; - break; - - default: - UR_LOG(ERR, "calculateKernelWorkDimensions: unsupported work_dim"); - return UR_RESULT_ERROR_INVALID_VALUE; + uint64_t GroupCountPerDimension[3] = {1, 1, 1}; + for (uint32_t I = 0; I < WorkDim; I++) { + GroupCountPerDimension[I] = GlobalWorkSize3D[I] / WG[I]; + // Ensure that the number of groups in each dimension fits within uint32_t. + if (GroupCountPerDimension[I] > UINT32_MAX) { + UR_LOG(ERR, + "Number of work groups in dimension {} exceeds the maximum " + "supported value of {}.", + I, UINT32_MAX); + return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; + } } + ZeThreadGroupDimensions.groupCountX = + ur_cast(GroupCountPerDimension[0]); + ZeThreadGroupDimensions.groupCountY = + ur_cast(GroupCountPerDimension[1]); + ZeThreadGroupDimensions.groupCountZ = + ur_cast(GroupCountPerDimension[2]); + // Error handling for non-uniform group size case if (GlobalWorkSize3D[0] != size_t(ZeThreadGroupDimensions.groupCountX) * WG[0]) { From ea2329e73379b1e736f8b357958a5bb5f24a0a10 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 11 May 2026 12:56:58 -0700 Subject: [PATCH 3/8] Don't pass IdQueriesRange in GlobalBinImageProps --- .../SYCLPostLink/ComputeModuleRuntimeInfo.h | 4 ++-- llvm/include/llvm/SYCLPostLink/Utils.h | 4 +++- .../SYCLPostLink/ComputeModuleRuntimeInfo.cpp | 6 ++--- llvm/lib/SYCLPostLink/Utils.cpp | 10 ++++---- llvm/tools/sycl-post-link/sycl-post-link.cpp | 12 ++++------ .../lib/rtc/DeviceCompilation.cpp | 23 ++++++++++++++++++- 6 files changed, 40 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h index e25c688cf2554..1821b605b36eb 100644 --- a/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLPostLink/ComputeModuleRuntimeInfo.h @@ -27,7 +27,6 @@ struct GlobalBinImageProps { bool EmitExportedSymbols; bool EmitImportedSymbols; bool EmitDeviceGlobalPropSet; - int IdQueriesRange; // 0 = int, 1 = uint, 2 = size_t (default) }; bool isModuleUsingAsan(const Module &M); bool isModuleUsingMsan(const Module &M); @@ -41,7 +40,8 @@ PropSetRegTy computeDeviceLibProperties(const Module &M, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps, - bool AllowDeviceImageDependencies); + bool AllowDeviceImageDependencies, + int IdQueriesRange); std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/include/llvm/SYCLPostLink/Utils.h b/llvm/include/llvm/SYCLPostLink/Utils.h index 42eddf9b3af2d..f337e32bf723f 100644 --- a/llvm/include/llvm/SYCLPostLink/Utils.h +++ b/llvm/include/llvm/SYCLPostLink/Utils.h @@ -55,13 +55,15 @@ bool isTargetCompatibleWithModule(const std::string &Target, /// \param AllowDeviceImageDependencies If true, preserves inter-module /// dependencies /// \param SplitMode The module splitting mode used +/// \param IdQueriesRange SYCL id queries range. /// /// \return Error::success() on success, or error details on failure llvm::Error saveModuleProperties(const module_split::ModuleDesc &MD, const sycl::GlobalBinImageProps &GlobProps, StringRef Filename, StringRef Target, bool AllowDeviceImageDependencies, - module_split::IRSplitMode SplitMode); + module_split::IRSplitMode SplitMode, + int IdQueriesRange); /// \brief Saves the symbol table (entry point names) for a module to a file. /// diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index aa82392d6c98b..189ad6eab4251 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -118,7 +118,8 @@ PropSetRegTy computeDeviceLibProperties(const Module &M, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, const GlobalBinImageProps &GlobProps, - bool AllowDeviceImageDependencies) { + bool AllowDeviceImageDependencies, + int IdQueriesRange) { PropSetRegTy PropSet; { @@ -378,8 +379,7 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "optLevel", OptLevel); } { - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "idQueriesRange", - GlobProps.IdQueriesRange); + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "idQueriesRange", IdQueriesRange); } { std::vector> ArgPos = diff --git a/llvm/lib/SYCLPostLink/Utils.cpp b/llvm/lib/SYCLPostLink/Utils.cpp index e98429b376d81..e26fead62c779 100644 --- a/llvm/lib/SYCLPostLink/Utils.cpp +++ b/llvm/lib/SYCLPostLink/Utils.cpp @@ -27,7 +27,8 @@ PropSetRegTy computeModulePropertiesHelper(const module_split::ModuleDesc &MD, const sycl::GlobalBinImageProps &GlobProps, bool AllowDeviceImageDependencies, - module_split::IRSplitMode SplitMode) { + module_split::IRSplitMode SplitMode, + int IdQueriesRange) { PropSetRegTy PropSet; // For bf16 devicelib module, no kernel included and no specialization // constant used, skip regular Prop emit. However, we have fallback and @@ -35,7 +36,8 @@ computeModulePropertiesHelper(const module_split::ModuleDesc &MD, // indicate all exported function. if (!MD.isSYCLDeviceLib()) PropSet = sycl::computeModuleProperties( - MD.getModule(), MD.entries(), GlobProps, AllowDeviceImageDependencies); + MD.getModule(), MD.entries(), GlobProps, AllowDeviceImageDependencies, + IdQueriesRange); else PropSet = sycl::computeDeviceLibProperties(MD.getModule(), MD.Name); @@ -116,9 +118,9 @@ Error llvm::sycl_post_link::saveModuleProperties( const module_split::ModuleDesc &MD, const sycl::GlobalBinImageProps &GlobProps, StringRef Filename, StringRef Target, bool AllowDeviceImageDependencies, - module_split::IRSplitMode SplitMode) { + module_split::IRSplitMode SplitMode, int IdQueriesRange) { PropSetRegTy PropSet = computeModulePropertiesHelper( - MD, GlobProps, AllowDeviceImageDependencies, SplitMode); + MD, GlobProps, AllowDeviceImageDependencies, SplitMode, IdQueriesRange); if (!Target.empty()) PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target", diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index add1bb4bee9c0..3e1cc3a1c8bf9 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -324,13 +324,9 @@ Error saveModule( continue; auto CopyTriple = BaseTriple; if (DoPropGen) { - GlobalBinImageProps Props = {EmitKernelParamInfo, - EmitProgramMetadata, - EmitKernelNames, - EmitExportedSymbols, - EmitImportedSymbols, - DeviceGlobals, - static_cast(IdQueriesRange)}; + GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, + EmitKernelNames, EmitExportedSymbols, + EmitImportedSymbols, DeviceGlobals}; StringRef Target = OutputFile.Target; std::string NewSuff = Suffix.str(); if (!Target.empty()) @@ -339,7 +335,7 @@ Error saveModule( CopyTriple.Prop = (OutputPrefix + NewSuff + ".prop").str(); if (Error E = sycl_post_link::saveModuleProperties( MD, Props, CopyTriple.Prop, Target, AllowDeviceImageDependencies, - SplitMode)) + SplitMode, static_cast(IdQueriesRange))) return E; } addTableRow(*Table, CopyTriple); diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index bcbfde50313c4..b34a5c18f5bf6 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -954,6 +954,25 @@ static IRSplitMode getDeviceCodeSplitMode(const InputArgList &UserArgList) { return SPLIT_AUTO; } +// Parse and return the value of `-fsycl-id-queries-range=` +// option. Return 0 if option is not specified (default value) or if value +// specified is int, Return 1 if value specified is uint, and 2 if value +// specified is size_t. +static int getSYCLIdMaxRange(const InputArgList &UserArgList) { + int MaxRange = 0; + if (auto *Arg = UserArgList.getLastArg(OPT_fsycl_id_queries_range_EQ)) { + StringRef ArgVal{Arg->getValue()}; + if (ArgVal == "int") { + MaxRange = 0; + } else if (ArgVal == "uint") { + MaxRange = 1; + } else if (ArgVal == "size_t") { + MaxRange = 2; + } + } + return MaxRange; +} + static void encodeProperties(PropertySetRegistry &Properties, RTCDevImgInfo &DevImgInfo) { const auto &PropertySets = Properties.getPropSets(); @@ -994,6 +1013,8 @@ jit_compiler::performPostLink(ModuleUPtr Module, options::OPT_fsycl_allow_device_image_dependencies, options::OPT_fno_sycl_allow_device_image_dependencies, false); + const int MaxIdRange = getSYCLIdMaxRange(UserArgList); + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by // `shouldEmitOnlyKernelsAsEntryPoints` in // `clang/lib/Driver/ToolChains/Clang.cpp`. @@ -1085,7 +1106,7 @@ jit_compiler::performPostLink(ModuleUPtr Module, /*DeviceGlobals=*/true}; PropertySetRegistry Properties = computeModuleProperties(MDesc->getModule(), MDesc->entries(), PropReq, - AllowDeviceImageDependencies); + AllowDeviceImageDependencies, MaxIdRange); // When the split mode is none, the required work group size will be added // to the whole module, which will make the runtime unable to launch the From 65cfa3f970cb9544947fe0c7482d03db0f8c84a0 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 11 May 2026 15:10:06 -0700 Subject: [PATCH 4/8] Fix retrieving device images --- llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp | 6 +++++- sycl/source/detail/scheduler/commands.cpp | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index 189ad6eab4251..f08bf1a848604 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -379,7 +379,11 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "optLevel", OptLevel); } { - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "idQueriesRange", IdQueriesRange); + // Add device image property only if the image has a non-default + // SYCL Id range. The default range is 0 (signed int). + if (IdQueriesRange != 0) + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "idQueriesRange", + IdQueriesRange); } { std::vector> ArgPos = diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3320da3e3fccc..76140ab974c16 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2775,9 +2775,9 @@ void enqueueImpKernel( EliminatedArgMask = MSyclKernel->getKernelArgMask(); if (!MSyclKernel->isInteropOrSourceBased()) { - auto &DeviceImage = detail::ProgramManager::getInstance().getDeviceImage( - DeviceKernelInfo.Name, ContextImpl, DeviceImpl); - IdQueryRangeProp = DeviceImage.getProperty("idQueriesRange"); + auto &DeviceImageImpl = MSyclKernel->getDeviceImage(); + IdQueryRangeProp = + DeviceImageImpl.get_bin_image_ref()->getProperty("idQueriesRange"); } } else if ((SyclKernelImpl = KernelBundleImplPtr @@ -2830,6 +2830,9 @@ void enqueueImpKernel( if (!(MSyclKernel && MSyclKernel->isInterop())) { uint64_t MaxRange; string ErrMsg; + // If IdQueryRangeProp property is not present in the device image, + // it means that the kernel was compiled without -fsycl-id-queries-range + // option, so use the default range type of `int`. uint32_t IdQueriesRange = IdQueryRangeProp ? DeviceBinaryProperty(IdQueryRangeProp).asUint32() : 0; From 25c6c0315564fe97583560a52877a8789efc3eff Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 12 May 2026 15:53:54 +0200 Subject: [PATCH 5/8] Fix range_offset_fit_in_int test --- sycl/test-e2e/Basic/range_offset_fit_in_int.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp index fc1bf875353cf..b31bd20504f5c 100644 --- a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp +++ b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp @@ -11,9 +11,11 @@ namespace S = sycl; -constexpr char Msg[] = "Provided range and/or offset does not fit in int. " - "Pass `-fsycl-id-queries-range=size_t' to " - "remove this limit."; +constexpr char Msg[] = + "The kernel was compiled with -fsycl-id-queries-range=int, but the " + "provided range/offset exceeds the maximum value storable in a int. Either " + "reduce the range/offset or recompile the kernel with " + "-fsycl-id-queries-range=[uint|size_t]."; void checkRangeException(S::exception &E) { std::cerr << E.what() << std::endl; @@ -84,7 +86,8 @@ void test() { CGH.parallel_for(RangeInLimits, [Acc](S::id<2> Id) { Acc[0] += 1; }); }); - } catch (...) { + } catch (S::exception &E) { + std::cout << "Unexpected exception: " << E.what() << std::endl; assert(false && "Unexpected exception catched"); } From 73bc43dd310462d78b4c5ceff49f7811900243d5 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 13 May 2026 03:56:49 +0200 Subject: [PATCH 6/8] Fix test --- sycl/source/detail/ndrange_desc.hpp | 7 +- sycl/source/detail/scheduler/commands.cpp | 110 +++++++++++------- sycl/source/detail/scheduler/commands.hpp | 4 + .../Basic/range_offset_fit_in_int.cpp | 19 ++- 4 files changed, 92 insertions(+), 48 deletions(-) diff --git a/sycl/source/detail/ndrange_desc.hpp b/sycl/source/detail/ndrange_desc.hpp index 2ce7ad8212122..69214464fa171 100644 --- a/sycl/source/detail/ndrange_desc.hpp +++ b/sycl/source/detail/ndrange_desc.hpp @@ -135,8 +135,13 @@ class NDRDescT { getProductAndCheckForOverflow(GlobalSize.data(), Dims); uint64_t LocalProduct = getProductAndCheckForOverflow(LocalSize.data(), Dims); + + // Localproduct equals to zero means user has not specified local size + // and backend is free to choose it. In this case, the maximum number of + // workgroups is equal to the total global size, assuming local size to + // be 1. if (LocalProduct == 0) - return 0; + return GlobalProduct; return GlobalProduct / LocalProduct; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 76140ab974c16..726f602b0aeac 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -2598,6 +2599,63 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, return std::make_tuple(UrKernel, DeviceImageImpl, EliminatedArgMask); } +void checkNDRangeBoundsAndThrow(const NDRDescT &NDRDesc, + uint32_t IdQueriesRange) { + uint64_t MaxRange = 0; + string ErrMsg; + switch (IdQueriesRange) { + case 1: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = "The kernel was compiled with -fsycl-id-queries-range=uint, but " + "the provided range/offset exceeds the maximum value storable in " + "an uint32_t. Either reduce the range/offset or " + "recompile the kernel with -fsycl-id-queries-range=size_t."; + break; + case 2: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = "The provided range/offset exceeds the maximum " + "value storable in a size_t, " + "which is the maximum value supported by DPCPP."; + break; + case 0: + default: + MaxRange = static_cast(std::numeric_limits::max()); + ErrMsg = + "The kernel was compiled with -fsycl-id-queries-range=int, but the " + "provided range/offset exceeds the maximum value storable in an " + "int. Either reduce the range/offset or " + "recompile the kernel with -fsycl-id-queries-range=[uint|size_t]."; + } + + bool ExceedsMaxRange = NDRDesc.getNumGlobalWorkGroups() > MaxRange; + if (!ExceedsMaxRange) { + for (size_t I = 0; I < NDRDesc.Dims; ++I) { + const uint64_t GlobalSize = static_cast(NDRDesc.GlobalSize[I]); + const uint64_t GlobalOffset = + static_cast(NDRDesc.GlobalOffset[I]); + const uint64_t LocalSize = static_cast(NDRDesc.LocalSize[I]); + // Validate the maximum generated global id in each dimension: + // GlobalOffset + GlobalSize - 1 <= MaxRange. + // Use overflow-safe arithmetic instead of forming the sum directly. + if (GlobalSize != 0 && (GlobalOffset > MaxRange || + (GlobalSize - 1) > (MaxRange - GlobalOffset))) { + ExceedsMaxRange = true; + break; + } + if (GlobalSize > MaxRange - LocalSize) { + ExceedsMaxRange = true; + break; + } + } + } + if (ExceedsMaxRange) { + throw detail::set_ur_error( + sycl::exception(sycl::make_error_code(sycl::errc::invalid), + ErrMsg.c_str()), + UR_RESULT_ERROR_INVALID_VALUE); + } +} + ur_result_t enqueueImpCommandBufferKernel( const context &Ctx, device_impl &DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, @@ -2824,48 +2882,16 @@ void enqueueImpKernel( EventsWaitList = std::move(EventsWithDeviceGlobalInits); } - // Get Max number of work groups that this kernel can accept. - { - // Skip the check for interop kernels. - if (!(MSyclKernel && MSyclKernel->isInterop())) { - uint64_t MaxRange; - string ErrMsg; - // If IdQueryRangeProp property is not present in the device image, - // it means that the kernel was compiled without -fsycl-id-queries-range - // option, so use the default range type of `int`. - uint32_t IdQueriesRange = - IdQueryRangeProp ? DeviceBinaryProperty(IdQueryRangeProp).asUint32() - : 0; - switch (IdQueriesRange) { - case 1: - MaxRange = static_cast(std::numeric_limits::max()); - ErrMsg = - "The kernel was compiled with -fsycl-id-queries-range=uint, but " - "the provided range/offset exceeds the maximum value storable in " - "an uint32_t. Either reduce the range/offset or " - "recompile the kernel with -fsycl-id-queries-range=size_t."; - break; - case 2: - MaxRange = static_cast(std::numeric_limits::max()); - ErrMsg = "The provided range/offset exceeds the maximum " - "value storable in a size_t, " - "which is the maximum value supported by DPCPP."; - break; - case 0: - default: - MaxRange = static_cast(std::numeric_limits::max()); - ErrMsg = - "The kernel was compiled with -fsycl-id-queries-range=int, but the " - "provided range/offset exceeds the maximum value storable in an " - "int. Either reduce the range/offset or " - "recompile the kernel with -fsycl-id-queries-range=[uint|size_t]."; - } - - if (NDRDesc.getNumGlobalWorkGroups() > MaxRange) { - throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), - ErrMsg.c_str()); - } - } + // Get Max number of work groups and linear id range that this kernel can + // accept. Skip the check for interop kernels. + if (!(MSyclKernel->isInterop())) { + // If IdQueryRangeProp property is not present in the device image, + // it means that the kernel was compiled without -fsycl-id-queries-range + // option, so use the default range type of `int`. + uint32_t IdQueriesRange = + IdQueryRangeProp ? DeviceBinaryProperty(IdQueryRangeProp).asUint32() + : 0; + checkNDRangeBoundsAndThrow(NDRDesc, IdQueriesRange); } ur_result_t Error = UR_RESULT_SUCCESS; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index c3afe69568924..eee8e38ce774f 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -719,6 +719,10 @@ class UpdateCommandBufferCommand : public Command { MNodes; }; +void checkNDRangeBoundsAndThrow(const NDRDescT &NDRDesc, + const uint64_t MaxRange, + const std::string ErrMsg); + // Enqueues a given kernel to a ur_exp_command_buffer_handle_t ur_result_t enqueueImpCommandBufferKernel( const context &Ctx, device_impl &DeviceImpl, diff --git a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp index b31bd20504f5c..12821742f97cb 100644 --- a/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp +++ b/sycl/test-e2e/Basic/range_offset_fit_in_int.cpp @@ -13,8 +13,8 @@ namespace S = sycl; constexpr char Msg[] = "The kernel was compiled with -fsycl-id-queries-range=int, but the " - "provided range/offset exceeds the maximum value storable in a int. Either " - "reduce the range/offset or recompile the kernel with " + "provided range/offset exceeds the maximum value storable in an int. " + "Either reduce the range/offset or recompile the kernel with " "-fsycl-id-queries-range=[uint|size_t]."; void checkRangeException(S::exception &E) { @@ -60,10 +60,9 @@ void test() { S::id<2>{(OutOfLimitsSize / 4) * 3, (OutOfLimitsSize / 4) * 3}); int Data = 0; - S::buffer Buf{&Data, 1}; - // no offset, either dim of range exceeds limit try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -80,6 +79,7 @@ void test() { // no offset, all dims of range are in limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -87,12 +87,12 @@ void test() { [Acc](S::id<2> Id) { Acc[0] += 1; }); }); } catch (S::exception &E) { - std::cout << "Unexpected exception: " << E.what() << std::endl; assert(false && "Unexpected exception catched"); } // no offset, all dims of range are in limits, linear id exceeds limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -109,6 +109,7 @@ void test() { // small offset, either dim of range exceeds limit try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -125,6 +126,7 @@ void test() { // large offset, neither dim of range exceeds limit, offset + range > limit try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -142,6 +144,7 @@ void test() { // large offset, neither dim of range exceeds limit try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -158,6 +161,7 @@ void test() { // small offset, neither range dim exceeds limit try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -170,6 +174,7 @@ void test() { // small offset, global range's dim is out of limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -186,6 +191,7 @@ void test() { // large offset, ranges are in limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -202,6 +208,7 @@ void test() { // small offset, ranges are in limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -214,6 +221,7 @@ void test() { // small offset, ranges are in limits, linear id out of limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); @@ -230,6 +238,7 @@ void test() { // small offset, ranges are in limits, range + offset exceeds limits try { + S::buffer Buf{&Data, 1}; Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); From 30939ee2f27d41ff2cce8f5378705b608a960f0b Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 13 May 2026 09:13:33 -0700 Subject: [PATCH 7/8] fix test failure --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 726f602b0aeac..a0322de253a6e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2884,7 +2884,7 @@ void enqueueImpKernel( // Get Max number of work groups and linear id range that this kernel can // accept. Skip the check for interop kernels. - if (!(MSyclKernel->isInterop())) { + { // If IdQueryRangeProp property is not present in the device image, // it means that the kernel was compiled without -fsycl-id-queries-range // option, so use the default range type of `int`. From e51fe6309b0163994c8a19d204df37509f55cd56 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 13 May 2026 12:50:01 -0700 Subject: [PATCH 8/8] Fix test failure --- .../sycl-post-link/sycl-id-queries-range.ll | 35 +++++++++++++++++++ sycl/source/detail/scheduler/commands.cpp | 10 +++--- sycl/test-e2e/Basic/large_range_error.cpp | 6 ++-- sycl/test/abi/sycl_symbols_linux.dump | 2 ++ 4 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/sycl-id-queries-range.ll diff --git a/llvm/test/tools/sycl-post-link/sycl-id-queries-range.ll b/llvm/test/tools/sycl-post-link/sycl-id-queries-range.ll new file mode 100644 index 0000000000000..2ce17ba1a5775 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-id-queries-range.ll @@ -0,0 +1,35 @@ +; This test checks that the sycl-post-link tool correctly handles the +; -id-queries-range option, which adds an 'idQueriesRange' property to the +; device binary image properties. + +; Default (int) mode: property should NOT be emitted +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t_default.table +; RUN: FileCheck %s -input-file=%t_default_0.prop --check-prefix CHECK-DEFAULT + +; uint mode: property should be emitted with value 1 +; RUN: sycl-post-link -properties -split=auto -symbols -S -id-queries-range=uint < %s -o %t_uint.table +; RUN: FileCheck %s -input-file=%t_uint_0.prop --check-prefix CHECK-UINT + +; size_t mode: property should be emitted with value 2 +; RUN: sycl-post-link -properties -split=auto -symbols -S -id-queries-range=size_t < %s -o %t_sizet.table +; RUN: FileCheck %s -input-file=%t_sizet_0.prop --check-prefix CHECK-SIZET + +; By default, the 'idQueriesRange' property should not be emitted. +; CHECK-DEFAULT-NOT: idQueriesRange + +; CHECK-UINT: [SYCL/misc properties] +; CHECK-UINT: idQueriesRange=1|1 + +; CHECK-SIZET: [SYCL/misc properties] +; CHECK-SIZET: idQueriesRange=1|2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define dso_local spir_func noundef i32 @_Z3fooii(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 { +entry: + %sub = sub nsw i32 %a, %b + ret i32 %sub +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "sycl-module-id"="test.cpp" } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a0322de253a6e..352a23617669f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2629,20 +2629,22 @@ void checkNDRangeBoundsAndThrow(const NDRDescT &NDRDesc, bool ExceedsMaxRange = NDRDesc.getNumGlobalWorkGroups() > MaxRange; if (!ExceedsMaxRange) { + uint64_t TotalGlobalSize = 1; for (size_t I = 0; I < NDRDesc.Dims; ++I) { const uint64_t GlobalSize = static_cast(NDRDesc.GlobalSize[I]); const uint64_t GlobalOffset = static_cast(NDRDesc.GlobalOffset[I]); - const uint64_t LocalSize = static_cast(NDRDesc.LocalSize[I]); // Validate the maximum generated global id in each dimension: // GlobalOffset + GlobalSize - 1 <= MaxRange. // Use overflow-safe arithmetic instead of forming the sum directly. - if (GlobalSize != 0 && (GlobalOffset > MaxRange || - (GlobalSize - 1) > (MaxRange - GlobalOffset))) { + if (GlobalSize != 0 && GlobalOffset != 0 && + (GlobalOffset > MaxRange || + (GlobalSize - 1) > (MaxRange - GlobalOffset))) { ExceedsMaxRange = true; break; } - if (GlobalSize > MaxRange - LocalSize) { + TotalGlobalSize *= GlobalSize; + if (TotalGlobalSize > MaxRange) { ExceedsMaxRange = true; break; } diff --git a/sycl/test-e2e/Basic/large_range_error.cpp b/sycl/test-e2e/Basic/large_range_error.cpp index bbb318afaf246..24542a29bdc2a 100644 --- a/sycl/test-e2e/Basic/large_range_error.cpp +++ b/sycl/test-e2e/Basic/large_range_error.cpp @@ -7,12 +7,10 @@ // RUN: %{build} -o %t_uint.out -fsycl-id-queries-range=uint // RUN: %{build} -o %t_size.out -fsycl-id-queries-range=size_t -// RUN: %{run} %t_int.out 17179869184 16 2>&1 | FileCheck --check-prefix=CHECK-PASS %s -// RUN: %{run} %t_int.out 17179869184 8 2>&1 | FileCheck --check-prefix=CHECK-INT-EXCEEDS %s +// RUN: %{run} %t_size.out 17179869184 16 2>&1 | FileCheck --check-prefix=CHECK-PASS %s -// RUN: %{run} %t_uint.out 17179869184 16 2>&1 | FileCheck --check-prefix=CHECK-PASS %s +// RUN: %{run} %t_int.out 17179869184 8 2>&1 | FileCheck --check-prefix=CHECK-INT-EXCEEDS %s // RUN: %{run} %t_uint.out 17179869184 4 2>&1 | FileCheck --check-prefix=CHECK-UINT-EXCEEDS %s - // RUN: %{run} %t_size.out 17179869184 4 2>&1 | FileCheck --check-prefix=CHECK-SIZE-PER-DIM-EXCEEDS %s // Tests that launching kernels with large ranges produces proper error diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e38af1dcf1e1f..84d4ee9432f87 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3557,6 +3557,8 @@ _ZN4sycl3_V17handler22ext_oneapi_fill2d_implEPvmPKvmmm _ZN4sycl3_V17handler22memcpyFromDeviceGlobalEPvPKvbmm _ZN4sycl3_V17handler22setDeviceKernelInfoPtrEPNS0_6detail16DeviceKernelInfoE _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE +_ZN4sycl3_V17handler22setHandlerKernelBundleIRKSt10shared_ptrINS0_6detail18kernel_bundle_implEEEEvOT_ +_ZN4sycl3_V17handler22setHandlerKernelBundleISt10shared_ptrINS0_6detail18kernel_bundle_implEEEEvOT_ _ZN4sycl3_V17handler23instantiateKernelOnHostEPv _ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm