From 2d0fc8f129bec42cf8c0ee2968fc1e7b42508b91 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 11:15:51 -0400 Subject: [PATCH 01/23] device allocated USM seamless host access Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 78 +++++++++--- .../include/dpl_extras/vector.h | 111 +++++++++++++++++- 2 files changed, 170 insertions(+), 19 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 945795b9c659..602394a5f933 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -64,26 +64,30 @@ template struct device_reference { device_reference(value_type &input) : value(input) {} template device_reference &operator=(const device_reference &input) { - value = input; + __assign_from(input.__get_value()); return *this; }; device_reference &operator=(const device_reference &input) { - T val = input.value; - value = val; +#ifdef __SYCL_DEVICE_ONLY__ + value = input.value; +#else + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.copy(&value, input.value, sizeof(value_type)).wait(); +#endif return *this; }; device_reference &operator=(const value_type &x) { - value = x; + __assign_from(x); return *this; }; pointer operator&() const { return pointer(&value); }; - operator value_type() const { return T(value); } + operator value_type() const { return __get_value(); } device_reference &operator++() { - ++value; + __assign_from(__get_value()+1); return *this; }; device_reference &operator--() { - --value; + __assign_from(__get_value()+1); return *this; }; device_reference operator++(int) { @@ -97,51 +101,89 @@ template struct device_reference { return ref; }; device_reference &operator+=(const T &input) { - value += input; - return *this; + __assign_from(__get_value() + input); + return *this; }; device_reference &operator-=(const T &input) { - value -= input; + __assign_from(__get_value() - input); return *this; }; device_reference &operator*=(const T &input) { - value *= input; + __assign_from(__get_value() * input); return *this; }; device_reference &operator/=(const T &input) { - value /= input; + __assign_from(__get_value() / input); return *this; }; device_reference &operator%=(const T &input) { - value %= input; + __assign_from(__get_value() % input); return *this; }; device_reference &operator&=(const T &input) { - value &= input; + __assign_from(__get_value() & input); return *this; }; device_reference &operator|=(const T &input) { - value |= input; + __assign_from(__get_value() | input); return *this; }; device_reference &operator^=(const T &input) { - value ^= input; + __assign_from(__get_value() ^ input); return *this; }; device_reference &operator<<=(const T &input) { - value <<= input; + __assign_from(__get_value() << input); return *this; }; device_reference &operator>>=(const T &input) { - value >>= input; + __assign_from(__get_value() >> input); return *this; }; void swap(device_reference &input) { +#ifdef __SYCL_DEVICE_ONLY__ T tmp = (*this); *this = (input); input = (tmp); +#else + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task([=]() { + this->swap(input); + }).wait(); + }); +#endif } T &value; +private: +#ifdef __SYCL_DEVICE_ONLY //call from the device + device_reference &__assign_from(const value_type& from) + { + value = from; + return *this; + } + + value_type __get_value() const + { + return T(value); + } +#else // call from the host + device_reference &__assign_from(const value_type& from) + { + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.fill(&value, from, 1); + return *this; + } + + value_type __get_value() const + { + T tmp; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.memcpy(&tmp, &value, sizeof(T)*1).wait(); + return T(tmp); + } + +#endif }; template diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 4480a060774e..0d96662f53c1 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -25,6 +25,115 @@ namespace dpct { namespace internal { + + +template +class usm_device_allocator { +public: + using value_type = T; + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + +public: + template struct rebind { + typedef usm_device_allocator other; + }; + + + usm_device_allocator() = delete; + usm_device_allocator(const sycl::context &Ctxt, const sycl::device &Dev, + const sycl::property_list &PropList = {}) + : MContext(Ctxt), MDevice(Dev), MPropList(PropList) {} + usm_device_allocator(const sycl::queue &Q, const sycl::property_list &PropList = {}) + : MContext(Q.get_context()), MDevice(Q.get_device()), + MPropList(PropList) {} + usm_device_allocator(const usm_device_allocator &) = default; + usm_device_allocator(usm_device_allocator &&) noexcept = default; + usm_device_allocator &operator=(const usm_device_allocator &Other) { + MContext = Other.MContext; + MDevice = Other.MDevice; + MPropList = Other.MPropList; + return *this; + } + usm_device_allocator &operator=(usm_device_allocator &&Other) { + MContext = std::move(Other.MContext); + MDevice = std::move(Other.MDevice); + MPropList = std::move(Other.MPropList); + return *this; + } + + template + usm_device_allocator(const usm_device_allocator &Other) noexcept + : MContext(Other.MContext), MDevice(Other.MDevice), + MPropList(Other.MPropList) {} + + /// Allocates memory. + /// + /// \param NumberOfElements is a count of elements to allocate memory for. + T *allocate(size_t NumberOfElements, const sycl::detail::code_location CodeLoc = + sycl::detail::code_location::current()) { + + auto Result = reinterpret_cast( + aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type), + MDevice, MContext, sycl::usm::alloc::device, MPropList, CodeLoc)); + if (!Result) { + throw sycl::exception(sycl::errc::memory_allocation); + } + return Result; + } + + /// Deallocates memory. + /// + /// \param Ptr is a pointer to memory being deallocated. + /// \param Size is a number of elements previously passed to allocate. + void deallocate( + T *Ptr, size_t, + const sycl::detail::code_location CodeLoc = sycl::detail::code_location::current()) { + if (Ptr) { + free(Ptr, MContext, CodeLoc); + } + } + + template + friend bool operator==(const usm_device_allocator &One, + const usm_device_allocator &Two) { + return ((One.MContext == Two.MContext) && + (One.MDevice == Two.MDevice)); + } + + template + friend bool operator!=(const usm_device_allocator &One, + const usm_device_allocator &Two) { + return !((One.MContext == Two.MContext) && + (One.MDevice == Two.MDevice)); + } + + template bool has_property() const noexcept { + return MPropList.has_property(); + } + + template Property get_property() const { + return MPropList.get_property(); + } + +private: + constexpr size_t getAlignment() const { return sycl::max(alignof(T), Alignment); } + + template + friend class usm_device_allocator; + + sycl::context MContext; + sycl::device MDevice; + sycl::property_list MPropList; +}; + + + +//taken from libc++ + +// __has_construct + template // for non-iterators struct is_iterator : std::false_type {}; @@ -42,7 +151,7 @@ struct is_iterator : std::true_type {}; #ifndef DPCT_USM_LEVEL_NONE template > + typename Allocator = dpct::internal::usm_device_allocator> class device_vector { public: using iterator = device_iterator; From 26347b48b98e65381926401ceb501e3b00a4f157 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 11:16:36 -0400 Subject: [PATCH 02/23] construct allocator aware (BROKEN: cant pass allocator to kernel) Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 238 ++++++++++++------ 1 file changed, 156 insertions(+), 82 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 0d96662f53c1..70355247e929 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -175,7 +175,124 @@ class device_vector { void _set_capacity_and_alloc() { _capacity = ::std::max(_size * 2, _min_capacity()); - _storage = _alloc.allocate(_capacity); + _storage = ::std::allocator_traits::allocate(_alloc, _capacity); + } + + + + template + struct __has_construct_impl : ::std::false_type { }; + + template + struct __has_construct_impl().construct(std::declval<_Args>()...) + ), _Alloc, _Args...> : ::std::true_type { }; + + template + struct __has_construct : __has_construct_impl { }; + + // __has_destroy + template + struct __has_destroy : ::std::false_type { }; + + template + struct __has_destroy<_Alloc, _Pointer, decltype( + (void)std::declval<_Alloc>().destroy(std::declval<_Pointer>()) + )> : ::std::true_type { }; + + // end of taken from libc++ + + //apply default constructor if no override is provided + template + typename ::std::enable_if_t::value, void> + device_allocator_construct(_Allocator alloc, DataT* p) + { + ::new((void*)p) T(); + } + + //use provided default construct call if it exists + template + typename ::std::enable_if_t<__has_construct<_Allocator, T*>::value, void> + device_allocator_construct(_Allocator alloc, DataT* p) + { + alloc.construct(p); + } + + //apply constructor if no override is provided + template + typename ::std::enable_if_t::value, void> + device_allocator_construct(_Allocator alloc, DataT* p, T_in arg) + { + ::new((void*)p) T(arg); + } + + //use provided construct call if it exists + template + typename ::std::enable_if_t<__has_construct<_Allocator, DataT*, T_in>::value, void> + device_allocator_construct(_Allocator alloc, DataT* p, T_in arg) + { + alloc.construct(p, arg); + } + + + //apply default destructor if no destroy override is provided + template + typename ::std::enable_if_t::value, void> + device_allocator_destroy(_Allocator alloc, DataT* p) + { + p->~T(); + } + + //use provided destroy call if it exists + template + typename ::std::enable_if_t<__has_destroy<_Allocator, DataT*>::value, void> + device_allocator_destroy(_Allocator alloc, DataT* p) + { + alloc.destroy(p); + } + + void _construct(size_type n, size_type start_idx = 0){ + if (n > 0) + { + get_default_queue().submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=](sycl::id<1> i) { + device_allocator_construct(_alloc, _storage + start_idx + i); + }); + }); + } + } + + void _construct(size_type n, const T &value, size_type start_idx = 0){ + if (n > 0) + { + get_default_queue().submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=, _alloc](sycl::id<1> i) { + device_allocator_construct(_alloc, _storage + start_idx + i, value); + }); + }); + } + } + + template + void _construct(DevIter first, DevIter last, size_type start_idx = 0){ + int num_eles = ::std::distance(first,last); + if (num_eles > 0) + { + get_default_queue().submit([&](sycl::handler &cgh) { + cgh.parallel_for(num_eles, [=, _alloc](sycl::id<1> i) { + device_allocator_construct(_alloc, _storage + start_idx + i, first + i); + }); + }); + } + } + + + template + void _construct_from_host(HostIter first, HostIter last, size_type start_idx = 0){ + sycl::buffer buf(first, last); + auto buf_first = oneapi::dpl::begin(buf); + auto buf_last = oneapi::dpl::end(buf); + _construct(buf_first, buf_last, start_idx); } public: @@ -189,23 +306,14 @@ class device_vector { : _alloc(get_default_queue()), _size(0), _capacity(_min_capacity()) { _set_capacity_and_alloc(); } - ~device_vector() /*= default*/ { _alloc.deallocate(_storage, _capacity); }; + ~device_vector() /*= default*/ { ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); }; explicit device_vector(size_type n) : device_vector(n, T()) {} explicit device_vector(size_type n, const T &value) : _alloc(get_default_queue()), _size(n) { _set_capacity_and_alloc(); - if (_size > 0) { - ::std::fill(oneapi::dpl::execution::make_device_policy(get_default_queue()), - begin(), end(), T(value)); - } - } - device_vector(const device_vector &other) : _alloc(get_default_queue()) { - _size = other.size(); - _capacity = other.capacity(); - _storage = _alloc.allocate(_capacity); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - other.begin(), other.end(), begin()); + _construct(n, value); } + device_vector(device_vector &&other) : _alloc(get_default_queue()), _size(other.size()), _capacity(other.capacity()), _storage(other._storage) { @@ -214,22 +322,17 @@ class device_vector { other._storage = nullptr; } +// TODO: check if this is ok for both random_access_iterators and non random_access_iterators template device_vector(InputIterator first, typename ::std::enable_if< internal::is_iterator::value && - !::std::is_pointer::value && - ::std::is_same::iterator_category, - ::std::random_access_iterator_tag>::value, + !::std::is_pointer::value, InputIterator>::type last) : _alloc(get_default_queue()) { _size = ::std::distance(first, last); _set_capacity_and_alloc(); - if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - first, last, begin()); - } + _construct(first, last); } template @@ -243,69 +346,39 @@ class device_vector { auto ptr_type = sycl::get_pointer_type(first, get_default_context()); if (ptr_type != sycl::usm::alloc::host && ptr_type != sycl::usm::alloc::unknown) { - ::std::copy( - oneapi::dpl::execution::make_device_policy(get_default_queue()), - first, last, begin()); + _construct(first, last); } else { - sycl::buffer buf(first, last); - auto buf_first = oneapi::dpl::begin(buf); - auto buf_last = oneapi::dpl::end(buf); - ::std::copy( - oneapi::dpl::execution::make_device_policy(get_default_queue()), - buf_first, buf_last, begin()); + _construct_from_host(first, last); } } } - template - device_vector(InputIterator first, - typename ::std::enable_if< - internal::is_iterator::value && - !::std::is_pointer::value && - !::std::is_same::iterator_category, - ::std::random_access_iterator_tag>::value, - InputIterator>::type last) - : _alloc(get_default_queue()), _size(::std::distance(first, last)) { - _set_capacity_and_alloc(); - ::std::vector _tmp(first, last); - if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - _tmp.begin(), _tmp.end(), this->begin()); - } - } - template - device_vector(const device_vector &v) - : _alloc(get_default_queue()), _storage(v.real_begin()), _size(v.size()), - _capacity(v.capacity()) {} + device_vector(const device_vector &other) + : _alloc(get_default_queue()) { + _size = other.size(); + _capacity = other.capacity(); + _storage = ::std::allocator_traits::allocate(_alloc, _capacity); + _construct(other.begin(), other.end()); + } template device_vector(::std::vector &v) : _alloc(get_default_queue()), _size(v.size()) { _set_capacity_and_alloc(); - if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - v.begin(), v.end(), this->begin()); - } + _construct_from_host(v.begin(), v.end()); } template device_vector &operator=(const ::std::vector &v) { resize(v.size()); - if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - v.begin(), v.end(), begin()); - } + _construct_from_host(v.begin(), v.end()); return *this; } device_vector &operator=(const device_vector &other) { // Copy assignment operator: resize(other.size()); - if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - other.begin(), other.end(), begin()); - } + _construct(other.begin(), other.end()); return *this; } device_vector &operator=(device_vector &&other) { @@ -336,21 +409,21 @@ class device_vector { void reserve(size_type n) { if (n > capacity()) { // allocate buffer for new size - auto tmp = _alloc.allocate(2 * n); + auto tmp = ::std::allocator_traits::allocate(_alloc, 2 * n); // copy content (old buffer to new buffer) ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), begin(), end(), tmp); // deallocate old memory - _alloc.deallocate(_storage, _capacity); + ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); _storage = tmp; _capacity = 2 * n; } } void resize(size_type new_size, const T &x = T()) { reserve(new_size); - if (_size < new_size) { - ::std::fill(oneapi::dpl::execution::make_device_policy(get_default_queue()), - begin() + _size, begin() + new_size, x); + if (new_size > size()) + { + _construct(new_size - size(), x, size()); } _size = new_size; } @@ -367,13 +440,13 @@ class device_vector { void shrink_to_fit(void) { if (_size != capacity()) { size_type tmp_capacity = ::std::max(_size, _min_capacity()); - auto tmp = _alloc.allocate(tmp_capacity); + auto tmp = ::std::allocator_traits::allocate(_alloc,tmp_capacity); if (_size > 0) { ::std::copy( oneapi::dpl::execution::make_device_policy(get_default_queue()), begin(), end(), tmp); } - _alloc.deallocate(_storage, _capacity); + ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); _storage = tmp; _capacity = tmp_capacity; } @@ -414,14 +487,14 @@ class device_vector { if (m <= 0) { return end(); } - auto tmp = _alloc.allocate(m); + auto tmp = ::std::allocator_traits::allocate(_alloc, m); // copy remainder to temporary buffer. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), last, end(), tmp); // override (erase) subsequence in storage. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, tmp + m, first); - _alloc.deallocate(tmp, m); + ::std::allocator_traits::deallocate(_alloc, tmp, m); _size -= n; return begin() + first.get_idx() + n; } @@ -434,14 +507,13 @@ class device_vector { void insert(iterator position, size_type n, const T &x) { if (position == end()) { resize(size() + n); - ::std::fill(oneapi::dpl::execution::make_device_policy(get_default_queue()), - end() - n, end(), x); + _construct(n, x, size() - n); } else { auto i_n = ::std::distance(begin(), position); // allocate temporary storage auto m = ::std::distance(position, end()); // will throw if position is not inside active vector - auto tmp = _alloc.allocate(m); + auto tmp = ::std::allocator_traits::allocate(_alloc, m); // copy remainder ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), position, end(), tmp); @@ -450,12 +522,13 @@ class device_vector { // resizing might invalidate position position = begin() + position.get_idx(); - ::std::fill(oneapi::dpl::execution::make_device_policy(get_default_queue()), - position, position + n, x); + //TODO: Is this OK? can we assume that the above and below copy is equivalent to std::move? + // otherwise we are constructing on top of copied out data, and will be destructing data which was copied + _construct(n, x, position.get_idx()); ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, tmp + m, position + n); - _alloc.deallocate(tmp, m); + ::std::allocator_traits::deallocate(_alloc, tmp, m); } } template @@ -466,12 +539,13 @@ class device_vector { auto n = ::std::distance(first, last); if (position == end()) { resize(size() + n); + _construct(first, last, size() - n); ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), first, last, end()); } else { auto m = ::std::distance(position, end()); // will throw if position is not inside active vector - auto tmp = _alloc.allocate(m); + auto tmp = ::std::allocator_traits::allocate(_alloc,m); ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), position, end(), tmp); @@ -480,11 +554,11 @@ class device_vector { // resizing might invalidate position position = begin() + position.get_idx(); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - first, last, position); + _construct(first, last, position.get_idx()); + ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, tmp + m, position + n); - _alloc.deallocate(tmp, m); + ::std::allocator_traits::deallocate(_alloc, tmp, m); } } Allocator get_allocator() const { return _alloc; } From d59369c256c9d5ee747f74179cdfd8a6cc44411d Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 12:24:09 -0400 Subject: [PATCH 03/23] working with static construct function Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 133 +++++++++--------- 1 file changed, 66 insertions(+), 67 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 70355247e929..61767dd4b37f 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -131,55 +131,6 @@ class usm_device_allocator { //taken from libc++ - -// __has_construct - -template // for non-iterators -struct is_iterator : std::false_type {}; - -template // For iterators -struct is_iterator< - Iter, - typename std::enable_if< - !std::is_void::value, void>::type> - : std::true_type {}; - -template // For pointers -struct is_iterator : std::true_type {}; -} // end namespace internal - -#ifndef DPCT_USM_LEVEL_NONE - -template > -class device_vector { -public: - using iterator = device_iterator; - using const_iterator = const iterator; - using reference = device_reference; - using const_reference = const reference; - using value_type = T; - using pointer = T *; - using const_pointer = const T *; - using difference_type = - typename ::std::iterator_traits::difference_type; - using size_type = ::std::size_t; - -private: - Allocator _alloc; - size_type _size; - size_type _capacity; - pointer _storage; - - size_type _min_capacity() const { return size_type(1); } - - void _set_capacity_and_alloc() { - _capacity = ::std::max(_size * 2, _min_capacity()); - _storage = ::std::allocator_traits::allocate(_alloc, _capacity); - } - - - template struct __has_construct_impl : ::std::false_type { }; @@ -202,28 +153,29 @@ class device_vector { // end of taken from libc++ + //apply default constructor if no override is provided template - typename ::std::enable_if_t::value, void> - device_allocator_construct(_Allocator alloc, DataT* p) + typename ::std::enable_if_t::value, void> + device_allocator_construct(DataT* p) { - ::new((void*)p) T(); + ::new((void*)p) DataT(); } //use provided default construct call if it exists template - typename ::std::enable_if_t<__has_construct<_Allocator, T*>::value, void> - device_allocator_construct(_Allocator alloc, DataT* p) + typename ::std::enable_if_t<__has_construct<_Allocator, DataT*>::value, void> + device_allocator_construct(DataT* p) { - alloc.construct(p); + _Allocator::construct(p); } //apply constructor if no override is provided template typename ::std::enable_if_t::value, void> - device_allocator_construct(_Allocator alloc, DataT* p, T_in arg) + device_allocator_construct(DataT* p, T_in arg) { - ::new((void*)p) T(arg); + ::new((void*)p) DataT(arg); } //use provided construct call if it exists @@ -231,32 +183,77 @@ class device_vector { typename ::std::enable_if_t<__has_construct<_Allocator, DataT*, T_in>::value, void> device_allocator_construct(_Allocator alloc, DataT* p, T_in arg) { - alloc.construct(p, arg); + _Allocator::construct(p, arg); } //apply default destructor if no destroy override is provided template typename ::std::enable_if_t::value, void> - device_allocator_destroy(_Allocator alloc, DataT* p) + device_allocator_destroy(DataT* p) { - p->~T(); + p->~DataT(); } //use provided destroy call if it exists template typename ::std::enable_if_t<__has_destroy<_Allocator, DataT*>::value, void> - device_allocator_destroy(_Allocator alloc, DataT* p) + device_allocator_destroy(DataT* p) { - alloc.destroy(p); + _Allocator::destroy(p); + } + +template // for non-iterators +struct is_iterator : std::false_type {}; + +template // For iterators +struct is_iterator< + Iter, + typename std::enable_if< + !std::is_void::value, void>::type> + : std::true_type {}; + +template // For pointers +struct is_iterator : std::true_type {}; +} // end namespace internal + +#ifndef DPCT_USM_LEVEL_NONE + +template > +class device_vector { +public: + using iterator = device_iterator; + using const_iterator = const iterator; + using reference = device_reference; + using const_reference = const reference; + using value_type = T; + using pointer = T *; + using const_pointer = const T *; + using difference_type = + typename ::std::iterator_traits::difference_type; + using size_type = ::std::size_t; + +private: + Allocator _alloc; + size_type _size; + size_type _capacity; + pointer _storage; + + size_type _min_capacity() const { return size_type(1); } + + void _set_capacity_and_alloc() { + _capacity = ::std::max(_size * 2, _min_capacity()); + _storage = ::std::allocator_traits::allocate(_alloc, _capacity); } void _construct(size_type n, size_type start_idx = 0){ if (n > 0) { + pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { cgh.parallel_for(n, [=](sycl::id<1> i) { - device_allocator_construct(_alloc, _storage + start_idx + i); + ::dpct::internal::device_allocator_construct(p + start_idx + i); }); }); } @@ -265,9 +262,10 @@ class device_vector { void _construct(size_type n, const T &value, size_type start_idx = 0){ if (n > 0) { + pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { - cgh.parallel_for(n, [=, _alloc](sycl::id<1> i) { - device_allocator_construct(_alloc, _storage + start_idx + i, value); + cgh.parallel_for(n, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_construct(p + start_idx + i, value); }); }); } @@ -278,9 +276,10 @@ class device_vector { int num_eles = ::std::distance(first,last); if (num_eles > 0) { + pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { - cgh.parallel_for(num_eles, [=, _alloc](sycl::id<1> i) { - device_allocator_construct(_alloc, _storage + start_idx + i, first + i); + cgh.parallel_for(num_eles, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_construct::value_type>(p + start_idx + i, first + i); }); }); } From a519a8446aa183461433d2a8b5618b781baa9e66 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 13:37:54 -0400 Subject: [PATCH 04/23] Fix treatment of default constructor Signed-off-by: Dan Hoeflinger --- clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 61767dd4b37f..17056879973a 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -306,7 +306,10 @@ class device_vector { _set_capacity_and_alloc(); } ~device_vector() /*= default*/ { ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); }; - explicit device_vector(size_type n) : device_vector(n, T()) {} + explicit device_vector(size_type n) : _alloc(get_default_queue()), _size(n) { + _set_capacity_and_alloc(); + _construct(n); + } explicit device_vector(size_type n, const T &value) : _alloc(get_default_queue()), _size(n) { _set_capacity_and_alloc(); From 8bb63dbf4caa5030d99565f58336ae8c4fe1cbfa Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 16:23:35 -0400 Subject: [PATCH 05/23] adding destroy functionality Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 17056879973a..9c4645d287b3 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -294,6 +294,23 @@ class device_vector { _construct(buf_first, buf_last, start_idx); } + void _destroy(size_type n, size_type start_idx = 0){ + //only call destroy kernel *only* if custom destroy function is provided + if constexpr(::dpct::internal::__has_destroy::value) + { + if (n > 0) + { + pointer p = _storage; + get_default_queue().submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_traits::destroy(p + start_idx + i); + }); + }); + } + } + } + + public: template operator ::std::vector() const { auto __tmp = ::std::vector(this->size()); @@ -305,7 +322,10 @@ class device_vector { : _alloc(get_default_queue()), _size(0), _capacity(_min_capacity()) { _set_capacity_and_alloc(); } - ~device_vector() /*= default*/ { ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); }; + ~device_vector() /*= default*/ { + _destroy(size()); + ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); + } explicit device_vector(size_type n) : _alloc(get_default_queue()), _size(n) { _set_capacity_and_alloc(); _construct(n); @@ -493,6 +513,10 @@ class device_vector { // copy remainder to temporary buffer. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), last, end(), tmp); + + auto position = ::std::distance(begin(), first); + _destroy(n, position); + // override (erase) subsequence in storage. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, tmp + m, first); From c77643d01a2b113905a038ce96b9063b6e86c945 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 30 Mar 2023 16:25:50 -0400 Subject: [PATCH 06/23] making device_allocator_traits similar to std Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 9c4645d287b3..04fe6180bbee 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -128,8 +128,6 @@ class usm_device_allocator { sycl::property_list MPropList; }; - - //taken from libc++ template struct __has_construct_impl : ::std::false_type { }; @@ -153,55 +151,65 @@ class usm_device_allocator { // end of taken from libc++ +template +struct device_allocator_traits{ + //apply default constructor if no override is provided - template + template + static typename ::std::enable_if_t::value, void> - device_allocator_construct(DataT* p) + construct(DataT* p) { ::new((void*)p) DataT(); } //use provided default construct call if it exists - template + template + static typename ::std::enable_if_t<__has_construct<_Allocator, DataT*>::value, void> - device_allocator_construct(DataT* p) + construct(DataT* p) { _Allocator::construct(p); } //apply constructor if no override is provided - template + template + static typename ::std::enable_if_t::value, void> - device_allocator_construct(DataT* p, T_in arg) + construct(DataT* p, T_in arg) { ::new((void*)p) DataT(arg); } //use provided construct call if it exists - template + template + static typename ::std::enable_if_t<__has_construct<_Allocator, DataT*, T_in>::value, void> - device_allocator_construct(_Allocator alloc, DataT* p, T_in arg) + construct(_Allocator alloc, DataT* p, T_in arg) { _Allocator::construct(p, arg); } //apply default destructor if no destroy override is provided - template + template + static typename ::std::enable_if_t::value, void> - device_allocator_destroy(DataT* p) + destroy(DataT* p) { p->~DataT(); } //use provided destroy call if it exists - template + template + static typename ::std::enable_if_t<__has_destroy<_Allocator, DataT*>::value, void> - device_allocator_destroy(DataT* p) + destroy(DataT* p) { _Allocator::destroy(p); } +}; template // for non-iterators struct is_iterator : std::false_type {}; @@ -253,7 +261,7 @@ class device_vector { pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { cgh.parallel_for(n, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_construct(p + start_idx + i); + ::dpct::internal::device_allocator_traits::construct(p + start_idx + i); }); }); } @@ -265,7 +273,7 @@ class device_vector { pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { cgh.parallel_for(n, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_construct(p + start_idx + i, value); + ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, value); }); }); } @@ -279,7 +287,7 @@ class device_vector { pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { cgh.parallel_for(num_eles, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_construct::value_type>(p + start_idx + i, first + i); + ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, first + i); }); }); } From 7894c6da60e91ed41c153dffd301b9228f0fdca8 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 11:15:05 -0400 Subject: [PATCH 07/23] switch to using oneDPL range functionality; misc. Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 82 ++++++++----------- 1 file changed, 35 insertions(+), 47 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 04fe6180bbee..979c24a6ff59 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -186,7 +186,7 @@ struct device_allocator_traits{ template static typename ::std::enable_if_t<__has_construct<_Allocator, DataT*, T_in>::value, void> - construct(_Allocator alloc, DataT* p, T_in arg) + construct(DataT* p, T_in arg) { _Allocator::construct(p, arg); } @@ -263,7 +263,7 @@ class device_vector { cgh.parallel_for(n, [=](sycl::id<1> i) { ::dpct::internal::device_allocator_traits::construct(p + start_idx + i); }); - }); + }).wait(); } } @@ -275,35 +275,31 @@ class device_vector { cgh.parallel_for(n, [=](sycl::id<1> i) { ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, value); }); - }); + }).wait(); } } - template - void _construct(DevIter first, DevIter last, size_type start_idx = 0){ + template + void _construct(Iter first, Iter last, size_type start_idx = 0){ int num_eles = ::std::distance(first,last); if (num_eles > 0) { + //this should properly handle host or device input data + auto read_input = oneapi::dpl::__ranges::__get_sycl_range(); + auto input_rng = read_input(first, last).all_view(); pointer p = _storage; get_default_queue().submit([&](sycl::handler &cgh) { + oneapi::dpl::__ranges::__require_access(cgh, input_rng); cgh.parallel_for(num_eles, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, first + i); + ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, input_rng[i]); }); - }); + }).wait(); } } - - template - void _construct_from_host(HostIter first, HostIter last, size_type start_idx = 0){ - sycl::buffer buf(first, last); - auto buf_first = oneapi::dpl::begin(buf); - auto buf_last = oneapi::dpl::end(buf); - _construct(buf_first, buf_last, start_idx); - } - void _destroy(size_type n, size_type start_idx = 0){ - //only call destroy kernel *only* if custom destroy function is provided + //only call destroy kernel *only* if custom destroy function is provided to + // prevent extra unnecessary kernel call if constexpr(::dpct::internal::__has_destroy::value) { if (n > 0) @@ -313,7 +309,7 @@ class device_vector { cgh.parallel_for(n, [=](sycl::id<1> i) { ::dpct::internal::device_allocator_traits::destroy(p + start_idx + i); }); - }); + }).wait(); } } } @@ -352,36 +348,33 @@ class device_vector { other._storage = nullptr; } -// TODO: check if this is ok for both random_access_iterators and non random_access_iterators template device_vector(InputIterator first, - typename ::std::enable_if< - internal::is_iterator::value && - !::std::is_pointer::value, - InputIterator>::type last) + InputIterator last) : _alloc(get_default_queue()) { _size = ::std::distance(first, last); _set_capacity_and_alloc(); _construct(first, last); } - template - device_vector(InputIterator first, - typename ::std::enable_if<::std::is_pointer::value, - InputIterator>::type last) - : _alloc(get_default_queue()) { - _size = ::std::distance(first, last); - _set_capacity_and_alloc(); - if (_size > 0) { - auto ptr_type = sycl::get_pointer_type(first, get_default_context()); - if (ptr_type != sycl::usm::alloc::host && - ptr_type != sycl::usm::alloc::unknown) { - _construct(first, last); - } else { - _construct_from_host(first, last); - } - } - } + // template + // device_vector(InputIterator first, + // typename ::std::enable_if<::std::is_pointer::value, + // InputIterator>::type last) + // : _alloc(get_default_queue()) { + // _size = ::std::distance(first, last); + // _set_capacity_and_alloc(); + // if (_size > 0) { + // auto ptr_type = sycl::get_pointer_type(first, get_default_context()); + // _construct(first, last); + // // if (ptr_type != sycl::usm::alloc::host && + // // ptr_type != sycl::usm::alloc::unknown) { + // // _construct(first, last); + // // } else { + // // _construct_from_host(first, last); + // // } + // } + // } template device_vector(const device_vector &other) @@ -393,16 +386,11 @@ class device_vector { } template - device_vector(::std::vector &v) - : _alloc(get_default_queue()), _size(v.size()) { - _set_capacity_and_alloc(); - _construct_from_host(v.begin(), v.end()); - } - + device_vector(::std::vector &v):device_vector(v.begin(), v.end()){} template device_vector &operator=(const ::std::vector &v) { resize(v.size()); - _construct_from_host(v.begin(), v.end()); + _construct(v.begin(), v.end()); return *this; } device_vector &operator=(const device_vector &other) { From e1da65ae11a4c7a6142bdc8bf7762711d85d0ab7 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 11:20:47 -0400 Subject: [PATCH 08/23] Comments Signed-off-by: Dan Hoeflinger --- .../test/dpct/helper_files_ref/include/dpl_extras/vector.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 979c24a6ff59..f5f438666f3e 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -151,10 +151,14 @@ class usm_device_allocator { // end of taken from libc++ + +//device_allocator_traits is a device-friendly subset of the functionality of +// std::allocator_traits which uses static construct and destroy functions +// and is usable inside of sycl kernels without passing the allocator to the +// kernel. template struct device_allocator_traits{ - //apply default constructor if no override is provided template static @@ -191,7 +195,6 @@ struct device_allocator_traits{ _Allocator::construct(p, arg); } - //apply default destructor if no destroy override is provided template static From 4040b6a27ee6aec480e1172e931ba27380862a56 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 11:22:58 -0400 Subject: [PATCH 09/23] removing unnecessary TODO Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index f5f438666f3e..68864c2c3ab3 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -360,25 +360,6 @@ class device_vector { _construct(first, last); } - // template - // device_vector(InputIterator first, - // typename ::std::enable_if<::std::is_pointer::value, - // InputIterator>::type last) - // : _alloc(get_default_queue()) { - // _size = ::std::distance(first, last); - // _set_capacity_and_alloc(); - // if (_size > 0) { - // auto ptr_type = sycl::get_pointer_type(first, get_default_context()); - // _construct(first, last); - // // if (ptr_type != sycl::usm::alloc::host && - // // ptr_type != sycl::usm::alloc::unknown) { - // // _construct(first, last); - // // } else { - // // _construct_from_host(first, last); - // // } - // } - // } - template device_vector(const device_vector &other) : _alloc(get_default_queue()) { @@ -547,8 +528,6 @@ class device_vector { // resizing might invalidate position position = begin() + position.get_idx(); - //TODO: Is this OK? can we assume that the above and below copy is equivalent to std::move? - // otherwise we are constructing on top of copied out data, and will be destructing data which was copied _construct(n, x, position.get_idx()); ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), From 54861b3013fb7eeb8e5194dcee4a46d1cc3c5b59 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 11:47:42 -0400 Subject: [PATCH 10/23] Adding warning to usm_device_allocator Signed-off-by: Dan Hoeflinger --- .../helper_files_ref/include/dpl_extras/vector.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 68864c2c3ab3..6c52c8600b3f 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -27,6 +27,13 @@ namespace dpct { namespace internal { + +//usm_device_allocator is provided here specifically for dpct::device_vector. +// Warning: It may be dangerous to use usm_device_allocator in other settings, +// because containers may use the supplied allocator to allocate side +// information which needs to be available on the host. Data allocated with +// this allocator is by definition not available on the host, and would result +// in an error if accessed from the host without proper handling. template class usm_device_allocator { public: @@ -128,7 +135,6 @@ class usm_device_allocator { sycl::property_list MPropList; }; -//taken from libc++ template struct __has_construct_impl : ::std::false_type { }; @@ -137,10 +143,11 @@ class usm_device_allocator { (void)std::declval<_Alloc>().construct(std::declval<_Args>()...) ), _Alloc, _Args...> : ::std::true_type { }; + //check if the provided allocator has a construct() member function template struct __has_construct : __has_construct_impl { }; - // __has_destroy + //check if the provided allocator has a destroy() member function template struct __has_destroy : ::std::false_type { }; @@ -149,7 +156,6 @@ class usm_device_allocator { (void)std::declval<_Alloc>().destroy(std::declval<_Pointer>()) )> : ::std::true_type { }; - // end of taken from libc++ //device_allocator_traits is a device-friendly subset of the functionality of From c93b74359e74982a12fb4da3e58f723483c6f51d Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 11:52:05 -0400 Subject: [PATCH 11/23] clang-format Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 322 +++++++++--------- 1 file changed, 165 insertions(+), 157 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 6c52c8600b3f..4d17ba8f7fdb 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -26,16 +26,13 @@ namespace dpct { namespace internal { - - -//usm_device_allocator is provided here specifically for dpct::device_vector. -// Warning: It may be dangerous to use usm_device_allocator in other settings, -// because containers may use the supplied allocator to allocate side -// information which needs to be available on the host. Data allocated with -// this allocator is by definition not available on the host, and would result -// in an error if accessed from the host without proper handling. -template -class usm_device_allocator { +// usm_device_allocator is provided here specifically for dpct::device_vector. +// Warning: It may be dangerous to use usm_device_allocator in other settings, +// because containers may use the supplied allocator to allocate side +// information which needs to be available on the host. Data allocated with +// this allocator is by definition not available on the host, and would result +// in an error if accessed from the host without proper handling. +template class usm_device_allocator { public: using value_type = T; using propagate_on_container_copy_assignment = std::true_type; @@ -47,12 +44,12 @@ class usm_device_allocator { typedef usm_device_allocator other; }; - usm_device_allocator() = delete; usm_device_allocator(const sycl::context &Ctxt, const sycl::device &Dev, - const sycl::property_list &PropList = {}) + const sycl::property_list &PropList = {}) : MContext(Ctxt), MDevice(Dev), MPropList(PropList) {} - usm_device_allocator(const sycl::queue &Q, const sycl::property_list &PropList = {}) + usm_device_allocator(const sycl::queue &Q, + const sycl::property_list &PropList = {}) : MContext(Q.get_context()), MDevice(Q.get_device()), MPropList(PropList) {} usm_device_allocator(const usm_device_allocator &) = default; @@ -78,12 +75,13 @@ class usm_device_allocator { /// Allocates memory. /// /// \param NumberOfElements is a count of elements to allocate memory for. - T *allocate(size_t NumberOfElements, const sycl::detail::code_location CodeLoc = - sycl::detail::code_location::current()) { + T *allocate(size_t NumberOfElements, + const sycl::detail::code_location CodeLoc = + sycl::detail::code_location::current()) { - auto Result = reinterpret_cast( - aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type), - MDevice, MContext, sycl::usm::alloc::device, MPropList, CodeLoc)); + auto Result = reinterpret_cast(aligned_alloc( + getAlignment(), NumberOfElements * sizeof(value_type), MDevice, + MContext, sycl::usm::alloc::device, MPropList, CodeLoc)); if (!Result) { throw sycl::exception(sycl::errc::memory_allocation); } @@ -94,9 +92,9 @@ class usm_device_allocator { /// /// \param Ptr is a pointer to memory being deallocated. /// \param Size is a number of elements previously passed to allocate. - void deallocate( - T *Ptr, size_t, - const sycl::detail::code_location CodeLoc = sycl::detail::code_location::current()) { + void deallocate(T *Ptr, size_t, + const sycl::detail::code_location CodeLoc = + sycl::detail::code_location::current()) { if (Ptr) { free(Ptr, MContext, CodeLoc); } @@ -105,15 +103,13 @@ class usm_device_allocator { template friend bool operator==(const usm_device_allocator &One, const usm_device_allocator &Two) { - return ((One.MContext == Two.MContext) && - (One.MDevice == Two.MDevice)); + return ((One.MContext == Two.MContext) && (One.MDevice == Two.MDevice)); } template friend bool operator!=(const usm_device_allocator &One, const usm_device_allocator &Two) { - return !((One.MContext == Two.MContext) && - (One.MDevice == Two.MDevice)); + return !((One.MContext == Two.MContext) && (One.MDevice == Two.MDevice)); } template bool has_property() const noexcept { @@ -125,97 +121,91 @@ class usm_device_allocator { } private: - constexpr size_t getAlignment() const { return sycl::max(alignof(T), Alignment); } + constexpr size_t getAlignment() const { + return sycl::max(alignof(T), Alignment); + } - template - friend class usm_device_allocator; + template friend class usm_device_allocator; sycl::context MContext; sycl::device MDevice; sycl::property_list MPropList; }; - template - struct __has_construct_impl : ::std::false_type { }; - - template - struct __has_construct_impl().construct(std::declval<_Args>()...) - ), _Alloc, _Args...> : ::std::true_type { }; +template +struct __has_construct_impl : ::std::false_type {}; - //check if the provided allocator has a construct() member function - template - struct __has_construct : __has_construct_impl { }; +template +struct __has_construct_impl().construct( + std::declval<_Args>()...)), + _Alloc, _Args...> : ::std::true_type {}; - //check if the provided allocator has a destroy() member function - template - struct __has_destroy : ::std::false_type { }; +// check if the provided allocator has a construct() member function +template +struct __has_construct : __has_construct_impl {}; - template - struct __has_destroy<_Alloc, _Pointer, decltype( - (void)std::declval<_Alloc>().destroy(std::declval<_Pointer>()) - )> : ::std::true_type { }; +// check if the provided allocator has a destroy() member function +template +struct __has_destroy : ::std::false_type {}; +template +struct __has_destroy<_Alloc, _Pointer, + decltype((void)std::declval<_Alloc>().destroy( + std::declval<_Pointer>()))> : ::std::true_type {}; +// device_allocator_traits is a device-friendly subset of the functionality of +// std::allocator_traits which uses static construct and destroy functions +// and is usable inside of sycl kernels without passing the allocator to the +// kernel. +template struct device_allocator_traits { -//device_allocator_traits is a device-friendly subset of the functionality of -// std::allocator_traits which uses static construct and destroy functions -// and is usable inside of sycl kernels without passing the allocator to the -// kernel. -template -struct device_allocator_traits{ - - //apply default constructor if no override is provided + // apply default constructor if no override is provided template static - typename ::std::enable_if_t::value, void> - construct(DataT* p) - { - ::new((void*)p) DataT(); + typename ::std::enable_if_t::value, + void> + construct(DataT *p) { + ::new ((void *)p) DataT(); } - //use provided default construct call if it exists + // use provided default construct call if it exists template static - typename ::std::enable_if_t<__has_construct<_Allocator, DataT*>::value, void> - construct(DataT* p) - { + typename ::std::enable_if_t<__has_construct<_Allocator, DataT *>::value, + void> + construct(DataT *p) { _Allocator::construct(p); } - //apply constructor if no override is provided + // apply constructor if no override is provided template - static - typename ::std::enable_if_t::value, void> - construct(DataT* p, T_in arg) - { - ::new((void*)p) DataT(arg); + static typename ::std::enable_if_t< + !__has_construct<_Allocator, DataT *, T_in>::value, void> + construct(DataT *p, T_in arg) { + ::new ((void *)p) DataT(arg); } - //use provided construct call if it exists + // use provided construct call if it exists template - static - typename ::std::enable_if_t<__has_construct<_Allocator, DataT*, T_in>::value, void> - construct(DataT* p, T_in arg) - { + static typename ::std::enable_if_t< + __has_construct<_Allocator, DataT *, T_in>::value, void> + construct(DataT *p, T_in arg) { _Allocator::construct(p, arg); } - //apply default destructor if no destroy override is provided + // apply default destructor if no destroy override is provided template - static - typename ::std::enable_if_t::value, void> - destroy(DataT* p) - { + static typename ::std::enable_if_t::value, + void> + destroy(DataT *p) { p->~DataT(); } - //use provided destroy call if it exists + // use provided destroy call if it exists template - static - typename ::std::enable_if_t<__has_destroy<_Allocator, DataT*>::value, void> - destroy(DataT* p) - { + static typename ::std::enable_if_t<__has_destroy<_Allocator, DataT *>::value, + void> + destroy(DataT *p) { _Allocator::destroy(p); } }; @@ -264,80 +254,88 @@ class device_vector { _storage = ::std::allocator_traits::allocate(_alloc, _capacity); } - void _construct(size_type n, size_type start_idx = 0){ - if (n > 0) - { + void _construct(size_type n, size_type start_idx = 0) { + if (n > 0) { pointer p = _storage; - get_default_queue().submit([&](sycl::handler &cgh) { - cgh.parallel_for(n, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_traits::construct(p + start_idx + i); - }); - }).wait(); + get_default_queue() + .submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_traits::construct( + p + start_idx + i); + }); + }) + .wait(); } } - void _construct(size_type n, const T &value, size_type start_idx = 0){ - if (n > 0) - { + void _construct(size_type n, const T &value, size_type start_idx = 0) { + if (n > 0) { pointer p = _storage; - get_default_queue().submit([&](sycl::handler &cgh) { - cgh.parallel_for(n, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, value); - }); - }).wait(); + get_default_queue() + .submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_traits::construct( + p + start_idx + i, value); + }); + }) + .wait(); } } template - void _construct(Iter first, Iter last, size_type start_idx = 0){ - int num_eles = ::std::distance(first,last); - if (num_eles > 0) - { - //this should properly handle host or device input data - auto read_input = oneapi::dpl::__ranges::__get_sycl_range(); + void _construct(Iter first, Iter last, size_type start_idx = 0) { + int num_eles = ::std::distance(first, last); + if (num_eles > 0) { + // this should properly handle host or device input data + auto read_input = + oneapi::dpl::__ranges::__get_sycl_range(); auto input_rng = read_input(first, last).all_view(); pointer p = _storage; - get_default_queue().submit([&](sycl::handler &cgh) { - oneapi::dpl::__ranges::__require_access(cgh, input_rng); - cgh.parallel_for(num_eles, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_traits::construct(p + start_idx + i, input_rng[i]); - }); - }).wait(); + get_default_queue() + .submit([&](sycl::handler &cgh) { + oneapi::dpl::__ranges::__require_access(cgh, input_rng); + cgh.parallel_for(num_eles, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_traits::construct( + p + start_idx + i, input_rng[i]); + }); + }) + .wait(); } } - void _destroy(size_type n, size_type start_idx = 0){ - //only call destroy kernel *only* if custom destroy function is provided to - // prevent extra unnecessary kernel call - if constexpr(::dpct::internal::__has_destroy::value) - { - if (n > 0) - { + void _destroy(size_type n, size_type start_idx = 0) { + // only call destroy kernel *only* if custom destroy function is provided to + // prevent extra unnecessary kernel call + if constexpr (::dpct::internal::__has_destroy::value) { + if (n > 0) { pointer p = _storage; - get_default_queue().submit([&](sycl::handler &cgh) { - cgh.parallel_for(n, [=](sycl::id<1> i) { - ::dpct::internal::device_allocator_traits::destroy(p + start_idx + i); - }); - }).wait(); + get_default_queue() + .submit([&](sycl::handler &cgh) { + cgh.parallel_for(n, [=](sycl::id<1> i) { + ::dpct::internal::device_allocator_traits::destroy( + p + start_idx + i); + }); + }) + .wait(); } - } + } } - public: template operator ::std::vector() const { auto __tmp = ::std::vector(this->size()); ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - this->begin(), this->end(), __tmp.begin()); + this->begin(), this->end(), __tmp.begin()); return __tmp; } device_vector() : _alloc(get_default_queue()), _size(0), _capacity(_min_capacity()) { _set_capacity_and_alloc(); } - ~device_vector() /*= default*/ { + ~device_vector() /*= default*/ { _destroy(size()); - ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); + ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); } explicit device_vector(size_type n) : _alloc(get_default_queue()), _size(n) { _set_capacity_and_alloc(); @@ -353,13 +351,12 @@ class device_vector { : _alloc(get_default_queue()), _size(other.size()), _capacity(other.capacity()), _storage(other._storage) { other._size = 0; - other._capacity = 0; + other._capacity = 0; other._storage = nullptr; } template - device_vector(InputIterator first, - InputIterator last) + device_vector(InputIterator first, InputIterator last) : _alloc(get_default_queue()) { _size = ::std::distance(first, last); _set_capacity_and_alloc(); @@ -376,7 +373,8 @@ class device_vector { } template - device_vector(::std::vector &v):device_vector(v.begin(), v.end()){} + device_vector(::std::vector &v) + : device_vector(v.begin(), v.end()) {} template device_vector &operator=(const ::std::vector &v) { resize(v.size()); @@ -419,18 +417,19 @@ class device_vector { // allocate buffer for new size auto tmp = ::std::allocator_traits::allocate(_alloc, 2 * n); // copy content (old buffer to new buffer) - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - begin(), end(), tmp); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + begin(), end(), tmp); // deallocate old memory - ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); + ::std::allocator_traits::deallocate(_alloc, _storage, + _capacity); _storage = tmp; _capacity = 2 * n; } } void resize(size_type new_size, const T &x = T()) { reserve(new_size); - if (new_size > size()) - { + if (new_size > size()) { _construct(new_size - size(), x, size()); } _size = new_size; @@ -448,13 +447,15 @@ class device_vector { void shrink_to_fit(void) { if (_size != capacity()) { size_type tmp_capacity = ::std::max(_size, _min_capacity()); - auto tmp = ::std::allocator_traits::allocate(_alloc,tmp_capacity); + auto tmp = + ::std::allocator_traits::allocate(_alloc, tmp_capacity); if (_size > 0) { ::std::copy( oneapi::dpl::execution::make_device_policy(get_default_queue()), begin(), end(), tmp); } - ::std::allocator_traits::deallocate(_alloc, _storage, _capacity); + ::std::allocator_traits::deallocate(_alloc, _storage, + _capacity); _storage = tmp; _capacity = tmp_capacity; } @@ -462,20 +463,22 @@ class device_vector { void assign(size_type n, const T &x) { resize(n); if (_size > 0) { - ::std::fill(oneapi::dpl::execution::make_device_policy(get_default_queue()), - begin(), begin() + n, x); + ::std::fill( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + begin(), begin() + n, x); } } template void assign(InputIterator first, typename ::std::enable_if::value, - InputIterator>::type last) { + InputIterator>::type last) { auto n = ::std::distance(first, last); resize(n); if (_size > 0) { - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - first, last, begin()); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + first, last, begin()); } } void clear(void) { _size = 0; } @@ -498,14 +501,14 @@ class device_vector { auto tmp = ::std::allocator_traits::allocate(_alloc, m); // copy remainder to temporary buffer. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - last, end(), tmp); + last, end(), tmp); auto position = ::std::distance(begin(), first); _destroy(n, position); // override (erase) subsequence in storage. ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - tmp, tmp + m, first); + tmp, tmp + m, first); ::std::allocator_traits::deallocate(_alloc, tmp, m); _size -= n; return begin() + first.get_idx() + n; @@ -527,8 +530,9 @@ class device_vector { // will throw if position is not inside active vector auto tmp = ::std::allocator_traits::allocate(_alloc, m); // copy remainder - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - position, end(), tmp); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + position, end(), tmp); resize(size() + n); // resizing might invalidate position @@ -536,8 +540,9 @@ class device_vector { _construct(n, x, position.get_idx()); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - tmp, tmp + m, position + n); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, + tmp + m, position + n); ::std::allocator_traits::deallocate(_alloc, tmp, m); } } @@ -545,20 +550,22 @@ class device_vector { void insert(iterator position, InputIterator first, typename ::std::enable_if::value, - InputIterator>::type last) { + InputIterator>::type last) { auto n = ::std::distance(first, last); if (position == end()) { resize(size() + n); _construct(first, last, size() - n); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - first, last, end()); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + first, last, end()); } else { auto m = ::std::distance(position, end()); // will throw if position is not inside active vector - auto tmp = ::std::allocator_traits::allocate(_alloc,m); + auto tmp = ::std::allocator_traits::allocate(_alloc, m); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - position, end(), tmp); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), + position, end(), tmp); resize(size() + n); // resizing might invalidate position @@ -566,8 +573,9 @@ class device_vector { _construct(first, last, position.get_idx()); - ::std::copy(oneapi::dpl::execution::make_device_policy(get_default_queue()), - tmp, tmp + m, position + n); + ::std::copy( + oneapi::dpl::execution::make_device_policy(get_default_queue()), tmp, + tmp + m, position + n); ::std::allocator_traits::deallocate(_alloc, tmp, m); } } From 9297bc12674e6378208fccfb5452064d7fddca38 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 12:08:37 -0400 Subject: [PATCH 12/23] provide explicit copy constructor Signed-off-by: Dan Hoeflinger --- .../dpct/helper_files_ref/include/dpl_extras/vector.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 4d17ba8f7fdb..e64e226fbf05 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -363,6 +363,14 @@ class device_vector { _construct(first, last); } + device_vector(const device_vector &other) + : _alloc(get_default_queue()) { + _size = other.size(); + _capacity = other.capacity(); + _storage = ::std::allocator_traits::allocate(_alloc, _capacity); + _construct(other.begin(), other.end()); + } + template device_vector(const device_vector &other) : _alloc(get_default_queue()) { From f55a86b6658b77bc53809310375f7dd1b5558c03 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 12:09:02 -0400 Subject: [PATCH 13/23] remove ambiguous constructor Signed-off-by: Dan Hoeflinger --- clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index e64e226fbf05..fa14cde23555 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -356,7 +356,8 @@ class device_vector { } template - device_vector(InputIterator first, InputIterator last) + device_vector(InputIterator first, + typename ::std::enable_if_t::value, InputIterator> last) : _alloc(get_default_queue()) { _size = ::std::distance(first, last); _set_capacity_and_alloc(); From bcc65074976300468dbb2e9a6c0905b2de3f34cb Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Fri, 31 Mar 2023 15:40:58 -0400 Subject: [PATCH 14/23] device_vector AllocatorAwareContainer requirements Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/vector.h | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index fa14cde23555..93054acd4291 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -240,6 +240,7 @@ class device_vector { using difference_type = typename ::std::iterator_traits::difference_type; using size_type = ::std::size_t; + using allocator_type = Allocator; private: Allocator _alloc; @@ -348,7 +349,7 @@ class device_vector { } device_vector(device_vector &&other) - : _alloc(get_default_queue()), _size(other.size()), + : _alloc(std::move(other._alloc)), _size(other.size()), _capacity(other.capacity()), _storage(other._storage) { other._size = 0; other._capacity = 0; @@ -365,7 +366,7 @@ class device_vector { } device_vector(const device_vector &other) - : _alloc(get_default_queue()) { + : _alloc(std::allocator_traits::select_on_container_copy_construction(other._alloc)) { _size = other.size(); _capacity = other.capacity(); _storage = ::std::allocator_traits::allocate(_alloc, _capacity); @@ -392,14 +393,23 @@ class device_vector { } device_vector &operator=(const device_vector &other) { // Copy assignment operator: + if constexpr(::std::allocator_traits::propagate_on_container_copy_assignment::value) + { + _alloc = other._alloc; + } resize(other.size()); _construct(other.begin(), other.end()); return *this; } device_vector &operator=(device_vector &&other) { // Move assignment operator: - device_vector dummy(::std::move(other)); - this->swap(dummy); + if constexpr(::std::allocator_traits::propagate_on_container_move_assignment::value) + { + _alloc = std::move(other._alloc); + } + _size = std::move(other._size); + _capacity = std::move(other._capacity); + _storage = std::move(other._storage); return *this; } size_type size() const { return _size; } @@ -417,7 +427,10 @@ class device_vector { ::std::swap(_size, v._size); ::std::swap(_capacity, v._capacity); ::std::swap(_storage, v._storage); - ::std::swap(_alloc, v._alloc); + if constexpr (::std::allocator_traits::propagate_on_container_swap::value) + { + ::std::swap(_alloc, v._alloc); + } } reference operator[](size_type n) { return _storage[n]; } const_reference operator[](size_type n) const { return _storage[n]; } From 9309b931c390a353ac44648900e7ead787da0ace Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 3 Apr 2023 11:17:01 -0400 Subject: [PATCH 15/23] Fix bug with operator-- Signed-off-by: Dan Hoeflinger --- clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 602394a5f933..932dc852404f 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -87,7 +87,7 @@ template struct device_reference { return *this; }; device_reference &operator--() { - __assign_from(__get_value()+1); + __assign_from(__get_value()-1); return *this; }; device_reference operator++(int) { From 504e0ffbb889482f7403ad4a1b43b9b9d946f066 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 3 Apr 2023 21:31:55 -0400 Subject: [PATCH 16/23] removing custom swap kernel Signed-off-by: Dan Hoeflinger --- .../dpct/helper_files_ref/include/dpl_extras/memory.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 932dc852404f..0b70a3ebc3ba 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -146,12 +146,9 @@ template struct device_reference { *this = (input); input = (tmp); #else - sycl::queue default_queue = dpct::get_default_queue(); - default_queue.submit([&](sycl::handler& h) { - h.single_task([=]() { - this->swap(input); - }).wait(); - }); + T tmp = __get_value(); + __assign_from(input.__get_value()); + input.__assign_from(tmp); #endif } T &value; From 3220e78ab50c8660f020fce6d5323687f6b867a4 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Tue, 4 Apr 2023 11:01:41 -0400 Subject: [PATCH 17/23] adjusting device reference to hold ptr internally Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 0b70a3ebc3ba..15fd2facf2d4 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -60,8 +60,9 @@ template struct device_reference { template device_reference(const device_reference &input) : value(input.value) {} - device_reference(const pointer &input) : value((*input).value) {} - device_reference(value_type &input) : value(input) {} + device_reference(const pointer &input) : value(input.get()) {} + device_reference(value_type *input) : value(input) {} + device_reference(value_type &input) : value(&input) {} template device_reference &operator=(const device_reference &input) { __assign_from(input.__get_value()); @@ -72,7 +73,7 @@ template struct device_reference { value = input.value; #else sycl::queue default_queue = dpct::get_default_queue(); - default_queue.copy(&value, input.value, sizeof(value_type)).wait(); + default_queue.copy(value, input.value, sizeof(value_type)).wait(); #endif return *this; }; @@ -80,8 +81,10 @@ template struct device_reference { __assign_from(x); return *this; }; - pointer operator&() const { return pointer(&value); }; + pointer operator&() const { return pointer(value); }; + operator value_type() const { return __get_value(); } + device_reference &operator++() { __assign_from(__get_value()+1); return *this; @@ -151,33 +154,33 @@ template struct device_reference { input.__assign_from(tmp); #endif } - T &value; + value_type *value; private: #ifdef __SYCL_DEVICE_ONLY //call from the device device_reference &__assign_from(const value_type& from) { - value = from; + *value = from; return *this; } value_type __get_value() const { - return T(value); + return *value; } #else // call from the host device_reference &__assign_from(const value_type& from) { sycl::queue default_queue = dpct::get_default_queue(); - default_queue.fill(&value, from, 1); + default_queue.fill(value, from, 1); return *this; } value_type __get_value() const { - T tmp; + value_type tmp; sycl::queue default_queue = dpct::get_default_queue(); - default_queue.memcpy(&tmp, &value, sizeof(T)*1).wait(); - return T(tmp); + default_queue.memcpy(&tmp, value, sizeof(value_type)*1).wait(); + return value_type(tmp); } #endif From 7069a8dcc5bf41f1d03294e87974b75cb00efeca Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Tue, 4 Apr 2023 11:52:17 -0400 Subject: [PATCH 18/23] passing pointer, not reference to device_reference Signed-off-by: Dan Hoeflinger --- .../helper_files_ref/include/dpl_extras/vector.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h index 93054acd4291..53aa0ce1c123 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/vector.h @@ -432,8 +432,8 @@ class device_vector { ::std::swap(_alloc, v._alloc); } } - reference operator[](size_type n) { return _storage[n]; } - const_reference operator[](size_type n) const { return _storage[n]; } + reference operator[](size_type n) { return reference(_storage + n); } + const_reference operator[](size_type n) const { return reference(_storage + n); } void reserve(size_type n) { if (n > capacity()) { // allocate buffer for new size @@ -460,10 +460,10 @@ class device_vector { return ::std::numeric_limits::max() / sizeof(T); } size_type capacity() const { return _capacity; } - const_reference front() const { return *begin(); } - reference front() { return *begin(); } - const_reference back(void) const { return *(end() - 1); } - reference back(void) { return *(end() - 1); } + const_reference front() const { return reference(begin()); } + reference front() { return reference(begin()); } + const_reference back(void) const { return reference(end() - 1); } + reference back(void) { return reference(end() - 1); } pointer data(void) { return _storage; } const_pointer data(void) const { return _storage; } void shrink_to_fit(void) { From 3d43770bcf5288a647a92d7b6ce322dbd1b95926 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Tue, 4 Apr 2023 14:47:03 -0400 Subject: [PATCH 19/23] (breaking) bug fix Signed-off-by: Dan Hoeflinger --- clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 15fd2facf2d4..42631022e2c7 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -156,7 +156,7 @@ template struct device_reference { } value_type *value; private: -#ifdef __SYCL_DEVICE_ONLY //call from the device +#ifdef __SYCL_DEVICE_ONLY__ //call from the device device_reference &__assign_from(const value_type& from) { *value = from; From 438616db56f2f4e182bd3e35d4a7ab38985b89b9 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 6 Apr 2023 11:01:05 -0400 Subject: [PATCH 20/23] resolves issue of missing kernel compilation Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 75 ++++++++++++------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 42631022e2c7..b8938a2c5088 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -68,15 +68,6 @@ template struct device_reference { __assign_from(input.__get_value()); return *this; }; - device_reference &operator=(const device_reference &input) { -#ifdef __SYCL_DEVICE_ONLY__ - value = input.value; -#else - sycl::queue default_queue = dpct::get_default_queue(); - default_queue.copy(value, input.value, sizeof(value_type)).wait(); -#endif - return *this; - }; device_reference &operator=(const value_type &x) { __assign_from(x); return *this; @@ -143,47 +134,75 @@ template struct device_reference { __assign_from(__get_value() >> input); return *this; }; + + virtual void + swap_helper(device_reference &input) + { + T tmp = __get_value(); + __assign_from(input.__get_value()); + input.__assign_from(tmp); + } + void swap(device_reference &input) { #ifdef __SYCL_DEVICE_ONLY__ T tmp = (*this); *this = (input); input = (tmp); #else - T tmp = __get_value(); - __assign_from(input.__get_value()); - input.__assign_from(tmp); + swap_helper(input); #endif } - value_type *value; -private: -#ifdef __SYCL_DEVICE_ONLY__ //call from the device - device_reference &__assign_from(const value_type& from) + + virtual void operator_equal_helper(const device_reference &input) { - *value = from; - return *this; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.copy(value, input.value, sizeof(value_type)).wait(); } - value_type __get_value() const + device_reference &operator=(const device_reference &input) { +#ifdef __SYCL_DEVICE_ONLY__ + value = input.value; +#else + operator_equal_helper(input); +#endif + return *this; + }; + + value_type *value; +private: + + virtual void assign_from_helper(const value_type& from) { - return *value; + dpct::get_default_queue().fill(value, from, 1); } -#else // call from the host + device_reference &__assign_from(const value_type& from) { - sycl::queue default_queue = dpct::get_default_queue(); - default_queue.fill(value, from, 1); +#ifdef __SYCL_DEVICE_ONLY__ //call from the device + *value = from; +#else + assign_from_helper(from); +#endif return *this; } + + virtual value_type get_value_helper() const + { + value_type tmp; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.memcpy(&tmp, value, sizeof(value_type)*1).wait(); + return value_type(tmp); + } value_type __get_value() const { - value_type tmp; - sycl::queue default_queue = dpct::get_default_queue(); - default_queue.memcpy(&tmp, value, sizeof(value_type)*1).wait(); - return value_type(tmp); +#ifdef __SYCL_DEVICE_ONLY__ //call from the device + return *value; +#else // call from the host + return get_value_helper(); +#endif } -#endif }; template From 4afce04dcc8af04508fd8eafda01880ee6008244 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 6 Apr 2023 14:04:19 -0400 Subject: [PATCH 21/23] launching single kernel per operator Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 290 ++++++++++++++++-- 1 file changed, 256 insertions(+), 34 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index b8938a2c5088..54db59c2775a 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -76,12 +76,47 @@ template struct device_reference { operator value_type() const { return __get_value(); } + virtual void PlusPlus_helper() + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)++; + }); + }) + .wait(); + } + device_reference &operator++() { - __assign_from(__get_value()+1); +#if __SYCL_DEVICE_ONLY__ + (*value)++; +#else + PlusPlus_helper(); +#endif return *this; }; + + + virtual void MinusMinus_helper() + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)--; + }); + }) + .wait(); + } + + device_reference &operator--() { - __assign_from(__get_value()-1); +#if __SYCL_DEVICE_ONLY__ + (*value)--; +#else + MinusMinus_helper(); +#endif return *this; }; device_reference operator++(int) { @@ -94,60 +129,241 @@ template struct device_reference { --(*this); return ref; }; - device_reference &operator+=(const T &input) { - __assign_from(__get_value() + input); + + virtual void PlusEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)+=input; + }); + }) + .wait(); + } + + device_reference &operator+=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)+=input; +#else + PlusEqual_helper(input); +#endif return *this; }; - device_reference &operator-=(const T &input) { - __assign_from(__get_value() - input); + + virtual void MinusEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)-=input; + }); + }) + .wait(); + } + + device_reference &operator-=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)-=input; +#else + MinusEqual_helper(input); +#endif return *this; }; - device_reference &operator*=(const T &input) { - __assign_from(__get_value() * input); + + virtual void TimesEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp) *= input; + }); + }) + .wait(); + } + + device_reference &operator*=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value) *= input; +#else + TimesEqual_helper(input); +#endif return *this; - }; - device_reference &operator/=(const T &input) { - __assign_from(__get_value() / input); + } + + virtual void DivideEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp) /= input; + }); + }) + .wait(); + } + + device_reference &operator/=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)/=input; +#else + DivideEqual_helper(input); +#endif return *this; - }; - device_reference &operator%=(const T &input) { - __assign_from(__get_value() % input); + } + + virtual void ModEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp) %= input; + }); + }) + .wait(); + } + + device_reference &operator%=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value) %= input; +#else + ModEqual_helper(input); +#endif return *this; - }; - device_reference &operator&=(const T &input) { - __assign_from(__get_value() & input); + } + + virtual void AndEqual_helper(const value_type &input) + { + value_type *tmp = value; + + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp) &= input; + }); + }) + .wait(); + } + + device_reference &operator&=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value) &= input; +#else + AndEqual_helper(input); +#endif return *this; - }; - device_reference &operator|=(const T &input) { - __assign_from(__get_value() | input); + } + + virtual void OrEqual_helper(const value_type &input) + { + value_type *tmp = value; + + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)|=input; + }); + }) + .wait(); + } + + device_reference &operator|=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)|=input; +#else + OrEqual_helper(input); +#endif return *this; }; - device_reference &operator^=(const T &input) { - __assign_from(__get_value() ^ input); + + virtual void CrossEqual_helper(const value_type &input) + { + value_type *tmp = value; + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)^=input; + }); + }) + .wait(); + } + device_reference &operator^=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)^=input; +#else + CrossEqual_helper(input); +#endif return *this; }; - device_reference &operator<<=(const T &input) { - __assign_from(__get_value() << input); + + virtual void ShiftLeftEqual_helper(const int &input) + { + value_type *tmp = value; + + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)<<=input; + }); + }) + .wait(); + } + device_reference &operator<<=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)<<=input; +#else + ShiftLeftEqual_helper(input); +#endif + __apply_binary([=] (const value_type& a, const value_type& b){return a << b;}, &value, input); return *this; }; - device_reference &operator>>=(const T &input) { - __assign_from(__get_value() >> input); + + virtual void ShiftRightEqual_helper(const int &input) + { + value_type *tmp = value; + + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=] () { + (*tmp)>>=input; + }); + }) + .wait(); + } + + device_reference &operator>>=(const value_type &input) { +#if __SYCL_DEVICE_ONLY__ + (*value)>>=input; +#else + ShiftRightEqual_helper(input); +#endif return *this; }; virtual void swap_helper(device_reference &input) { - T tmp = __get_value(); - __assign_from(input.__get_value()); - input.__assign_from(tmp); + value_type *my_val = value; + value_type *input_val = input.value; + + sycl::queue default_queue = dpct::get_default_queue(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=]() { + T tmp = *my_val; + *my_val = *(input_val); + *(input_val) = tmp; + }); + }).wait(); } void swap(device_reference &input) { #ifdef __SYCL_DEVICE_ONLY__ - T tmp = (*this); - *this = (input); - input = (tmp); + T tmp = *value; + *value = *(input.value); + *(input.value) = tmp; #else swap_helper(input); #endif @@ -155,13 +371,19 @@ template struct device_reference { virtual void operator_equal_helper(const device_reference &input) { + value_type *tmp = value; + value_type *input_val = input.value; sycl::queue default_queue = dpct::get_default_queue(); - default_queue.copy(value, input.value, sizeof(value_type)).wait(); + default_queue.submit([&](sycl::handler& h) { + h.single_task>([=]() { + *tmp = *(input_val); + }); + }).wait(); } device_reference &operator=(const device_reference &input) { #ifdef __SYCL_DEVICE_ONLY__ - value = input.value; + *value = *(input.value); #else operator_equal_helper(input); #endif From 7a77d30a0a26a91c9a28e667ef15a4bf269158b0 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Thu, 6 Apr 2023 14:41:41 -0400 Subject: [PATCH 22/23] __attribute__((__used__)) instead of virtual Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 52 ++++++++++++------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index 54db59c2775a..c2434347e2eb 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -76,7 +76,8 @@ template struct device_reference { operator value_type() const { return __get_value(); } - virtual void PlusPlus_helper() + __attribute__((__used__)) + void PlusPlus_helper() { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -98,7 +99,8 @@ template struct device_reference { }; - virtual void MinusMinus_helper() + __attribute__((__used__)) + void MinusMinus_helper() { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -130,7 +132,8 @@ template struct device_reference { return ref; }; - virtual void PlusEqual_helper(const value_type &input) + __attribute__((__used__)) + void PlusEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -151,7 +154,8 @@ template struct device_reference { return *this; }; - virtual void MinusEqual_helper(const value_type &input) + __attribute__((__used__)) + void MinusEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -172,7 +176,8 @@ template struct device_reference { return *this; }; - virtual void TimesEqual_helper(const value_type &input) + __attribute__((__used__)) + void TimesEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -193,7 +198,8 @@ template struct device_reference { return *this; } - virtual void DivideEqual_helper(const value_type &input) + __attribute__((__used__)) + void DivideEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -214,7 +220,8 @@ template struct device_reference { return *this; } - virtual void ModEqual_helper(const value_type &input) + __attribute__((__used__)) + void ModEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -234,8 +241,9 @@ template struct device_reference { #endif return *this; } - - virtual void AndEqual_helper(const value_type &input) + + __attribute__((__used__)) + void AndEqual_helper(const value_type &input) { value_type *tmp = value; @@ -257,7 +265,8 @@ template struct device_reference { return *this; } - virtual void OrEqual_helper(const value_type &input) + __attribute__((__used__)) + void OrEqual_helper(const value_type &input) { value_type *tmp = value; @@ -279,7 +288,8 @@ template struct device_reference { return *this; }; - virtual void CrossEqual_helper(const value_type &input) + __attribute__((__used__)) + void CrossEqual_helper(const value_type &input) { value_type *tmp = value; sycl::queue default_queue = dpct::get_default_queue(); @@ -299,7 +309,8 @@ template struct device_reference { return *this; }; - virtual void ShiftLeftEqual_helper(const int &input) + __attribute__((__used__)) + void ShiftLeftEqual_helper(const int &input) { value_type *tmp = value; @@ -317,11 +328,11 @@ template struct device_reference { #else ShiftLeftEqual_helper(input); #endif - __apply_binary([=] (const value_type& a, const value_type& b){return a << b;}, &value, input); return *this; }; - virtual void ShiftRightEqual_helper(const int &input) + __attribute__((__used__)) + void ShiftRightEqual_helper(const int &input) { value_type *tmp = value; @@ -343,7 +354,8 @@ template struct device_reference { return *this; }; - virtual void + __attribute__((__used__)) + void swap_helper(device_reference &input) { value_type *my_val = value; @@ -369,7 +381,8 @@ template struct device_reference { #endif } - virtual void operator_equal_helper(const device_reference &input) + __attribute__((__used__)) + void operator_equal_helper(const device_reference &input) { value_type *tmp = value; value_type *input_val = input.value; @@ -393,7 +406,8 @@ template struct device_reference { value_type *value; private: - virtual void assign_from_helper(const value_type& from) + __attribute__((__used__)) + void assign_from_helper(const value_type& from) { dpct::get_default_queue().fill(value, from, 1); } @@ -408,8 +422,8 @@ template struct device_reference { return *this; } - - virtual value_type get_value_helper() const + __attribute__((__used__)) + value_type get_value_helper() const { value_type tmp; sycl::queue default_queue = dpct::get_default_queue(); From 87a173ed9c18ed776d90aaa3e717fa1cb6380ac9 Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 10 Apr 2023 16:05:48 -0400 Subject: [PATCH 23/23] Adding device_ref to be ref of device_pointer Signed-off-by: Dan Hoeflinger --- .../include/dpl_extras/memory.h | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h index c2434347e2eb..b57a225dd268 100644 --- a/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h +++ b/clang/test/dpct/helper_files_ref/include/dpl_extras/memory.h @@ -662,9 +662,6 @@ template class device_pointer_base { operator ValueType *() { return ptr; } operator ValueType *() const { return ptr; } - ValueType &operator[](difference_type idx) { return ptr[idx]; } - ValueType &operator[](difference_type idx) const { return ptr[idx]; } - Derived operator+(difference_type forward) const { return Derived{ptr + forward}; } @@ -693,7 +690,7 @@ class device_pointer : public device_pointer_base> { using value_type = T; using difference_type = std::make_signed::type; using pointer = T *; - using reference = T &; + using reference = device_reference; using const_reference = const T &; using iterator_category = std::random_access_iterator_tag; using is_hetero = std::false_type; // required @@ -708,6 +705,9 @@ class device_pointer : public device_pointer_base> { return *this; } + reference operator[](difference_type idx) { return reference(this->ptr + idx); } + reference operator[](difference_type idx) const { return reference(this->ptr + idx); } + // include operators from base class using base_type::operator++; using base_type::operator--; @@ -739,7 +739,7 @@ class device_pointer using value_type = dpct::byte_t; using difference_type = std::make_signed::type; using pointer = void *; - using reference = value_type &; + using reference = device_reference; using const_reference = const value_type &; using iterator_category = std::random_access_iterator_tag; using is_hetero = std::false_type; // required @@ -753,6 +753,9 @@ class device_pointer operator void *() { return this->ptr; } operator void *() const { return this->ptr; } + reference operator[](difference_type idx) { return reference(this->ptr + idx); } + reference operator[](difference_type idx) const { return reference(this->ptr + idx); } + // include operators from base class using base_type::operator++; using base_type::operator--; @@ -773,6 +776,8 @@ class device_pointer return *this; } }; + + #endif #ifdef DPCT_USM_LEVEL_NONE @@ -897,10 +902,10 @@ template class device_iterator : public device_pointer { return *this; } - reference operator*() const { return *(Base::ptr + idx); } + reference operator*() const { return reference(Base::ptr + idx); } - reference operator[](difference_type i) { return Base::ptr[idx + i]; } - reference operator[](difference_type i) const { return Base::ptr[idx + i]; } + reference operator[](difference_type i) { return reference(Base::ptr + idx + i); } + reference operator[](difference_type i) const { return reference(Base::ptr + idx + i); } device_iterator &operator++() { ++idx; return *this; @@ -1035,4 +1040,10 @@ template T &get_raw_reference(T &ref) { } // namespace dpct +template +struct ::sycl::is_device_copyable> : std::true_type {}; +template +struct ::sycl::is_device_copyable> : std::true_type {}; + + #endif