diff --git a/sycl/include/sycl/nd_item.hpp b/sycl/include/sycl/nd_item.hpp index 26d1f4351c915..1382a12040b66 100644 --- a/sycl/include/sycl/nd_item.hpp +++ b/sycl/include/sycl/nd_item.hpp @@ -10,10 +10,13 @@ #include // for __spirv_ControlBarrier #include // for Scope +#include // for initLocalInvocationId #include // for mode, fence_space #include // for __SYCL_ASSUME_INT #include // for __SYCL2020_DEPRECATED, __SY... +#include // for ConvertToOpenCLType_t #include // for getSPIRVMemorySemanticsMask +#include // for is_bool, change_base_... #include // for device_event #include // for make_error_code, errc, exce... #include // for group @@ -38,6 +41,502 @@ namespace ext::oneapi::experimental { template class root_group; } +#if __INTEL_PREVIEW_BREAKING_CHANGES +/// Identifies an instance of the function object executing at each point in an +/// nd_range. +/// +/// \ingroup sycl_api +template class nd_item { +public: + static constexpr int dimensions = Dimensions; + + id get_global_id() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initGlobalInvocationId>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + size_t __SYCL_ALWAYS_INLINE get_global_id(int Dimension) const { + size_t Id = get_global_id()[Dimension]; + __SYCL_ASSUME_INT(Id); + return Id; + } + + size_t __SYCL_ALWAYS_INLINE get_global_linear_id() const { + size_t LinId = 0; + id Index = get_global_id(); + range Extent = get_global_range(); + id Offset = get_offset(); + if (1 == Dimensions) { + LinId = Index[0] - Offset[0]; + } else if (2 == Dimensions) { + LinId = (Index[0] - Offset[0]) * Extent[1] + Index[1] - Offset[1]; + } else { + LinId = (Index[0] - Offset[0]) * Extent[1] * Extent[2] + + (Index[1] - Offset[1]) * Extent[2] + Index[2] - Offset[2]; + } + __SYCL_ASSUME_INT(LinId); + return LinId; + } + + id get_local_id() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initLocalInvocationId>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + size_t __SYCL_ALWAYS_INLINE get_local_id(int Dimension) const { + size_t Id = get_local_id()[Dimension]; + __SYCL_ASSUME_INT(Id); + return Id; + } + + size_t get_local_linear_id() const { + size_t LinId = 0; + id Index = get_local_id(); + range Extent = get_local_range(); + if (1 == Dimensions) { + LinId = Index[0]; + } else if (2 == Dimensions) { + LinId = Index[0] * Extent[1] + Index[1]; + } else { + LinId = + Index[0] * Extent[1] * Extent[2] + Index[1] * Extent[2] + Index[2]; + } + __SYCL_ASSUME_INT(LinId); + return LinId; + } + + group get_group() const { + // TODO: ideally Group object should be stateless and have a contructor with + // no arguments. + return detail::Builder::createGroup(get_global_range(), get_local_range(), + get_group_range(), get_group_id()); + } + + sub_group get_sub_group() const { return sub_group(); } + + size_t __SYCL_ALWAYS_INLINE get_group(int Dimension) const { + size_t Id = get_group_id()[Dimension]; + __SYCL_ASSUME_INT(Id); + return Id; + } + + size_t __SYCL_ALWAYS_INLINE get_group_linear_id() const { + size_t LinId = 0; + id Index = get_group_id(); + range Extent = get_group_range(); + if (1 == Dimensions) { + LinId = Index[0]; + } else if (2 == Dimensions) { + LinId = Index[0] * Extent[1] + Index[1]; + } else { + LinId = + Index[0] * Extent[1] * Extent[2] + Index[1] * Extent[2] + Index[2]; + } + __SYCL_ASSUME_INT(LinId); + return LinId; + } + + range get_group_range() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initNumWorkgroups>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + size_t __SYCL_ALWAYS_INLINE get_group_range(int Dimension) const { + size_t Range = get_group_range()[Dimension]; + __SYCL_ASSUME_INT(Range); + return Range; + } + + range get_global_range() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initGlobalSize>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + size_t get_global_range(int Dimension) const { + size_t Val = get_global_range()[Dimension]; + __SYCL_ASSUME_INT(Val); + return Val; + } + + range get_local_range() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initWorkgroupSize>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + size_t get_local_range(int Dimension) const { + size_t Id = get_local_range()[Dimension]; + __SYCL_ASSUME_INT(Id); + return Id; + } + + __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020") + id get_offset() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initGlobalOffset>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } + + nd_range get_nd_range() const { + return nd_range(get_global_range(), get_local_range(), + get_offset()); + } + + void barrier(access::fence_space accessSpace = + access::fence_space::global_and_local) const { + uint32_t flags = _V1::detail::getSPIRVMemorySemanticsMask(accessSpace); + __spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup, + flags); + } + + /// Executes a work-group mem-fence with memory ordering on the local address + /// space, global address space or both based on the value of \p accessSpace. + template + __SYCL2020_DEPRECATED("use sycl::atomic_fence() free function instead") + void mem_fence( + typename std::enable_if_t + accessSpace = access::fence_space::global_and_local) const { + uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace); + // TODO: currently, there is no good way in SPIR-V to set the memory + // barrier only for load operations or only for store operations. + // The full read-and-write barrier is used and the template parameter + // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be + // changed to address this discrepancy between SPIR-V and SYCL, + // or if we decide that 'accessMode' is the important feature then + // we can fix this later, for example, by using OpenCL 1.2 functions + // read_mem_fence() and write_mem_fence(). + __spirv_MemoryBarrier(__spv::Scope::Workgroup, flags); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest + /// with a source stride specified by \p srcStride, and returns a SYCL + /// device_event which can be used to wait on the completion of the copy. + /// Permitted types for dataT are all scalar and vector types, except boolean. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + std::enable_if_t::value, + device_event> async_work_group_copy(local_ptr dest, + global_ptr src, + size_t numElements, + size_t srcStride) const { + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; + + __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), + numElements, srcStride, 0); + return device_event(E); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest with + /// the destination stride specified by \p destStride, and returns a SYCL + /// device_event which can be used to wait on the completion of the copy. + /// Permitted types for dataT are all scalar and vector types, except boolean. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + std::enable_if_t::value, + device_event> async_work_group_copy(global_ptr dest, + local_ptr src, + size_t numElements, + size_t destStride) + const { + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; + + __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), + numElements, destStride, 0); + return device_event(E); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest + /// with a source stride specified by \p srcStride, and returns a SYCL + /// device_event which can be used to wait on the completion of the copy. + /// Permitted types for DestDataT are all scalar and vector types, except + /// boolean. SrcDataT must be either the same as DestDataT or const DestDataT. + template + std::enable_if_t::value && + std::is_same_v, DestDataT>, + device_event> + async_work_group_copy(decorated_local_ptr dest, + decorated_global_ptr src, size_t numElements, + size_t srcStride) const { + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; + + __ocl_event_t E = __SYCL_OpGroupAsyncCopyGlobalToLocal( + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), + numElements, srcStride, 0); + return device_event(E); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest with + /// the destination stride specified by \p destStride, and returns a SYCL + /// device_event which can be used to wait on the completion of the copy. + /// Permitted types for DestDataT are all scalar and vector types, except + /// boolean. SrcDataT must be either the same as DestDataT or const DestDataT. + template + std::enable_if_t::value && + std::is_same_v, DestDataT>, + device_event> + async_work_group_copy(decorated_global_ptr dest, + decorated_local_ptr src, size_t numElements, + size_t destStride) const { + using DestT = detail::ConvertToOpenCLType_t; + using SrcT = detail::ConvertToOpenCLType_t; + + __ocl_event_t E = __SYCL_OpGroupAsyncCopyLocalToGlobal( + __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()), + numElements, destStride, 0); + return device_event(E); + } + + /// Specialization for scalar bool type. + /// Asynchronously copies a number of elements specified by \p NumElements + /// from the source pointed by \p Src to destination pointed by \p Dest + /// with a stride specified by \p Stride, and returns a SYCL device_event + /// which can be used to wait on the completion of the copy. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + std::enable_if_t< + detail::is_scalar_bool::value, + device_event> async_work_group_copy(multi_ptr + Dest, + multi_ptr + Src, + size_t NumElements, + size_t Stride) const { + static_assert(sizeof(bool) == sizeof(uint8_t), + "Async copy to/from bool memory is not supported."); + auto DestP = multi_ptr( + reinterpret_cast(Dest.get())); + auto SrcP = multi_ptr( + reinterpret_cast(Src.get())); + return async_work_group_copy(DestP, SrcP, NumElements, Stride); + } + + /// Specialization for vector bool type. + /// Asynchronously copies a number of elements specified by \p NumElements + /// from the source pointed by \p Src to destination pointed by \p Dest + /// with a stride specified by \p Stride, and returns a SYCL device_event + /// which can be used to wait on the completion of the copy. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + std::enable_if_t< + detail::is_vector_bool::value, + device_event> async_work_group_copy(multi_ptr + Dest, + multi_ptr + Src, + size_t NumElements, + size_t Stride) const { + static_assert(sizeof(bool) == sizeof(uint8_t), + "Async copy to/from bool memory is not supported."); + using VecT = detail::change_base_type_t; + auto DestP = address_space_cast( + reinterpret_cast(Dest.get())); + auto SrcP = address_space_cast( + reinterpret_cast(Src.get())); + return async_work_group_copy(DestP, SrcP, NumElements, Stride); + } + + /// Specialization for scalar bool type. + /// Asynchronously copies a number of elements specified by \p NumElements + /// from the source pointed by \p Src to destination pointed by \p Dest + /// with a stride specified by \p Stride, and returns a SYCL device_event + /// which can be used to wait on the completion of the copy. + template + std::enable_if_t::value && + std::is_same_v, DestT>, + device_event> + async_work_group_copy(multi_ptr Dest, + multi_ptr Src, + size_t NumElements, size_t Stride) const { + static_assert(sizeof(bool) == sizeof(uint8_t), + "Async copy to/from bool memory is not supported."); + using QualSrcT = + std::conditional_t, const uint8_t, uint8_t>; + auto DestP = multi_ptr( + detail::cast_AS::pointer>( + Dest.get_decorated())); + auto SrcP = multi_ptr( + detail::cast_AS::pointer>( + Src.get_decorated())); + return async_work_group_copy(DestP, SrcP, NumElements, Stride); + } + + /// Specialization for vector bool type. + /// Asynchronously copies a number of elements specified by \p NumElements + /// from the source pointed by \p Src to destination pointed by \p Dest + /// with a stride specified by \p Stride, and returns a SYCL device_event + /// which can be used to wait on the completion of the copy. + template + std::enable_if_t::value && + std::is_same_v, DestT>, + device_event> + async_work_group_copy(multi_ptr Dest, + multi_ptr Src, + size_t NumElements, size_t Stride) const { + static_assert(sizeof(bool) == sizeof(uint8_t), + "Async copy to/from bool memory is not supported."); + using VecT = detail::change_base_type_t; + using QualSrcVecT = + std::conditional_t, std::add_const_t, VecT>; + auto DestP = multi_ptr( + detail::cast_AS< + typename multi_ptr::pointer>( + Dest.get_decorated())); + auto SrcP = multi_ptr( + detail::cast_AS::pointer>( + Src.get_decorated())); + return async_work_group_copy(DestP, SrcP, NumElements, Stride); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest and + /// returns a SYCL device_event which can be used to wait on the completion + /// of the copy. + /// Permitted types for dataT are all scalar and vector types. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + device_event + async_work_group_copy(local_ptr dest, global_ptr src, + size_t numElements) const { + return async_work_group_copy(dest, src, numElements, 1); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest and + /// returns a SYCL device_event which can be used to wait on the completion + /// of the copy. + /// Permitted types for dataT are all scalar and vector types. + template + __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead") + device_event + async_work_group_copy(global_ptr dest, local_ptr src, + size_t numElements) const { + return async_work_group_copy(dest, src, numElements, 1); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest and + /// returns a SYCL device_event which can be used to wait on the completion + /// of the copy. + /// Permitted types for DestDataT are all scalar and vector types. SrcDataT + /// must be either the same as DestDataT or const DestDataT. + template + typename std::enable_if_t< + std::is_same_v>, device_event> + async_work_group_copy(decorated_local_ptr dest, + decorated_global_ptr src, + size_t numElements) const { + return async_work_group_copy(dest, src, numElements, 1); + } + + /// Asynchronously copies a number of elements specified by \p numElements + /// from the source pointed by \p src to destination pointed by \p dest and + /// returns a SYCL device_event which can be used to wait on the completion + /// of the copy. + /// Permitted types for DestDataT are all scalar and vector types. SrcDataT + /// must be either the same as DestDataT or const DestDataT. + template + typename std::enable_if_t< + std::is_same_v>, device_event> + async_work_group_copy(decorated_global_ptr dest, + decorated_local_ptr src, + size_t numElements) const { + return async_work_group_copy(dest, src, numElements, 1); + } + + template void wait_for(eventTN... events) const { + waitForHelper(events...); + } + + sycl::ext::oneapi::experimental::root_group + ext_oneapi_get_root_group() const { + return sycl::ext::oneapi::experimental::root_group{*this}; + } + + nd_item(const nd_item &rhs) = default; + nd_item(nd_item &&rhs) = default; + + nd_item &operator=(const nd_item &rhs) = default; + nd_item &operator=(nd_item &&rhs) = default; + + bool operator==(const nd_item &) const { return true; } + bool operator!=(const nd_item &rhs) const { return !((*this) == rhs); } + +protected: + friend class detail::Builder; + nd_item() {} + nd_item(const item &, const item &, + const group &) {} + + void waitForHelper() const {} + + void waitForHelper(device_event Event) const { Event.wait(); } + + template + void waitForHelper(T E, Ts... Es) const { + waitForHelper(E); + waitForHelper(Es...); + } + + id get_group_id() const { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv::initWorkgroupId>(); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "nd_item methods can't be invoked on the host"); + return {}; +#endif + } +}; +#else /// Identifies an instance of the function object executing at each point in an /// nd_range. /// @@ -81,9 +580,9 @@ template class nd_item { sub_group get_sub_group() const { return sub_group(); } size_t __SYCL_ALWAYS_INLINE get_group(int Dimension) const { - size_t Size = Group[Dimension]; - __SYCL_ASSUME_INT(Size); - return Size; + size_t Id = Group[Dimension]; + __SYCL_ASSUME_INT(Id); + return Id; } size_t __SYCL_ALWAYS_INLINE get_group_linear_id() const { @@ -246,6 +745,7 @@ template class nd_item { item localItem; group Group; }; +#endif template __SYCL_DEPRECATED("use sycl::ext::oneapi::experimental::this_nd_item() instead")