From c2a226bf35350a46d5f83d9ae93afcaa8afc1f89 Mon Sep 17 00:00:00 2001 From: Dmitriy Sobolev Date: Mon, 23 Dec 2024 10:55:55 +0000 Subject: [PATCH] Fall back to SYCL 2020 API for a generic SYCL implementation (#1954) * Fall back to SYCL 2019 API Signed-off-by: Dmitriy Sobolev --- .../dpl/internal/scan_by_segment_impl.h | 8 +- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 8 +- .../dpcpp/parallel_backend_sycl_radix_sort.h | 26 +-- .../dpcpp/parallel_backend_sycl_reduce.h | 4 +- .../parallel_backend_sycl_reduce_by_segment.h | 16 +- .../dpcpp/parallel_backend_sycl_utils.h | 23 +- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 217 ++++++++++++------ .../pstl/hetero/dpcpp/unseq_backend_sycl.h | 15 +- .../dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h | 8 +- include/oneapi/dpl/pstl/utils.h | 2 +- test/general/test_policies.pass.cpp | 2 +- 11 files changed, 208 insertions(+), 121 deletions(-) diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index b895561baeb..a8199e227dd 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -164,11 +164,11 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_scan_wg_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -268,11 +268,11 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh); __dpl_sycl::__local_accessor __loc_seg_ends_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanPrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_scan_prefix_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 96d63e33aee..c0568a312ca 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -326,11 +326,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name auto __temp_acc = __result_and_scratch.template __get_scratch_acc( __cgh, __dpl_sycl::__no_init{}); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle()); #endif __cgh.parallel_for<_LocalScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel_1, #endif sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { @@ -347,11 +347,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name __cgh.depends_on(__submit_event); auto __temp_acc = __result_and_scratch.template __get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle()); #endif __cgh.parallel_for<_GroupScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel_2, #endif // TODO: try to balance work between several workgroups instead of one diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index a220b3c29ff..b00023ab02d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -198,11 +198,11 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, : oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng); // an accessor per work-group with value counters from each work-item auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) { @@ -299,11 +299,11 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size __hdl.depends_on(__dependency_event); // access the counters for all work groups oneapi::dpl::__ranges::__require_access(__hdl, __count_rng); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) { @@ -346,7 +346,7 @@ enum class __peer_prefix_algo template struct __peer_prefix_helper; -#if (_ONEDPL_LIBSYCL_VERSION >= 50700) +#if _ONEDPL_SYCL2020_SUBGROUP_BARRIER_PRESENT template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic_fetch_or> { @@ -390,7 +390,7 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic return __offset; } }; -#endif // (_ONEDPL_LIBSYCL_VERSION >= 50700) +#endif // _ONEDPL_SYCL2020_SUBGROUP_BARRIER_PRESENT template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::scan_then_broadcast> @@ -428,7 +428,7 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::scan_t } }; -#if _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT +#if _ONEDPL_LIBSYCL_SUB_GROUP_MASK_PRESENT template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::subgroup_ballot> { @@ -468,7 +468,7 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::subgro return __offset; } }; -#endif // _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT +#endif // _ONEDPL_LIBSYCL_SUB_GROUP_MASK_PRESENT template void @@ -544,11 +544,11 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif //Each SYCL work group processes one data segment. @@ -726,13 +726,13 @@ struct __parallel_radix_sort_iteration sycl::event __reorder_event{}; if (__reorder_sg_size == 8 || __reorder_sg_size == 16 || __reorder_sg_size == 32) { -#if _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT +#if _ONEDPL_LIBSYCL_SUB_GROUP_MASK_PRESENT constexpr auto __peer_algorithm = __peer_prefix_algo::subgroup_ballot; -#elif _ONEDPL_LIBSYCL_VERSION >= 50700 +#elif _ONEDPL_SYCL2020_SUBGROUP_BARRIER_PRESENT constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #else constexpr auto __peer_algorithm = __peer_prefix_algo::scan_then_broadcast; -#endif // _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT +#endif // _ONEDPL_LIBSYCL_SUB_GROUP_MASK_PRESENT __reorder_event = __radix_sort_reorder_submit<_RadixReorderPeerKernel, __radix_bits, __is_ascending, __peer_algorithm>( diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index edad63d2a79..a29060a9cca 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -370,11 +370,11 @@ struct __parallel_transform_reduce_impl oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); __dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_ReduceKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_by_segment.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_by_segment.h index 62ae736782d..13d36c20419 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_by_segment.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_by_segment.h @@ -164,12 +164,12 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_ auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) { oneapi::dpl::__ranges::__require_access(__cgh, __keys); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceCountKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=]( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_count_kernel, #endif sycl::nd_item<1> __item) { @@ -202,11 +202,11 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_ __cgh.depends_on(__seg_end_identification); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceOffsetKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_offset_kernel, #endif sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -225,11 +225,11 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_ auto __partials_acc = __partials.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_wg_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -348,11 +348,11 @@ __parallel_reduce_by_segment_fallback(oneapi::dpl::__internal::__device_backend_ __dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh); __cgh.depends_on(__wg_reduce); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReducePrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_prefix_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index a81bda902ba..f5275b501a9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -108,15 +108,12 @@ __supports_sub_group_size(const _ExecutionPolicy& __exec, std::size_t __target_s // Kernel run-time information helpers //----------------------------------------------------------------------------- -// 20201214 value corresponds to Intel(R) oneAPI C++ Compiler Classic 2021.1.2 Patch release -#define _USE_KERNEL_DEVICE_SPECIFIC_API (__SYCL_COMPILER_VERSION > 20201214) || (_ONEDPL_LIBSYCL_VERSION >= 50700) - template ::std::size_t __kernel_work_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __kernel) { const sycl::device& __device = __policy.queue().get_device(); -#if _USE_KERNEL_DEVICE_SPECIFIC_API +#if _ONEDPL_SYCL2020_KERNEL_DEVICE_API_PRESENT return __kernel.template get_info(__device); #else return __kernel.template get_work_group_info(__device); @@ -130,10 +127,10 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ const sycl::device& __device = __policy.queue().get_device(); [[maybe_unused]] const ::std::size_t __wg_size = __kernel_work_group_size(__policy, __kernel); const ::std::uint32_t __sg_size = -#if _USE_KERNEL_DEVICE_SPECIFIC_API +#if _ONEDPL_SYCL2020_KERNEL_DEVICE_API_PRESENT __kernel.template get_info( __device -# if _ONEDPL_LIBSYCL_VERSION < 60000 +# if _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60000) , sycl::range<3> { __wg_size, 1, 1 } # endif @@ -267,7 +264,7 @@ class __kernel_compiler static_assert(__kernel_count > 0, "At least one kernel name should be provided"); public: -#if _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT template static auto __compile(_Exec&& __exec) @@ -551,13 +548,13 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base inline bool __use_USM_host_allocations(sycl::queue __queue) { -#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT +#if _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT && _ONEDPL_SYCL_L0_EXT_PRESENT auto __device = __queue.get_device(); if (!__device.is_gpu()) return false; if (!__device.has(sycl::aspect::usm_host_allocations)) return false; - if (__device.get_backend() != sycl::backend::ext_oneapi_level_zero) + if (__device.get_backend() != __dpl_sycl::__level_zero_backend) return false; return true; #else @@ -568,7 +565,7 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base inline bool __use_USM_allocations(sycl::queue __queue) { -#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT +#if _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT return __queue.get_device().has(sycl::aspect::usm_device_allocations); #else return false; @@ -621,7 +618,7 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base static auto __get_usm_or_buffer_accessor_ptr(const _Acc& __acc, std::size_t __scratch_n = 0) { -#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT +#if _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT return __acc.__get_pointer(); #else return &__acc[__scratch_n]; @@ -632,7 +629,7 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base auto __get_result_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const { -#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT +#if _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT if (__use_USM_host && __supports_USM_device) return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __result_buf.get(), __prop_list); else if (__supports_USM_device) @@ -648,7 +645,7 @@ struct __result_and_scratch_storage : __result_and_scratch_storage_base auto __get_scratch_acc(sycl::handler& __cgh, const sycl::property_list& __prop_list = {}) const { -#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT +#if _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT if (__use_USM_host || __supports_USM_device) return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __scratch_buf.get(), __prop_list); return __usm_or_buffer_accessor<__accessor_t<_AccessMode>>(__cgh, __sycl_buf.get(), __prop_list); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index fbe9bd24080..80d6fc3fa44 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -39,9 +39,8 @@ #if defined(__LIBSYCL_MAJOR_VERSION) && defined(__LIBSYCL_MINOR_VERSION) && defined(__LIBSYCL_PATCH_VERSION) # define _ONEDPL_LIBSYCL_VERSION \ (__LIBSYCL_MAJOR_VERSION * 10000 + __LIBSYCL_MINOR_VERSION * 100 + __LIBSYCL_PATCH_VERSION) -#else -# define _ONEDPL_LIBSYCL_VERSION 0 #endif +#define _ONEDPL_LIBSYCL_VERSION_LESS_THAN(_VERSION) (_ONEDPL_LIBSYCL_VERSION && _ONEDPL_LIBSYCL_VERSION < _VERSION) #if _ONEDPL_FPGA_DEVICE # if _ONEDPL_LIBSYCL_VERSION >= 50400 @@ -51,23 +50,48 @@ # endif #endif -// Macros to check the new SYCL features -#define _ONEDPL_NO_INIT_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) -#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) -#define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) -#define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN \ - (_ONEDPL_LIBSYCL_VERSION < 70100) && (_ONEDPL_LIBSYCL_VERSION != 0) +// SYCL 2020 feature macros. They are enabled by default: +// A SYCL implementation is assumed to support the features unless specified otherwise. +// This is controlled by extendable logic: !(A && A < SUPPORTED_VER_A) && !(B && B < SUPPORTED_VER_B) && ..., +// where A, B, etc., are macros representing the version of a specific SYCL implementation. +#define _ONEDPL_SYCL2020_BITCAST_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_NO_INIT_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_BUFFER_SIZE_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_ACCESSOR_SIZE_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_REQD_SUB_GROUP_SIZE_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_SYCL2020_TARGET_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50400)) +#define _ONEDPL_SYCL2020_TARGET_DEVICE_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50400)) +#define _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50500)) +#define _ONEDPL_SYCL2020_SUB_GROUP_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50700)) +#define _ONEDPL_SYCL2020_SUBGROUP_BARRIER_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50700)) +// 20201214 value corresponds to DPC++ 2021.1.2 +#define _ONEDPL_SYCL2020_KERNEL_DEVICE_API_PRESENT \ + (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50700) || __SYCL_COMPILER_VERSION > 20201214) +#define _ONEDPL_SYCL2020_BUFFER_ALLOCATOR_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60000)) +#define _ONEDPL_SYCL2020_LOCAL_ACCESSOR_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60000)) +// The unified future supporting USM memory and buffers is only supported after DPC++ 2023.1 but not by 2023.2. +#define _ONEDPL_SYCL2020_DEFAULT_ACCESSOR_CONSTRUCTOR_PRESENT \ + (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60100) || _ONEDPL_LIBSYCL_VERSION != 60200) +#define _ONEDPL_SYCL2020_HOST_TARGET_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200)) +#define _ONEDPL_SYCL2020_HOST_ACCESSOR_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200)) +#define _ONEDPL_SYCL2020_GET_HOST_ACCESS_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200)) +#define _ONEDPL_SYCL2020_LOCAL_ACC_GET_MULTI_PTR_PRESENT (!_ONEDPL_LIBSYCL_VERSION_LESS_THAN(70000)) + +// Feature macros for DPC++ SYCL runtime library alternatives to non-supported SYCL 2020 features +#define _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300)) +#define _ONEDPL_LIBSYCL_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION == 50200) +#define _ONEDPL_LIBSYCL_SUB_GROUP_MASK_PRESENT \ + (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1 && _ONEDPL_LIBSYCL_VERSION >= 50700) + +#define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN (_ONEDPL_LIBSYCL_VERSION_LESS_THAN(70100)) // TODO: determine which compiler configurations provide subgroup load/store #define _ONEDPL_SYCL_SUB_GROUP_LOAD_STORE_PRESENT false -#define _ONEDPL_SYCL_SUB_GROUP_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50700) - // Macro to check if we are compiling for SPIR-V devices. This macro must only be used within // SYCL kernels for determining SPIR-V compilation. Using this macro on the host may lead to incorrect behavior. #ifndef _ONEDPL_DETECT_SPIRV_COMPILATION // Check if overridden for testing @@ -78,10 +102,12 @@ # endif #endif // _ONEDPL_DETECT_SPIRV_COMPILATION -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_SYCL2020_REQD_SUB_GROUP_SIZE_PRESENT # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300) # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) intel::reqd_sub_group_size(SIZE) +#else +# error "sycl::reqd_sub_group_size is not supported, and no alternative is available" #endif // This macro is intended to be used for specifying a subgroup size as a SYCL kernel attribute for SPIR-V targets @@ -93,22 +119,16 @@ # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(SIZE) #endif -// The unified future supporting USM memory and buffers is only supported after DPCPP 2023.1 -// but not by 2023.2. -#if (_ONEDPL_LIBSYCL_VERSION >= 60100 && _ONEDPL_LIBSYCL_VERSION != 60200) -# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 1 -#else -# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 0 -#endif - namespace __dpl_sycl { using __no_init = -#if _ONEDPL_NO_INIT_PRESENT +#if _ONEDPL_SYCL2020_NO_INIT_PRESENT sycl::property::no_init; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300) sycl::property::noinit; +#else +# error "sycl::property::no_init is not supported, and no alternative is available" #endif #if _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT @@ -118,12 +138,15 @@ using __known_identity = sycl::known_identity<_BinaryOp, _T>; template using __has_known_identity = sycl::has_known_identity<_BinaryOp, _T>; -#elif _ONEDPL_LIBSYCL_VERSION == 50200 +#elif _ONEDPL_LIBSYCL_KNOWN_IDENTITY_PRESENT template using __known_identity = sycl::ONEAPI::known_identity<_BinaryOp, _T>; template using __has_known_identity = sycl::ONEAPI::has_known_identity<_BinaryOp, _T>; + +#else +# error "sycl::__known_identity is not supported, and no alternative is available" #endif // _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT template @@ -138,7 +161,7 @@ using __maximum = sycl::maximum<_T>; template using __minimum = sycl::minimum<_T>; -#else // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300) template using __plus = sycl::ONEAPI::plus<_T>; @@ -147,22 +170,29 @@ using __maximum = sycl::ONEAPI::maximum<_T>; template using __minimum = sycl::ONEAPI::minimum<_T>; + +#else +# error "sycl::plus, sycl::maximum, sycl::minimum are not supported, and no alternative is available" #endif // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT -#if _ONEDPL_SYCL_SUB_GROUP_PRESENT +#if _ONEDPL_SYCL2020_SUB_GROUP_PRESENT using __sub_group = sycl::sub_group; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50700) using __sub_group = sycl::ONEAPI::sub_group; +#else +# error "sycl::group is not supported, and no alternative is available" #endif template constexpr auto __get_buffer_size(const _Buffer& __buffer) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_SYCL2020_BUFFER_SIZE_PRESENT return __buffer.size(); -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300) return __buffer.get_count(); +#else +# error "buffer::size is not supported, and no alternative is available" #endif } @@ -170,10 +200,12 @@ template constexpr auto __get_accessor_size(const _Accessor& __accessor) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_SYCL2020_ACCESSOR_SIZE_PRESENT return __accessor.size(); -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50300) return __accessor.get_count(); +#else +# error "accessor::size is not supported, and no alternative is available" #endif } @@ -181,7 +213,7 @@ template constexpr void __group_barrier(_Item __item) { -#if 0 //_ONEDPL_LIBSYCL_VERSION >= 50300 +#if 0 // !defined(_ONEDPL_LIBSYCL_VERSION) || _ONEDPL_LIBSYCL_VERSION >= 50300 //TODO: usage of sycl::group_barrier: probably, we have to revise SYCL parallel patterns which use a group_barrier. // 1) sycl::group_barrier() implementation is not ready // 2) sycl::group_barrier and sycl::item::group_barrier are not quite equivalent @@ -197,8 +229,10 @@ __group_broadcast(_Args... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::group_broadcast(__args...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::broadcast(__args...); +#else +# error "sycl::group_broadcast is not supported, and no alternative is available" #endif } @@ -208,8 +242,10 @@ __exclusive_scan_over_group(_Args... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::exclusive_scan_over_group(__args...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::exclusive_scan(__args...); +#else +# error "sycl::exclusive_scan_over_group is not supported, and no alternative is available" #endif } @@ -219,8 +255,10 @@ __inclusive_scan_over_group(_Args... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::inclusive_scan_over_group(__args...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::inclusive_scan(__args...); +#else +# error "sycl::inclusive_scan_over_group is not supported, and no alternative is available" #endif } @@ -230,8 +268,10 @@ __reduce_over_group(_Args... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::reduce_over_group(__args...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::reduce(__args...); +#else +# error "sycl::reduce_over_group is not supported, and no alternative is available" #endif } @@ -241,8 +281,10 @@ __any_of_group(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::any_of_group(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::any_of_group is not supported, and no alternative is available" #endif } @@ -252,8 +294,10 @@ __all_of_group(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::all_of_group(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::all_of_group is not supported, and no alternative is available" #endif } @@ -263,8 +307,10 @@ __none_of_group(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::none_of_group(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::none_of is not supported, and no alternative is available" #endif } @@ -274,8 +320,10 @@ __joint_exclusive_scan(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_exclusive_scan(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::exclusive_scan(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_exclusive_scan is not supported, and no alternative is available" #endif } @@ -285,8 +333,10 @@ __joint_inclusive_scan(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_inclusive_scan(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::inclusive_scan(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_inclusive_scan is not supported, and no alternative is available" #endif } @@ -296,8 +346,10 @@ __joint_reduce(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_reduce(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::reduce(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_reduce is not supported, and no alternative is available" #endif } @@ -307,8 +359,10 @@ __joint_any_of(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_any_of(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_any_of is not supported, and no alternative is available" #endif } @@ -318,8 +372,10 @@ __joint_all_of(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_all_of(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_all_of is not supported, and no alternative is available" #endif } @@ -329,8 +385,10 @@ __joint_none_of(_Args&&... __args) { #if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_none_of(::std::forward<_Args>(__args)...); -#else +#elif _ONEDPL_LIBSYCL_COLLECTIVES_PRESENT return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); +#else +# error "sycl::joint_none_of is not supported, and no alternative is available" #endif } @@ -367,61 +425,75 @@ inline auto __fpga_selector() #endif // _ONEDPL_FPGA_DEVICE using __target = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 +#if _ONEDPL_SYCL2020_TARGET_PRESENT sycl::target; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50400) sycl::access::target; +#else +# error "sycl::target is not supported, and no alternative is available" #endif constexpr __target __target_device = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 +#if _ONEDPL_SYCL2020_TARGET_DEVICE_PRESENT __target::device; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50400) __target::global_buffer; +#else +# error "sycl::target::device is not supported, and no alternative is available" #endif constexpr __target __host_target = -#if _ONEDPL_LIBSYCL_VERSION >= 60200 +#if _ONEDPL_SYCL2020_HOST_TARGET_PRESENT __target::host_task; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200) __target::host_buffer; +#else +# error "sycl::target::host_task is not supported, and no alternative is available" #endif template using __buffer_allocator = -#if _ONEDPL_LIBSYCL_VERSION >= 60000 +#if _ONEDPL_SYCL2020_BUFFER_ALLOCATOR_PRESENT sycl::buffer_allocator<_DataT>; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60000) sycl::buffer_allocator; +#else +# error "sycl::buffer_allocator is not supported, and no alternative is available" #endif template -#if _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT +#if _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT using __atomic_ref = sycl::atomic_ref<_AtomicType, sycl::memory_order::relaxed, sycl::memory_scope::work_group, _Space>; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(50500) struct __atomic_ref : sycl::atomic<_AtomicType, _Space> { explicit __atomic_ref(_AtomicType& ref) : sycl::atomic<_AtomicType, _Space>(sycl::multi_ptr<_AtomicType, _Space>(&ref)){}; }; -#endif // _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT +#else +# error "sycl::atomic_ref is not supported, and no alternative is available" +#endif // _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT template using __local_accessor = -#if _ONEDPL_LIBSYCL_VERSION >= 60000 +#if _ONEDPL_SYCL2020_LOCAL_ACCESSOR_PRESENT sycl::local_accessor<_DataT, _Dimensions>; -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60000) sycl::accessor<_DataT, _Dimensions, sycl::access::mode::read_write, __dpl_sycl::__target::local>; +#else +# error "sycl::local_accessor is not supported, and no alternative is available" #endif template auto __get_host_access(_Buf&& __buf) { -#if _ONEDPL_LIBSYCL_VERSION >= 60200 +#if _ONEDPL_SYCL2020_GET_HOST_ACCESS_PRESENT return ::std::forward<_Buf>(__buf).get_host_access(sycl::read_only); -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200) return ::std::forward<_Buf>(__buf).template get_access(); +#else +# error "sycl::buffer::get_host_access is not supported, and no alternative is available" #endif } @@ -429,13 +501,26 @@ template auto __get_accessor_ptr(const _Acc& __acc) { -#if _ONEDPL_LIBSYCL_VERSION >= 70000 +#if _ONEDPL_SYCL2020_LOCAL_ACC_GET_MULTI_PTR_PRESENT return __acc.template get_multi_ptr().get(); -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(70000) return __acc.get_pointer(); +#else +# error "sycl::accessor::get_multi_ptr is not supported, and no alternative is available" #endif } +#if defined(SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO) || defined(SYCL_EXT_ACPP_BACKEND_LEVEL_ZERO) +# define _ONEDPL_SYCL_L0_EXT_PRESENT 1 +#endif +#if _ONEDPL_SYCL_L0_EXT_PRESENT +# if defined(SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO) +inline constexpr auto __level_zero_backend = sycl::backend::ext_oneapi_level_zero; +# elif defined(SYCL_EXT_ACPP_BACKEND_LEVEL_ZERO) +inline constexpr auto __level_zero_backend = sycl::backend::level_zero; +# endif +#endif + } // namespace __dpl_sycl #endif // _ONEDPL_SYCL_DEFS_H diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index 2caa6add318..e0b57260ee0 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -23,6 +23,9 @@ #include "../../utils.h" #include "sycl_defs.h" +#define _ONEDPL_SYCL_KNOWN_IDENTITY_PRESENT \ + (_ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT || _ONEDPL_LIBSYCL_KNOWN_IDENTITY_PRESENT) + namespace oneapi { namespace dpl @@ -49,7 +52,7 @@ inline constexpr bool __can_use_known_identity = template using __has_known_identity = ::std::conditional_t< __can_use_known_identity<_Tp>, -# if _ONEDPL_LIBSYCL_VERSION >= 50200 +# if _ONEDPL_SYCL_KNOWN_IDENTITY_PRESENT typename ::std::disjunction< __dpl_sycl::__has_known_identity<_BinaryOp, _Tp>, ::std::conjunction<::std::is_arithmetic<_Tp>, @@ -61,14 +64,14 @@ using __has_known_identity = ::std::conditional_t< ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum>>>>, -# else //_ONEDPL_LIBSYCL_VERSION >= 50200 +# else typename ::std::conjunction< ::std::is_arithmetic<_Tp>, ::std::disjunction<::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus>>>, -# endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +# endif ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false #else //_ONEDPL_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL) @@ -90,11 +93,11 @@ struct __known_identity_for_plus template inline constexpr _Tp __known_identity = -#if _ONEDPL_LIBSYCL_VERSION >= 50200 +#if _ONEDPL_SYCL_KNOWN_IDENTITY_PRESENT __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; -#else //_ONEDPL_LIBSYCL_VERSION >= 50200 +#else __known_identity_for_plus<_BinaryOp, _Tp>::value; //for plus only -#endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +#endif template struct walk_n diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index 0f757c2ce3e..4d16ae4508b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -129,7 +129,7 @@ struct all_view_fn } }; -#if _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED +#if _ONEDPL_SYCL2020_HOST_ACCESSOR_PRESENT struct all_host_view_fn { // An overload for sycl::buffer template type @@ -163,11 +163,13 @@ inline constexpr all_view_fn all_write; -#if _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED +#if _ONEDPL_SYCL2020_HOST_ACCESSOR_PRESENT inline constexpr all_host_view_fn -#else +#elif _ONEDPL_LIBSYCL_VERSION_LESS_THAN(60200) inline constexpr all_view_fn +#else +# error "sycl::host_accessor is not supported, and no alternative is available" #endif host_all; } // namespace views diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index 8a8dfdae1bc..9c32178a78c 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -505,7 +505,7 @@ __dpl_bit_cast(const _Src& __src) noexcept { #if __cpp_lib_bit_cast >= 201806L return ::std::bit_cast<_Dst>(__src); -#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_LIBSYCL_VERSION >= 50300 +#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_SYCL2020_BITCAST_PRESENT return sycl::bit_cast<_Dst>(__src); #elif __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(_Dst, __src); diff --git a/test/general/test_policies.pass.cpp b/test/general/test_policies.pass.cpp index 7ac952a51ce..f766669c763 100644 --- a/test/general/test_policies.pass.cpp +++ b/test/general/test_policies.pass.cpp @@ -75,7 +75,7 @@ main() // make_device_policy test_policy_instance(TestUtils::make_device_policy(q)); -#if _ONEDPL_LIBSYCL_VERSION < 60000 +#if TEST_LIBSYCL_VERSION && TEST_LIBSYCL_VERSION < 60000 // make_device_policy requires a sycl::queue as an argument. // Currently, there is no implicit conversion (implicit syc::queue constructor by a device selector) // from a device selector to a queue.