diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 956c33bec68df..e569981589ac4 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 8c9dd7e464a99ebbfb238ac2dabefc3ac77baea5 - # Merge: a99dbcee 3abe18cf - # Author: Piotr Balcer - # Date: Fri Sep 6 17:21:17 2024 +0200 - # Merge pull request #1820 from pbalcer/static-linking - # Add support for static linking of the L0 adapter - set(UNIFIED_RUNTIME_TAG 8c9dd7e464a99ebbfb238ac2dabefc3ac77baea5) + # commit eb63d1a21729f6928bb6cccc5f92856b0690aca6 + # Merge: e26bba51 45a781f4 + # Author: Omar Ahmed + # Date: Tue Sep 10 12:08:57 2024 +0100 + # Merge pull request #1796 from GeorgeWeb/georgi/ur_kernel_max_active_wgs + # [CUDA] Implement urKernelSuggestMaxCooperativeGroupCountExp for Cuda + set(UNIFIED_RUNTIME_TAG eb63d1a21729f6928bb6cccc5f92856b0690aca6) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc index 8221000502642..ee52d75b8fd21 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc @@ -204,9 +204,11 @@ otherwise it is 0. |Returns the maximum number of work-groups, when the kernel is submitted to the specified queue with the specified work-group size and the specified amount of dynamic work-group local memory (in bytes), accounting for any kernel -properties or features. If the kernel can be submitted to the specified queue -without an error, the minimum value returned by this query is 1, otherwise it -is 0. +properties or features. If the specified work-group size is 0, which is +invalid, then the implementation will throw a synchronous exception with the +`errc::invalid` error code. If the kernel can be submitted to the specified +queue without an error, the minimum value returned by this query is 1, +otherwise it is 0. |=== diff --git a/sycl/include/sycl/detail/info_desc_helpers.hpp b/sycl/include/sycl/detail/info_desc_helpers.hpp index e8bc8f76c83db..d3b4bfd977139 100644 --- a/sycl/include/sycl/detail/info_desc_helpers.hpp +++ b/sycl/include/sycl/detail/info_desc_helpers.hpp @@ -31,6 +31,8 @@ template struct is_queue_info_desc : std::false_type {}; template struct is_kernel_info_desc : std::false_type {}; template struct is_kernel_device_specific_info_desc : std::false_type {}; +template +struct is_kernel_queue_specific_info_desc : std::false_type {}; template struct is_event_info_desc : std::false_type {}; template struct is_event_profiling_info_desc : std::false_type {}; // Normally we would just use std::enable_if to limit valid get_info template @@ -134,6 +136,16 @@ struct IsKernelInfo #include #include #undef __SYCL_PARAM_TRAITS_SPEC + +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ + template <> \ + struct is_##DescType##_info_desc \ + : std::true_type { \ + using return_type = Namespace::info::DescType::Desc::return_type; \ + }; +#include +#undef __SYCL_PARAM_TRAITS_SPEC + #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ template <> \ struct is_backend_info_desc : std::true_type { \ diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp index 558396bb6f9c8..b8c90683bbaaf 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -24,13 +24,8 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -namespace info::kernel_queue_specific { -// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once -// #7598 is merged. -struct max_num_work_group_sync { - using return_type = size_t; -}; -} // namespace info::kernel_queue_specific +// See 'sycl/info/kernel_device_specific_traits.def' for the kernel +// device-specific properties that relate to 'root_group'. template class root_group { public: diff --git a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def new file mode 100644 index 0000000000000..0ec11af0bb6b1 --- /dev/null +++ b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def @@ -0,0 +1,4 @@ +// TODO: Revisit 'max_num_work_group_sync' and align it with the +// 'sycl_ext_oneapi_forward_progress' extension once #7598 is merged. +__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t,) +__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,) diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index 8f6a86e5f60bb..b84f98f350d0d 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -247,6 +247,8 @@ struct work_item_progress_capabilities; #include #include #include +#include + #undef __SYCL_PARAM_TRAITS_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC } // namespace _V1 diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 40db1d8461dde..dac7f619d745e 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -159,9 +159,29 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { get_info(const device &Device, const range<3> &WGSize) const; // TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension - // once #7598 is merged. + // once #7598 is merged. (regarding the 'max_num_work_group_sync' query) + + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue. + /// + /// \param Queue is a valid SYCL queue. + /// \return depends on information being queried. + template + typename detail::is_kernel_queue_specific_info_desc::return_type + ext_oneapi_get_info(queue Queue) const; + + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. template - typename Param::return_type ext_oneapi_get_info(const queue &q) const; + typename detail::is_kernel_queue_specific_info_desc::return_type + ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const; private: /// Constructs a SYCL kernel object from a valid kernel_impl instance. diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index faf3695c04e94..50af09831f207 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -106,6 +106,38 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const { "interoperability function or to query a device built-in kernel"); } +bool kernel_impl::exceedsOccupancyResourceLimits( + const device &Device, const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize. + // Generally, exceeding hardware resource limits will yield in an error when + // the kernel is launched. + const size_t MaxWorkGroupSize = + get_info(Device); + const size_t MaxLocalMemorySizeInBytes = + Device.get_info(); + + if (WorkGroupSize.size() > MaxWorkGroupSize) + return true; + + if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes) + return true; + + // It will be impossible to launch a kernel for Cuda when the hardware limit + // for the 32-bit registers page file size is exceeded. + if (Device.get_backend() == backend::ext_oneapi_cuda) { + const uint32_t RegsPerWorkItem = + get_info(Device); + const uint32_t MaxRegsPerWorkGroup = + Device.get_info(); + if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup) + return true; + } + + return false; +} + template <> typename info::platform::version::return_type kernel_impl::get_backend_info() const { diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index ab2950e26a856..040b5cbccf965 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -114,8 +114,26 @@ class kernel_impl { typename Param::return_type get_info(const device &Device, const range<3> &WGSize) const; + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue. + /// + /// \param Queue is a valid SYCL queue. + /// \return depends on information being queried. + template + typename Param::return_type ext_oneapi_get_info(queue Queue) const; + + /// Query queue/launch-specific information from a kernel using the + /// info::kernel_queue_specific descriptor for a specific Queue and values. + /// max_num_work_groups is the only valid descriptor for this function. + /// + /// \param Queue is a valid SYCL queue. + /// \param WorkGroupSize is the work-group size the number of work-groups is + /// requested for. + /// \return depends on information being queried. template - typename Param::return_type ext_oneapi_get_info(const queue &q) const; + typename Param::return_type + ext_oneapi_get_info(queue Queue, const range<3> &MaxWorkGroupSize, + size_t DynamicLocalMemorySize) const; /// Get a constant reference to a raw kernel object. /// @@ -171,6 +189,12 @@ class kernel_impl { bool isBuiltInKernel(const device &Device) const; void checkIfValidForNumArgsInfoQuery() const; + + /// Check if the occupancy limits are exceeded for the given kernel launch + /// configuration. + bool exceedsOccupancyResourceLimits(const device &Device, + const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const; }; template @@ -217,20 +241,66 @@ kernel_impl::get_info(const device &Device, getPlugin()); } +namespace syclex = ext::oneapi::experimental; + template <> -inline typename ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync::return_type +inline typename syclex::info::kernel_queue_specific::max_num_work_groups:: + return_type kernel_impl::ext_oneapi_get_info< - ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(const queue &Queue) const { + syclex::info::kernel_queue_specific::max_num_work_groups>( + queue Queue, const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + if (WorkGroupSize.size() == 0) + throw exception(sycl::make_error_code(errc::invalid), + "The launch work-group size cannot be zero."); + const auto &Plugin = getPlugin(); const auto &Handle = getHandleRef(); + auto Device = Queue.get_device(); + + uint32_t GroupCount{0}; + if (auto Result = Plugin->call_nocheck< + UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>( + Handle, WorkGroupSize.size(), DynamicLocalMemorySize, &GroupCount); + Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + // The feature is supported. Check for other errors and throw if any. + Plugin->checkUrResult(Result); + return GroupCount; + } + + // Fallback. If the backend API is unsupported, this query will return either + // 0 or 1 based on the kernel resource usage and the user-requested resources. + return exceedsOccupancyResourceLimits(Device, WorkGroupSize, + DynamicLocalMemorySize) + ? 0 + : 1; +} + +template <> +inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync:: + return_type + kernel_impl::ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_group_sync>( + queue Queue, const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( + Queue, WorkGroupSize, DynamicLocalMemorySize); +} + +template <> +inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync:: + return_type + kernel_impl::ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_group_sync>( + queue Queue) const { + auto Device = Queue.get_device(); const auto MaxWorkGroupSize = - Queue.get_device().get_info(); - uint32_t GroupCount = 0; - Plugin->call( - Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount); - return GroupCount; + get_info(Device); + const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1}; + return ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_group_sync>( + Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0); } } // namespace detail diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index f4ec76bcf9e7d..a4aae60bece08 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -106,16 +106,36 @@ kernel::get_info( const device &, const sycl::range<3> &) const; template -typename Param::return_type -kernel::ext_oneapi_get_info(const queue &Queue) const { +typename detail::is_kernel_queue_specific_info_desc::return_type +kernel::ext_oneapi_get_info(queue Queue) const { return impl->ext_oneapi_get_info(Queue); } +template +typename detail::is_kernel_queue_specific_info_desc::return_type +kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize, + size_t DynamicLocalMemorySize) const { + return impl->ext_oneapi_get_info(Queue, WorkGroupSize, + DynamicLocalMemorySize); +} + template __SYCL_EXPORT typename ext::oneapi::experimental::info:: kernel_queue_specific::max_num_work_group_sync::return_type kernel::ext_oneapi_get_info< ext::oneapi::experimental::info::kernel_queue_specific:: - max_num_work_group_sync>(const queue &Queue) const; + max_num_work_group_sync>(queue Queue) const; + +#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT) \ + template __SYCL_EXPORT ReturnT \ + kernel::ext_oneapi_get_info( \ + queue, const range<3> &, size_t) const; +// Not including "ext_oneapi_kernel_queue_specific_traits.def" because not all +// kernel_queue_specific queries require the above-defined get_info interface. +// clang-format off +__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t) +__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t) +// clang-format on +#undef __SYCL_PARAM_TRAITS_SPEC kernel::kernel(std::shared_ptr Impl) : impl(Impl) {} diff --git a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp new file mode 100644 index 0000000000000..7b76327d015b1 --- /dev/null +++ b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp @@ -0,0 +1,215 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using namespace sycl::info::device; +using namespace sycl::info::kernel_device_specific; + +using value_type = int64_t; + +namespace kernels { + +template +using sycl_global_accessor = + sycl::accessor; + +class TestKernel { +public: + static constexpr bool HasLocalMemory{false}; + + TestKernel(sycl_global_accessor acc) : acc_{acc} {} + + void operator()(sycl::nd_item<1> item) const { + const auto gtid = item.get_global_linear_id(); + acc_[gtid] = gtid + 42; + } + +private: + sycl_global_accessor acc_; +}; + +class TestLocalMemoryKernel { +public: + static constexpr bool HasLocalMemory{true}; + + TestLocalMemoryKernel(sycl_global_accessor acc, + sycl::local_accessor loc_acc) + : acc_{acc}, loc_acc_{loc_acc} {} + + void operator()(sycl::nd_item<1> item) const { + const auto ltid = item.get_local_linear_id(); + const auto gtid = item.get_global_linear_id(); + if (ltid < loc_acc_.size()) { + loc_acc_[ltid] = ltid + 42; + item.barrier(sycl::access::fence_space::local_space); + acc_[gtid] = loc_acc_[ltid]; + } else { + acc_[gtid] = 0; + } + } + +private: + sycl_global_accessor acc_; + sycl::local_accessor loc_acc_; +}; + +} // namespace kernels + +namespace { + +template +int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) { + const auto ctx = q.get_context(); + auto bundle = sycl::get_kernel_bundle(ctx); + auto kernel = bundle.template get_kernel(); + + const size_t maxWorkGroupSize = + kernel.template get_info(dev); + const size_t NumWorkItems = maxWorkGroupSize * maxWorkGroupSize; + + size_t workGroupSize = 32; + size_t localMemorySizeInBytes{0}; + if constexpr (KernelName::HasLocalMemory) { + localMemorySizeInBytes = workGroupSize * sizeof(value_type); + } + + sycl::buffer buf{sycl::range<1>{NumWorkItems}}; + + // Tests + + // ==================== // + // Test 1 - return type // + // ==================== // + sycl::range<3> workGroupRange{workGroupSize, 1, 1}; + auto maxWGs = kernel.template ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( + q, workGroupRange, localMemorySizeInBytes); + + // Test the return type is as specified in the extension document. + static_assert(std::is_same_v, size_t>, + "max_num_work_groups query must return size_t"); + + // ===================== // + // Test 2 - return value // + // ===================== // + // We must have at least one active group if we are below resource limits. + assert(maxWGs > 0 && "max_num_work_groups query failed"); + if (maxWGs == 0) + return 1; + + // Run the kernel + auto launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems}, + sycl::range<1>{workGroupSize}}; + q.submit([&](sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + if constexpr (KernelName::HasLocalMemory) { + sycl::local_accessor loc_acc{ + sycl::range<1>{workGroupSize}, cgh}; + cgh.parallel_for(launch_range, KernelName{acc, loc_acc}); + } else { + cgh.parallel_for(launch_range, KernelName{acc}); + } + }).wait(); + assert(sycl::host_accessor{buf}[0] == 42); + + // ========================== // + // Test 3 - use max resources // + // ========================== // + // A little over the maximum work-group size for the purpose of exceeding. + workGroupSize = maxWorkGroupSize; + workGroupRange[0] = workGroupSize; + size_t localSize = + (dev.get_info() / sizeof(value_type)); + if constexpr (KernelName::HasLocalMemory) { + localMemorySizeInBytes = localSize * sizeof(value_type); + } + maxWGs = kernel.template ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( + q, workGroupRange, localMemorySizeInBytes); + + assert(maxWGs > 0 && "max_num_work_groups query failed"); + if (maxWGs == 0) + return 1; + + launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems}, + sycl::range<1>{workGroupSize}}; + + q.submit([&](sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + if constexpr (KernelName::HasLocalMemory) { + sycl::local_accessor loc_acc{sycl::range<1>{localSize}, + cgh}; + cgh.parallel_for(launch_range, KernelName{acc, loc_acc}); + } else { + cgh.parallel_for(launch_range, KernelName{acc}); + } + }).wait(); + assert(sycl::host_accessor{buf}[0] == 42); + + // =============================== // + // Test 4 - exceed resource limits // + // =============================== // + workGroupSize = maxWorkGroupSize + 32; + workGroupRange[0] = workGroupSize; + maxWGs = kernel.template ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>( + q, workGroupRange, localMemorySizeInBytes); + // It cannot be possible to launch a kernel successfully with a configuration + // that exceeds the available resources as in the above defined workGroupSize. + // workGroupSize is larger than maxWorkGroupSize, hence maxWGs must equal 0. + if (dev.get_backend() == sycl::backend::ext_oneapi_cuda) { + assert(maxWGs == 0 && "max_num_work_groups query failed"); + if (maxWGs > 0) + return 1; + } + + // As we ensured that the 'max_num_work_groups' query correctly + // returns 0 possible work-groups, test that the kernel launch will fail. + // A configuration that defines a work-group size larger than the maximum + // possible should result in failure. + try { + launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems}, + sycl::range<1>{workGroupSize}}; + + q.submit([&](sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + if constexpr (KernelName::HasLocalMemory) { + sycl::local_accessor loc_acc{sycl::range<1>{localSize}, + cgh}; + cgh.parallel_for(launch_range, KernelName{acc, loc_acc}); + } else { + cgh.parallel_for(launch_range, KernelName{acc}); + } + }).wait(); + } catch (const sycl::exception &e) { + // 'nd_range' error is the expected outcome from the above launch config. + if (e.code() == sycl::make_error_code(sycl::errc::nd_range)) { + return 0; + } + std::cerr << e.code() << ":\t"; + std::cerr << e.what() << std::endl; + return 1; + } + + return 0; +} + +} // namespace + +int main() { + sycl::queue q{}; + sycl::device dev = q.get_device(); + + using namespace kernels; + + int ret{0}; + ret &= test_max_num_work_groups(q, dev); + ret &= test_max_num_work_groups(q, dev); + return ret; +} diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index d8393f35c6253..92e5d69ffcab4 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -30,12 +30,21 @@ void testQueriesAndProperties() { const auto maxWGs = kernel.ext_oneapi_get_info< sycl::ext::oneapi::experimental::info::kernel_queue_specific:: max_num_work_group_sync>(q); + const auto wgRange = sycl::range{WorkGroupSize, 1, 1}; + const auto maxWGsWithLimits = kernel.ext_oneapi_get_info< + sycl::ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync>(q, wgRange, wgRange.size() * sizeof(int)); const auto props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; q.single_task(props, []() {}); - static_assert(std::is_same_v::type, size_t>, - "max_num_work_group_sync query must return size_t"); - assert(maxWGs >= 1 && "max_num_work_group_sync query failed"); + + static auto check_max_num_work_group_sync = [](auto Result) { + static_assert(std::is_same_v, size_t>, + "max_num_work_group_sync query must return size_t"); + assert(Result >= 1 && "max_num_work_group_sync query failed"); + }; + check_max_num_work_group_sync(maxWGs); + check_max_num_work_group_sync(maxWGsWithLimits); } void testRootGroup() { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4c73f43ed6ba2..ec6ec2096403f 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3915,8 +3915,10 @@ _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device15backend_versionEEENS0_6 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv _ZNK4sycl3_V16kernel17get_kernel_bundleEv -_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENT_11return_typeERKNS0_5queueE _ZNK4sycl3_V16kernel3getEv +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueE +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm +_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16private_mem_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e2c3643c557be..55ce460c64559 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -13,7 +13,9 @@ ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ -??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KAEBVqueue@12@@Z +??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@@Z +??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z +??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@event@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp index 36e9a5ad3961e..fd2628dfb6e1c 100644 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp @@ -110,6 +110,7 @@ // CHECK-NEXT: info/ext_codeplay_device_traits.def // CHECK-NEXT: info/ext_intel_device_traits.def // CHECK-NEXT: info/ext_oneapi_device_traits.def +// CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def // CHECK-NEXT: info/sycl_backend_traits.def // CHECK-NEXT: platform.hpp // CHECK-NEXT: detail/string_view.hpp diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 6cb1be75681ee..33dc01b8a4e74 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -111,6 +111,7 @@ // CHECK-NEXT: info/ext_codeplay_device_traits.def // CHECK-NEXT: info/ext_intel_device_traits.def // CHECK-NEXT: info/ext_oneapi_device_traits.def +// CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def // CHECK-NEXT: info/sycl_backend_traits.def // CHECK-NEXT: platform.hpp // CHECK-NEXT: detail/string_view.hpp