diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
index 956c33bec68df..e569981589ac4 100644
--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake
+++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 8c9dd7e464a99ebbfb238ac2dabefc3ac77baea5
-  # Merge: a99dbcee 3abe18cf
-  # Author: Piotr Balcer <piotr.balcer@intel.com>
-  # Date:   Fri Sep 6 17:21:17 2024 +0200
-  #   Merge pull request #1820 from pbalcer/static-linking
-  #   Add support for static linking of the L0 adapter
-  set(UNIFIED_RUNTIME_TAG 8c9dd7e464a99ebbfb238ac2dabefc3ac77baea5)
+  # commit eb63d1a21729f6928bb6cccc5f92856b0690aca6
+  # Merge: e26bba51 45a781f4
+  # Author: Omar Ahmed <omar.ahmed@codeplay.com>
+  # Date:   Tue Sep 10 12:08:57 2024 +0100
+  #   Merge pull request #1796 from GeorgeWeb/georgi/ur_kernel_max_active_wgs
+  #   [CUDA] Implement urKernelSuggestMaxCooperativeGroupCountExp for Cuda
+  set(UNIFIED_RUNTIME_TAG eb63d1a21729f6928bb6cccc5f92856b0690aca6)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
   # Due to the use of dependentloadflag and no installer for UMF and hwloc we need
diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc
index 8221000502642..ee52d75b8fd21 100644
--- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc
+++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc
@@ -204,9 +204,11 @@ otherwise it is 0.
 |Returns the maximum number of work-groups, when the kernel is submitted to the
 specified queue with the specified work-group size and the specified amount of
 dynamic work-group local memory (in bytes), accounting for any kernel
-properties or features. If the kernel can be submitted to the specified queue
-without an error, the minimum value returned by this query is 1, otherwise it
-is 0.
+properties or features. If the specified work-group size is 0, which is
+invalid, then the implementation will throw a synchronous exception with the
+`errc::invalid` error code. If the kernel can be submitted to the specified
+queue without an error, the minimum value returned by this query is 1,
+otherwise it is 0.
 
 |===
 
diff --git a/sycl/include/sycl/detail/info_desc_helpers.hpp b/sycl/include/sycl/detail/info_desc_helpers.hpp
index e8bc8f76c83db..d3b4bfd977139 100644
--- a/sycl/include/sycl/detail/info_desc_helpers.hpp
+++ b/sycl/include/sycl/detail/info_desc_helpers.hpp
@@ -31,6 +31,8 @@ template <typename T> struct is_queue_info_desc : std::false_type {};
 template <typename T> struct is_kernel_info_desc : std::false_type {};
 template <typename T>
 struct is_kernel_device_specific_info_desc : std::false_type {};
+template <typename T>
+struct is_kernel_queue_specific_info_desc : std::false_type {};
 template <typename T> struct is_event_info_desc : std::false_type {};
 template <typename T> struct is_event_profiling_info_desc : std::false_type {};
 // Normally we would just use std::enable_if to limit valid get_info template
@@ -134,6 +136,16 @@ struct IsKernelInfo<info::kernel_device_specific::ext_codeplay_num_regs>
 #include <sycl/info/ext_intel_device_traits.def>
 #include <sycl/info/ext_oneapi_device_traits.def>
 #undef __SYCL_PARAM_TRAITS_SPEC
+
+#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode)   \
+  template <>                                                                  \
+  struct is_##DescType##_info_desc<Namespace::info::DescType::Desc>            \
+      : std::true_type {                                                       \
+    using return_type = Namespace::info::DescType::Desc::return_type;          \
+  };
+#include <sycl/info/ext_oneapi_kernel_queue_specific_traits.def>
+#undef __SYCL_PARAM_TRAITS_SPEC
+
 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode)              \
   template <>                                                                  \
   struct is_backend_info_desc<info::DescType::Desc> : std::true_type {         \
diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp
index 558396bb6f9c8..b8c90683bbaaf 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp
@@ -24,13 +24,8 @@ namespace sycl {
 inline namespace _V1 {
 namespace ext::oneapi::experimental {
 
-namespace info::kernel_queue_specific {
-// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once
-// #7598 is merged.
-struct max_num_work_group_sync {
-  using return_type = size_t;
-};
-} // namespace info::kernel_queue_specific
+// See 'sycl/info/kernel_device_specific_traits.def' for the kernel
+// device-specific properties that relate to 'root_group'.
 
 template <int Dimensions> class root_group {
 public:
diff --git a/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def
new file mode 100644
index 0000000000000..0ec11af0bb6b1
--- /dev/null
+++ b/sycl/include/sycl/info/ext_oneapi_kernel_queue_specific_traits.def
@@ -0,0 +1,4 @@
+// TODO: Revisit 'max_num_work_group_sync' and align it with the
+// 'sycl_ext_oneapi_forward_progress' extension once #7598 is merged.
+__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t,)
+__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t,)
diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp
index 8f6a86e5f60bb..b84f98f350d0d 100644
--- a/sycl/include/sycl/info/info_desc.hpp
+++ b/sycl/include/sycl/info/info_desc.hpp
@@ -247,6 +247,8 @@ struct work_item_progress_capabilities;
 #include <sycl/info/ext_codeplay_device_traits.def>
 #include <sycl/info/ext_intel_device_traits.def>
 #include <sycl/info/ext_oneapi_device_traits.def>
+#include <sycl/info/ext_oneapi_kernel_queue_specific_traits.def>
+
 #undef __SYCL_PARAM_TRAITS_SPEC
 #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
 } // namespace _V1
diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp
index 40db1d8461dde..dac7f619d745e 100644
--- a/sycl/include/sycl/kernel.hpp
+++ b/sycl/include/sycl/kernel.hpp
@@ -159,9 +159,29 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase<kernel> {
       get_info(const device &Device, const range<3> &WGSize) const;
 
   // TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension
-  // once #7598 is merged.
+  // once #7598 is merged. (regarding the 'max_num_work_group_sync' query)
+
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+  ext_oneapi_get_info(queue Queue) const;
+
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
   template <typename Param>
-  typename Param::return_type ext_oneapi_get_info(const queue &q) const;
+  typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+  ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
 
 private:
   /// Constructs a SYCL kernel object from a valid kernel_impl instance.
diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp
index faf3695c04e94..50af09831f207 100644
--- a/sycl/source/detail/kernel_impl.cpp
+++ b/sycl/source/detail/kernel_impl.cpp
@@ -106,6 +106,38 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const {
       "interoperability function or to query a device built-in kernel");
 }
 
+bool kernel_impl::exceedsOccupancyResourceLimits(
+    const device &Device, const range<3> &WorkGroupSize,
+    size_t DynamicLocalMemorySize) const {
+  // Respect occupancy limits for WorkGroupSize and DynamicLocalMemorySize.
+  // Generally, exceeding hardware resource limits will yield in an error when
+  // the kernel is launched.
+  const size_t MaxWorkGroupSize =
+      get_info<info::kernel_device_specific::work_group_size>(Device);
+  const size_t MaxLocalMemorySizeInBytes =
+      Device.get_info<info::device::local_mem_size>();
+
+  if (WorkGroupSize.size() > MaxWorkGroupSize)
+    return true;
+
+  if (DynamicLocalMemorySize > MaxLocalMemorySizeInBytes)
+    return true;
+
+  // It will be impossible to launch a kernel for Cuda when the hardware limit
+  // for the 32-bit registers page file size is exceeded.
+  if (Device.get_backend() == backend::ext_oneapi_cuda) {
+    const uint32_t RegsPerWorkItem =
+        get_info<info::kernel_device_specific::ext_codeplay_num_regs>(Device);
+    const uint32_t MaxRegsPerWorkGroup =
+        Device.get_info<ext::codeplay::experimental::info::device::
+                            max_registers_per_work_group>();
+    if ((MaxWorkGroupSize * RegsPerWorkItem) > MaxRegsPerWorkGroup)
+      return true;
+  }
+
+  return false;
+}
+
 template <>
 typename info::platform::version::return_type
 kernel_impl::get_backend_info<info::platform::version>() const {
diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index ab2950e26a856..040b5cbccf965 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -114,8 +114,26 @@ class kernel_impl {
   typename Param::return_type get_info(const device &Device,
                                        const range<3> &WGSize) const;
 
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \return depends on information being queried.
+  template <typename Param>
+  typename Param::return_type ext_oneapi_get_info(queue Queue) const;
+
+  /// Query queue/launch-specific information from a kernel using the
+  /// info::kernel_queue_specific descriptor for a specific Queue and values.
+  /// max_num_work_groups is the only valid descriptor for this function.
+  ///
+  /// \param Queue is a valid SYCL queue.
+  /// \param WorkGroupSize is the work-group size the number of work-groups is
+  /// requested for.
+  /// \return depends on information being queried.
   template <typename Param>
-  typename Param::return_type ext_oneapi_get_info(const queue &q) const;
+  typename Param::return_type
+  ext_oneapi_get_info(queue Queue, const range<3> &MaxWorkGroupSize,
+                      size_t DynamicLocalMemorySize) const;
 
   /// Get a constant reference to a raw kernel object.
   ///
@@ -171,6 +189,12 @@ class kernel_impl {
 
   bool isBuiltInKernel(const device &Device) const;
   void checkIfValidForNumArgsInfoQuery() const;
+
+  /// Check if the occupancy limits are exceeded for the given kernel launch
+  /// configuration.
+  bool exceedsOccupancyResourceLimits(const device &Device,
+                                      const range<3> &WorkGroupSize,
+                                      size_t DynamicLocalMemorySize) const;
 };
 
 template <typename Param>
@@ -217,20 +241,66 @@ kernel_impl::get_info(const device &Device,
       getPlugin());
 }
 
+namespace syclex = ext::oneapi::experimental;
+
 template <>
-inline typename ext::oneapi::experimental::info::kernel_queue_specific::
-    max_num_work_group_sync::return_type
+inline typename syclex::info::kernel_queue_specific::max_num_work_groups::
+    return_type
     kernel_impl::ext_oneapi_get_info<
-        ext::oneapi::experimental::info::kernel_queue_specific::
-            max_num_work_group_sync>(const queue &Queue) const {
+        syclex::info::kernel_queue_specific::max_num_work_groups>(
+        queue Queue, const range<3> &WorkGroupSize,
+        size_t DynamicLocalMemorySize) const {
+  if (WorkGroupSize.size() == 0)
+    throw exception(sycl::make_error_code(errc::invalid),
+                    "The launch work-group size cannot be zero.");
+
   const auto &Plugin = getPlugin();
   const auto &Handle = getHandleRef();
+  auto Device = Queue.get_device();
+
+  uint32_t GroupCount{0};
+  if (auto Result = Plugin->call_nocheck<
+                    UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
+          Handle, WorkGroupSize.size(), DynamicLocalMemorySize, &GroupCount);
+      Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
+    // The feature is supported. Check for other errors and throw if any.
+    Plugin->checkUrResult(Result);
+    return GroupCount;
+  }
+
+  // Fallback. If the backend API is unsupported, this query will return either
+  // 0 or 1 based on the kernel resource usage and the user-requested resources.
+  return exceedsOccupancyResourceLimits(Device, WorkGroupSize,
+                                        DynamicLocalMemorySize)
+             ? 0
+             : 1;
+}
+
+template <>
+inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
+    return_type
+    kernel_impl::ext_oneapi_get_info<
+        syclex::info::kernel_queue_specific::max_num_work_group_sync>(
+        queue Queue, const range<3> &WorkGroupSize,
+        size_t DynamicLocalMemorySize) const {
+  return ext_oneapi_get_info<
+      syclex::info::kernel_queue_specific::max_num_work_groups>(
+      Queue, WorkGroupSize, DynamicLocalMemorySize);
+}
+
+template <>
+inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
+    return_type
+    kernel_impl::ext_oneapi_get_info<
+        syclex::info::kernel_queue_specific::max_num_work_group_sync>(
+        queue Queue) const {
+  auto Device = Queue.get_device();
   const auto MaxWorkGroupSize =
-      Queue.get_device().get_info<info::device::max_work_group_size>();
-  uint32_t GroupCount = 0;
-  Plugin->call<UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
-      Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0, &GroupCount);
-  return GroupCount;
+      get_info<info::kernel_device_specific::work_group_size>(Device);
+  const sycl::range<3> WorkGroupSize{MaxWorkGroupSize, 1, 1};
+  return ext_oneapi_get_info<
+      syclex::info::kernel_queue_specific::max_num_work_group_sync>(
+      Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0);
 }
 
 } // namespace detail
diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp
index f4ec76bcf9e7d..a4aae60bece08 100644
--- a/sycl/source/kernel.cpp
+++ b/sycl/source/kernel.cpp
@@ -106,16 +106,36 @@ kernel::get_info<info::kernel_device_specific::max_sub_group_size>(
     const device &, const sycl::range<3> &) const;
 
 template <typename Param>
-typename Param::return_type
-kernel::ext_oneapi_get_info(const queue &Queue) const {
+typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+kernel::ext_oneapi_get_info(queue Queue) const {
   return impl->ext_oneapi_get_info<Param>(Queue);
 }
 
+template <typename Param>
+typename detail::is_kernel_queue_specific_info_desc<Param>::return_type
+kernel::ext_oneapi_get_info(queue Queue, const range<3> &WorkGroupSize,
+                            size_t DynamicLocalMemorySize) const {
+  return impl->ext_oneapi_get_info<Param>(Queue, WorkGroupSize,
+                                          DynamicLocalMemorySize);
+}
+
 template __SYCL_EXPORT typename ext::oneapi::experimental::info::
     kernel_queue_specific::max_num_work_group_sync::return_type
     kernel::ext_oneapi_get_info<
         ext::oneapi::experimental::info::kernel_queue_specific::
-            max_num_work_group_sync>(const queue &Queue) const;
+            max_num_work_group_sync>(queue Queue) const;
+
+#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT)           \
+  template __SYCL_EXPORT ReturnT                                               \
+  kernel::ext_oneapi_get_info<Namespace::info::DescType::Desc>(                \
+      queue, const range<3> &, size_t) const;
+// Not including "ext_oneapi_kernel_queue_specific_traits.def" because not all
+// kernel_queue_specific queries require the above-defined get_info interface.
+// clang-format off
+__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_group_sync, size_t)
+__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, kernel_queue_specific, max_num_work_groups, size_t)
+// clang-format on
+#undef __SYCL_PARAM_TRAITS_SPEC
 
 kernel::kernel(std::shared_ptr<detail::kernel_impl> Impl) : impl(Impl) {}
 
diff --git a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
new file mode 100644
index 0000000000000..7b76327d015b1
--- /dev/null
+++ b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp
@@ -0,0 +1,215 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+
+#include <cassert>
+#include <cstdint>
+
+namespace syclex = sycl::ext::oneapi::experimental;
+using namespace sycl::info::device;
+using namespace sycl::info::kernel_device_specific;
+
+using value_type = int64_t;
+
+namespace kernels {
+
+template <class T, size_t Dim>
+using sycl_global_accessor =
+    sycl::accessor<T, Dim, sycl::access::mode::read_write,
+                   sycl::access::target::global_buffer>;
+
+class TestKernel {
+public:
+  static constexpr bool HasLocalMemory{false};
+
+  TestKernel(sycl_global_accessor<value_type, 1> acc) : acc_{acc} {}
+
+  void operator()(sycl::nd_item<1> item) const {
+    const auto gtid = item.get_global_linear_id();
+    acc_[gtid] = gtid + 42;
+  }
+
+private:
+  sycl_global_accessor<value_type, 1> acc_;
+};
+
+class TestLocalMemoryKernel {
+public:
+  static constexpr bool HasLocalMemory{true};
+
+  TestLocalMemoryKernel(sycl_global_accessor<value_type, 1> acc,
+                        sycl::local_accessor<value_type, 1> loc_acc)
+      : acc_{acc}, loc_acc_{loc_acc} {}
+
+  void operator()(sycl::nd_item<1> item) const {
+    const auto ltid = item.get_local_linear_id();
+    const auto gtid = item.get_global_linear_id();
+    if (ltid < loc_acc_.size()) {
+      loc_acc_[ltid] = ltid + 42;
+      item.barrier(sycl::access::fence_space::local_space);
+      acc_[gtid] = loc_acc_[ltid];
+    } else {
+      acc_[gtid] = 0;
+    }
+  }
+
+private:
+  sycl_global_accessor<value_type, 1> acc_;
+  sycl::local_accessor<value_type, 1> loc_acc_;
+};
+
+} // namespace kernels
+
+namespace {
+
+template <class KernelName>
+int test_max_num_work_groups(sycl::queue &q, const sycl::device &dev) {
+  const auto ctx = q.get_context();
+  auto bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(ctx);
+  auto kernel = bundle.template get_kernel<KernelName>();
+
+  const size_t maxWorkGroupSize =
+      kernel.template get_info<work_group_size>(dev);
+  const size_t NumWorkItems = maxWorkGroupSize * maxWorkGroupSize;
+
+  size_t workGroupSize = 32;
+  size_t localMemorySizeInBytes{0};
+  if constexpr (KernelName::HasLocalMemory) {
+    localMemorySizeInBytes = workGroupSize * sizeof(value_type);
+  }
+
+  sycl::buffer<value_type, 1> buf{sycl::range<1>{NumWorkItems}};
+
+  // Tests
+
+  // ==================== //
+  // Test 1 - return type //
+  // ==================== //
+  sycl::range<3> workGroupRange{workGroupSize, 1, 1};
+  auto maxWGs = kernel.template ext_oneapi_get_info<
+      syclex::info::kernel_queue_specific::max_num_work_groups>(
+      q, workGroupRange, localMemorySizeInBytes);
+
+  // Test the return type is as specified in the extension document.
+  static_assert(std::is_same_v<std::remove_cv_t<decltype(maxWGs)>, size_t>,
+                "max_num_work_groups query must return size_t");
+
+  // ===================== //
+  // Test 2 - return value //
+  // ===================== //
+  // We must have at least one active group if we are below resource limits.
+  assert(maxWGs > 0 && "max_num_work_groups query failed");
+  if (maxWGs == 0)
+    return 1;
+
+  // Run the kernel
+  auto launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems},
+                                        sycl::range<1>{workGroupSize}};
+  q.submit([&](sycl::handler &cgh) {
+     auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
+     if constexpr (KernelName::HasLocalMemory) {
+       sycl::local_accessor<value_type, 1> loc_acc{
+           sycl::range<1>{workGroupSize}, cgh};
+       cgh.parallel_for(launch_range, KernelName{acc, loc_acc});
+     } else {
+       cgh.parallel_for(launch_range, KernelName{acc});
+     }
+   }).wait();
+  assert(sycl::host_accessor{buf}[0] == 42);
+
+  // ========================== //
+  // Test 3 - use max resources //
+  // ========================== //
+  // A little over the maximum work-group size for the purpose of exceeding.
+  workGroupSize = maxWorkGroupSize;
+  workGroupRange[0] = workGroupSize;
+  size_t localSize =
+      (dev.get_info<sycl::info::device::local_mem_size>() / sizeof(value_type));
+  if constexpr (KernelName::HasLocalMemory) {
+    localMemorySizeInBytes = localSize * sizeof(value_type);
+  }
+  maxWGs = kernel.template ext_oneapi_get_info<
+      syclex::info::kernel_queue_specific::max_num_work_groups>(
+      q, workGroupRange, localMemorySizeInBytes);
+
+  assert(maxWGs > 0 && "max_num_work_groups query failed");
+  if (maxWGs == 0)
+    return 1;
+
+  launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems},
+                                   sycl::range<1>{workGroupSize}};
+
+  q.submit([&](sycl::handler &cgh) {
+     auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
+     if constexpr (KernelName::HasLocalMemory) {
+       sycl::local_accessor<value_type, 1> loc_acc{sycl::range<1>{localSize},
+                                                   cgh};
+       cgh.parallel_for(launch_range, KernelName{acc, loc_acc});
+     } else {
+       cgh.parallel_for(launch_range, KernelName{acc});
+     }
+   }).wait();
+  assert(sycl::host_accessor{buf}[0] == 42);
+
+  // =============================== //
+  // Test 4 - exceed resource limits //
+  // =============================== //
+  workGroupSize = maxWorkGroupSize + 32;
+  workGroupRange[0] = workGroupSize;
+  maxWGs = kernel.template ext_oneapi_get_info<
+      syclex::info::kernel_queue_specific::max_num_work_groups>(
+      q, workGroupRange, localMemorySizeInBytes);
+  // It cannot be possible to launch a kernel successfully with a configuration
+  // that exceeds the available resources as in the above defined workGroupSize.
+  // workGroupSize is larger than maxWorkGroupSize, hence maxWGs must equal 0.
+  if (dev.get_backend() == sycl::backend::ext_oneapi_cuda) {
+    assert(maxWGs == 0 && "max_num_work_groups query failed");
+    if (maxWGs > 0)
+      return 1;
+  }
+
+  // As we ensured that the 'max_num_work_groups' query correctly
+  // returns 0 possible work-groups, test that the kernel launch will fail.
+  // A configuration that defines a work-group size larger than the maximum
+  // possible should result in failure.
+  try {
+    launch_range = sycl::nd_range<1>{sycl::range<1>{NumWorkItems},
+                                     sycl::range<1>{workGroupSize}};
+
+    q.submit([&](sycl::handler &cgh) {
+       auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
+       if constexpr (KernelName::HasLocalMemory) {
+         sycl::local_accessor<value_type, 1> loc_acc{sycl::range<1>{localSize},
+                                                     cgh};
+         cgh.parallel_for(launch_range, KernelName{acc, loc_acc});
+       } else {
+         cgh.parallel_for(launch_range, KernelName{acc});
+       }
+     }).wait();
+  } catch (const sycl::exception &e) {
+    // 'nd_range' error is the expected outcome from the above launch config.
+    if (e.code() == sycl::make_error_code(sycl::errc::nd_range)) {
+      return 0;
+    }
+    std::cerr << e.code() << ":\t";
+    std::cerr << e.what() << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
+
+} // namespace
+
+int main() {
+  sycl::queue q{};
+  sycl::device dev = q.get_device();
+
+  using namespace kernels;
+
+  int ret{0};
+  ret &= test_max_num_work_groups<TestKernel>(q, dev);
+  ret &= test_max_num_work_groups<TestLocalMemoryKernel>(q, dev);
+  return ret;
+}
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
index d8393f35c6253..92e5d69ffcab4 100644
--- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp
+++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -30,12 +30,21 @@ void testQueriesAndProperties() {
   const auto maxWGs = kernel.ext_oneapi_get_info<
       sycl::ext::oneapi::experimental::info::kernel_queue_specific::
           max_num_work_group_sync>(q);
+  const auto wgRange = sycl::range{WorkGroupSize, 1, 1};
+  const auto maxWGsWithLimits = kernel.ext_oneapi_get_info<
+      sycl::ext::oneapi::experimental::info::kernel_queue_specific::
+          max_num_work_group_sync>(q, wgRange, wgRange.size() * sizeof(int));
   const auto props = sycl::ext::oneapi::experimental::properties{
       sycl::ext::oneapi::experimental::use_root_sync};
   q.single_task<class QueryKernel>(props, []() {});
-  static_assert(std::is_same_v<std::remove_cv<decltype(maxWGs)>::type, size_t>,
-                "max_num_work_group_sync query must return size_t");
-  assert(maxWGs >= 1 && "max_num_work_group_sync query failed");
+
+  static auto check_max_num_work_group_sync = [](auto Result) {
+    static_assert(std::is_same_v<std::remove_cv_t<decltype(Result)>, size_t>,
+                  "max_num_work_group_sync query must return size_t");
+    assert(Result >= 1 && "max_num_work_group_sync query failed");
+  };
+  check_max_num_work_group_sync(maxWGs);
+  check_max_num_work_group_sync(maxWGsWithLimits);
 }
 
 void testRootGroup() {
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 4c73f43ed6ba2..ec6ec2096403f 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3915,8 +3915,10 @@ _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device15backend_versionEEENS0_6
 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V16kernel16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V16kernel17get_kernel_bundleEv
-_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENT_11return_typeERKNS0_5queueE
 _ZNK4sycl3_V16kernel3getEv
+_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueE
+_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific23max_num_work_group_syncEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm
+_ZNK4sycl3_V16kernel19ext_oneapi_get_infoINS0_3ext6oneapi12experimental4info21kernel_queue_specific19max_num_work_groupsEEENS0_6detail34is_kernel_queue_specific_info_descIT_E11return_typeENS0_5queueERKNS0_5rangeILi3EEEm
 _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific15work_group_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE
 _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16global_work_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE
 _ZNK4sycl3_V16kernel8get_infoINS0_4info22kernel_device_specific16private_mem_sizeEEENS0_6detail35is_kernel_device_specific_info_descIT_E11return_typeERKNS0_6deviceE
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index e2c3643c557be..55ce460c64559 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -13,7 +13,9 @@
 ??$create_sub_devices@$0BAIH@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@_KV?$allocator@_K@std@@@4@@Z
 ??$create_sub_devices@$0BAII@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4partition_affinity_domain@info@12@@Z
 ??$create_sub_devices@$0BAIJ@@device@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ
-??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KAEBVqueue@12@@Z
+??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@@Z
+??$ext_oneapi_get_info@Umax_num_work_group_sync@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z
+??$ext_oneapi_get_info@Umax_num_work_groups@kernel_queue_specific@info@experimental@oneapi@ext@_V1@sycl@@@kernel@_V1@sycl@@QEBA_KVqueue@12@AEBV?$range@$02@12@_K@Z
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
 ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@event@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp
index 36e9a5ad3961e..fd2628dfb6e1c 100644
--- a/sycl/test/include_deps/sycl_accessor.hpp.cpp
+++ b/sycl/test/include_deps/sycl_accessor.hpp.cpp
@@ -110,6 +110,7 @@
 // CHECK-NEXT: info/ext_codeplay_device_traits.def
 // CHECK-NEXT: info/ext_intel_device_traits.def
 // CHECK-NEXT: info/ext_oneapi_device_traits.def
+// CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def
 // CHECK-NEXT: info/sycl_backend_traits.def
 // CHECK-NEXT: platform.hpp
 // CHECK-NEXT: detail/string_view.hpp
diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp
index 6cb1be75681ee..33dc01b8a4e74 100644
--- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp
+++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp
@@ -111,6 +111,7 @@
 // CHECK-NEXT: info/ext_codeplay_device_traits.def
 // CHECK-NEXT: info/ext_intel_device_traits.def
 // CHECK-NEXT: info/ext_oneapi_device_traits.def
+// CHECK-NEXT: info/ext_oneapi_kernel_queue_specific_traits.def
 // CHECK-NEXT: info/sycl_backend_traits.def
 // CHECK-NEXT: platform.hpp
 // CHECK-NEXT: detail/string_view.hpp