diff --git a/include/ur_api.h b/include/ur_api.h index 7459f5f205..96a58e04ea 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -4772,7 +4772,8 @@ typedef enum ur_kernel_group_info_t { UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]] Return Work Group maximum global size UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 1, ///< [size_t] Return maximum Work Group size UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 2, ///< [size_t[3]] Return Work Group size required by the source code, such - ///< as __attribute__((required_work_group_size(X,Y,Z)) + ///< as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if + ///< unspecified UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 3, ///< [size_t] Return local memory required by the Kernel UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4, ///< [size_t] Return preferred multiple of Work Group size for launch UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 5, ///< [size_t] Return minimum amount of private memory in bytes used by each @@ -4788,7 +4789,8 @@ typedef enum ur_kernel_group_info_t { typedef enum ur_kernel_sub_group_info_t { UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE = 0, ///< [uint32_t] Return maximum SubGroup size UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS = 1, ///< [uint32_t] Return maximum number of SubGroup - UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code + UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code or 0 + ///< if unspecified UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL = 3, ///< [uint32_t] Return SubGroup size required by Intel /// @cond UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32 = 0x7fffffff @@ -5989,6 +5991,7 @@ urEventSetCallback( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified." /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index e91330ca77..8a264bfc49 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -65,6 +65,8 @@ returns: - $X_RESULT_ERROR_INVALID_WORK_DIMENSION - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_INVALID_KERNEL_ARGS + - "The kernel argument values have not been specified." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- diff --git a/scripts/core/kernel.yml b/scripts/core/kernel.yml index 4ce4f9c70a..4a0bf0bab1 100644 --- a/scripts/core/kernel.yml +++ b/scripts/core/kernel.yml @@ -135,7 +135,7 @@ etors: - name: WORK_GROUP_SIZE desc: "[size_t] Return maximum Work Group size" - name: COMPILE_WORK_GROUP_SIZE - desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z))" + desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if unspecified" - name: LOCAL_MEM_SIZE desc: "[size_t] Return local memory required by the Kernel" - name: PREFERRED_WORK_GROUP_SIZE_MULTIPLE @@ -154,7 +154,7 @@ etors: - name: MAX_NUM_SUB_GROUPS desc: "[uint32_t] Return maximum number of SubGroup" - name: COMPILE_NUM_SUB_GROUPS - desc: "[uint32_t] Return number of SubGroup required by the source code" + desc: "[uint32_t] Return number of SubGroup required by the source code or 0 if unspecified" - name: SUB_GROUP_SIZE_INTEL desc: "[uint32_t] Return SubGroup size required by Intel" --- #-------------------------------------------------------------------------- diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 63981187e7..750616235d 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -85,6 +85,8 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS; case CL_DEVICE_NOT_AVAILABLE: return UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE; + case CL_INVALID_KERNEL_ARGS: + return UR_RESULT_ERROR_INVALID_KERNEL_ARGS; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 55567f4a7b..62b502095c 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -4768,6 +4768,7 @@ ur_result_t UR_APICALL urEventSetCallback( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified." /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueKernelLaunch( diff --git a/source/ur_api.cpp b/source/ur_api.cpp index c1aca0d221..1ed70e0494 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -4040,6 +4040,7 @@ ur_result_t UR_APICALL urEventSetCallback( /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified." /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueKernelLaunch( diff --git a/test/conformance/device/device_adapter_native_cpu.match b/test/conformance/device/device_adapter_native_cpu.match index cb56081405..147a9a3dfd 100644 --- a/test/conformance/device/device_adapter_native_cpu.match +++ b/test/conformance/device/device_adapter_native_cpu.match @@ -1,6 +1,7 @@ urDeviceCreateWithNativeHandleTest.InvalidNullHandlePlatform urDeviceCreateWithNativeHandleTest.InvalidNullPointerDevice {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime +urDeviceGetInfoSingleTest.MaxWorkGroupSizeIsNonzero {{OPT}}urDeviceSelectBinaryTest.Success urDeviceGetInfoTest.Success/UR_DEVICE_INFO_DEVICE_ID urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MEMORY_CLOCK_RATE diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index a3fd9afe10..d1a04d8a6a 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include @@ -242,6 +243,12 @@ INSTANTIATE_TEST_SUITE_P( return ss.str(); }); +struct urDeviceGetInfoSingleTest : uur::urAllDevicesTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urAllDevicesTest::SetUp()); + } +}; + bool doesReturnArray(ur_device_info_t info_type) { if (info_type == UR_DEVICE_INFO_SUPPORTED_PARTITIONS || info_type == UR_DEVICE_INFO_PARTITION_TYPE) { @@ -284,7 +291,7 @@ TEST_P(urDeviceGetInfoTest, Success) { } } -TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) { +TEST_F(urDeviceGetInfoSingleTest, InvalidNullHandleDevice) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urDeviceGetInfo(nullptr, UR_DEVICE_INFO_TYPE, @@ -292,7 +299,7 @@ TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) { nullptr)); } -TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) { +TEST_F(urDeviceGetInfoSingleTest, InvalidEnumerationInfoType) { for (auto device : devices) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, @@ -302,7 +309,7 @@ TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) { } } -TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) { +TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSize) { for (auto device : devices) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, @@ -311,7 +318,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) { } } -TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) { +TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSizeSmall) { for (auto device : devices) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, @@ -321,7 +328,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) { } } -TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) { +TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropValue) { for (auto device : devices) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, @@ -331,10 +338,30 @@ TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) { } } -TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropSizeRet) { +TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropSizeRet) { for (auto device : devices) { ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_NULL_POINTER, urDeviceGetInfo(device, UR_DEVICE_INFO_TYPE, 0, nullptr, nullptr)); } } + +TEST_F(urDeviceGetInfoSingleTest, MaxWorkGroupSizeIsNonzero) { + for (auto device : devices) { + size_t max_global_size; + + ASSERT_SUCCESS( + urDeviceGetInfo(device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + sizeof(size_t), &max_global_size, nullptr)); + ASSERT_NE(max_global_size, 0); + + std::array max_work_group_sizes; + ASSERT_SUCCESS(urDeviceGetInfo(device, + UR_DEVICE_INFO_MAX_WORK_GROUPS_3D, + sizeof(max_work_group_sizes), + max_work_group_sizes.data(), nullptr)); + for (size_t i = 0; i < 3; i++) { + ASSERT_NE(max_work_group_sizes[i], 0); + } + } +} diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 91ee4d42af..1419604b9d 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -93,6 +93,13 @@ macro(add_device_binary SOURCE_FILE) continue() endif() + # HIP doesn't seem to provide the symbol + # `_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11FixedSgSize` which + # causes a build failure here + if(${TRIPLE} MATCHES "amd" AND ${KERNEL_NAME} MATCHES "subgroup") + continue() + endif() + add_custom_command(OUTPUT "${BIN_PATH}" COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off ${AMD_TARGET_BACKEND} ${AMD_OFFLOAD_ARCH} ${AMD_NOGPULIB} @@ -139,6 +146,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/fixed_wg_size.cpp b/test/conformance/device_code/fixed_wg_size.cpp new file mode 100644 index 0000000000..db2e8a9250 --- /dev/null +++ b/test/conformance/device_code/fixed_wg_size.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +struct KernelFunctor { + void operator()(sycl::nd_item<3>) const {} + void operator()(sycl::item<3>) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size<4, 4, 4>}; + } +}; + +int main() { + sycl::queue myQueue; + myQueue.submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<3>(8, 8, 8), + KernelFunctor{}); + }); + + myQueue.wait(); + return 0; +} diff --git a/test/conformance/device_code/standard_types.cpp b/test/conformance/device_code/standard_types.cpp new file mode 100644 index 0000000000..d3cf89aa66 --- /dev/null +++ b/test/conformance/device_code/standard_types.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +struct Struct { + uint32_t a; + uint32_t b; +}; + +int main() { + sycl::queue deviceQueue; + sycl::range<1> numOfItems{1}; + + uint32_t output = 0; + + volatile bool test_bool = true; + volatile uint8_t test_u8 = 2; + volatile uint32_t test_u32 = 3; + volatile uint64_t test_u64 = 5; + Struct test_struct{7, 5}; + volatile float test_float = 11; + + { + sycl::buffer output_buff(&output, sycl::range(1)); + deviceQueue.submit([&](sycl::handler &cgh) { + sycl::accessor acc{output_buff, cgh, sycl::read_write}; + auto kern = [=](sycl::id<1> id) { + acc[id] = 100 + (test_bool ? 1 : 0) * test_u8 * test_u32 * + test_u64 * test_struct.a * + static_cast(test_float); + }; + cgh.parallel_for(numOfItems, kern); + }); + deviceQueue.wait(); + } + + return output == 2410; +} diff --git a/test/conformance/device_code/subgroup.cpp b/test/conformance/device_code/subgroup.cpp new file mode 100644 index 0000000000..fa4228f846 --- /dev/null +++ b/test/conformance/device_code/subgroup.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +struct KernelFunctor { + sycl::accessor Acc; + + KernelFunctor(sycl::accessor Acc) + : Acc(Acc) {} + + void operator()(sycl::nd_item<1> NdItem) const { + auto SG = NdItem.get_sub_group(); + if (NdItem.get_global_linear_id() == 0) { + Acc[0] = SG.get_local_linear_range(); + } + } +}; + +int main() { + sycl::queue myQueue; + size_t output = 0; + sycl::buffer output_buff(&output, sycl::range(1)); + + myQueue.submit([&](sycl::handler &cgh) { + sycl::accessor acc{output_buff, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(sycl::nd_range<1>(8, 2), + KernelFunctor{acc}); + }); + + myQueue.wait(); + return 0; +} diff --git a/test/conformance/enqueue/enqueue_adapter_cuda.match b/test/conformance/enqueue/enqueue_adapter_cuda.match index 9b57269f3d..8d015c3dc7 100644 --- a/test/conformance/enqueue/enqueue_adapter_cuda.match +++ b/test/conformance/enqueue/enqueue_adapter_cuda.match @@ -1,3 +1,6 @@ +urEnqueueKernelLaunchTest.InvalidKernelArgs/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEnqueueKernelLaunchKernelSubGroupTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urEnqueueMemBufferFillTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___size__256__patternSize__256 diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index 43a114bf21..155a400e89 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -22,6 +22,13 @@ {{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU_ {{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU_ {{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ {{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1 {{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_31 {{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1027 diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match index 4aa265c897..0e751b8a25 100644 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -32,4 +32,6 @@ {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}} {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}} {{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} +urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__OpenCL___{{.*}}_ +urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__OpenCL___{{.*}}_ {{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 9217457270..823524844b 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -18,6 +18,45 @@ struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest { }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchTest); +struct urEnqueueKernelLaunchKernelWgSizeTest : uur::urKernelExecutionTest { + void SetUp() override { + program_name = "fixed_wg_size"; + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + } + + std::array global_size{32, 32, 32}; + std::array global_offset{0, 0, 0}; + // This must match the size in fixed_wg_size.cpp + std::array wg_size{4, 4, 4}; + size_t n_dimensions = 3; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelWgSizeTest); + +// Note: Due to an issue with HIP, the subgroup test is not generated +struct urEnqueueKernelLaunchKernelSubGroupTest : uur::urKernelExecutionTest { + void SetUp() override { + program_name = "subgroup"; + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + } + + std::array global_size{32, 32, 32}; + std::array global_offset{0, 0, 0}; + size_t n_dimensions = 3; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelSubGroupTest); + +struct urEnqueueKernelLaunchKernelStandardTest : uur::urKernelExecutionTest { + void SetUp() override { + program_name = "standard_types"; + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + } + + size_t n_dimensions = 1; + size_t global_size = 1; + size_t offset = 0; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelStandardTest); + TEST_P(urEnqueueKernelLaunchTest, Success) { ur_mem_handle_t buffer = nullptr; AddBuffer1DArg(sizeof(val) * global_size, &buffer); @@ -77,6 +116,84 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidWorkDimension) { UR_RESULT_ERROR_INVALID_WORK_DIMENSION); } +TEST_P(urEnqueueKernelLaunchTest, InvalidWorkGroupSize) { + // As far as I can tell, there's no way to check if a kernel or device + // requires uniform work group sizes or not, so this may succeed or report + // an error + size_t local_size = 31; + ur_mem_handle_t buffer = nullptr; + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + AddPodArg(val); + auto result = + urEnqueueKernelLaunch(queue, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr); + ASSERT_TRUE(result == UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE || + result == UR_RESULT_SUCCESS); +} + +TEST_P(urEnqueueKernelLaunchTest, InvalidKernelArgs) { + // Enqueue kernel without setting any args + ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions, + &global_offset, &global_size, + nullptr, 0, nullptr, nullptr), + UR_RESULT_ERROR_INVALID_KERNEL_ARGS); +} + +TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, Success) { + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queue, kernel, n_dimensions, global_offset.data(), global_size.data(), + nullptr, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); +} + +TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, SuccessWithExplicitLocalSize) { + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queue, kernel, n_dimensions, global_offset.data(), global_size.data(), + wg_size.data(), 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); +} + +TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, NonMatchingLocalSize) { + std::array wrong_wg_size{8, 8, 8}; + ASSERT_EQ_RESULT( + urEnqueueKernelLaunch(queue, kernel, n_dimensions, global_offset.data(), + global_size.data(), wrong_wg_size.data(), 0, + nullptr, nullptr), + UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE); +} + +TEST_P(urEnqueueKernelLaunchKernelSubGroupTest, Success) { + ur_mem_handle_t buffer = nullptr; + AddBuffer1DArg(sizeof(size_t), &buffer); + ASSERT_SUCCESS(urEnqueueKernelLaunch( + queue, kernel, n_dimensions, global_offset.data(), global_size.data(), + nullptr, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(size_t), 8); +} + +struct Pair { + uint32_t a; + uint32_t b; +}; +TEST_P(urEnqueueKernelLaunchKernelStandardTest, Success) { + uint32_t expected_result = 2410; + ur_mem_handle_t output = nullptr; + AddBuffer1DArg(sizeof(uint32_t), &output); + AddPodArg(true); + AddPodArg(2); + AddPodArg(3); + AddPodArg(5); + AddPodArg({7, 5}); + AddPodArg(11.0); + + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, &offset, + &global_size, nullptr, 0, nullptr, + nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(output, sizeof(uint32_t), expected_result); +} + struct testParametersEnqueueKernel { size_t X, Y, Z; size_t Dims; diff --git a/test/conformance/kernel/kernel_adapter_cuda.match b/test/conformance/kernel/kernel_adapter_cuda.match index 8b5ac60de7..fe44a34352 100644 --- a/test/conformance/kernel/kernel_adapter_cuda.match +++ b/test/conformance/kernel/kernel_adapter_cuda.match @@ -1,3 +1,4 @@ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index b0fa73b381..894bd698bb 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -2,6 +2,7 @@ {{OPT}}urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urKernelCreateWithNativeHandleTest.InvalidNullHandleProgram/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/AMD_HIP_BACKEND___{{.*}}_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_ARGS diff --git a/test/conformance/kernel/kernel_adapter_native_cpu.match b/test/conformance/kernel/kernel_adapter_native_cpu.match index fae4d2f51b..93e3ddd67d 100644 --- a/test/conformance/kernel/kernel_adapter_native_cpu.match +++ b/test/conformance/kernel/kernel_adapter_native_cpu.match @@ -31,6 +31,8 @@ urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___ urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE +urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/SYCL_NATIVE_CPU___SYCL_Native_CPU_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_INFO_FUNCTION_NAME urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_INFO_NUM_ARGS urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_INFO_REFERENCE_COUNT @@ -101,6 +103,7 @@ urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL +urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urKernelReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urKernelReleaseTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urKernelRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ diff --git a/test/conformance/kernel/urKernelGetGroupInfo.cpp b/test/conformance/kernel/urKernelGetGroupInfo.cpp index 5ad6225676..b91001a07f 100644 --- a/test/conformance/kernel/urKernelGetGroupInfo.cpp +++ b/test/conformance/kernel/urKernelGetGroupInfo.cpp @@ -18,6 +18,24 @@ UUR_TEST_SUITE_P( UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE), uur::deviceTestWithParamPrinter); +struct urKernelGetGroupInfoSingleTest : uur::urKernelTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + } +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urKernelGetGroupInfoSingleTest); + +struct urKernelGetGroupInfoWgSizeTest : uur::urKernelTest { + void SetUp() override { + program_name = "fixed_wg_size"; + UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + } + + // This must match the size in fixed_wg_size.cpp + std::array wg_size{4, 4, 4}; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urKernelGetGroupInfoWgSizeTest); + TEST_P(urKernelGetGroupInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; @@ -57,3 +75,21 @@ TEST_P(urKernelGetGroupInfoTest, InvalidEnumeration) { UR_KERNEL_GROUP_INFO_FORCE_UINT32, 0, nullptr, &bad_enum_length)); } + +TEST_P(urKernelGetGroupInfoWgSizeTest, CompileWorkGroupSize) { + std::array read_dims{1, 1, 1}; + ASSERT_SUCCESS(urKernelGetGroupInfo( + kernel, device, UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(read_dims), read_dims.data(), nullptr)); + ASSERT_EQ(read_dims, wg_size); +} + +TEST_P(urKernelGetGroupInfoSingleTest, CompileWorkGroupSizeEmpty) { + // Returns 0 by default when there is no sepecific information + std::array read_dims{1, 1, 1}; + std::array zero{0, 0, 0}; + ASSERT_SUCCESS(urKernelGetGroupInfo( + kernel, device, UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(read_dims), read_dims.data(), nullptr)); + ASSERT_EQ(read_dims, zero); +} diff --git a/test/conformance/kernel/urKernelGetSubGroupInfo.cpp b/test/conformance/kernel/urKernelGetSubGroupInfo.cpp index 9a8e599510..fa4e045483 100644 --- a/test/conformance/kernel/urKernelGetSubGroupInfo.cpp +++ b/test/conformance/kernel/urKernelGetSubGroupInfo.cpp @@ -16,6 +16,13 @@ UUR_TEST_SUITE_P( UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL), uur::deviceTestWithParamPrinter); +struct urKernelGetSubGroupInfoSingleTest : uur::urKernelTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + } +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urKernelGetSubGroupInfoSingleTest); + TEST_P(urKernelGetSubGroupInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; @@ -53,3 +60,12 @@ TEST_P(urKernelGetSubGroupInfoTest, InvalidEnumeration) { kernel, device, UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32, 0, nullptr, &bad_enum_length)); } + +TEST_P(urKernelGetSubGroupInfoSingleTest, CompileNumSubgroupsIsZero) { + // Returns 0 by default when there is no specific information + size_t subgroups = 1; + ASSERT_SUCCESS(urKernelGetSubGroupInfo( + kernel, device, UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS, + sizeof(subgroups), &subgroups, nullptr)); + ASSERT_EQ(subgroups, 0); +}