Skip to content

Commit

Permalink
Merge pull request #1501 from RossBrunton/ross/kerneltests
Browse files Browse the repository at this point in the history
[Testing] Spec clarifications and testing updates for kernel
  • Loading branch information
kbenzie authored May 23, 2024
2 parents 719bb9c + e2ffea6 commit 396fb20
Show file tree
Hide file tree
Showing 21 changed files with 347 additions and 10 deletions.
7 changes: 5 additions & 2 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4772,7 +4772,8 @@ typedef enum ur_kernel_group_info_t {
UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]] Return Work Group maximum global size
UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 1, ///< [size_t] Return maximum Work Group size
UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 2, ///< [size_t[3]] Return Work Group size required by the source code, such
///< as __attribute__((required_work_group_size(X,Y,Z))
///< as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if
///< unspecified
UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 3, ///< [size_t] Return local memory required by the Kernel
UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4, ///< [size_t] Return preferred multiple of Work Group size for launch
UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 5, ///< [size_t] Return minimum amount of private memory in bytes used by each
Expand All @@ -4788,7 +4789,8 @@ typedef enum ur_kernel_group_info_t {
typedef enum ur_kernel_sub_group_info_t {
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE = 0, ///< [uint32_t] Return maximum SubGroup size
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS = 1, ///< [uint32_t] Return maximum number of SubGroup
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code or 0
///< if unspecified
UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL = 3, ///< [uint32_t] Return SubGroup size required by Intel
/// @cond
UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32 = 0x7fffffff
Expand Down Expand Up @@ -5989,6 +5991,7 @@ urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
UR_APIEXPORT ur_result_t UR_APICALL
Expand Down
2 changes: 2 additions & 0 deletions scripts/core/enqueue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ returns:
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
- $X_RESULT_ERROR_INVALID_VALUE
- $X_RESULT_ERROR_INVALID_KERNEL_ARGS
- "The kernel argument values have not been specified."
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_OUT_OF_RESOURCES
--- #--------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions scripts/core/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ etors:
- name: WORK_GROUP_SIZE
desc: "[size_t] Return maximum Work Group size"
- name: COMPILE_WORK_GROUP_SIZE
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z))"
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if unspecified"
- name: LOCAL_MEM_SIZE
desc: "[size_t] Return local memory required by the Kernel"
- name: PREFERRED_WORK_GROUP_SIZE_MULTIPLE
Expand All @@ -154,7 +154,7 @@ etors:
- name: MAX_NUM_SUB_GROUPS
desc: "[uint32_t] Return maximum number of SubGroup"
- name: COMPILE_NUM_SUB_GROUPS
desc: "[uint32_t] Return number of SubGroup required by the source code"
desc: "[uint32_t] Return number of SubGroup required by the source code or 0 if unspecified"
- name: SUB_GROUP_SIZE_INTEL
desc: "[uint32_t] Return SubGroup size required by Intel"
--- #--------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/opencl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ ur_result_t mapCLErrorToUR(cl_int Result) {
return UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS;
case CL_DEVICE_NOT_AVAILABLE:
return UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE;
case CL_INVALID_KERNEL_ARGS:
return UR_RESULT_ERROR_INVALID_KERNEL_ARGS;
default:
return UR_RESULT_ERROR_UNKNOWN;
}
Expand Down
1 change: 1 addition & 0 deletions source/loader/ur_libapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4768,6 +4768,7 @@ ur_result_t UR_APICALL urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urEnqueueKernelLaunch(
Expand Down
1 change: 1 addition & 0 deletions source/ur_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4040,6 +4040,7 @@ ur_result_t UR_APICALL urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urEnqueueKernelLaunch(
Expand Down
1 change: 1 addition & 0 deletions test/conformance/device/device_adapter_native_cpu.match
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
urDeviceCreateWithNativeHandleTest.InvalidNullHandlePlatform
urDeviceCreateWithNativeHandleTest.InvalidNullPointerDevice
{{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime
urDeviceGetInfoSingleTest.MaxWorkGroupSizeIsNonzero
{{OPT}}urDeviceSelectBinaryTest.Success
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_DEVICE_ID
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MEMORY_CLOCK_RATE
Expand Down
39 changes: 33 additions & 6 deletions test/conformance/device/urDeviceGetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <array>
#include <map>
#include <uur/fixtures.h>

Expand Down Expand Up @@ -242,6 +243,12 @@ INSTANTIATE_TEST_SUITE_P(
return ss.str();
});

struct urDeviceGetInfoSingleTest : uur::urAllDevicesTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urAllDevicesTest::SetUp());
}
};

bool doesReturnArray(ur_device_info_t info_type) {
if (info_type == UR_DEVICE_INFO_SUPPORTED_PARTITIONS ||
info_type == UR_DEVICE_INFO_PARTITION_TYPE) {
Expand Down Expand Up @@ -284,15 +291,15 @@ TEST_P(urDeviceGetInfoTest, Success) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullHandleDevice) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE,
urDeviceGetInfo(nullptr, UR_DEVICE_INFO_TYPE,
sizeof(ur_device_type_t), &device_type,
nullptr));
}

TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) {
TEST_F(urDeviceGetInfoSingleTest, InvalidEnumerationInfoType) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION,
Expand All @@ -302,7 +309,7 @@ TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) {
TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSize) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE,
Expand All @@ -311,7 +318,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) {
TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSizeSmall) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE,
Expand All @@ -321,7 +328,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropValue) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER,
Expand All @@ -331,10 +338,30 @@ TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropSizeRet) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropSizeRet) {
for (auto device : devices) {
ASSERT_EQ_RESULT(
UR_RESULT_ERROR_INVALID_NULL_POINTER,
urDeviceGetInfo(device, UR_DEVICE_INFO_TYPE, 0, nullptr, nullptr));
}
}

TEST_F(urDeviceGetInfoSingleTest, MaxWorkGroupSizeIsNonzero) {
for (auto device : devices) {
size_t max_global_size;

ASSERT_SUCCESS(
urDeviceGetInfo(device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
sizeof(size_t), &max_global_size, nullptr));
ASSERT_NE(max_global_size, 0);

std::array<size_t, 3> max_work_group_sizes;
ASSERT_SUCCESS(urDeviceGetInfo(device,
UR_DEVICE_INFO_MAX_WORK_GROUPS_3D,
sizeof(max_work_group_sizes),
max_work_group_sizes.data(), nullptr));
for (size_t i = 0; i < 3; i++) {
ASSERT_NE(max_work_group_sizes[i], 0);
}
}
}
10 changes: 10 additions & 0 deletions test/conformance/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ macro(add_device_binary SOURCE_FILE)
continue()
endif()

# HIP doesn't seem to provide the symbol
# `_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11FixedSgSize` which
# causes a build failure here
if(${TRIPLE} MATCHES "amd" AND ${KERNEL_NAME} MATCHES "subgroup")
continue()
endif()

add_custom_command(OUTPUT "${BIN_PATH}"
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
${AMD_TARGET_BACKEND} ${AMD_OFFLOAD_ARCH} ${AMD_NOGPULIB}
Expand Down Expand Up @@ -139,6 +146,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp)

set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
add_custom_command(OUTPUT ${KERNEL_HEADER}
Expand Down
27 changes: 27 additions & 0 deletions test/conformance/device_code/fixed_wg_size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <sycl/sycl.hpp>

struct KernelFunctor {
void operator()(sycl::nd_item<3>) const {}
void operator()(sycl::item<3>) const {}

auto get(sycl::ext::oneapi::experimental::properties_tag) {
return sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::work_group_size<4, 4, 4>};
}
};

int main() {
sycl::queue myQueue;
myQueue.submit([&](sycl::handler &cgh) {
cgh.parallel_for<class FixedWgSize>(sycl::range<3>(8, 8, 8),
KernelFunctor{});
});

myQueue.wait();
return 0;
}
42 changes: 42 additions & 0 deletions test/conformance/device_code/standard_types.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <stdint.h>
#include <sycl/sycl.hpp>

struct Struct {
uint32_t a;
uint32_t b;
};

int main() {
sycl::queue deviceQueue;
sycl::range<1> numOfItems{1};

uint32_t output = 0;

volatile bool test_bool = true;
volatile uint8_t test_u8 = 2;
volatile uint32_t test_u32 = 3;
volatile uint64_t test_u64 = 5;
Struct test_struct{7, 5};
volatile float test_float = 11;

{
sycl::buffer output_buff(&output, sycl::range(1));
deviceQueue.submit([&](sycl::handler &cgh) {
sycl::accessor acc{output_buff, cgh, sycl::read_write};
auto kern = [=](sycl::id<1> id) {
acc[id] = 100 + (test_bool ? 1 : 0) * test_u8 * test_u32 *
test_u64 * test_struct.a *
static_cast<uint32_t>(test_float);
};
cgh.parallel_for<class Foo>(numOfItems, kern);
});
deviceQueue.wait();
}

return output == 2410;
}
35 changes: 35 additions & 0 deletions test/conformance/device_code/subgroup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <sycl/sycl.hpp>

struct KernelFunctor {
sycl::accessor<size_t, 1, sycl::access_mode::write> Acc;

KernelFunctor(sycl::accessor<size_t, 1, sycl::access_mode::write> Acc)
: Acc(Acc) {}

void operator()(sycl::nd_item<1> NdItem) const {
auto SG = NdItem.get_sub_group();
if (NdItem.get_global_linear_id() == 0) {
Acc[0] = SG.get_local_linear_range();
}
}
};

int main() {
sycl::queue myQueue;
size_t output = 0;
sycl::buffer output_buff(&output, sycl::range(1));

myQueue.submit([&](sycl::handler &cgh) {
sycl::accessor acc{output_buff, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<class FixedSgSize>(sycl::nd_range<1>(8, 2),
KernelFunctor{acc});
});

myQueue.wait();
return 0;
}
3 changes: 3 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_cuda.match
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
urEnqueueKernelLaunchTest.InvalidKernelArgs/NVIDIA_CUDA_BACKEND___{{.*}}_
urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/NVIDIA_CUDA_BACKEND___{{.*}}_
urEnqueueKernelLaunchKernelSubGroupTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueMemBufferFillTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___size__256__patternSize__256
Expand Down
7 changes: 7 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_native_cpu.match
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_31
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1027
Expand Down
2 changes: 2 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_opencl.match
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@
{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}}
{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}}
{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}
urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__OpenCL___{{.*}}_
urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__OpenCL___{{.*}}_
{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled
Loading

0 comments on commit 396fb20

Please sign in to comment.