Skip to content

Commit

Permalink
Make test without using macro funcs
Browse files Browse the repository at this point in the history
- Use gtesting infrastructure for tests instead of macro funcs
- Change the logic in the local WG size calculation so the X dim
  is calculated first. This prioritises big block sizes in X dims
  which can improve perf.
  • Loading branch information
hdelan committed Mar 12, 2024
1 parent c2b46e0 commit 81dd2d0
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 106 deletions.
20 changes: 10 additions & 10 deletions source/ur/ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,20 +350,20 @@ template <typename T> inline bool isPowerOf2(const T &Value) {
static inline void roundToHighestFactorOfGlobalSizeIn3d(
size_t *ThreadsPerBlock, const size_t *GlobalSize,
const size_t *MaxBlockDim, const size_t MaxBlockSize) {
ThreadsPerBlock[2] = std::min(GlobalSize[2], MaxBlockDim[2]);
ThreadsPerBlock[0] = std::min(GlobalSize[0], MaxBlockDim[0]);
// Make the X dim a factor of 2
do {
roundToHighestFactorOfGlobalSize(ThreadsPerBlock[0], GlobalSize[0]);
} while (!isPowerOf2(ThreadsPerBlock[0]) && ThreadsPerBlock[0] > 32 &&
--ThreadsPerBlock[0]);

roundToHighestFactorOfGlobalSize(ThreadsPerBlock[2], GlobalSize[2]);

ThreadsPerBlock[1] =
std::min(GlobalSize[1],
std::min(MaxBlockSize / ThreadsPerBlock[2], MaxBlockDim[1]));
std::min(MaxBlockSize / ThreadsPerBlock[0], MaxBlockDim[1]));
roundToHighestFactorOfGlobalSize(ThreadsPerBlock[1], GlobalSize[1]);

ThreadsPerBlock[0] = std::min(
GlobalSize[0], MaxBlockSize / (ThreadsPerBlock[1] * ThreadsPerBlock[2]));

// Make the X dim a factor of 2
do {
roundToHighestFactorOfGlobalSize(ThreadsPerBlock[0], GlobalSize[0]);
} while (!isPowerOf2(ThreadsPerBlock[0]) && ThreadsPerBlock[0] > 32 &&
--ThreadsPerBlock[0]);
ThreadsPerBlock[2] = std::min(
GlobalSize[2], MaxBlockSize / (ThreadsPerBlock[1] * ThreadsPerBlock[0]));
}
184 changes: 88 additions & 96 deletions test/conformance/enqueue/urEnqueueKernelLaunch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,105 +77,97 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidWorkDimension) {
UR_RESULT_ERROR_INVALID_WORK_DIMENSION);
}

#define ENQUEUE_KERNEL_LAUNCH_TEST_1D_SIZES(SIZE) \
struct urEnqueueKernelLaunchTestSizes##SIZE : uur::urKernelExecutionTest { \
void SetUp() override { \
program_name = "fill"; \
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); \
} \
\
uint32_t val = 42; \
size_t global_size = SIZE; \
size_t global_offset = 0; \
size_t n_dimensions = 1; \
}; \
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchTestSizes##SIZE); \
\
TEST_P(urEnqueueKernelLaunchTestSizes##SIZE, Success) { \
ur_mem_handle_t buffer = nullptr; \
AddBuffer1DArg(sizeof(val) * global_size, &buffer); \
AddPodArg(val); \
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, \
&global_offset, &global_size, \
nullptr, 0, nullptr, nullptr)); \
ASSERT_SUCCESS(urQueueFinish(queue)); \
ValidateBuffer(buffer, sizeof(val) * global_size, val); \
struct testParametersEnqueueKernel {
size_t X, Y, Z;
size_t Dims;
};

template <typename T>
inline std::string printKernelLaunchTestString(
const testing::TestParamInfo<typename T::ParamType> &info) {
const auto device_handle = std::get<0>(info.param);
const auto platform_device_name =
uur::GetPlatformAndDeviceName(device_handle);
std::stringstream test_name;
test_name << platform_device_name << "__"
<< std::get<1>(info.param).Dims << "D_"
<< std::get<1>(info.param).X;
if (std::get<1>(info.param).Dims > 1) {
test_name << "_" << std::get<1>(info.param).Y;
}
if (std::get<1>(info.param).Dims > 2) {
test_name << "_" << std::get<1>(info.param).Z;
}
ENQUEUE_KERNEL_LAUNCH_TEST_1D_SIZES(1)
ENQUEUE_KERNEL_LAUNCH_TEST_1D_SIZES(53)
ENQUEUE_KERNEL_LAUNCH_TEST_1D_SIZES(100)
ENQUEUE_KERNEL_LAUNCH_TEST_1D_SIZES(1342)

#define ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(SIZE1, SIZE2) \
struct urEnqueueKernelLaunch2DTestSizes##SIZE1##_##SIZE2 \
: uur::urKernelExecutionTest { \
void SetUp() override { \
program_name = "fill_2d"; \
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); \
} \
\
uint32_t val = 42; \
size_t global_size[2] = {SIZE1, SIZE2}; \
size_t global_offset[2] = {0, 0}; \
size_t buffer_size = sizeof(val) * global_size[0] * global_size[1]; \
size_t n_dimensions = 2; \
}; \
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P( \
urEnqueueKernelLaunch2DTestSizes##SIZE1##_##SIZE2); \
\
TEST_P(urEnqueueKernelLaunch2DTestSizes##SIZE1##_##SIZE2, Success) { \
ur_mem_handle_t buffer = nullptr; \
AddBuffer1DArg(buffer_size, &buffer); \
AddPodArg(val); \
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, \
global_offset, global_size, \
nullptr, 0, nullptr, nullptr)); \
ASSERT_SUCCESS(urQueueFinish(queue)); \
ValidateBuffer(buffer, buffer_size, val); \
test_name << "";
return test_name.str();
}

struct urEnqueueKernelLaunchTestWithParam
: uur::urBaseKernelExecutionTestWithParam<testParametersEnqueueKernel> {
void SetUp() override {
global_range[0] = std::get<1>(GetParam()).X;
global_range[1] = std::get<1>(GetParam()).Y;
global_range[2] = std::get<1>(GetParam()).Z;
buffer_size = sizeof(val) * global_range[0];
n_dimensions = std::get<1>(GetParam()).Dims;
if (n_dimensions == 1) {
program_name = "fill";
} else if (n_dimensions == 2) {
program_name = "fill_2d";
buffer_size *= global_range[1];
} else {
assert(n_dimensions == 3);
program_name = "fill_3d";
buffer_size *= global_range[1] * global_range[2];
}
UUR_RETURN_ON_FATAL_FAILURE(
urBaseKernelExecutionTestWithParam::SetUp());
}
ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(8, 8)
ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(1, 1)
ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(53, 100)
ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(1, 79)
ENQUEUE_KERNEL_LAUNCH_TEST_2D_SIZES(1342, 1)

#define ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(SIZE1, SIZE2, SIZE3) \
struct urEnqueueKernelLaunch3DTestSizes##SIZE1##_##SIZE2##_##SIZE3 \
: uur::urKernelExecutionTest { \
void SetUp() override { \
program_name = "fill_3d"; \
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); \
} \
\
uint32_t val = 42; \
size_t global_size[3] = {SIZE1, SIZE2, SIZE3}; \
size_t global_offset[3] = {0, 0, 0}; \
size_t buffer_size = \
sizeof(val) * global_size[0] * global_size[1] * global_size[2]; \
size_t n_dimensions = 3; \
}; \
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P( \
urEnqueueKernelLaunch3DTestSizes##SIZE1##_##SIZE2##_##SIZE3); \
\
TEST_P(urEnqueueKernelLaunch3DTestSizes##SIZE1##_##SIZE2##_##SIZE3, \
Success) { \
ur_mem_handle_t buffer = nullptr; \
AddBuffer1DArg(buffer_size, &buffer); \
AddPodArg(val); \
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions, \
global_offset, global_size, \
nullptr, 0, nullptr, nullptr)); \
ASSERT_SUCCESS(urQueueFinish(queue)); \
ValidateBuffer(buffer, buffer_size, val); \

void TearDown() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urBaseKernelExecutionTestWithParam<
testParametersEnqueueKernel>::TearDown());
}
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(1, 1, 1)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(37, 1, 1)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(1, 78, 1)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(1, 1, 1025)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(37, 19, 1)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(1, 78, 91)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(18, 1, 1025)
ENQUEUE_KERNEL_LAUNCH_TEST_3D_SIZES(18, 79, 1025)

uint32_t val = 42;
size_t global_range[3];
size_t global_offset[3] = {0, 0, 0};
size_t n_dimensions;
size_t buffer_size;
};

static std::vector<testParametersEnqueueKernel> test_cases{// 1D
{1, 1, 1, 1},
{31, 1, 1, 1},
{1027, 1, 1, 1},
{32, 1, 1, 1},
{256, 1, 1, 1},
// 2D
{1, 1, 1, 2},
{31, 7, 1, 2},
{1027, 1, 1, 2},
{1, 32, 1, 2},
{256, 79, 1, 2},
// 3D
{1, 1, 1, 3},
{31, 7, 1, 3},
{1027, 1, 19, 3},
{1, 53, 19, 3},
{256, 79, 8, 3}};
UUR_TEST_SUITE_P(
urEnqueueKernelLaunchTestWithParam, testing::ValuesIn(test_cases),
printKernelLaunchTestString<urEnqueueKernelLaunchTestWithParam>);

TEST_P(urEnqueueKernelLaunchTestWithParam, Success) {
ur_mem_handle_t buffer = nullptr;
AddBuffer1DArg(buffer_size, &buffer);
AddPodArg(val);
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
global_offset, global_range, nullptr,
0, nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
ValidateBuffer(buffer, buffer_size, val);
}

struct urEnqueueKernelLaunchWithVirtualMemory : uur::urKernelExecutionTest {

Expand Down
104 changes: 104 additions & 0 deletions test/conformance/testing/include/uur/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,110 @@ struct urBaseKernelExecutionTest : urBaseKernelTest {
uint32_t current_arg_index = 0;
};

template <typename T>
struct urBaseKernelExecutionTestWithParam : urBaseKernelTestWithParam<T> {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTestWithParam<T>::SetUp());
UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTestWithParam<T>::Build());
context = urBaseKernelTestWithParam<T>::context;
kernel = urBaseKernelTestWithParam<T>::kernel;
ASSERT_SUCCESS(urQueueCreate(
context, urBaseKernelTestWithParam<T>::device, 0, &queue));
}

void TearDown() override {
for (auto &buffer : buffer_args) {
ASSERT_SUCCESS(urMemRelease(buffer));
}
UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTestWithParam<T>::TearDown());
if (queue) {
EXPECT_SUCCESS(urQueueRelease(queue));
}
}

// Adds a kernel arg representing a sycl buffer constructed with a 1D range.
void AddBuffer1DArg(size_t size, ur_mem_handle_t *out_buffer) {
ur_mem_handle_t mem_handle = nullptr;
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, size,
nullptr, &mem_handle));
char zero = 0;
ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, mem_handle, &zero,
sizeof(zero), 0, size, 0, nullptr,
nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index, nullptr,
mem_handle));

// SYCL device kernels have different interfaces depending on the
// backend being used. Typically a kernel which takes a buffer argument
// will take a pointer to the start of the buffer and a sycl::id param
// which is a struct that encodes the accessor to the buffer. However
// the AMD backend handles this differently and uses three separate
// arguments for each of the three dimensions of the accessor.

ur_platform_backend_t backend;
ASSERT_SUCCESS(urPlatformGetInfo(urBaseKernelTestWithParam<T>::platform,
UR_PLATFORM_INFO_BACKEND,
sizeof(backend), &backend, nullptr));
if (backend == UR_PLATFORM_BACKEND_HIP) {
// this emulates the three offset params for buffer accessor on AMD.
size_t val = 0;
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1,
sizeof(size_t), nullptr, &val));
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 2,
sizeof(size_t), nullptr, &val));
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 3,
sizeof(size_t), nullptr, &val));
current_arg_index += 4;
} else {
// This emulates the offset struct sycl adds for a 1D buffer accessor.
struct {
size_t offsets[1] = {0};
} accessor;
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index + 1,
sizeof(accessor), nullptr,
&accessor));
current_arg_index += 2;
}

buffer_args.push_back(mem_handle);
*out_buffer = mem_handle;
}

template <class U> void AddPodArg(U data) {
ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index,
sizeof(data), nullptr, &data));
current_arg_index++;
}

// Validate the contents of `buffer` according to the given validator.
template <class U>
void ValidateBuffer(ur_mem_handle_t buffer, size_t size,
std::function<bool(U &)> validator) {
std::vector<U> read_buffer(size / sizeof(U));
ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, true, 0, size,
read_buffer.data(), 0, nullptr,
nullptr));
ASSERT_TRUE(
std::all_of(read_buffer.begin(), read_buffer.end(), validator));
}

// Helper that uses the generic validate function to check for a given value.
template <class U>
void ValidateBuffer(ur_mem_handle_t buffer, size_t size, U value) {
auto validator = [&value](U result) -> bool { return result == value; };

ValidateBuffer<U>(buffer, size, validator);
}

std::vector<ur_mem_handle_t>
buffer_args;
uint32_t current_arg_index = 0;
ur_context_handle_t context;
ur_kernel_handle_t kernel;
ur_queue_handle_t queue;
};

struct urKernelExecutionTest : urBaseKernelExecutionTest {
void SetUp() {
UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelExecutionTest::SetUp());
Expand Down

0 comments on commit 81dd2d0

Please sign in to comment.