Skip to content

Commit

Permalink
Test updating multiple kernels in a command-buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
EwanC committed Jan 4, 2024
1 parent 0c71fc5 commit 2f02ade
Show file tree
Hide file tree
Showing 7 changed files with 298 additions and 62 deletions.
26 changes: 12 additions & 14 deletions test/conformance/device_code/indexers_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
#include <CL/sycl.hpp>

int main() {
const cl::sycl::range<1> globalRange(6);
const cl::sycl::range<1> localRange(3);
const cl::sycl::id<1> globalOffset(4);
const cl::sycl::nd_range<1> ndRange(globalRange, localRange, globalOffset);
const cl::sycl::range<3> globalRange(8, 8, 8);
const cl::sycl::range<3> localRange(2, 2, 2);
const cl::sycl::id<3> globalOffset(4, 4, 4);
const cl::sycl::nd_range<3> ndRange(globalRange, localRange, globalOffset);

cl::sycl::queue sycl_queue;
const size_t elements_per_work_item = 9;
int *ptr = cl::sycl::malloc_shared<int>(
globalRange[0] * elements_per_work_item, sycl_queue);
const size_t elements_per_work_item = 6;
int *ptr = cl::sycl::malloc_shared<int>(globalRange[0] * globalRange[1] *
globalRange[2] *
elements_per_work_item,
sycl_queue);

sycl_queue.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<class indexers>(
ndRange, [ptr](cl::sycl::nd_item<1> index) {
int *wi_ptr = ptr + index.get_global_linear_id() * 9;
ndRange, [ptr](cl::sycl::nd_item<3> index) {
int *wi_ptr =
ptr + index.get_global_linear_id() * elements_per_work_item;

wi_ptr[0] = index.get_global_id(0);
wi_ptr[1] = index.get_global_id(1);
Expand All @@ -28,11 +31,6 @@ int main() {
wi_ptr[3] = index.get_local_id(0);
wi_ptr[4] = index.get_local_id(1);
wi_ptr[5] = index.get_local_id(2);

auto offset = index.get_offset();
wi_ptr[6] = offset[0];
wi_ptr[7] = offset[1];
wi_ptr[8] = offset[2];
});
});
return 0;
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{{OPT}}BufferFillCommandTest.UpdateParmeters/AMD_HIP_BACKEND{{.*}}
{{OPT}}BufferFillCommandTest.UpdateGlobalSize/AMD_HIP_BACKEND{{.*}}
{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/AMD_HIP_BACKEND{{.*}}
{{OPT}}BufferFillCommandTest.OverrideUpdate/AMD_HIP_BACKEND{{.*}}
{{OPT}}BufferFillCommandTest.OverrideArgList/AMD_HIP_BACKEND{{.*}}
{{OPT}}USMFillCommandTest.UpdateParmeters/AMD_HIP_BACKEND{{.*}}
{{OPT}}USMFillCommandTest.UpdateExecInfo/AMD_HIP_BACKEND{{.*}}
{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/AMD_HIP_BACKEND{{.*}}
{{OPT}}BufferSaxpyKernelTests.UpdateParmeters/AMD_HIP_BACKEND{{.*}}
{{OPT}}USMSaxpyKernelTests.UpdateParmeters/AMD_HIP_BACKEND{{.*}}
{{OPT}}NDRangeUpdateTests.Update3D/AMD_HIP_BACKEND{{.*}}
{{OPT}}NDRangeUpdateTests.Update2D/AMD_HIP_BACKEND{{.*}}
{{OPT}}NDRangeUpdateTests.Update1D/AMD_HIP_BACKEND{{.*}}
Empty file.
Empty file.
178 changes: 130 additions & 48 deletions test/conformance/exp_command_buffer/ndrange_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ struct NDRangeUpdateTests : uur::urExpUpdatableCommandBufferTests {
GTEST_SKIP() << "Shared USM is not supported.";
}

const size_t allocation_size = sizeof(int) * 3 * 3 * global_size;
const size_t allocation_size = sizeof(int) * elements_per_id *
global_size[0] * global_size[1] *
global_size[2];
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
allocation_size, &shared_ptr));
ASSERT_NE(shared_ptr, nullptr);
Expand All @@ -27,13 +29,46 @@ struct NDRangeUpdateTests : uur::urExpUpdatableCommandBufferTests {
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr));

ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset,
&global_size, &local_size, 0, nullptr, nullptr, &command_handle));
updatable_cmd_buf_handle, kernel, n_dimensions,
global_offset.data(), global_size.data(), local_size.data(), 0,
nullptr, nullptr, &command_handle));
ASSERT_NE(command_handle, nullptr);

ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle));
}

void Validate(std::array<size_t, 3> &globalSize,
std::array<size_t, 3> &localSize,
std::array<size_t, 3> &globalOffset) {
for (size_t i = 0; i < globalSize[0]; i++) {
for (size_t j = 0; j < globalSize[1]; j++) {
for (size_t k = 0; k < globalSize[2]; k++) {
const size_t global_linear_id =
(i * (globalSize[1] * globalSize[0]) +
j * globalSize[0] + k);
int *wi_ptr = (int *)shared_ptr +
(elements_per_id * global_linear_id);

int global_id_1d = wi_ptr[0];
int global_id_2d = wi_ptr[1];
int global_id_3d = wi_ptr[2];

EXPECT_EQ(global_id_1d, i + globalOffset[0]);
EXPECT_EQ(global_id_2d, j + globalOffset[1]);
EXPECT_EQ(global_id_3d, k + globalOffset[2]);

int local_id_1d = wi_ptr[3];
int local_id_2d = wi_ptr[4];
int local_id_3d = wi_ptr[5];

EXPECT_EQ(local_id_1d, i % localSize[0]);
EXPECT_EQ(local_id_2d, j % localSize[1]);
EXPECT_EQ(local_id_3d, k % localSize[2]);
}
}
}
}

void TearDown() override {
if (shared_ptr) {
ASSERT_SUCCESS(urUSMFree(context, shared_ptr));
Expand All @@ -43,73 +78,120 @@ struct NDRangeUpdateTests : uur::urExpUpdatableCommandBufferTests {
urExpUpdatableCommandBufferTests::TearDown());
}

size_t global_size = 6;
size_t local_size = 3;
size_t global_offset = 4;
size_t n_dimensions = 1;
const size_t elements_per_id = 6;
std::array<size_t, 3> global_size = {8, 8, 8};
std::array<size_t, 3> local_size = {2, 2, 2};
std::array<size_t, 3> global_offset = {4, 4, 4};
size_t n_dimensions = 3;
void *shared_ptr = nullptr;
ur_exp_command_buffer_command_handle_t command_handle = nullptr;
};

UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTests);

TEST_P(NDRangeUpdateTests, Update1D) {
TEST_P(NDRangeUpdateTests, Update3D) {
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

const size_t elements_per_id = 9;
for (size_t i = 0; i < global_size; i++) {
int *wi_ptr = (int *)shared_ptr + (i * elements_per_id);

int global_id_1d = wi_ptr[0];
//int global_id_2d = wi_ptr[1];
//int global_id_3d = wi_ptr[2];

ASSERT_EQ(global_id_1d, i + global_offset) << i;
// ASSERT_EQ(global_id_2d, global_offset);
// ASSERT_EQ(global_id_3d, 0);

int local_id_1d = wi_ptr[3];
// int local_id_2d = wi_ptr[4];
// int local_id_3d = wi_ptr[5];
Validate(global_size, local_size, global_offset);
std::array<size_t, 3> new_local_size = {4, 4, 4};
std::array<size_t, 3> new_global_offset = {3, 3, 3};
ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
0, // numMemobjArgs
0, // numPointerArgs
0, // numValueArgs
0, // numExecInfos
3, // workDim;
nullptr, // pArgMemobjList
nullptr, // pArgPointerList
nullptr, // pArgValueList
nullptr, // pArgExecInfoList
new_global_offset.data(), // pGlobalWorkOffset
nullptr, // pGlobalWorkSize
new_local_size.data(), // pLocalWorkSize
};

ASSERT_SUCCESS(
urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc));
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

ASSERT_EQ(local_id_1d, i % local_size);
// ASSERT_EQ(local_id_2d, 0);
// ASSERT_EQ(local_id_3d, 0);
Validate(global_size, new_local_size, new_global_offset);
}

int global_offset_1d = wi_ptr[6];
//int global_offset_2d = wi_ptr[7];
//int global_offset_3d = wi_ptr[8];
TEST_P(NDRangeUpdateTests, DISABLED_Update2D) {
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

ASSERT_EQ(global_offset_1d, 4);
// ASSERT_EQ(global_offset_2d, 0);
// ASSERT_EQ(global_offset_3d, 0);
}
Validate(global_size, local_size, global_offset);
std::array<size_t, 3> new_global_size = {6, 6, 1};
std::array<size_t, 3> new_local_size = {3, 3, 1};
std::array<size_t, 3> new_global_offset = {3, 3, 0};
ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
0, // numMemobjArgs
0, // numPointerArgs
0, // numValueArgs
0, // numExecInfos
2, // workDim;
nullptr, // pArgMemobjList
nullptr, // pArgPointerList
nullptr, // pArgValueList
nullptr, // pArgExecInfoList
new_global_offset.data(), // pGlobalWorkOffset
new_global_size.data(), // pGlobalWorkSize
new_local_size.data(), // pLocalWorkSize
};

ASSERT_SUCCESS(
urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc));
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

// TODO
// 1. update
// 3. Validate.
Validate(new_global_size, new_local_size, new_global_offset);
}

TEST_P(NDRangeUpdateTests, Update2D) {
TEST_P(NDRangeUpdateTests, DISABLED_Update1D) {
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

// TODO
// 1. Validate initial run
// 2. Update to offset/local/global work sizes in 2 dimensions
// 3. Validate.
}

TEST_P(NDRangeUpdateTests, Update3D) {
Validate(global_size, local_size, global_offset);
ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
nullptr, // pNext
0, // numMemobjArgs
0, // numPointerArgs
0, // numValueArgs
0, // numExecInfos
1, // workDim
nullptr, // pArgMemobjList
nullptr, // pArgPointerList
nullptr, // pArgValueList
nullptr, // pArgExecInfoList
nullptr, // pGlobalWorkOffset
nullptr, // pGlobalWorkSize
nullptr, // pLocalWorkSize
};

ASSERT_SUCCESS(
urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc));
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urQueueFinish(queue));

// TODO
// 1. Validate initial run
// 2. Update to offset/local/global work sizes in 3 dimensions
// 3. Validate.
std::array<size_t, 3> new_global_size = {global_size[0], 1, 1};
std::array<size_t, 3> new_local_size = {local_size[0], 1, 1};
std::array<size_t, 3> new_global_offset = {global_offset[0], 0, 0};
Validate(new_global_size, new_local_size, new_global_offset);
}
Loading

0 comments on commit 2f02ade

Please sign in to comment.