From 4f5dc2e8392b126c8e631631edf5ec24a930cdde Mon Sep 17 00:00:00 2001 From: Omar Ahmed Date: Wed, 11 Oct 2023 11:46:31 +0100 Subject: [PATCH] Add testing for memory management across multi-device contexts --- .../enqueue/urEnqueueKernelLaunch.cpp | 46 +++++++ .../enqueue/urEnqueueMemBufferReadRect.cpp | 3 +- .../enqueue/urEnqueueMemImageCopy.cpp | 60 +++++++++ .../enqueue/urEnqueueMemImageRead.cpp | 36 ++++++ .../testing/include/uur/fixtures.h | 122 ++++++++++++++++++ 5 files changed, 266 insertions(+), 1 deletion(-) diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index a75c8a3706..d9cb79e372 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -216,3 +216,49 @@ TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) { ASSERT_EQ(fill_val, data.at(i)); } } + +struct urEnqueueKernelLaunchMultiDeviceTest : public urEnqueueKernelLaunchTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::SetUp()); + queues.reserve(uur::DevicesEnvironment::instance->devices.size()); + for (const auto &device : uur::DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(this->context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::TearDown()); + } + + std::vector queues; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchMultiDeviceTest); + +TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { + ur_mem_handle_t buffer = nullptr; + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + AddPodArg(val); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, n_dimensions, + &global_offset, &global_size, nullptr, + 0, nullptr, nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // Then the remaining queues do blocking reads from the buffer. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + uint32_t output = 0; + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, true, 0, + sizeof(output), &output, 0, + nullptr, nullptr)); + ASSERT_EQ(val, output) << "Result on queue " << i << " did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp index 28e373104c..7068985dfb 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp @@ -188,7 +188,8 @@ TEST_P(urEnqueueMemBufferReadRectTest, InvalidNullPtrEventWaitList) { using urEnqueueMemBufferReadRectMultiDeviceTest = uur::urMultiDeviceMemBufferQueueTest; -TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, WriteReadDifferentQueues) { +TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, + WriteRectReadDifferentQueues) { // First queue does a blocking write of 42 into the buffer. // Then a rectangular write the buffer as 1024x1x1 1D. std::vector input(count, 42); diff --git a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp index 2e29dab482..a22b4baa37 100644 --- a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp @@ -251,3 +251,63 @@ TEST_P(urEnqueueMemImageCopyTest, InvalidSize) { {1, 0, 0}, size, 0, nullptr, nullptr)); } + +using urEnqueueMemImageCopyMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageCopyMultiDeviceTest, CopyReadDifferentQueues) { + ur_mem_handle_t dstImage1D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc1D, nullptr, &dstImage1D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image1D, dstImage1D, origin, + origin, region1D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage2D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc2D, nullptr, &dstImage2D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image2D, dstImage2D, origin, + origin, region2D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage3D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc3D, nullptr, &dstImage3D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image3D, dstImage3D, origin, + origin, region3D, 0, nullptr, + nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemImageRead.cpp b/test/conformance/enqueue/urEnqueueMemImageRead.cpp index daebe0865d..d4cf322958 100644 --- a/test/conformance/enqueue/urEnqueueMemImageRead.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageRead.cpp @@ -130,3 +130,39 @@ TEST_P(urEnqueueMemImageReadTest, InvalidRegion3D) { bad_region, 0, 0, output.data(), 0, nullptr, nullptr)); } + +using urEnqueueMemImageReadMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageReadMultiDeviceTest, WriteReadDifferentQueues) { + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 3ae09924b0..46b6c24ad6 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -601,6 +601,128 @@ struct urMemImageQueueTest : urQueueTest { 0}; // num samples }; +struct urMultiDeviceMemImageTest : urMultiDeviceContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::SetUp()); + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc1D, nullptr, &image1D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc2D, nullptr, &image2D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc3D, nullptr, &image3D)); + } + + void TearDown() override { + if (image1D) { + EXPECT_SUCCESS(urMemRelease(image1D)); + } + if (image2D) { + EXPECT_SUCCESS(urMemRelease(image2D)); + } + if (image3D) { + EXPECT_SUCCESS(urMemRelease(image3D)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::TearDown()); + } + + const size_t width = 1024; + const size_t height = 8; + const size_t depth = 2; + ur_mem_handle_t image1D = nullptr; + ur_mem_handle_t image2D = nullptr; + ur_mem_handle_t image3D = nullptr; + ur_rect_region_t region1D{width, 1, 1}; + ur_rect_region_t region2D{width, height, 1}; + ur_rect_region_t region3D{width, height, depth}; + ur_rect_offset_t origin{0, 0, 0}; + ur_image_format_t format = {UR_IMAGE_CHANNEL_ORDER_RGBA, + UR_IMAGE_CHANNEL_TYPE_FLOAT}; + ur_image_desc_t desc1D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE1D, // mem object type + width, // image width + 1, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc2D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE2D, // mem object type + width, // image width + height, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc3D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE3D, // mem object type + width, // image width + height, // image height + depth, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples +}; + +struct urMultiDeviceMemImageQueueTest : urMultiDeviceMemImageTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::SetUp()); + queues.reserve(DevicesEnvironment::instance->devices.size()); + for (const auto &device : DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::TearDown()); + } + + std::vector queues; +}; + +struct urMultiDeviceMemImageWriteTest : urMultiDeviceMemImageQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::SetUp()); + + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image1D, true, origin, + region1D, 0, 0, input1D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image2D, true, origin, + region2D, 0, 0, input2D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image3D, true, origin, + region3D, 0, 0, input3D.data(), 0, + nullptr, nullptr)); + } + + void TearDown() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::TearDown()); + } + + std::vector input1D = std::vector(width * 4, 42); + std::vector input2D = + std::vector(width * height * 4, 42); + std::vector input3D = + std::vector(width * height * depth * 4, 42); +}; + struct urUSMDeviceAllocTest : urQueueTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp());