diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 9d19a34b93..f12cdc9ae4 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -53,6 +53,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/usm_ll.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/usm_ll.cpp b/test/conformance/device_code/usm_ll.cpp new file mode 100644 index 0000000000..abacdcf56e --- /dev/null +++ b/test/conformance/device_code/usm_ll.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +using namespace sycl; + +int numNodes = 4; + +struct Node { + Node() : pNext(nullptr), Num(0xDEADBEEF) {} + + Node *pNext; + uint32_t Num; +}; + +int main() { + queue q; + auto dev = q.get_device(); + auto ctxt = q.get_context(); + + if (!dev.get_info()) { + return 0; + } + + Node *s_head = + (Node *)aligned_alloc_shared(alignof(Node), sizeof(Node), dev, ctxt); + if (s_head == nullptr) { + return -1; + } + Node *s_cur = s_head; + + for (int i = 0; i < numNodes; i++) { + s_cur->Num = i * 2; + + if (i != (numNodes - 1)) { + s_cur->pNext = (Node *)aligned_alloc_shared( + alignof(Node), sizeof(Node), dev, ctxt); + } else { + s_cur->pNext = nullptr; + } + + s_cur = s_cur->pNext; + } + + auto e1 = q.submit([=](handler &cgh) { + cgh.single_task([=]() { + Node *pHead = s_head; + while (pHead) { + pHead->Num = pHead->Num * 2 + 1; + pHead = pHead->pNext; + } + }); + }); + + e1.wait(); + + s_cur = s_head; + for (int i = 0; i < numNodes; i++) { + Node *old = s_cur; + s_cur = s_cur->pNext; + free(old, ctxt); + } + + return 0; +} diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match index a034083c87..ebba00431b 100644 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -33,3 +33,4 @@ {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}} {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}} {{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled diff --git a/test/conformance/enqueue/helpers.h b/test/conformance/enqueue/helpers.h index 7c01c4905a..6fc8dfacba 100644 --- a/test/conformance/enqueue/helpers.h +++ b/test/conformance/enqueue/helpers.h @@ -66,6 +66,18 @@ struct TestParameters2D { size_t height; }; +inline std::string USMKindToString(USMKind kind) { + switch (kind) { + case USMKind::Device: + return "Device"; + case USMKind::Host: + return "Host"; + case USMKind::Shared: + default: + return "Shared"; + } +} + template inline std::string print2DTestString(const testing::TestParamInfo &info) { @@ -73,10 +85,14 @@ print2DTestString(const testing::TestParamInfo &info) { const auto platform_device_name = uur::GetPlatformAndDeviceName(device_handle); std::stringstream test_name; + auto src_kind = std::get<1>(std::get<1>(info.param)); + auto dst_kind = std::get<2>(std::get<1>(info.param)); test_name << platform_device_name << "__pitch__" - << std::get<1>(info.param).pitch << "__width__" - << std::get<1>(info.param).width << "__height__" - << std::get<1>(info.param).height; + << std::get<0>(std::get<1>(info.param)).pitch << "__width__" + << std::get<0>(std::get<1>(info.param)).width << "__height__" + << std::get<0>(std::get<1>(info.param)).height << "__src__" + << USMKindToString(src_kind) << "__dst__" + << USMKindToString(dst_kind); return test_name.str(); } diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index d9cb79e372..0ea7cda255 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -262,3 +262,102 @@ TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { ASSERT_EQ(val, output) << "Result on queue " << i << " did not match!"; } } + +struct urEnqueueKernelLaunchUSMLinkedList + : uur::urKernelTestWithParam { + struct Node { + Node() : next(nullptr), num(0xDEADBEEF) {} + + Node *next; + uint32_t num; + }; + + void SetUp() override { + program_name = "usm_ll"; + UUR_RETURN_ON_FATAL_FAILURE( + uur::urKernelTestWithParam::SetUp()); + + use_pool = getParam().value; + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + ur_usm_pool_desc_t pool_desc{UR_STRUCTURE_TYPE_USM_POOL_DESC, nullptr, + 0}; + if (use_pool) { + ASSERT_SUCCESS(urUSMPoolCreate(this->context, &pool_desc, &pool)); + } + } + + void TearDown() override { + auto *list_cur = list_head; + while (list_cur) { + auto *list_next = list_cur->next; + ASSERT_SUCCESS(urUSMFree(context, list_cur)); + list_cur = list_next; + } + + if (queue) { + ASSERT_SUCCESS(urQueueRelease(queue)); + } + + if (pool) { + ASSERT_SUCCESS(urUSMPoolRelease(pool)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + uur::urKernelTestWithParam::TearDown()); + } + + size_t global_size = 1; + size_t global_offset = 0; + Node *list_head = nullptr; + const int num_nodes = 4; + bool use_pool = false; + ur_usm_pool_handle_t pool = nullptr; + ur_queue_handle_t queue; +}; + +UUR_TEST_SUITE_P( + urEnqueueKernelLaunchUSMLinkedList, + testing::ValuesIn(uur::BoolTestParam::makeBoolParam("UsePool")), + uur::deviceTestWithParamPrinter); + +TEST_P(urEnqueueKernelLaunchUSMLinkedList, Success) { + ur_device_usm_access_capability_flags_t shared_usm_flags = 0; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Build linked list with USM allocations + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, pool, + sizeof(Node), + reinterpret_cast(&list_head))); + ASSERT_NE(list_head, nullptr); + Node *list_cur = list_head; + for (int i = 0; i < num_nodes; i++) { + list_cur->num = i * 2; + if (i < num_nodes - 1) { + ASSERT_SUCCESS( + urUSMSharedAlloc(context, device, nullptr, pool, sizeof(Node), + reinterpret_cast(&list_cur->next))); + ASSERT_NE(list_cur->next, nullptr); + } else { + list_cur->next = nullptr; + } + list_cur = list_cur->next; + } + + // Run kernel which will iterate the list and modify the values + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &list_head)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &global_offset, + &global_size, nullptr, 0, nullptr, + nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify values + list_cur = list_head; + for (int i = 0; i < num_nodes; i++) { + ASSERT_EQ(list_cur->num, i * 4 + 1); + list_cur = list_cur->next; + } +} diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index d0e3dd9f72..258b971f41 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -6,14 +6,24 @@ #include "helpers.h" #include +using TestParametersMemcpy2D = + std::tuple; + struct urEnqueueUSMMemcpy2DTestWithParam - : uur::urQueueTestWithParam { + : uur::urQueueTestWithParam { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE( - uur::urQueueTestWithParam::SetUp()); + uur::urQueueTestWithParam::SetUp()); + + const auto [in2DParams, inSrcKind, inDstKind] = getParam(); + std::tie(pitch, width, height, src_kind, dst_kind) = + std::make_tuple(in2DParams.pitch, in2DParams.width, + in2DParams.height, inSrcKind, inDstKind); + ur_device_usm_access_capability_flags_t device_usm = 0; ASSERT_SUCCESS(uur::GetDeviceUSMDeviceSupport(device, device_usm)); - if (!device_usm) { + if (!device_usm && (src_kind == uur::USMKind::Device || + dst_kind == uur::USMKind::Device)) { GTEST_SKIP() << "Device USM is not supported"; } @@ -25,15 +35,13 @@ struct urEnqueueUSMMemcpy2DTestWithParam GTEST_SKIP() << "2D USM memcpy is not supported"; } - const auto [inPitch, inWidth, inHeight] = getParam(); - std::tie(pitch, width, height) = - std::make_tuple(inPitch, inWidth, inHeight); - const size_t num_elements = pitch * height; - ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, - num_elements, &pSrc)); - ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, - num_elements, &pDst)); + ASSERT_SUCCESS(uur::MakeUSMAllocationByType( + src_kind, context, device, nullptr, nullptr, num_elements, &pSrc)); + + ASSERT_SUCCESS(uur::MakeUSMAllocationByType( + dst_kind, context, device, nullptr, nullptr, num_elements, &pDst)); + ur_event_handle_t memset_event = nullptr; ASSERT_SUCCESS(urEnqueueUSMFill(queue, pSrc, sizeof(memset_value), @@ -52,17 +60,22 @@ struct urEnqueueUSMMemcpy2DTestWithParam if (pDst) { ASSERT_SUCCESS(urUSMFree(context, pDst)); } - uur::urQueueTestWithParam::TearDown(); + uur::urQueueTestWithParam::TearDown(); } void verifyMemcpySucceeded() { std::vector host_mem(pitch * height); - ASSERT_SUCCESS(urEnqueueUSMMemcpy2D(queue, true, host_mem.data(), pitch, - pDst, pitch, width, height, 0, - nullptr, nullptr)); + const uint8_t *host_ptr = nullptr; + if (dst_kind == uur::USMKind::Device) { + ASSERT_SUCCESS(urEnqueueUSMMemcpy2D(queue, true, host_mem.data(), + pitch, pDst, pitch, width, + height, 0, nullptr, nullptr)); + host_ptr = host_mem.data(); + } else { + host_ptr = static_cast(pDst); + } for (size_t w = 0; w < width; ++w) { for (size_t h = 0; h < height; ++h) { - const auto *host_ptr = host_mem.data(); const size_t index = (pitch * h) + w; ASSERT_TRUE(*(host_ptr + index) == memset_value); } @@ -75,9 +88,11 @@ struct urEnqueueUSMMemcpy2DTestWithParam size_t pitch = 0; size_t width = 0; size_t height = 0; + uur::USMKind src_kind; + uur::USMKind dst_kind; }; -static std::vector test_cases{ +static std::vector test_sizes{ /* Everything set to 1 */ {1, 1, 1}, /* Height == 1 && Pitch > width */ @@ -92,7 +107,13 @@ static std::vector test_cases{ {234, 233, 1}}; UUR_TEST_SUITE_P(urEnqueueUSMMemcpy2DTestWithParam, - ::testing::ValuesIn(test_cases), + ::testing::Combine(::testing::ValuesIn(test_sizes), + ::testing::Values(uur::USMKind::Device, + uur::USMKind::Host, + uur::USMKind::Shared), + ::testing::Values(uur::USMKind::Device, + uur::USMKind::Host, + uur::USMKind::Shared)), uur::print2DTestString); TEST_P(urEnqueueUSMMemcpy2DTestWithParam, SuccessBlocking) { @@ -119,7 +140,8 @@ TEST_P(urEnqueueUSMMemcpy2DTestWithParam, SuccessNonBlocking) { using urEnqueueUSMMemcpy2DNegativeTest = urEnqueueUSMMemcpy2DTestWithParam; UUR_TEST_SUITE_P(urEnqueueUSMMemcpy2DNegativeTest, - ::testing::Values(uur::TestParameters2D{1, 1, 1}), + ::testing::Values(TestParametersMemcpy2D{ + {1, 1, 1}, uur::USMKind::Device, uur::USMKind::Device}), uur::print2DTestString); TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidNullHandleQueue) { diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 2ede84d135..29d9b0673a 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1071,7 +1071,8 @@ struct urProgramTest : urQueueTest { template struct urProgramTestWithParam : urContextTestWithParam { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::SetUp()); - uur::KernelsEnvironment::instance->LoadSource("foo", 0, il_binary); + uur::KernelsEnvironment::instance->LoadSource(program_name, 0, + il_binary); ASSERT_SUCCESS(urProgramCreateWithIL(this->context, il_binary->data(), il_binary->size(), nullptr, &program)); diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 027e270f25..4b7649559f 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -378,6 +378,18 @@ ur_device_partition_property_t makePartitionEquallyDesc(uint32_t cu_per_device); ur_device_partition_property_t makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain); +enum class USMKind { + Device, + Host, + Shared, +}; + +ur_result_t MakeUSMAllocationByType(USMKind kind, ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t hPool, size_t size, + void **ppMem); + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_UTILS_H_INCLUDED diff --git a/test/conformance/testing/source/utils.cpp b/test/conformance/testing/source/utils.cpp index 1597ba0df6..de1c33be08 100644 --- a/test/conformance/testing/source/utils.cpp +++ b/test/conformance/testing/source/utils.cpp @@ -658,4 +658,22 @@ makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain) { return desc; } +ur_result_t MakeUSMAllocationByType(USMKind kind, ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t hPool, size_t size, + void **ppMem) { + switch (kind) { + case USMKind::Device: + return urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, hPool, size, + ppMem); + case USMKind::Host: + return urUSMHostAlloc(hContext, pUSMDesc, hPool, size, ppMem); + default: + case USMKind::Shared: + return urUSMSharedAlloc(hContext, hDevice, pUSMDesc, hPool, size, + ppMem); + } +} + } // namespace uur