From 4957bd02a18b3a6a973cf5fce60a286fcfe58d78 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 22 Feb 2024 12:52:46 +0000 Subject: [PATCH 01/23] [urinfo] Use urDeviceGetSelected Now that #740 is merged, actually use `urDeviceGetSelected` in the `urinfo` tool to mirror the behaviour of the `sycl-ls` tool. --- source/loader/ur_lib.cpp | 2 +- tools/urinfo/urinfo.cpp | 66 +++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 0e350ef3fa..22263bb807 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -552,7 +552,7 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, if (acceptDeviceList.size() == 0 && discardDeviceList.size() == 0) { // nothing in env var was understood as a valid term - return UR_RESULT_ERROR_INVALID_VALUE; + return UR_RESULT_SUCCESS; } else if (acceptDeviceList.size() == 0) { // no accept terms were understood, but at least one discard term was // we are magnanimous to the user when there were bad/ignored accept terms diff --git a/tools/urinfo/urinfo.cpp b/tools/urinfo/urinfo.cpp index d1463ea4fa..bf7173234b 100644 --- a/tools/urinfo/urinfo.cpp +++ b/tools/urinfo/urinfo.cpp @@ -14,6 +14,8 @@ namespace urinfo { struct app { bool verbose = false; + bool linear_ids = true; + bool ignore_device_selector = false; ur_loader_config_handle_t loaderConfig = nullptr; std::vector adapters; std::unordered_map> @@ -23,6 +25,16 @@ struct app { app(int argc, const char **argv) { parseArgs(argc, argv); + if (!ignore_device_selector) { + if (auto device_selector = std::getenv("ONEAPI_DEVICE_SELECTOR")) { + std::fprintf(stderr, + "info: Output filtered by ONEAPI_DEVICE_SELECTOR " + "environment variable, which is set to \"%s\".\n" + "To see all devices, use the " + "--ignore-device-selector CLI option.\n\n", + device_selector); + } + } UR_CHECK(urLoaderConfigCreate(&loaderConfig)); UR_CHECK(urLoaderConfigEnableLayer(loaderConfig, "UR_LAYER_FULL_VALIDATION")); @@ -40,6 +52,10 @@ devices which are currently visible in the local execution environment. -h, --help show this help message and exit --version show version number and exit -v, --verbose print additional information + --no-linear-ids do not show linear device ids + --ignore-device-selector + do not use ONEAPI_DEVICE_SELECTOR to filter list of + devices )"; for (int argi = 1; argi < argc; argi++) { std::string_view arg{argv[argi]}; @@ -51,6 +67,10 @@ devices which are currently visible in the local execution environment. std::exit(0); } else if (arg == "-v" || arg == "--verbose") { verbose = true; + } else if (arg == "--no-linear-ids") { + linear_ids = false; + } else if (arg == "--ignore-device-selector") { + ignore_device_selector = true; } else { std::fprintf(stderr, "error: invalid argument: %s\n", argv[argi]); @@ -65,12 +85,14 @@ devices which are currently visible in the local execution environment. uint32_t numAdapters = 0; UR_CHECK(urAdapterGet(0, nullptr, &numAdapters)); if (numAdapters == 0) { - std::cout << "No adapters found.\n"; std::exit(0); } adapters.resize(numAdapters); UR_CHECK(urAdapterGet(numAdapters, adapters.data(), nullptr)); + auto urDeviceGetFn = + ignore_device_selector ? urDeviceGet : urDeviceGetSelected; + for (size_t adapterIndex = 0; adapterIndex < adapters.size(); adapterIndex++) { auto adapter = adapters[adapterIndex]; @@ -78,8 +100,6 @@ devices which are currently visible in the local execution environment. uint32_t numPlatforms = 0; UR_CHECK(urPlatformGet(&adapter, 1, 0, nullptr, &numPlatforms)); if (numPlatforms == 0) { - std::cout << "No platforms found in adapter " << adapterIndex - << ".\n"; continue; } adapterPlatformsMap[adapter].resize(numPlatforms); @@ -92,17 +112,15 @@ devices which are currently visible in the local execution environment. auto platform = adapterPlatformsMap[adapter][platformIndex]; // Enumerate devices uint32_t numDevices = 0; - UR_CHECK(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, - &numDevices)); + UR_CHECK(urDeviceGetFn(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, + &numDevices)); if (numDevices == 0) { - std::cout << "No devices found platform " << platformIndex - << ".\n"; continue; } platformDevicesMap[platform].resize(numDevices); - UR_CHECK(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, numDevices, - platformDevicesMap[platform].data(), - nullptr)); + UR_CHECK(urDeviceGetFn(platform, UR_DEVICE_TYPE_ALL, numDevices, + platformDevicesMap[platform].data(), + nullptr)); } } } @@ -112,6 +130,8 @@ devices which are currently visible in the local execution environment. adapterIndex++) { auto adapter = adapters[adapterIndex]; auto &platforms = adapterPlatformsMap[adapter]; + size_t adapter_device_id = 0; + std::string adapter_backend = urinfo::getAdapterBackend(adapter); for (size_t platformIndex = 0; platformIndex < platforms.size(); platformIndex++) { auto platform = platforms[platformIndex]; @@ -119,16 +139,28 @@ devices which are currently visible in the local execution environment. for (size_t deviceIndex = 0; deviceIndex < devices.size(); deviceIndex++) { auto device = devices[deviceIndex]; - std::cout << "[adapter(" << adapterIndex << "," - << urinfo::getAdapterBackend(adapter) << "):" - << "platform(" << platformIndex << "):" - << "device(" << deviceIndex << "," - << urinfo::getDeviceType(device) << ")] " - << urinfo::getPlatformName(platform) << ", " - << urinfo::getDeviceName(device) << " " + auto device_type = urinfo::getDeviceType(device); + + if (linear_ids) { + std::cout << "[" << adapter_backend << ":" + << device_type << "]"; + std::cout << "[" << adapter_backend << ":" + << adapter_device_id << "]"; + } else { + std::cout << "[adapter(" << adapterIndex << "," + << adapter_backend << "):" + << "platform(" << platformIndex << "):" + << "device(" << deviceIndex << "," + << device_type << ")]"; + } + + std::cout << " " << urinfo::getPlatformName(platform) + << ", " << urinfo::getDeviceName(device) << " " << urinfo::getDeviceVersion(device) << " " << "[" << urinfo::getDeviceDriverVersion(device) << "]\n"; + + adapter_device_id++; } } } From 5fe75aaac87a174b4ab40573b5e49032dc8d1b81 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 28 Feb 2024 12:32:38 +0000 Subject: [PATCH 02/23] [ODS] Fix not returning invalid values After allowing urDeviceGetSelected to return zero devices rather than returning an error, a number of unit tests started failing. This patch fixes these tests by returning the expected error code during parsing, rather than after processing all ODS terms. This has the caveat of not processing all ODS terms before reporting an error and should be revisited once more robust testing is in place. --- source/loader/ur_lib.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 22263bb807..8245644899 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -426,8 +426,10 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, for (auto &termPair : mapODS) { std::string backend = termPair.first; - if (backend - .empty()) { // FIXME: never true because getenv_to_map rejects this case + // TODO: Figure out how to process all ODS errors rather than returning + // on the first error. + if (backend.empty()) { + // FIXME: never true because getenv_to_map rejects this case // malformed term: missing backend -- output ERROR, then continue logger::error("ERROR: missing backend, format of filter = " "'[!]backend:filterStrings'"); @@ -459,20 +461,19 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, std::tolower(static_cast(b)); })) { // irrelevant term for current request: different backend -- silently ignore - logger::warning( - "WARNING: ignoring term with irrelevant backend '{}'", backend); - continue; + logger::error("unrecognised backend '{}'", backend); + return UR_RESULT_ERROR_INVALID_VALUE; } if (termPair.second.size() == 0) { - // malformed term: missing filterStrings -- output ERROR, then continue - logger::error("ERROR missing filterStrings, format of filter = " + // malformed term: missing filterStrings -- output ERROR + logger::error("missing filterStrings, format of filter = " "'[!]backend:filterStrings'"); - continue; + return UR_RESULT_ERROR_INVALID_VALUE; } if (std::find_if(termPair.second.cbegin(), termPair.second.cend(), [](const auto &s) { return s.empty(); }) != - termPair.second - .cend()) { // FIXME: never true because getenv_to_map rejects this case + termPair.second.cend()) { + // FIXME: never true because getenv_to_map rejects this case // malformed term: missing filterString -- output warning, then continue logger::warning( "WARNING: empty filterString, format of filterStrings " @@ -483,10 +484,10 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, [](const auto &s) { return std::count(s.cbegin(), s.cend(), '.') > 2; }) != termPair.second.cend()) { - // malformed term: too many dots in filterString -- output warning, then continue - logger::warning("WARNING: too many dots in filterString, format of " - "filterString = 'root[.sub[.subsub]]'"); - continue; + // malformed term: too many dots in filterString + logger::error("too many dots in filterString, format of " + "filterString = 'root[.sub[.subsub]]'"); + return UR_RESULT_ERROR_INVALID_VALUE; } if (std::find_if( termPair.second.cbegin(), termPair.second.cend(), @@ -504,10 +505,9 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, } return false; // no BAD things, so must be okay }) != termPair.second.cend()) { - // malformed term: star dot no-star in filterString -- output warning, then continue - logger::warning( - "WARNING: invalid wildcard in filterString, '*.' => '*.*'"); - continue; + // malformed term: star dot no-star in filterString + logger::error("invalid wildcard in filterString, '*.' => '*.*'"); + return UR_RESULT_ERROR_INVALID_VALUE; } // TODO -- use regex validation_pattern to catch all other syntax errors in the ODS string From 198b7c22f44d580d86cc95439fc4a9ed0c2577ce Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 28 Feb 2024 14:01:51 +0000 Subject: [PATCH 03/23] [Loader] Remove comparison with no effect --- source/loader/ur_lib.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 8245644899..d2ed4853a8 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -224,10 +224,6 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, if (!hPlatform) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - // NumEntries is max number of devices wanted by the caller (max usable length of phDevices) - if (NumEntries < 0) { - return UR_RESULT_ERROR_INVALID_SIZE; - } if (NumEntries > 0 && !phDevices) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } From b1a32ba95f4c4caaf238cbe77638bf747f7e13c3 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 14 Feb 2024 14:25:22 +0000 Subject: [PATCH 04/23] [EXP][Command-Buffer] Fix CUDA Coverity issues Address issues in the CUDA adapter code added by https://github.com/oneapi-src/unified-runtime/pull/1089 flagged by Coverity. * Uninitalized struct member of `CUDA_KERNEL_NODE_PARAMS` struct * copying instead of moving a shared pointer to a node when constructing a command-handle. --- source/adapters/cuda/command_buffer.cpp | 12 +++++++----- source/adapters/cuda/command_buffer.hpp | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..7beef3f3f8 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -72,11 +72,12 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node{std::move(Node)}, + Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; @@ -375,6 +376,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); + NodeParams.kern = nullptr; NodeParams.extra = nullptr; // Create and add an new kernel node to the Cuda graph @@ -392,8 +394,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, NodeSP, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, std::move(NodeSP), NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index e2b09059bf..60e03dc655 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -183,7 +183,7 @@ static inline const char *getUrResultString(ur_result_t Result) { struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); From 273ac6be0065e0ca553052447f1ec0b2ba154c0e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 16 Feb 2024 16:14:51 +0000 Subject: [PATCH 05/23] Only destroy executable graph if command-buffer finalized --- source/adapters/cuda/command_buffer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 7beef3f3f8..836854c627 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -66,7 +66,9 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphDestroy(CudaGraph); // Release the memory allocated to the CudaGraphExec - cuGraphExecDestroy(CudaGraphExec); + if (CudaGraphExec) { + cuGraphExecDestroy(CudaGraphExec); + } } ur_exp_command_buffer_command_handle_t_:: From be622e7cf127f652599f46b23f7e9e8e939e3a82 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 28 Feb 2024 13:28:53 +0000 Subject: [PATCH 06/23] [HIP][CMD-BUF] HIP coverity fixes * Don't throw from constructor * Move rather than copy nodes during sync-point registration * Initalize `NextSyncPoint` --- source/adapters/hip/command_buffer.cpp | 6 +++--- source/adapters/hip/command_buffer.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 9fd3a06927..180c90d064 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -50,7 +50,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCountInternal{1}, RefCountExternal{1} { + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -65,11 +65,11 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { UR_TRACE(urDeviceRelease(Device)); // Release the memory allocated to the HIPGraph - UR_CHECK_ERROR(hipGraphDestroy(HIPGraph)); + (void)hipGraphDestroy(HIPGraph); // Release the memory allocated to the HIPGraphExec if (HIPGraphExec) { - UR_CHECK_ERROR(hipGraphExecDestroy(HIPGraphExec)); + (void)hipGraphExecDestroy(HIPGraphExec); } } diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index f1b3e32bfb..53b648b5cd 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -254,8 +254,8 @@ struct ur_exp_command_buffer_handle_t_ { ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, - std::shared_ptr HIPNode) { - SyncPoints[SyncPoint] = HIPNode; + std::shared_ptr &&HIPNode) { + SyncPoints[SyncPoint] = std::move(HIPNode); NextSyncPoint++; } @@ -269,7 +269,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_sync_point_t addSyncPoint(std::shared_ptr HIPNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - registerSyncPoint(SyncPoint, HIPNode); + registerSyncPoint(SyncPoint, std::move(HIPNode)); return SyncPoint; } uint32_t incrementInternalReferenceCount() noexcept { From b4e3503c627624cf5312943b00de83fe88d35162 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 5 Mar 2024 12:07:31 +0000 Subject: [PATCH 07/23] Add some missing validations in CTS --- test/conformance/device/urDeviceGet.cpp | 36 +++++++++++++++++++ .../enqueue/urEnqueueMemBufferMap.cpp | 13 +++++-- test/conformance/memory/urMemBufferCreate.cpp | 12 +++++++ .../platform/urPlatformGetInfo.cpp | 12 +++++-- test/conformance/program/urProgramBuild.cpp | 5 +++ .../program/urProgramCreateWithIL.cpp | 9 +++++ .../queue/queue_adapter_native_cpu.match | 9 +++++ test/conformance/queue/urQueueCreate.cpp | 11 +++++- .../testing/include/uur/fixtures.h | 20 +++++++++++ 9 files changed, 120 insertions(+), 7 deletions(-) diff --git a/test/conformance/device/urDeviceGet.cpp b/test/conformance/device/urDeviceGet.cpp index e8aa356a58..5ce4c45906 100644 --- a/test/conformance/device/urDeviceGet.cpp +++ b/test/conformance/device/urDeviceGet.cpp @@ -35,6 +35,42 @@ TEST_F(urDeviceGetTest, SuccessSubsetOfDevices) { } } +struct urDeviceGetTestWithDeviceTypeParam + : uur::urAllDevicesTest, + ::testing::WithParamInterface { + + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urAllDevicesTest::SetUp()); + } +}; + +INSTANTIATE_TEST_SUITE_P( + , urDeviceGetTestWithDeviceTypeParam, + ::testing::Values(UR_DEVICE_TYPE_DEFAULT, UR_DEVICE_TYPE_GPU, + UR_DEVICE_TYPE_CPU, UR_DEVICE_TYPE_FPGA, + UR_DEVICE_TYPE_MCA, UR_DEVICE_TYPE_VPU), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(urDeviceGetTestWithDeviceTypeParam, Success) { + ur_device_type_t device_type = GetParam(); + uint32_t count = 0; + ASSERT_SUCCESS(urDeviceGet(platform, device_type, 0, nullptr, &count)); + ASSERT_GE(devices.size(), count); + + if (count > 0) { + std::vector devices(count); + ASSERT_SUCCESS( + urDeviceGet(platform, device_type, count, devices.data(), nullptr)); + for (auto device : devices) { + ASSERT_NE(nullptr, device); + } + } +} + TEST_F(urDeviceGetTest, InvalidNullHandlePlatform) { uint32_t count; ASSERT_EQ_RESULT( diff --git a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp index 247c9bc3df..4e9af10592 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp @@ -21,14 +21,21 @@ TEST_P(urEnqueueMemBufferMapTest, SuccessRead) { } } -TEST_P(urEnqueueMemBufferMapTest, SuccessWrite) { +using urEnqueueMemBufferMapTestWithWriteFlagParam = + uur::urMemBufferQueueTestWithParam; +UUR_TEST_SUITE_P(urEnqueueMemBufferMapTestWithWriteFlagParam, + ::testing::Values(UR_MAP_FLAG_WRITE, + UR_MAP_FLAG_WRITE_INVALIDATE_REGION), + uur::deviceTestWithParamPrinter); + +TEST_P(urEnqueueMemBufferMapTestWithWriteFlagParam, SuccessWrite) { const std::vector input(count, 0); ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, size, input.data(), 0, nullptr, nullptr)); uint32_t *map = nullptr; - ASSERT_SUCCESS(urEnqueueMemBufferMap(queue, buffer, true, UR_MAP_FLAG_WRITE, - 0, size, 0, nullptr, nullptr, + ASSERT_SUCCESS(urEnqueueMemBufferMap(queue, buffer, true, getParam(), 0, + size, 0, nullptr, nullptr, (void **)&map)); for (unsigned i = 0; i < count; ++i) { map[i] = 42; diff --git a/test/conformance/memory/urMemBufferCreate.cpp b/test/conformance/memory/urMemBufferCreate.cpp index a86ebd326e..2e9b46114d 100644 --- a/test/conformance/memory/urMemBufferCreate.cpp +++ b/test/conformance/memory/urMemBufferCreate.cpp @@ -48,6 +48,18 @@ UUR_TEST_SUITE_P(urMemBufferCreateWithHostPtrFlagsTest, UR_MEM_FLAG_USE_HOST_POINTER), uur::deviceTestWithParamPrinter); +TEST_P(urMemBufferCreateWithHostPtrFlagsTest, SUCCESS) { + uur::raii::Mem host_ptr_buffer = nullptr; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_ALLOC_HOST_POINTER, + 4096, nullptr, host_ptr_buffer.ptr())); + + ur_buffer_properties_t properties{UR_STRUCTURE_TYPE_BUFFER_PROPERTIES, + nullptr, host_ptr_buffer.ptr()}; + uur::raii::Mem buffer = nullptr; + ASSERT_SUCCESS(urMemBufferCreate(context, getParam(), 4096, &properties, + buffer.ptr())); +} + TEST_P(urMemBufferCreateWithHostPtrFlagsTest, InvalidHostPtr) { uur::raii::Mem buffer = nullptr; ASSERT_EQ_RESULT( diff --git a/test/conformance/platform/urPlatformGetInfo.cpp b/test/conformance/platform/urPlatformGetInfo.cpp index fa95662ec6..1dc92b26d7 100644 --- a/test/conformance/platform/urPlatformGetInfo.cpp +++ b/test/conformance/platform/urPlatformGetInfo.cpp @@ -19,7 +19,7 @@ INSTANTIATE_TEST_SUITE_P( urPlatformGetInfo, urPlatformGetInfoTest, ::testing::Values(UR_PLATFORM_INFO_NAME, UR_PLATFORM_INFO_VENDOR_NAME, UR_PLATFORM_INFO_VERSION, UR_PLATFORM_INFO_EXTENSIONS, - UR_PLATFORM_INFO_PROFILE), + UR_PLATFORM_INFO_PROFILE, UR_PLATFORM_INFO_BACKEND), [](const ::testing::TestParamInfo &info) { std::stringstream ss; ss << info.param; @@ -30,11 +30,17 @@ TEST_P(urPlatformGetInfoTest, Success) { size_t size = 0; ur_platform_info_t info_type = GetParam(); ASSERT_SUCCESS(urPlatformGetInfo(platform, info_type, 0, nullptr, &size)); - ASSERT_NE(size, 0); + if (info_type == UR_PLATFORM_INFO_BACKEND) { + ASSERT_EQ(size, sizeof(ur_platform_backend_t)); + } else { + ASSERT_NE(size, 0); + } std::vector name(size); ASSERT_SUCCESS( urPlatformGetInfo(platform, info_type, size, name.data(), nullptr)); - ASSERT_EQ(size, std::strlen(name.data()) + 1); + if (info_type != UR_PLATFORM_INFO_BACKEND) { + ASSERT_EQ(size, std::strlen(name.data()) + 1); + } } TEST_P(urPlatformGetInfoTest, InvalidNullHandlePlatform) { diff --git a/test/conformance/program/urProgramBuild.cpp b/test/conformance/program/urProgramBuild.cpp index ec65074c78..97e4db77e3 100644 --- a/test/conformance/program/urProgramBuild.cpp +++ b/test/conformance/program/urProgramBuild.cpp @@ -12,6 +12,11 @@ TEST_P(urProgramBuildTest, Success) { ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); } +TEST_P(urProgramBuildTest, SuccessWithOptions) { + const char *pOptions = ""; + ASSERT_SUCCESS(urProgramBuild(context, program, pOptions)); +} + TEST_P(urProgramBuildTest, InvalidNullHandleContext) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urProgramBuild(nullptr, program, nullptr)); diff --git a/test/conformance/program/urProgramCreateWithIL.cpp b/test/conformance/program/urProgramCreateWithIL.cpp index b439850bac..00f41da6ef 100644 --- a/test/conformance/program/urProgramCreateWithIL.cpp +++ b/test/conformance/program/urProgramCreateWithIL.cpp @@ -36,6 +36,15 @@ TEST_P(urProgramCreateWithILTest, Success) { ASSERT_SUCCESS(urProgramRelease(program)); } +TEST_P(urProgramCreateWithILTest, SuccessWithProperties) { + ur_program_properties_t properties{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES}; + ur_program_handle_t program = nullptr; + ASSERT_SUCCESS(urProgramCreateWithIL( + context, il_binary->data(), il_binary->size(), &properties, &program)); + ASSERT_NE(nullptr, program); + ASSERT_SUCCESS(urProgramRelease(program)); +} + TEST_P(urProgramCreateWithILTest, InvalidNullHandle) { ur_program_handle_t program = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match index 98c622a472..c2887b1063 100644 --- a/test/conformance/queue/queue_adapter_native_cpu.match +++ b/test/conformance/queue/queue_adapter_native_cpu.match @@ -1,5 +1,14 @@ urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PROFILING_ENABLE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_ON_DEVICE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_ON_DEVICE_DEFAULT +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_DISCARD_EVENTS +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PRIORITY_LOW +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PRIORITY_HIGH +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SUBMISSION_BATCHED +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_USE_DEFAULT_STREAM +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM urQueueFinishTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_INFO_CONTEXT diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 1c5ca6e614..03cda76d50 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -17,7 +17,16 @@ TEST_P(urQueueCreateTest, Success) { using urQueueCreateWithParamTest = uur::urContextTestWithParam; UUR_TEST_SUITE_P(urQueueCreateWithParamTest, testing::Values(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, - UR_QUEUE_FLAG_PROFILING_ENABLE), + UR_QUEUE_FLAG_PROFILING_ENABLE, + UR_QUEUE_FLAG_ON_DEVICE, + UR_QUEUE_FLAG_ON_DEVICE_DEFAULT, + UR_QUEUE_FLAG_DISCARD_EVENTS, + UR_QUEUE_FLAG_PRIORITY_LOW, + UR_QUEUE_FLAG_PRIORITY_HIGH, + UR_QUEUE_FLAG_SUBMISSION_BATCHED, + UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE, + UR_QUEUE_FLAG_USE_DEFAULT_STREAM, + UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM), uur::deviceTestWithParamPrinter); TEST_P(urQueueCreateWithParamTest, SuccessWithProperties) { diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index cf01015eb4..8e0c86f9b3 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -526,6 +526,26 @@ struct urMemBufferQueueTest : urQueueTest { ur_mem_handle_t buffer = nullptr; }; +template +struct urMemBufferQueueTestWithParam : urQueueTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); + ASSERT_SUCCESS(urMemBufferCreate(this->context, UR_MEM_FLAG_READ_WRITE, + size, nullptr, &buffer)); + } + + void TearDown() override { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); + } + + const size_t count = 8; + const size_t size = sizeof(uint32_t) * count; + ur_mem_handle_t buffer = nullptr; +}; + struct urMemImageQueueTest : urQueueTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urQueueTest::SetUp()); From 9874f3697158ff31989b413803925d4472856772 Mon Sep 17 00:00:00 2001 From: Luke Drummond Date: Tue, 5 Mar 2024 14:55:53 +0000 Subject: [PATCH 08/23] [cmake] Always generate compile_commands.json I can't think of a valid reason not to generate compile_commands.json and several reasons why we should 1. I don't like typing configure options that much. 2. I like editor completion to work without me having to regenerate my build. 3. It's a good way to inspect final build flags without having to query the build system. --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c09b5f0a3c..9e0e430c66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,9 @@ set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING "List of sycl targets to build CTS device binaries for") set(UR_CONFORMANCE_AMD_ARCH "" CACHE STRING "AMD device target ID to build CTS binaries for") +# There's little reason not to generate the compile_commands.json +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + include(Assertions) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) From 1f6918cd644e57c4c860ae4aa8f8fc12645fe514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 6 Mar 2024 14:40:06 +0100 Subject: [PATCH 09/23] [CI] Fix E2E workflows on PR trigger By default 'issue_comment' trigger gets the context of a default branch. If we want to test the actual PR, on which we created/edited a comment we have to fetch and checkout a special ref for PR's merge commit. Note, that 'pull//merge' ref is available as long as the PR is not merged. This commit do not take this under consideration, since this case should be fairly uncommon and would obfuscate the workflow. --- .github/workflows/e2e_core.yml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml index e4c59956dc..f953f25600 100644 --- a/.github/workflows/e2e_core.yml +++ b/.github/workflows/e2e_core.yml @@ -66,10 +66,6 @@ jobs: ls -la ./ rm -rf ./* || true - - uses: xt0rted/pull-request-comment-branch@d97294d304604fa98a2600a6e2f916a84b596dc7 # v2.0.0 - id: comment-branch - if: ${{ always() && inputs.trigger != 'schedule' }} - - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.trigger != 'schedule' }} @@ -90,7 +86,18 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: path: ur-repo - ref: ${{ steps.comment-branch.outputs.head_ref }} + + # On issue_comment trigger (for PRs) we need to fetch special ref for + # proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.trigger != 'schedule' }} + working-directory: ${{github.workspace}}/ur-repo + env: + PR_NO: ${{github.event.issue.number}} + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* + git checkout origin/pr/${PR_NO}/merge + git rev-parse origin/pr/${PR_NO}/merge - name: Checkout SYCL uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 From 51bbfcf36cd0bdc475f7b2dcf958af4f83ba9078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 6 Mar 2024 15:24:34 +0100 Subject: [PATCH 10/23] [CI] E2E on PR - add more information in the comment --- .github/workflows/e2e_core.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml index f953f25600..e2f374ee1a 100644 --- a/.github/workflows/e2e_core.yml +++ b/.github/workflows/e2e_core.yml @@ -198,8 +198,9 @@ jobs: script: | const adapter = '${{ matrix.adapter.name }}'; const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const status = '${{ steps.tests.outcome }}'; - const body = `E2E ${adapter} build: \n${url}\n Status: ${status}`; + const test_status = '${{ steps.tests.outcome }}'; + const job_status = '${{ job.status }}'; + const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`; github.rest.issues.createComment({ issue_number: context.issue.number, From 7ed7c91e5a873bc0e0dd8f2c7110b0f743a89fbf Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 7 Mar 2024 16:18:34 +0000 Subject: [PATCH 11/23] [CTS] Treat skipped tests as passes The conformance tests skip tests when a devices returns `UR_RESULT_ERROR_UNSUPPORTED_FEATURE` and the feature under test is optional. As such, skipped tests count as passes because the device has exhibiting valid behaviour. This patch changes how CTS pass rates are calculated to match. --- scripts/ctest_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 scripts/ctest_parser.py diff --git a/scripts/ctest_parser.py b/scripts/ctest_parser.py old mode 100644 new mode 100755 index 1f9c4f6cfe..bd1a84f3ee --- a/scripts/ctest_parser.py +++ b/scripts/ctest_parser.py @@ -33,8 +33,8 @@ def summarize_results(results): total_failed = len(results['Failed']) total_crashed = total - (total_passed + total_skipped + total_failed) - pass_rate_incl_skipped = percent(total_passed, total) - pass_rate_excl_skipped = percent(total_passed, total - total_skipped) + pass_rate_incl_skipped = percent(total_passed + total_skipped, total) + pass_rate_excl_skipped = percent(total_passed, total) skipped_rate = percent(total_skipped, total) failed_rate = percent(total_failed, total) From 87fe8e687981924c6e83c1dd542342e958e5306e Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 7 Mar 2024 08:51:52 -0800 Subject: [PATCH 12/23] [L0] Create/Destory Adapter Handle during lib init Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/CMakeLists.txt | 12 +++++ source/adapters/level_zero/adapter.cpp | 50 +++++++++++-------- source/adapters/level_zero/adapter.hpp | 2 +- .../level_zero/adapter_lib_init_linux.cpp | 26 ++++++++++ .../level_zero/adapter_lib_init_windows.cpp | 29 +++++++++++ source/adapters/level_zero/device.cpp | 4 +- source/adapters/level_zero/platform.cpp | 6 +-- source/adapters/level_zero/queue.cpp | 2 +- 8 files changed, 104 insertions(+), 27 deletions(-) create mode 100644 source/adapters/level_zero/adapter_lib_init_linux.cpp create mode 100644 source/adapters/level_zero/adapter_lib_init_windows.cpp diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 62ba260dea..4cebeefc4f 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -122,6 +122,18 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) +if(WIN32) + target_sources(ur_adapter_level_zero + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_windows.cpp + ) +else() + target_sources(ur_adapter_level_zero + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp + ) +endif() + # TODO: fix level_zero adapter conversion warnings target_compile_options(${TARGET_NAME} PRIVATE $<$:/wd4805 /wd4244> diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 77c0edfb45..504df5b20d 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -11,6 +11,8 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" +ur_adapter_handle_t_ *Adapter; + ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { uint32_t ZeDriverCount = 0; ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); @@ -37,8 +39,7 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { ur_result_t adapterStateInit() { return UR_RESULT_SUCCESS; } ur_adapter_handle_t_::ur_adapter_handle_t_() { - - Adapter.PlatformCache.Compute = [](Result &result) { + PlatformCache.Compute = [](Result &result) { static std::once_flag ZeCallCountInitialized; try { std::call_once(ZeCallCountInitialized, []() { @@ -52,7 +53,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { } // initialize level zero only once. - if (Adapter.ZeResult == std::nullopt) { + if (Adapter->ZeResult == std::nullopt) { // Setting these environment variables before running zeInit will enable // the validation layer in the Level Zero loader. if (UrL0Debug & UR_L0_DEBUG_VALIDATION) { @@ -71,20 +72,20 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { // We must only initialize the driver once, even if urPlatformGet() is // called multiple times. Declaring the return value as "static" ensures // it's only called once. - Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); + Adapter->ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); } - assert(Adapter.ZeResult != + assert(Adapter->ZeResult != std::nullopt); // verify that level-zero is initialized PlatformVec platforms; // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. - if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { + if (*Adapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { result = std::move(platforms); return; } - if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) { + if (*Adapter->ZeResult != ZE_RESULT_SUCCESS) { urPrint("zeInit: Level Zero initialization failure\n"); - result = ze2urResult(*Adapter.ZeResult); + result = ze2urResult(*Adapter->ZeResult); return; } @@ -97,8 +98,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { }; } -ur_adapter_handle_t_ Adapter{}; - ur_result_t adapterStateTeardown() { bool LeakFound = false; @@ -203,11 +202,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( ///< adapters available. ) { if (NumEntries > 0 && Adapters) { - std::lock_guard Lock{Adapter.Mutex}; - if (Adapter.RefCount++ == 0) { - adapterStateInit(); + if (Adapter) { + std::lock_guard Lock{Adapter->Mutex}; + if (Adapter->RefCount++ == 0) { + adapterStateInit(); + } + *Adapters = Adapter; + } else { + return UR_RESULT_ERROR_UNINITIALIZED; } - *Adapters = &Adapter; } if (NumAdapters) { @@ -218,17 +221,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - std::lock_guard Lock{Adapter.Mutex}; - if (--Adapter.RefCount == 0) { - return adapterStateTeardown(); + // Check first if the Adapter pointer is valid + if (Adapter) { + std::lock_guard Lock{Adapter->Mutex}; + if (--Adapter->RefCount == 0) { + return adapterStateTeardown(); + } } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - std::lock_guard Lock{Adapter.Mutex}; - Adapter.RefCount++; + if (Adapter) { + std::lock_guard Lock{Adapter->Mutex}; + Adapter->RefCount++; + } else { + return UR_RESULT_ERROR_UNINITIALIZED; + } return UR_RESULT_SUCCESS; } @@ -257,7 +267,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_LEVEL_ZERO); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(Adapter.RefCount.load()); + return ReturnValue(Adapter->RefCount.load()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index 0942db852a..e3e05b118b 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -25,4 +25,4 @@ struct ur_adapter_handle_t_ { ZeCache> PlatformCache; }; -extern ur_adapter_handle_t_ Adapter; +extern ur_adapter_handle_t_ *Adapter; diff --git a/source/adapters/level_zero/adapter_lib_init_linux.cpp b/source/adapters/level_zero/adapter_lib_init_linux.cpp new file mode 100644 index 0000000000..d6ee1bcd1c --- /dev/null +++ b/source/adapters/level_zero/adapter_lib_init_linux.cpp @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) 2024 Intel Corporation +* +* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.TXT +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +* @file adapter_lib_init_linux.cpp +* +*/ + +#include "adapter.hpp" +#include "ur_level_zero.hpp" + +void __attribute__((constructor)) createAdapterHandle() { + if (!Adapter) { + Adapter = new ur_adapter_handle_t_(); + } +} + +void __attribute__((destructor)) deleteAdapterHandle() { + if (Adapter) { + delete Adapter; + } +} diff --git a/source/adapters/level_zero/adapter_lib_init_windows.cpp b/source/adapters/level_zero/adapter_lib_init_windows.cpp new file mode 100644 index 0000000000..f8384f890c --- /dev/null +++ b/source/adapters/level_zero/adapter_lib_init_windows.cpp @@ -0,0 +1,29 @@ +/* +* +* Copyright (C) 2024 Intel Corporation +* +* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.TXT +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +* +* @file adapter_lib_init_windows.cpp +* +*/ + +#include "adapter.hpp" +#include "ur_level_zero.hpp" +#include + +extern "C" BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, + LPVOID lpvReserved) { + if (fdwReason == DLL_PROCESS_DETACH) { + if (Adapter) { + delete Adapter; + } + } else if (fdwReason == DLL_PROCESS_ATTACH) { + if (!Adapter) { + Adapter = new ur_adapter_handle_t_(); + } + } + return TRUE; +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 2f6b3a91ff..624c5a7e7f 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1442,7 +1442,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // a valid Level Zero device. ur_device_handle_t Dev = nullptr; - if (const auto *platforms = Adapter.PlatformCache->get_value()) { + if (const auto *platforms = Adapter->PlatformCache->get_value()) { for (const auto &p : *platforms) { Dev = p->getDeviceFromNativeHandle(ZeDevice); if (Dev) { @@ -1453,7 +1453,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( } } } else { - return Adapter.PlatformCache->get_error(); + return Adapter->PlatformCache->get_error(); } if (Dev == nullptr) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index cca3b2173d..d8e0583e73 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -29,7 +29,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( ) { // Platform handles are cached for reuse. This is to ensure consistent // handle pointers across invocations and to improve retrieval performance. - if (const auto *cached_platforms = Adapter.PlatformCache->get_value(); + if (const auto *cached_platforms = Adapter->PlatformCache->get_value(); cached_platforms) { uint32_t nplatforms = (uint32_t)cached_platforms->size(); if (NumPlatforms) { @@ -41,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( } } } else { - return Adapter.PlatformCache->get_error(); + return Adapter->PlatformCache->get_error(); } return UR_RESULT_SUCCESS; @@ -133,7 +133,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( auto ZeDriver = ur_cast(NativePlatform); uint32_t NumPlatforms = 0; - ur_adapter_handle_t AdapterHandle = &Adapter; + ur_adapter_handle_t AdapterHandle = Adapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); if (NumPlatforms) { diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 2009c3c6f5..a50921f207 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -569,7 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( // Maybe this is not completely correct. uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; - ur_adapter_handle_t AdapterHandle = &Adapter; + ur_adapter_handle_t AdapterHandle = Adapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); ur_device_handle_t UrDevice = Device; From aa690790dbdbef57b545f6dd68cd5c46d37107ed Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 7 Mar 2024 11:23:50 -0800 Subject: [PATCH 13/23] Fix license format Signed-off-by: Neil R. Spruit --- .../level_zero/adapter_lib_init_linux.cpp | 20 +++++++++---------- .../level_zero/adapter_lib_init_windows.cpp | 20 +++++++++---------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/source/adapters/level_zero/adapter_lib_init_linux.cpp b/source/adapters/level_zero/adapter_lib_init_linux.cpp index d6ee1bcd1c..13044d251f 100644 --- a/source/adapters/level_zero/adapter_lib_init_linux.cpp +++ b/source/adapters/level_zero/adapter_lib_init_linux.cpp @@ -1,14 +1,12 @@ -/* -* -* Copyright (C) 2024 Intel Corporation -* -* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -* See LICENSE.TXT -* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -* -* @file adapter_lib_init_linux.cpp -* -*/ +//===--------- adapter_lib_init_linux.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include "adapter.hpp" #include "ur_level_zero.hpp" diff --git a/source/adapters/level_zero/adapter_lib_init_windows.cpp b/source/adapters/level_zero/adapter_lib_init_windows.cpp index f8384f890c..7f5146461f 100644 --- a/source/adapters/level_zero/adapter_lib_init_windows.cpp +++ b/source/adapters/level_zero/adapter_lib_init_windows.cpp @@ -1,14 +1,12 @@ -/* -* -* Copyright (C) 2024 Intel Corporation -* -* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -* See LICENSE.TXT -* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -* -* @file adapter_lib_init_windows.cpp -* -*/ +//===--------- adapter_lib_init_windows.cpp - Level Zero Adapter ----------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include "adapter.hpp" #include "ur_level_zero.hpp" From 91a37cfb9b45edc8b6cb13f0c12c5c93b0a1bf6a Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 7 Mar 2024 12:49:56 -0800 Subject: [PATCH 14/23] Delete Adapter in atexit after refcnt is 0 due to multi DLLMain - In Windows, SYCL and UMF both define DLLMain such that a DLLMain for only the adapter's is not possible. To fix this, the L0 adapter inits the global adapter at variable init and registers an atexit teardown after refcnt == 0. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/CMakeLists.txt | 9 +-- source/adapters/level_zero/adapter.cpp | 56 +++++++++++++------ source/adapters/level_zero/adapter.hpp | 2 +- .../level_zero/adapter_lib_init_linux.cpp | 9 +-- .../level_zero/adapter_lib_init_windows.cpp | 27 --------- source/adapters/level_zero/device.cpp | 4 +- source/adapters/level_zero/platform.cpp | 6 +- source/adapters/level_zero/queue.cpp | 2 +- 8 files changed, 52 insertions(+), 63 deletions(-) delete mode 100644 source/adapters/level_zero/adapter_lib_init_windows.cpp diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 4cebeefc4f..d26d0aeb26 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -122,12 +122,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) -if(WIN32) - target_sources(ur_adapter_level_zero - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_windows.cpp - ) -else() +if(NOT WIN32) target_sources(ur_adapter_level_zero PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 504df5b20d..9d5b1038a2 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -11,7 +11,13 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" -ur_adapter_handle_t_ *Adapter; +// Due to multiple DLLMain definitions with SYCL, Global Adapter is init at +// variable creation. +#if defined(_WIN32) +ur_adapter_handle_t_ *GlobalAdapter = new ur_adapter_handle_t_(); +#else +ur_adapter_handle_t_ *GlobalAdapter; +#endif ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { uint32_t ZeDriverCount = 0; @@ -53,7 +59,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { } // initialize level zero only once. - if (Adapter->ZeResult == std::nullopt) { + if (GlobalAdapter->ZeResult == std::nullopt) { // Setting these environment variables before running zeInit will enable // the validation layer in the Level Zero loader. if (UrL0Debug & UR_L0_DEBUG_VALIDATION) { @@ -72,20 +78,21 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { // We must only initialize the driver once, even if urPlatformGet() is // called multiple times. Declaring the return value as "static" ensures // it's only called once. - Adapter->ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); + GlobalAdapter->ZeResult = + ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); } - assert(Adapter->ZeResult != + assert(GlobalAdapter->ZeResult != std::nullopt); // verify that level-zero is initialized PlatformVec platforms; // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. - if (*Adapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { + if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { result = std::move(platforms); return; } - if (*Adapter->ZeResult != ZE_RESULT_SUCCESS) { + if (*GlobalAdapter->ZeResult != ZE_RESULT_SUCCESS) { urPrint("zeInit: Level Zero initialization failure\n"); - result = ze2urResult(*Adapter->ZeResult); + result = ze2urResult(*GlobalAdapter->ZeResult); return; } @@ -98,6 +105,14 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { }; } +#if defined(_WIN32) +void globalAdapterWindowsCleanup() { + if (GlobalAdapter) { + delete GlobalAdapter; + } +} +#endif + ur_result_t adapterStateTeardown() { bool LeakFound = false; @@ -183,6 +198,11 @@ ur_result_t adapterStateTeardown() { } if (LeakFound) return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + // Due to multiple DLLMain definitions with SYCL, register to cleanup the + // Global Adapter after refcnt is 0 +#if defined(_WIN32) + std::atexit(globalAdapterWindowsCleanup); +#endif return UR_RESULT_SUCCESS; } @@ -202,12 +222,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( ///< adapters available. ) { if (NumEntries > 0 && Adapters) { - if (Adapter) { - std::lock_guard Lock{Adapter->Mutex}; - if (Adapter->RefCount++ == 0) { + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (GlobalAdapter->RefCount++ == 0) { adapterStateInit(); } - *Adapters = Adapter; + *Adapters = GlobalAdapter; } else { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -222,9 +242,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { // Check first if the Adapter pointer is valid - if (Adapter) { - std::lock_guard Lock{Adapter->Mutex}; - if (--Adapter->RefCount == 0) { + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (--GlobalAdapter->RefCount == 0) { return adapterStateTeardown(); } } @@ -233,9 +253,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - if (Adapter) { - std::lock_guard Lock{Adapter->Mutex}; - Adapter->RefCount++; + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + GlobalAdapter->RefCount++; } else { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -267,7 +287,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_LEVEL_ZERO); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(Adapter->RefCount.load()); + return ReturnValue(GlobalAdapter->RefCount.load()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index e3e05b118b..1fdf3a9294 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -25,4 +25,4 @@ struct ur_adapter_handle_t_ { ZeCache> PlatformCache; }; -extern ur_adapter_handle_t_ *Adapter; +extern ur_adapter_handle_t_ *GlobalAdapter; diff --git a/source/adapters/level_zero/adapter_lib_init_linux.cpp b/source/adapters/level_zero/adapter_lib_init_linux.cpp index 13044d251f..bb6f1d4e6d 100644 --- a/source/adapters/level_zero/adapter_lib_init_linux.cpp +++ b/source/adapters/level_zero/adapter_lib_init_linux.cpp @@ -12,13 +12,14 @@ #include "ur_level_zero.hpp" void __attribute__((constructor)) createAdapterHandle() { - if (!Adapter) { - Adapter = new ur_adapter_handle_t_(); + if (!GlobalAdapter) { + GlobalAdapter = new ur_adapter_handle_t_(); } } void __attribute__((destructor)) deleteAdapterHandle() { - if (Adapter) { - delete Adapter; + if (GlobalAdapter) { + delete GlobalAdapter; + GlobalAdapter = nullptr; } } diff --git a/source/adapters/level_zero/adapter_lib_init_windows.cpp b/source/adapters/level_zero/adapter_lib_init_windows.cpp deleted file mode 100644 index 7f5146461f..0000000000 --- a/source/adapters/level_zero/adapter_lib_init_windows.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===--------- adapter_lib_init_windows.cpp - Level Zero Adapter ----------===// -// -// Copyright (C) 2023 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "adapter.hpp" -#include "ur_level_zero.hpp" -#include - -extern "C" BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, - LPVOID lpvReserved) { - if (fdwReason == DLL_PROCESS_DETACH) { - if (Adapter) { - delete Adapter; - } - } else if (fdwReason == DLL_PROCESS_ATTACH) { - if (!Adapter) { - Adapter = new ur_adapter_handle_t_(); - } - } - return TRUE; -} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 624c5a7e7f..437b4d6603 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1442,7 +1442,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // a valid Level Zero device. ur_device_handle_t Dev = nullptr; - if (const auto *platforms = Adapter->PlatformCache->get_value()) { + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { for (const auto &p : *platforms) { Dev = p->getDeviceFromNativeHandle(ZeDevice); if (Dev) { @@ -1453,7 +1453,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( } } } else { - return Adapter->PlatformCache->get_error(); + return GlobalAdapter->PlatformCache->get_error(); } if (Dev == nullptr) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index d8e0583e73..ab577247bd 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -29,7 +29,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( ) { // Platform handles are cached for reuse. This is to ensure consistent // handle pointers across invocations and to improve retrieval performance. - if (const auto *cached_platforms = Adapter->PlatformCache->get_value(); + if (const auto *cached_platforms = GlobalAdapter->PlatformCache->get_value(); cached_platforms) { uint32_t nplatforms = (uint32_t)cached_platforms->size(); if (NumPlatforms) { @@ -41,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( } } } else { - return Adapter->PlatformCache->get_error(); + return GlobalAdapter->PlatformCache->get_error(); } return UR_RESULT_SUCCESS; @@ -133,7 +133,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( auto ZeDriver = ur_cast(NativePlatform); uint32_t NumPlatforms = 0; - ur_adapter_handle_t AdapterHandle = Adapter; + ur_adapter_handle_t AdapterHandle = GlobalAdapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); if (NumPlatforms) { diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index a50921f207..17ead460d0 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -569,7 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( // Maybe this is not completely correct. uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; - ur_adapter_handle_t AdapterHandle = Adapter; + ur_adapter_handle_t AdapterHandle = GlobalAdapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); ur_device_handle_t UrDevice = Device; From 5518b489d823bac40cacc7f28aeb70d2c3113eb7 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 7 Mar 2024 16:05:36 -0800 Subject: [PATCH 15/23] Remove error return for adapter and change get to create temp adapter - To handle the customer usecases, don't return uinit when the adapter is used after the adapter library has already closed. - Given the global adapter is gone, but get adapter is called, then a temp adapter struct is allocated and an ondemand atexit cleanup of this memory is registered. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/adapter.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 9d5b1038a2..39a0934b4f 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -105,13 +105,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { }; } -#if defined(_WIN32) -void globalAdapterWindowsCleanup() { +void globalAdapterOnDemandCleanup() { if (GlobalAdapter) { delete GlobalAdapter; } } -#endif ur_result_t adapterStateTeardown() { bool LeakFound = false; @@ -201,7 +199,7 @@ ur_result_t adapterStateTeardown() { // Due to multiple DLLMain definitions with SYCL, register to cleanup the // Global Adapter after refcnt is 0 #if defined(_WIN32) - std::atexit(globalAdapterWindowsCleanup); + std::atexit(globalAdapterOnDemandCleanup); #endif return UR_RESULT_SUCCESS; @@ -227,10 +225,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( if (GlobalAdapter->RefCount++ == 0) { adapterStateInit(); } - *Adapters = GlobalAdapter; } else { - return UR_RESULT_ERROR_UNINITIALIZED; + // If the GetAdapter is called after the Library began or was torndown, + // then temporarily create a new Adapter handle and register a new + // cleanup. + GlobalAdapter = new ur_adapter_handle_t_(); + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (GlobalAdapter->RefCount++ == 0) { + adapterStateInit(); + } + std::atexit(globalAdapterOnDemandCleanup); } + *Adapters = GlobalAdapter; } if (NumAdapters) { @@ -256,8 +262,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; GlobalAdapter->RefCount++; - } else { - return UR_RESULT_ERROR_UNINITIALIZED; } return UR_RESULT_SUCCESS; From 2b3d016a8a9779225725d3010452f79211c10132 Mon Sep 17 00:00:00 2001 From: "He, Wenju" Date: Fri, 1 Mar 2024 17:01:57 -0800 Subject: [PATCH 16/23] [Coverity][L0] Remove overlapping mem copy in urBindlessImagesSampledImageCreateExp --- source/adapters/level_zero/image.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 0d986de0ad..582121d91a 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -738,13 +738,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( hContext, hDevice, hImageMem, pImageFormat, pImageDesc, phMem, phImage)); struct combined_sampled_image_handle { - uint64_t raw_image_handle; - uint64_t raw_sampler_handle; + uint64_t RawImageHandle; + uint64_t RawSamplerHandle; }; - combined_sampled_image_handle *sampledImageHandle = + auto *SampledImageHandle = reinterpret_cast(phImage); - sampledImageHandle->raw_image_handle = reinterpret_cast(*phImage); - sampledImageHandle->raw_sampler_handle = + SampledImageHandle->RawSamplerHandle = reinterpret_cast(hSampler->ZeSampler); return UR_RESULT_SUCCESS; From b1c9d0b3337b5863c57805c572fb08447acae3f2 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Mon, 11 Mar 2024 13:40:04 +0000 Subject: [PATCH 17/23] [CUDA] Fix build issue with version < 12.0 - Default initialize CUDA_KERNEL_NODE_PARAMS --- source/adapters/cuda/command_buffer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 836854c627..d3f270c701 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -368,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( try { // Set node param structure with the kernel related data auto &ArgIndices = hKernel->getArgIndices(); - CUDA_KERNEL_NODE_PARAMS NodeParams; + CUDA_KERNEL_NODE_PARAMS NodeParams = {}; NodeParams.func = CuFunc; NodeParams.gridDimX = BlocksPerGrid[0]; NodeParams.gridDimY = BlocksPerGrid[1]; @@ -378,8 +378,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); - NodeParams.kern = nullptr; - NodeParams.extra = nullptr; // Create and add an new kernel node to the Cuda graph UR_CHECK_ERROR(cuGraphAddKernelNode(&GraphNode, hCommandBuffer->CudaGraph, From 0446c65e869d5098fa006d0035a5b00240400bee Mon Sep 17 00:00:00 2001 From: Konrad Kusiak Date: Wed, 21 Feb 2024 14:58:54 +0000 Subject: [PATCH 18/23] Fixed issue with function pointer typedefs for windows build --- source/adapters/cuda/tracing.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/source/adapters/cuda/tracing.cpp b/source/adapters/cuda/tracing.cpp index e3acf03165..1552e5e236 100644 --- a/source/adapters/cuda/tracing.cpp +++ b/source/adapters/cuda/tracing.cpp @@ -27,20 +27,20 @@ using tracing_event_t = xpti_td *; using subscriber_handle_t = CUpti_SubscriberHandle; -using cuptiSubscribe_fn = CUPTIAPI -CUptiResult (*)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, - void *userdata); +using cuptiSubscribe_fn = + CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *subscriber, + CUpti_CallbackFunc callback, void *userdata); -using cuptiUnsubscribe_fn = CUPTIAPI -CUptiResult (*)(CUpti_SubscriberHandle subscriber); +using cuptiUnsubscribe_fn = + CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle subscriber); -using cuptiEnableDomain_fn = CUPTIAPI -CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain); +using cuptiEnableDomain_fn = + CUptiResult(CUPTIAPI *)(uint32_t enable, CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain); -using cuptiEnableCallback_fn = CUPTIAPI -CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, CUpti_CallbackId cbid); +using cuptiEnableCallback_fn = + CUptiResult(CUPTIAPI *)(uint32_t enable, CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain, CUpti_CallbackId cbid); #define LOAD_CUPTI_SYM(p, lib, x) \ p.x = (cupti##x##_fn)ur_loader::LibLoader::getFunctionPtr(lib.get(), \ From cb7b1262f3c0df42d9b16e67dc27b4fb75a301b9 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Tue, 12 Mar 2024 15:48:36 +0800 Subject: [PATCH 19/23] [OCL] Gracefully tear down adapter in case that some globals have been released Sometimes the adapter may exit before sycl lib, if sycl lib call urAdapterRelease later it may cause segment fault since some globals already have been released. --- source/adapters/opencl/adapter.cpp | 43 +++++++++++++++++++++++------- source/adapters/opencl/common.hpp | 2 +- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..c9e0e8af20 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -10,23 +10,40 @@ #include "common.hpp" +#ifdef _WIN32 +#include +#endif + struct ur_adapter_handle_t_ { std::atomic RefCount = 0; std::mutex Mutex; }; -ur_adapter_handle_t_ adapter{}; +static ur_adapter_handle_t_ *adapter = new ur_adapter_handle_t_(); + +static void globalAdapterShutdown() { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + if (adapter) { + delete adapter; + adapter = nullptr; + } +} UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { - std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = std::make_unique(); + std::lock_guard Lock{adapter->Mutex}; + if (adapter->RefCount++ == 0) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); } - *phAdapters = &adapter; + *phAdapters = adapter; + + atexit(globalAdapterShutdown); } if (pNumAdapters) { @@ -37,14 +54,20 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - ++adapter.RefCount; + ++adapter->RefCount; return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - cl_ext::ExtFuncPtrCache.reset(); + // Check first if the adapter is valid pointer + if (adapter) { + std::lock_guard Lock{adapter->Mutex}; + if (--adapter->RefCount == 0) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + } } return UR_RESULT_SUCCESS; } @@ -68,7 +91,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(adapter.RefCount.load()); + return ReturnValue(adapter->RefCount.load()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..43d1c12b1e 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -349,7 +349,7 @@ struct ExtFuncPtrCacheT { // piTeardown to avoid issues with static destruction order (a user application // might have static objects that indirectly access this cache in their // destructor). -inline std::unique_ptr ExtFuncPtrCache; +inline ExtFuncPtrCacheT *ExtFuncPtrCache; // USM helper function to get an extension function pointer template From aaa0458b782ea6183b21ead1ff14a947afe40a30 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Tue, 12 Mar 2024 20:02:49 +0800 Subject: [PATCH 20/23] Remove unused header file --- source/adapters/opencl/adapter.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index c9e0e8af20..c25319ce67 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -10,10 +10,6 @@ #include "common.hpp" -#ifdef _WIN32 -#include -#endif - struct ur_adapter_handle_t_ { std::atomic RefCount = 0; std::mutex Mutex; From 11ecfd3ef8afa483c4859b626751a18f9cd679e6 Mon Sep 17 00:00:00 2001 From: Raiyan Latif Date: Thu, 29 Feb 2024 05:32:45 -0800 Subject: [PATCH 21/23] [L0] Add support for in-order lists using L0 driver Signed-off-by: Raiyan Latif --- source/adapters/level_zero/device.cpp | 13 ++++++ source/adapters/level_zero/device.hpp | 3 ++ source/adapters/level_zero/event.cpp | 57 +++++++++++++++++++++------ source/adapters/level_zero/kernel.cpp | 4 +- source/adapters/level_zero/queue.cpp | 10 ++++- 5 files changed, 71 insertions(+), 16 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 437b4d6603..9fae1ed4af 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1054,6 +1054,19 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() { return EnableRelaxedAllocationLimits; } +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + static const bool UseDriverInOrderLists = [] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + if (!UrRet) + return false; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 94480336c5..a57a97d38d 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object { // Read env settings to select immediate commandlist mode. ImmCmdlistMode useImmediateCommandLists(); + // Whether Adapter uses driver's implementation of in-order lists or not + bool useDriverInOrderLists(); + // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 57b839a714..c9d1c7d6b4 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -43,6 +43,20 @@ static const bool UseMultipleCmdlistBarriers = [] { return std::atoi(UseMultipleCmdlistBarriersFlag) > 0; }(); +bool WaitListEmptyOrAllEventsFromSameQueue( + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList) { + if (!NumEventsInWaitList) + return true; + + for (uint32_t i = 0; i < NumEventsInWaitList; ++i) { + if (Queue != EventWaitList[i]->UrQueue) + return false; + } + + return true; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -206,21 +220,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( bool IsInternal = OutEvent == nullptr; ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; - auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList, - EventWaitList]() { - if (!NumEventsInWaitList) - return true; - - for (uint32_t I = 0; I < NumEventsInWaitList; ++I) - if (Queue != EventWaitList[I]->UrQueue) - return false; - - return true; - }; - // For in-order queue and wait-list which is empty or has events from // the same queue just use the last command event as the barrier event. - if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() && + if (Queue->isInOrderQueue() && + WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, + EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { UR_CALL(urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; @@ -1189,6 +1193,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded) IncludeLastCommandEvent = false; + // If we are using L0 native implementation for handling in-order queues, + // then we don't need to add the last enqueued event into the waitlist, as + // the native driver implementation will already ensure in-order semantics. + // The only exception is when a different immediate command was last used on + // the same UR Queue. + if (CurQueue->Device->useDriverInOrderLists() && CurQueue->isInOrderQueue() && + CurQueue->UsingImmCmdLists) { + auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine); + uint32_t QueueGroupOrdinal, QueueIndex; + auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex, + /*QueryOnly */ true); + auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex]; + IncludeLastCommandEvent &= + CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() && + NextImmCmdList != CurQueue->LastUsedCommandList; + } + try { uint32_t TmpListLength = 0; @@ -1205,6 +1226,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( this->UrEventList = new ur_event_handle_t[EventListLength]; } + // For in-order queue and wait-list which is empty or has events only from + // the same queue then we don't need to wait on any other additional events + if (CurQueue->Device->useDriverInOrderLists() && + CurQueue->isInOrderQueue() && + WaitListEmptyOrAllEventsFromSameQueue(CurQueue, EventListLength, + EventList)) { + this->Length = TmpListLength; + return UR_RESULT_SUCCESS; + } + if (EventListLength > 0) { for (uint32_t I = 0; I < EventListLength; I++) { { diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 0e5ce3215a..c40e4ef0e3 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // the code can do a urKernelRelease on this kernel. (*Event)->CommandData = (void *)Kernel; - // Increment the reference count of the Kernel and indicate that the Kernel is - // in use. Once the event has been signalled, the code in + // Increment the reference count of the Kernel and indicate that the Kernel + // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. UR_CALL(urKernelRetain(Kernel)); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 17ead460d0..187f4f75f9 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1870,6 +1870,10 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + if (Device->useDriverInOrderLists() && isInOrderQueue()) { + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER; + } + ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc, &ZeCommandList)); @@ -1985,7 +1989,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { // Evaluate performance of explicit usage for "0" index. if (QueueIndex != 0) { - ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; + ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; + } + + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } // Check if context's command list cache has an immediate command list with From aa98f7acd8b8eb99c4b2927f10a6e29df9f6bec4 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Wed, 13 Mar 2024 13:32:32 +0800 Subject: [PATCH 22/23] Fix pre-ci failures --- source/adapters/opencl/adapter.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index c25319ce67..4c1bb6bca1 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -15,7 +15,7 @@ struct ur_adapter_handle_t_ { std::mutex Mutex; }; -static ur_adapter_handle_t_ *adapter = new ur_adapter_handle_t_(); +static ur_adapter_handle_t_ *adapter = nullptr; static void globalAdapterShutdown() { if (cl_ext::ExtFuncPtrCache) { @@ -32,14 +32,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { + // Sometimes urAdaterGet may be called after the library already been torn + // down, we also need to create a temporary handle for it. + if (!adapter) { + adapter = new ur_adapter_handle_t_(); + atexit(globalAdapterShutdown); + } + std::lock_guard Lock{adapter->Mutex}; if (adapter->RefCount++ == 0) { cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); } *phAdapters = adapter; - - atexit(globalAdapterShutdown); } if (pNumAdapters) { From 3ca422a21e9f230fac614b53d237877a1c845498 Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Fri, 19 Jan 2024 16:30:26 +0000 Subject: [PATCH 23/23] [Bindless][CUDA] Mipmap interop Extends the CUDA adapter to allow for mipmap interop with bindless images --- source/adapters/cuda/image.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index dc08af248a..8d2610626e 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -1006,17 +1006,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( ArrayDesc.Format = format; CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc = {}; - mipmapDesc.numLevels = 1; + mipmapDesc.numLevels = pImageDesc->numMipLevel; mipmapDesc.arrayDesc = ArrayDesc; + // External memory is mapped to a CUmipmappedArray + // If desired, a CUarray is retrieved from the mipmaps 0th level CUmipmappedArray memMipMap; UR_CHECK_ERROR(cuExternalMemoryGetMappedMipmappedArray( &memMipMap, (CUexternalMemory)hInteropMem, &mipmapDesc)); - CUarray memArray; - UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0)); + if (pImageDesc->numMipLevel > 1) { + *phImageMem = (ur_exp_image_mem_handle_t)memMipMap; + } else { + CUarray memArray; + UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0)); - *phImageMem = (ur_exp_image_mem_handle_t)memArray; + *phImageMem = (ur_exp_image_mem_handle_t)memArray; + } } catch (ur_result_t Err) { return Err;