diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 1850083caa..d43ae07cdb 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -156,17 +156,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - [[maybe_unused]] ur_adapter_handle_t - AdapterHandle, ///< [in] handle of the platform instance + ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. - [[maybe_unused]] int32_t - *Error ///< [out] pointer to an integer where the adapter specific - ///< error code will be stored. + int32_t *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { - AdapterHandle = &Adapter; *Message = ErrorMessage; - Error = &ErrorAdapterNativeCode; + *Error = ErrorAdapterNativeCode; return ErrorMessageCode; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index acc7c755f4..05b66e12f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -12,6 +12,7 @@ #include "ur_level_zero.hpp" #include #include +#include UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance @@ -353,8 +354,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. - if (pSize && !Device->isSubDevice()) { - *pSize = 0; + if (Device->SubDeviceCreationProperty == std::nullopt || + !Device->isSubDevice()) { + if (pSize) + *pSize = 0; return UR_RESULT_SUCCESS; } @@ -365,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(cslice); } - return ReturnValue(Device->SubDeviceCreationProperty); + return ReturnValue(*Device->SubDeviceCreationProperty); } // Everything under here is not supported yet case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: @@ -1218,16 +1221,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); for (uint32_t I = 0; I < NumDevices; I++) { - Device->SubDevices[I]->SubDeviceCreationProperty = - Properties->pProperties[0]; - if (Properties->pProperties[0].type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { // In case the value is NEXT_PARTITIONABLE, we need to change it to the // chosen domain. This will always be NUMA since that's the only domain // supported by level zero. - Device->SubDevices[I]->SubDeviceCreationProperty.value.affinity_domain = - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; OutDevices[I] = Device->SubDevices[I]; // reusing the same pi_device needs to increment the reference count diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 5f34efab44..3b91b70058 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ struct ur_device_handle_t_ : _ur_object { // If this device is a subdevice, this variable contains the properties that // were used during its creation. - ur_device_partition_property_t SubDeviceCreationProperty; + std::optional SubDeviceCreationProperty; // PI platform to which this device belongs. // This field is only set at _ur_device_handle_t creation time, and cannot diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index 64f6e4f939..4ef21ce18b 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -16,13 +16,15 @@ struct ur_kernel_handle_t_ : _ur_object { ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_program_handle_t Program) - : Program{Program}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{nullptr}, Program{Program}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_context_handle_t Context) - : Context{Context}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{Context}, Program{nullptr}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index aefa661dac..fa3ef18e47 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -2078,9 +2078,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, auto &Allocation = Allocations[Device]; // Sub-buffers don't maintain own allocations but rely on parent buffer. - if (isSubBuffer()) { - UR_CALL(SubBuffer.Parent->getZeHandle(ZeHandle, AccessMode, Device)); - ZeHandle += SubBuffer.Origin; + if (SubBuffer) { + UR_CALL(SubBuffer->Parent->getZeHandle(ZeHandle, AccessMode, Device)); + ZeHandle += SubBuffer->Origin; // Still store the allocation info in the PI sub-buffer for // getZeHandlePtr to work. At least zeKernelSetArgumentValue needs to // be given a pointer to the allocation handle rather than its value. @@ -2312,7 +2312,7 @@ ur_result_t _ur_buffer::free() { // Buffer constructor _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, char *HostPtr, bool ImportedHostPtr = false) - : ur_mem_handle_t_(Context), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context), Size(Size) { // We treat integrated devices (physical memory shared with the CPU) // differently from discrete devices (those with distinct memories). @@ -2347,7 +2347,7 @@ _ur_buffer::_ur_buffer(ur_context_handle_t Context, ur_device_handle_t Device, _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, ur_device_handle_t Device, char *ZeMemHandle, bool OwnZeMemHandle) - : ur_mem_handle_t_(Context, Device), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context, Device), Size(Size) { // Device == nullptr means host allocation Allocations[Device].ZeHandle = ZeMemHandle; diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 54f9a84e6b..8efd5b136e 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,8 @@ struct ur_mem_handle_t_ : _ur_object { virtual ~ur_mem_handle_t_() = default; protected: - ur_mem_handle_t_(ur_context_handle_t Context) : UrContext{Context} {} + ur_mem_handle_t_(ur_context_handle_t Context) + : UrContext{Context}, UrDevice{nullptr} {} ur_mem_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) : UrContext{Context}, UrDevice(Device) {} @@ -101,7 +103,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { // Sub-buffer constructor _ur_buffer(_ur_buffer *Parent, size_t Origin, size_t Size) : ur_mem_handle_t_(Parent->UrContext), - Size(Size), SubBuffer{Parent, Origin} {} + Size(Size), SubBuffer{{Parent, Origin}} {} // Interop-buffer constructor _ur_buffer(ur_context_handle_t Context, size_t Size, @@ -121,8 +123,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { ur_device_handle_t Device = nullptr) override; bool isImage() const override { return false; } - - bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } + bool isSubBuffer() const { return SubBuffer != std::nullopt; } // Frees all allocations made for the buffer. ur_result_t free(); @@ -174,10 +175,11 @@ struct _ur_buffer final : ur_mem_handle_t_ { size_t Size; size_t getAlignment() const; - struct { + struct SubBuffer_t { _ur_buffer *Parent; - size_t Origin; // only valid if Parent != nullptr - } SubBuffer; + size_t Origin; + }; + std::optional SubBuffer; }; struct _ur_image final : ur_mem_handle_t_ { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index f7b9576189..86aa4ec745 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -10,11 +10,13 @@ #pragma once #include "common.hpp" +#include "ze_api.h" struct ur_device_handle_t_; struct ur_platform_handle_t_ : public _ur_platform { - ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver} {} + ur_platform_handle_t_(ze_driver_handle_t Driver) + : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp old mode 100755 new mode 100644 index 994f595a5d..f07e0df675 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -219,7 +219,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( if (ImmCmdList == Queue->CommandListMap.end()) continue; - auto EventList = ImmCmdList->second.EventList; + const auto &EventList = ImmCmdList->second.EventList; for (auto It = EventList.crbegin(); It != EventList.crend(); It++) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, ((*It)->ZeEvent)); @@ -391,11 +391,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // At this point only the thread creating the queue will have associated // command-lists. Other threads have not accessed the queue yet. So we can // only warmup the initial thread's command-lists. - auto QueueGroup = Q->ComputeQueueGroupsByTID.get(); + const auto &QueueGroup = Q->ComputeQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(false, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); if (Q->useCopyEngine()) { - auto QueueGroup = Q->CopyQueueGroupsByTID.get(); + const auto &QueueGroup = Q->CopyQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(true, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 306cec5416..88281925ce 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -424,7 +424,8 @@ struct ur_queue_handle_t_ : _ur_object { // checked. Otherwise, the OpenCommandList containing compute commands is // checked. bool hasOpenCommandList(bool IsCopy) const { - auto CommandBatch = (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; + const auto &CommandBatch = + (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; return CommandBatch.OpenCommandList != CommandListMap.end(); }