From 5eddd177e5559b63d9c03a684e974444abdac381 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 9 Oct 2023 09:15:31 -0700 Subject: [PATCH 001/138] [UR][CUDA][L0][HIP] Add virtual memory adapter implementations This commit adds the adapter implementations of the virtual memory extension functionality to be used in https://github.com/intel/llvm/pull/8954. Signed-off-by: Larsen, Steffen --- source/adapters/cuda/CMakeLists.txt | 3 + source/adapters/cuda/device.cpp | 2 + source/adapters/cuda/physical_mem.cpp | 58 ++++++++ source/adapters/cuda/physical_mem.hpp | 68 +++++++++ source/adapters/cuda/virtual_mem.cpp | 138 +++++++++++++++++++ source/adapters/hip/CMakeLists.txt | 3 + source/adapters/hip/device.cpp | 2 + source/adapters/hip/physical_mem.cpp | 36 +++++ source/adapters/hip/physical_mem.hpp | 30 ++++ source/adapters/hip/virtual_mem.cpp | 69 ++++++++++ source/adapters/level_zero/CMakeLists.txt | 3 + source/adapters/level_zero/common.cpp | 3 + source/adapters/level_zero/device.cpp | 3 + source/adapters/level_zero/physical_mem.cpp | 54 ++++++++ source/adapters/level_zero/physical_mem.hpp | 24 ++++ source/adapters/level_zero/ur_level_zero.hpp | 1 + source/adapters/level_zero/virtual_mem.cpp | 120 ++++++++++++++++ 17 files changed, 617 insertions(+) create mode 100644 source/adapters/cuda/physical_mem.cpp create mode 100644 source/adapters/cuda/physical_mem.hpp create mode 100644 source/adapters/cuda/virtual_mem.cpp create mode 100644 source/adapters/hip/physical_mem.cpp create mode 100644 source/adapters/hip/physical_mem.hpp create mode 100644 source/adapters/hip/virtual_mem.cpp create mode 100644 source/adapters/level_zero/physical_mem.cpp create mode 100644 source/adapters/level_zero/physical_mem.hpp create mode 100644 source/adapters/level_zero/virtual_mem.cpp diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index 0b44ae0777..c0d4399608 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -29,6 +29,8 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp @@ -40,6 +42,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 0c00210eb2..dc870821f2 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1019,6 +1019,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // TODO: Investigate if this information is available on CUDA. case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: return ReturnValue(false); + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: + return ReturnValue(true); case UR_DEVICE_INFO_ESIMD_SUPPORT: return ReturnValue(false); case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: diff --git a/source/adapters/cuda/physical_mem.cpp b/source/adapters/cuda/physical_mem.cpp new file mode 100644 index 0000000000..177bc2234d --- /dev/null +++ b/source/adapters/cuda/physical_mem.cpp @@ -0,0 +1,58 @@ +//===--------- physical_mem.cpp - CUDA Adapter ----------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "physical_mem.hpp" +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" + +#include +#include + +UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem) { + CUmemAllocationProp AllocProps = {}; + AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; + UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id), + UR_RESULT_ERROR_INVALID_DEVICE); + + CUmemGenericAllocationHandle ResHandle; + UR_CHECK_ERROR(cuMemCreate(&ResHandle, size, &AllocProps, 0)); + *phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { + hPhysicalMem->incrementReferenceCount(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { + if (hPhysicalMem->decrementReferenceCount() > 0) + return UR_RESULT_SUCCESS; + + try { + std::unique_ptr PhysicalMemGuard(hPhysicalMem); + + ScopedContext Active(hPhysicalMem->getContext()); + UR_CHECK_ERROR(cuMemRelease(hPhysicalMem->get())); + return UR_RESULT_SUCCESS; + } catch (ur_result_t err) { + return err; + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } +} diff --git a/source/adapters/cuda/physical_mem.hpp b/source/adapters/cuda/physical_mem.hpp new file mode 100644 index 0000000000..2b0dc029d5 --- /dev/null +++ b/source/adapters/cuda/physical_mem.hpp @@ -0,0 +1,68 @@ +//===---------- physical_mem.hpp - CUDA Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include + +#include + +#include "adapter.hpp" +#include "device.hpp" +#include "platform.hpp" + +/// UR queue mapping on physical memory allocations used in virtual memory +/// management. +/// +struct ur_physical_mem_handle_t_ { + using native_type = CUmemGenericAllocationHandle; + + std::atomic_uint32_t RefCount; + native_type PhysicalMem; + ur_context_handle_t_ *Context; + + ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx) + : RefCount(1), PhysicalMem(PhysMem), Context(Ctx) { + urContextRetain(Context); + } + + ~ur_physical_mem_handle_t_() { urContextRelease(Context); } + + native_type get() const noexcept { return PhysicalMem; } + + ur_context_handle_t_ *getContext() const noexcept { return Context; } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; + +// Find a device ordinal of a device. +inline ur_result_t GetDeviceOrdinal(ur_device_handle_t Device, int &Ordinal) { + ur_adapter_handle_t AdapterHandle = &adapter; + // Get list of platforms + uint32_t NumPlatforms; + UR_ASSERT(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms), + UR_RESULT_ERROR_INVALID_ARGUMENT); + UR_ASSERT(NumPlatforms, UR_RESULT_ERROR_UNKNOWN); + + std::vector Platforms{NumPlatforms}; + UR_ASSERT( + urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr), + UR_RESULT_ERROR_INVALID_ARGUMENT); + + // Ordinal corresponds to the platform ID as each device has its own platform. + CUdevice NativeDevice = Device->get(); + for (Ordinal = 0; size_t(Ordinal) < Platforms.size(); ++Ordinal) + if (Platforms[Ordinal]->Devices[0]->get() == NativeDevice) + return UR_RESULT_SUCCESS; + return UR_RESULT_ERROR_INVALID_DEVICE; +} diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp new file mode 100644 index 0000000000..583594fea1 --- /dev/null +++ b/source/adapters/cuda/virtual_mem.cpp @@ -0,0 +1,138 @@ +//===--------- virtual_mem.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "physical_mem.hpp" + +#include +#include + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + ScopedContext Active(hContext); + switch (propName) { + case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: + case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { + CUmemAllocationGranularity_flags Flags = + propName == UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM + ? CU_MEM_ALLOC_GRANULARITY_MINIMUM + : CU_MEM_ALLOC_GRANULARITY_RECOMMENDED; + CUmemAllocationProp AllocProps = {}; + AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; + UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id), + UR_RESULT_ERROR_INVALID_DEVICE); + + size_t Granularity; + UR_CHECK_ERROR( + cuMemGetAllocationGranularity(&Granularity, &AllocProps, Flags)); + return ReturnValue(Granularity); + } + default: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, + size_t size, void **ppStart) { + ScopedContext Active(hContext); + return UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0, + (CUdeviceptr)pStart, 0)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( + ur_context_handle_t hContext, const void *pStart, size_t size) { + ScopedContext Active(hContext); + UR_CHECK_ERROR(cuMemAddressFree((CUdeviceptr)pStart, size)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_virtual_mem_access_flags_t flags) { + CUmemAccessDesc AccessDesc; + if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; + else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READ; + else + AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_NONE; + AccessDesc.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + // TODO: When contexts support multiple devices, we should create a descriptor + // for each. We may also introduce a variant of this function with a + // specific device. + UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id), + UR_RESULT_ERROR_INVALID_DEVICE); + + ScopedContext Active(hContext); + UR_CHECK_ERROR(cuMemSetAccess((CUdeviceptr)pStart, size, &AccessDesc, 1)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, + ur_physical_mem_handle_t hPhysicalMem, size_t offset, + ur_virtual_mem_access_flags_t flags) { + ScopedContext Active(hContext); + UR_CHECK_ERROR( + cuMemMap((CUdeviceptr)pStart, size, offset, hPhysicalMem->get(), 0)); + if (flags) + UR_ASSERT(urVirtualMemSetAccess(hContext, pStart, size, flags), + UR_RESULT_ERROR_INVALID_ARGUMENT); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( + ur_context_handle_t hContext, const void *pStart, size_t size) { + ScopedContext Active(hContext); + UR_CHECK_ERROR(cuMemUnmap((CUdeviceptr)pStart, size)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( + ur_context_handle_t hContext, const void *pStart, + [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + ScopedContext Active(hContext); + switch (propName) { + case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { + CUmemLocation MemLocation = {}; + MemLocation.type = CU_MEM_LOCATION_TYPE_DEVICE; + UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id), + UR_RESULT_ERROR_INVALID_DEVICE); + + unsigned long long CuAccessFlags; + UR_CHECK_ERROR( + cuMemGetAccess(&CuAccessFlags, &MemLocation, (CUdeviceptr)pStart)); + + ur_virtual_mem_access_flags_t UrAccessFlags = 0; + if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READWRITE) + UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + else if (CuAccessFlags == CU_MEM_ACCESS_FLAGS_PROT_READ) + UrAccessFlags = UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + return ReturnValue(UrAccessFlags); + } + default: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index b29b1becf7..b5646de713 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -63,6 +63,8 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp @@ -73,6 +75,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 7cec6def8b..d199e90757 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -809,6 +809,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: return ReturnValue(false); + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: + return ReturnValue(false); case UR_DEVICE_INFO_ESIMD_SUPPORT: return ReturnValue(false); diff --git a/source/adapters/hip/physical_mem.cpp b/source/adapters/hip/physical_mem.cpp new file mode 100644 index 0000000000..8939d89d33 --- /dev/null +++ b/source/adapters/hip/physical_mem.cpp @@ -0,0 +1,36 @@ +//===--------- physical_mem.cpp - HIP Adapter -----------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "physical_mem.hpp" +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( + ur_context_handle_t, ur_device_handle_t, size_t, + const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRetain(ur_physical_mem_handle_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRelease(ur_physical_mem_handle_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/physical_mem.hpp b/source/adapters/hip/physical_mem.hpp new file mode 100644 index 0000000000..fc50836f62 --- /dev/null +++ b/source/adapters/hip/physical_mem.hpp @@ -0,0 +1,30 @@ +//===---------- physical_mem.hpp - HIP Adapter ----------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "device.hpp" +#include "platform.hpp" + +/// UR queue mapping on physical memory allocations used in virtual memory +/// management. +/// TODO: Implement. +/// +struct ur_physical_mem_handle_t_ { + std::atomic_uint32_t RefCount; + + ur_physical_mem_handle_t_() : RefCount(1) {} + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; diff --git a/source/adapters/hip/virtual_mem.cpp b/source/adapters/hip/virtual_mem.cpp new file mode 100644 index 0000000000..e2c4f9faf0 --- /dev/null +++ b/source/adapters/hip/virtual_mem.cpp @@ -0,0 +1,69 @@ +//===--------- virtual_mem.cpp - HIP Adapter ------------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "physical_mem.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( + ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, + size_t, void *, size_t *) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, + const void *, size_t, + void **) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, + const void *, size_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( + ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( + ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, + ur_virtual_mem_access_flags_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, + const void *, size_t) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, + const void *, size_t, + ur_virtual_mem_info_t, + size_t, void *, + size_t *) { + detail::ur::die( + "Virtual memory extension is not currently implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} \ No newline at end of file diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 223692e109..b80c5aef5d 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -84,6 +84,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/usm.hpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp @@ -95,8 +96,10 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index 3d83b91139..ed1e3c630e 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -208,6 +208,9 @@ template <> ze_structure_type_t getZeStructureType() { template <> ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_SAMPLER_DESC; } +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_PHYSICAL_MEM_DESC; +} template <> ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index dbc18ead4a..2e7bd10b72 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -789,6 +789,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(static_cast( 0)); //__read_write attribute currently undefinde in opencl } + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { + return ReturnValue(static_cast(true)); + } case UR_DEVICE_INFO_ESIMD_SUPPORT: { // ESIMD is only supported by Intel GPUs. diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp new file mode 100644 index 0000000000..d4d9792f24 --- /dev/null +++ b/source/adapters/level_zero/physical_mem.cpp @@ -0,0 +1,54 @@ +//===---------------- physical_mem.cpp - Level Zero Adapter ---------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "physical_mem.hpp" +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "ur_level_zero.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem) { + ZeStruct PhysicalMemDesc; + PhysicalMemDesc.flags = 0; + PhysicalMemDesc.size = size; + + ze_physical_mem_handle_t ZePhysicalMem; + ZE2UR_CALL(zePhysicalMemCreate, (hContext->ZeContext, hDevice->ZeDevice, + &PhysicalMemDesc, &ZePhysicalMem)); + try { + *phPhysicalMem = new ur_physical_mem_handle_t_(ZePhysicalMem, hContext); + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { + hPhysicalMem->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { + if (!hPhysicalMem->RefCount.decrementAndTest()) + return UR_RESULT_SUCCESS; + + ZE2UR_CALL(zePhysicalMemDestroy, + (hPhysicalMem->Context->ZeContext, hPhysicalMem->ZePhysicalMem)); + delete hPhysicalMem; + + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/level_zero/physical_mem.hpp b/source/adapters/level_zero/physical_mem.hpp new file mode 100644 index 0000000000..9b83d93222 --- /dev/null +++ b/source/adapters/level_zero/physical_mem.hpp @@ -0,0 +1,24 @@ +//===---------------- physical_mem.hpp - Level Zero Adapter ---------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +struct ur_physical_mem_handle_t_ : _ur_object { + ur_physical_mem_handle_t_(ze_physical_mem_handle_t ZePhysicalMem, + ur_context_handle_t Context) + : ZePhysicalMem{ZePhysicalMem}, Context{Context} {} + + // Level Zero physical memory handle. + ze_physical_mem_handle_t ZePhysicalMem; + + // Keeps the PI context of this memory handle. + ur_context_handle_t Context; +}; diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 38bc5aed2d..dd7bbf67b3 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -30,6 +30,7 @@ #include "image.hpp" #include "kernel.hpp" #include "memory.hpp" +#include "physical_mem.hpp" #include "platform.hpp" #include "program.hpp" #include "queue.hpp" diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp new file mode 100644 index 0000000000..e90aec45de --- /dev/null +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -0,0 +1,120 @@ +//===---------------- virtual_mem.cpp - Level Zero Adapter ----------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "physical_mem.hpp" +#include "ur_level_zero.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: + case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { + // For L0 the minimum and recommended granularity is the same. We use an + // memory size of 1 byte to get the actual granularity instead of the + // aligned size. + size_t PageSize; + ZE2UR_CALL(zeVirtualMemQueryPageSize, + (hContext->ZeContext, hDevice->ZeDevice, 1, &PageSize)); + return ReturnValue(PageSize); + } + default: + urPrint("Unsupported propName in urQueueGetInfo: propName=%d(0x%x)\n", + propName, propName); + return UR_RESULT_ERROR_INVALID_VALUE; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, + size_t size, void **ppStart) { + ZE2UR_CALL(zeVirtualMemReserve, (hContext->ZeContext, pStart, size, ppStart)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( + ur_context_handle_t hContext, const void *pStart, size_t size) { + ZE2UR_CALL(zeVirtualMemFree, (hContext->ZeContext, pStart, size)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_virtual_mem_access_flags_t flags) { + ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; + if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; + else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY; + + ZE2UR_CALL(zeVirtualMemSetAccessAttribute, + (hContext->ZeContext, pStart, size, AccessAttr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, + ur_physical_mem_handle_t hPhysicalMem, size_t offset, + ur_virtual_mem_access_flags_t flags) { + ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; + if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; + else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY; + + ZE2UR_CALL(zeVirtualMemMap, + (hContext->ZeContext, pStart, size, hPhysicalMem->ZePhysicalMem, + offset, AccessAttr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( + ur_context_handle_t hContext, const void *pStart, size_t size) { + ZE2UR_CALL(zeVirtualMemUnmap, (hContext->ZeContext, pStart, size)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( + ur_context_handle_t hContext, const void *pStart, + [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { + size_t QuerySize; + ze_memory_access_attribute_t Access; + ZE2UR_CALL(zeVirtualMemGetAccessAttribute, + (hContext->ZeContext, pStart, size, &Access, &QuerySize)); + ur_virtual_mem_access_flags_t RetFlags = 0; + if (Access & ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE) + RetFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Access & ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY) + RetFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + return ReturnValue(Access); + } + default: + urPrint("Unsupported propName in urQueueGetInfo: propName=%d(0x%x)\n", + propName, propName); + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} From eed79a74d37735f67095cb7b707e8dd2e1d17c8b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 9 Oct 2023 10:09:34 -0700 Subject: [PATCH 002/138] Remove duplicate CUDA UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT case Signed-off-by: Larsen, Steffen --- source/adapters/cuda/device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index dc870821f2..b7e76b667a 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1030,7 +1030,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: - case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: From c5ea815b297c7e912b25bb893cc0e1de0c4137b1 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 9 Oct 2023 10:14:47 -0700 Subject: [PATCH 003/138] Fix return of check error Signed-off-by: Larsen, Steffen --- source/adapters/cuda/virtual_mem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp index 583594fea1..203d052619 100644 --- a/source/adapters/cuda/virtual_mem.cpp +++ b/source/adapters/cuda/virtual_mem.cpp @@ -52,8 +52,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, size_t size, void **ppStart) { ScopedContext Active(hContext); - return UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0, - (CUdeviceptr)pStart, 0)); + UR_CHECK_ERROR(cuMemAddressReserve((CUdeviceptr *)ppStart, size, 0, + (CUdeviceptr)pStart, 0)); return UR_RESULT_SUCCESS; } From 986330c516d6d7142ea4cb9d47e13c047ee8201d Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 10 Oct 2023 03:38:20 -0700 Subject: [PATCH 004/138] Fix common include in cuda device.hpp Signed-off-by: Larsen, Steffen --- source/adapters/cuda/device.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index 919f813e4e..49c40a3da6 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -11,6 +11,8 @@ #include +#include "common.hpp" + struct ur_device_handle_t_ { private: using native_type = CUdevice; From 44ca1398fca2aed7d2b231f10290efb41282b6af Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 11 Oct 2023 08:25:24 -0700 Subject: [PATCH 005/138] Zero-initialize CUmemAccessDesc Signed-off-by: Larsen, Steffen --- source/adapters/cuda/virtual_mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp index 203d052619..f49da3132e 100644 --- a/source/adapters/cuda/virtual_mem.cpp +++ b/source/adapters/cuda/virtual_mem.cpp @@ -67,7 +67,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, size_t size, ur_virtual_mem_access_flags_t flags) { - CUmemAccessDesc AccessDesc; + CUmemAccessDesc AccessDesc = {}; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; else if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) From 0694ea820235d72e2b07480fc8ebf76c9a4f3816 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 11 Oct 2023 08:26:12 -0700 Subject: [PATCH 006/138] Add missing newline Signed-off-by: Larsen, Steffen --- source/adapters/hip/virtual_mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/virtual_mem.cpp b/source/adapters/hip/virtual_mem.cpp index e2c4f9faf0..6330451797 100644 --- a/source/adapters/hip/virtual_mem.cpp +++ b/source/adapters/hip/virtual_mem.cpp @@ -66,4 +66,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, detail::ur::die( "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} \ No newline at end of file +} From 9d7ac5276587aec3a594bc1454b87127335f2597 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Oct 2023 02:44:01 -0700 Subject: [PATCH 007/138] Fix wrongful use of UR_ASSERT in cuda implementation Signed-off-by: Larsen, Steffen --- source/adapters/cuda/physical_mem.cpp | 3 +-- source/adapters/cuda/physical_mem.hpp | 8 +++----- source/adapters/cuda/virtual_mem.cpp | 13 +++++-------- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/source/adapters/cuda/physical_mem.cpp b/source/adapters/cuda/physical_mem.cpp index 177bc2234d..444d492aa3 100644 --- a/source/adapters/cuda/physical_mem.cpp +++ b/source/adapters/cuda/physical_mem.cpp @@ -23,8 +23,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( CUmemAllocationProp AllocProps = {}; AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; - UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id)); CUmemGenericAllocationHandle ResHandle; UR_CHECK_ERROR(cuMemCreate(&ResHandle, size, &AllocProps, 0)); diff --git a/source/adapters/cuda/physical_mem.hpp b/source/adapters/cuda/physical_mem.hpp index 2b0dc029d5..0ce332e112 100644 --- a/source/adapters/cuda/physical_mem.hpp +++ b/source/adapters/cuda/physical_mem.hpp @@ -50,14 +50,12 @@ inline ur_result_t GetDeviceOrdinal(ur_device_handle_t Device, int &Ordinal) { ur_adapter_handle_t AdapterHandle = &adapter; // Get list of platforms uint32_t NumPlatforms; - UR_ASSERT(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms), - UR_RESULT_ERROR_INVALID_ARGUMENT); + UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); UR_ASSERT(NumPlatforms, UR_RESULT_ERROR_UNKNOWN); std::vector Platforms{NumPlatforms}; - UR_ASSERT( - urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr), - UR_RESULT_ERROR_INVALID_ARGUMENT); + UR_CHECK_ERROR(urPlatformGet(&AdapterHandle, 1, NumPlatforms, + Platforms.data(), nullptr)); // Ordinal corresponds to the platform ID as each device has its own platform. CUdevice NativeDevice = Device->get(); diff --git a/source/adapters/cuda/virtual_mem.cpp b/source/adapters/cuda/virtual_mem.cpp index f49da3132e..9c37dda4fb 100644 --- a/source/adapters/cuda/virtual_mem.cpp +++ b/source/adapters/cuda/virtual_mem.cpp @@ -33,8 +33,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( CUmemAllocationProp AllocProps = {}; AllocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; AllocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; - UR_ASSERT(GetDeviceOrdinal(hDevice, AllocProps.location.id), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id)); size_t Granularity; UR_CHECK_ERROR( @@ -78,8 +77,8 @@ urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, // TODO: When contexts support multiple devices, we should create a descriptor // for each. We may also introduce a variant of this function with a // specific device. - UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_CHECK_ERROR( + GetDeviceOrdinal(hContext->getDevice(), AccessDesc.location.id)); ScopedContext Active(hContext); UR_CHECK_ERROR(cuMemSetAccess((CUdeviceptr)pStart, size, &AccessDesc, 1)); @@ -94,8 +93,7 @@ urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, UR_CHECK_ERROR( cuMemMap((CUdeviceptr)pStart, size, offset, hPhysicalMem->get(), 0)); if (flags) - UR_ASSERT(urVirtualMemSetAccess(hContext, pStart, size, flags), - UR_RESULT_ERROR_INVALID_ARGUMENT); + UR_CHECK_ERROR(urVirtualMemSetAccess(hContext, pStart, size, flags)); return UR_RESULT_SUCCESS; } @@ -117,8 +115,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { CUmemLocation MemLocation = {}; MemLocation.type = CU_MEM_LOCATION_TYPE_DEVICE; - UR_ASSERT(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_CHECK_ERROR(GetDeviceOrdinal(hContext->getDevice(), MemLocation.id)); unsigned long long CuAccessFlags; UR_CHECK_ERROR( From bae28367ceebc10b3eac1af1d56400dd69186037 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 13 Oct 2023 02:45:00 -0700 Subject: [PATCH 008/138] an -> a Signed-off-by: Larsen, Steffen --- source/adapters/level_zero/virtual_mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e90aec45de..545f9fde54 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -22,7 +22,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( switch (propName) { case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: case UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: { - // For L0 the minimum and recommended granularity is the same. We use an + // For L0 the minimum and recommended granularity is the same. We use a // memory size of 1 byte to get the actual granularity instead of the // aligned size. size_t PageSize; From e0534f2c4288d79d222c0eb31e6c3ad3307f8c05 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Fri, 20 Oct 2023 17:18:29 +0200 Subject: [PATCH 009/138] [HIP] Implement ext_oneapi_queue_priority --- source/adapters/hip/queue.cpp | 23 ++++++++++++++++++----- source/adapters/hip/queue.hpp | 5 +++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index 910d7cf512..d79c4e50b5 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -38,8 +38,8 @@ hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) { // The second check is done after mutex is locked so other threads can not // change NumComputeStreams after that if (NumComputeStreams < ComputeStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &ComputeStreams[NumComputeStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &ComputeStreams[NumComputeStreams++], Flags, Priority)); } } Token = ComputeStreamIdx++; @@ -97,8 +97,8 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { // The second check is done after mutex is locked so other threads can not // change NumTransferStreams after that if (NumTransferStreams < TransferStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &TransferStreams[NumTransferStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &TransferStreams[NumTransferStreams++], Flags, Priority)); } } uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size(); @@ -119,6 +119,19 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, } unsigned int Flags = 0; + ur_queue_flags_t URFlags = 0; + int Priority = 0; // Not guaranteed, but, in ROCm 5.7, 0 is the default + + if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { + URFlags = pProps->flags; + if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { + ScopedContext Active(hContext->getDevice()); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); + } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { + ScopedContext Active(hContext->getDevice()); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); + } + } const bool IsOutOfOrder = pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE @@ -131,7 +144,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext, - hDevice, Flags, pProps ? pProps->flags : 0}); + hDevice, Flags, pProps ? pProps->flags : 0, Priority}); *phQueue = QueueImpl.release(); diff --git a/source/adapters/hip/queue.hpp b/source/adapters/hip/queue.hpp index c79bd293a3..ad2f0f016e 100644 --- a/source/adapters/hip/queue.hpp +++ b/source/adapters/hip/queue.hpp @@ -44,6 +44,7 @@ struct ur_queue_handle_t_ { unsigned int LastSyncTransferStreams; unsigned int Flags; ur_queue_flags_t URFlags; + int Priority; // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be // locked at the same time, ComputeStreamSyncMutex should be locked first // to avoid deadlocks @@ -56,7 +57,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(std::vector &&ComputeStreams, std::vector &&TransferStreams, ur_context_handle_t Context, ur_device_handle_t Device, - unsigned int Flags, ur_queue_flags_t URFlags, + unsigned int Flags, ur_queue_flags_t URFlags, int Priority, bool BackendOwns = true) : ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move( TransferStreams)}, @@ -66,7 +67,7 @@ struct ur_queue_handle_t_ { Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0}, TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0}, LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags), - URFlags(URFlags), HasOwnership{BackendOwns} { + URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} { urContextRetain(Context); urDeviceRetain(Device); } From f7d789d156b858e813881b710ad182e3a12a03d1 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Tue, 24 Oct 2023 19:27:16 +0200 Subject: [PATCH 010/138] Fix queue creation with native handle --- source/adapters/hip/queue.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index d79c4e50b5..dc1be4de9d 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -307,6 +307,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( hContext->getDevice(), HIPFlags, Flags, + /*priority*/ 0, /*backend_owns*/ pProperties->isNativeHandleOwned}; (*phQueue)->NumComputeStreams = 1; From d14b117b33e641f4dc13c6fda91ede1b1ea21bb4 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 8 Nov 2023 14:44:12 +0000 Subject: [PATCH 011/138] Query support for 2D USM operations in CTS tests. --- .../enqueue/urEnqueueUSMFill2D.cpp | 8 ++++++++ .../enqueue/urEnqueueUSMMemcpy2D.cpp | 20 +++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index 37470d40e2..fcb244e94a 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -33,6 +33,14 @@ struct urEnqueueUSMFill2DTestWithParam void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urQueueTestWithParam::SetUp()); + bool memfill2d_support = false; + ASSERT_SUCCESS(urContextGetInfo( + context, UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, + sizeof(memfill2d_support), &memfill2d_support, nullptr)); + if (!memfill2d_support) { + GTEST_SKIP() << "2D USM mem fill is not supported"; + } + pitch = std::get<1>(GetParam()).pitch; width = std::get<1>(GetParam()).width; height = std::get<1>(GetParam()).height; diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index 8eaed4b743..d0e3dd9f72 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -17,6 +17,14 @@ struct urEnqueueUSMMemcpy2DTestWithParam GTEST_SKIP() << "Device USM is not supported"; } + bool memcpy2d_support = false; + ASSERT_SUCCESS(urContextGetInfo( + context, UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, + sizeof(memcpy2d_support), &memcpy2d_support, nullptr)); + if (!memcpy2d_support) { + GTEST_SKIP() << "2D USM memcpy is not supported"; + } + const auto [inPitch, inWidth, inHeight] = getParam(); std::tie(pitch, width, height) = std::make_tuple(inPitch, inWidth, inHeight); @@ -28,9 +36,9 @@ struct urEnqueueUSMMemcpy2DTestWithParam num_elements, &pDst)); ur_event_handle_t memset_event = nullptr; - ASSERT_SUCCESS(urEnqueueUSMFill2D( - queue, pSrc, pitch, sizeof(memset_value), &memset_value, width, - height, 0, nullptr, &memset_event)); + ASSERT_SUCCESS(urEnqueueUSMFill(queue, pSrc, sizeof(memset_value), + &memset_value, pitch * height, 0, + nullptr, &memset_event)); ASSERT_SUCCESS(urQueueFlush(queue)); ASSERT_SUCCESS(urEventWait(1, &memset_event)); @@ -171,9 +179,9 @@ TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidEventWaitList) { // enqueue something to get an event ur_event_handle_t event = nullptr; uint8_t fill_pattern = 14; - ASSERT_SUCCESS(urEnqueueUSMFill2D(queue, pDst, pitch, sizeof(fill_pattern), - &fill_pattern, width, height, 0, nullptr, - &event)); + ASSERT_SUCCESS(urEnqueueUSMFill(queue, pDst, sizeof(fill_pattern), + &fill_pattern, pitch * height, 0, nullptr, + &event)); ASSERT_NE(event, nullptr); ASSERT_SUCCESS(urQueueFinish(queue)); From 341114d00a0fe79ae137a16ad3a76f2cb2685a7a Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Sat, 11 Nov 2023 08:29:18 +0000 Subject: [PATCH 012/138] [UR][Loader] Fix handling of native handles Native handles are created by adapters and thus are inheritently backend-specific. Loader can not assume anything about these handles, as even nullptr may be a valid value for such a handle. This patch changes two things about native handles: 1) Native handles are no longer wrapped in UR objects 2) Dispatch table is extracted from any other argument of the API function The above is true for all interop APIs except for urPlatformCreateWithNativeHandle, which needs a spec change. --- scripts/templates/ldrddi.cpp.mako | 9 ++- source/loader/ur_ldrddi.cpp | 129 +++--------------------------- 2 files changed, 16 insertions(+), 122 deletions(-) diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 0c9a3ed8b0..4cd50e36ac 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -127,14 +127,17 @@ namespace ur_loader %else: <%param_replacements={}%> %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if 0 == i: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // extract platform's function pointer table auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable; auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; + <%break%> %endif + %endfor + %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): %if 'range' in item: <% add_local = True @@ -143,6 +146,7 @@ namespace ur_loader for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i ) ${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle; %else: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // convert loader handle to platform handle %if item['optional']: ${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr; @@ -150,6 +154,7 @@ namespace ur_loader ${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle; %endif %endif + %endif %endfor // forward to device-platform @@ -170,7 +175,7 @@ namespace ur_loader %if item['release']: // release loader handle ${item['factory']}.release( ${item['name']} ); - %else: + %elif not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': try { %if 'range' in item: diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 9327f349c5..c780d51335 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -349,14 +349,6 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativePlatform = reinterpret_cast( - ur_native_factory.getInstance(*phNativePlatform, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -670,14 +662,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeDevice = reinterpret_cast( - ur_native_factory.getInstance(*phNativeDevice, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -696,17 +680,13 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeDevice)->dditable; + reinterpret_cast(hPlatform)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Device.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeDevice = - reinterpret_cast(hNativeDevice)->handle; - // convert loader handle to platform handle hPlatform = reinterpret_cast(hPlatform)->handle; @@ -913,14 +893,6 @@ __urdlllocal ur_result_t UR_APICALL urContextGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeContext = reinterpret_cast( - ur_native_factory.getInstance(*phNativeContext, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -941,17 +913,13 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeContext)->dditable; + reinterpret_cast(*phDevices)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Context.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeContext = - reinterpret_cast(hNativeContext)->handle; - // convert loader handles to platform handles auto phDevicesLocal = std::vector(numDevices); for (size_t i = 0; i < numDevices; ++i) { @@ -1204,14 +1172,6 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeMem = reinterpret_cast( - ur_native_factory.getInstance(*phNativeMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1229,17 +1189,13 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnBufferCreateWithNativeHandle = dditable->ur.Mem.pfnBufferCreateWithNativeHandle; if (nullptr == pfnBufferCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1279,17 +1235,13 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnImageCreateWithNativeHandle = dditable->ur.Mem.pfnImageCreateWithNativeHandle; if (nullptr == pfnImageCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1525,14 +1477,6 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeSampler = reinterpret_cast( - ur_native_factory.getInstance(*phNativeSampler, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1550,18 +1494,13 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeSampler)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Sampler.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeSampler = - reinterpret_cast(hNativeSampler)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -2601,14 +2540,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeProgram = reinterpret_cast( - ur_native_factory.getInstance(*phNativeProgram, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2626,18 +2557,13 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeProgram)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Program.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeProgram = - reinterpret_cast(hNativeProgram)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3085,14 +3011,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeKernel = reinterpret_cast( - ur_native_factory.getInstance(*phNativeKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3112,18 +3030,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeKernel)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Kernel.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeKernel = - reinterpret_cast(hNativeKernel)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3297,14 +3210,6 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeQueue = reinterpret_cast( - ur_native_factory.getInstance(*phNativeQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3323,17 +3228,13 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeQueue)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Queue.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeQueue = reinterpret_cast(hNativeQueue)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3570,14 +3471,6 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeEvent = reinterpret_cast( - ur_native_factory.getInstance(*phNativeEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3595,17 +3488,13 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeEvent)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Event.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeEvent = reinterpret_cast(hNativeEvent)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; From 07bac5387289e29f0ae6658a72323cf826fef7c7 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 12 Oct 2023 13:24:56 -0700 Subject: [PATCH 013/138] [UR][L0] Correctly wait on barrier on urEnqueueEventsWaitWithBarrier When event list is null, a barrier is still needed for all previous commands if profiling is enabled, so fix it. Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/event.cpp | 7 ++++--- source/adapters/level_zero/queue.hpp | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index b979c8ab15..ba6ec7dfe6 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -168,7 +168,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { if (EventWaitList.Length) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, @@ -181,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( (CmdList->first, Event->ZeEvent, EventWaitList.Length, EventWaitList.ZeEventList)); } + return UR_RESULT_SUCCESS; }; @@ -964,8 +966,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool HostVisible, ur_event_handle_t *RetEvent) { - bool ProfilingEnabled = - !Queue || (Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled(); if (auto CachedEvent = Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 9c90a999b3..306cec5416 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -515,6 +515,11 @@ struct ur_queue_handle_t_ : _ur_object { // lists in the queue. ur_result_t insertStartBarrierIfDiscardEventsMode(ur_command_list_ptr_t &CmdList); + + // returns true if queue has profiling enabled + bool isProfilingEnabled() { + return ((this->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0); + } }; // This helper function creates a ur_event_handle_t and associate a From 7fd9dafd0fb921a3e3a1a497a8458fcae414d384 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Wed, 8 Nov 2023 12:52:43 -0800 Subject: [PATCH 014/138] Address comments Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/event.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index ba6ec7dfe6..d8af1e674d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -165,6 +165,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // event signal because it is already guaranteed that previous commands // in this queue are completed when the signal is started. // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // From cf943a1acd1a924c377e5e5617ff72e5c3569989 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 00:37:43 -0800 Subject: [PATCH 015/138] Add interfaces to loaders Signed-off-by: Larsen, Steffen --- source/adapters/cuda/ur_interface_loader.cpp | 20 +++++++++---------- source/adapters/hip/ur_interface_loader.cpp | 20 +++++++++---------- .../level_zero/ur_interface_loader.cpp | 20 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index b87934182c..8e28f7762c 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -362,13 +362,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } - pDdiTable->pfnFree = nullptr; - pDdiTable->pfnGetInfo = nullptr; - pDdiTable->pfnGranularityGetInfo = nullptr; - pDdiTable->pfnMap = nullptr; - pDdiTable->pfnReserve = nullptr; - pDdiTable->pfnSetAccess = nullptr; - pDdiTable->pfnUnmap = nullptr; + pDdiTable->pfnFree = urVirtualMemFree; + pDdiTable->pfnGetInfo = urVirtualMemGetInfo; + pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; + pDdiTable->pfnMap = urVirtualMemMap; + pDdiTable->pfnReserve = urVirtualMemReserve; + pDdiTable->pfnSetAccess = urVirtualMemSetAccess; + pDdiTable->pfnUnmap = urVirtualMemUnmap; return retVal; } @@ -383,9 +383,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } - pDdiTable->pfnCreate = nullptr; - pDdiTable->pfnRelease = nullptr; - pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnCreate = urPhysicalMemCreate; + pDdiTable->pfnRelease = urPhysicalMemRelease; + pDdiTable->pfnRetain = urPhysicalMemRetain; return retVal; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 26292b9528..5fcc48846d 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -317,13 +317,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } - pDdiTable->pfnFree = nullptr; - pDdiTable->pfnGetInfo = nullptr; - pDdiTable->pfnGranularityGetInfo = nullptr; - pDdiTable->pfnMap = nullptr; - pDdiTable->pfnReserve = nullptr; - pDdiTable->pfnSetAccess = nullptr; - pDdiTable->pfnUnmap = nullptr; + pDdiTable->pfnFree = urVirtualMemFree; + pDdiTable->pfnGetInfo = urVirtualMemGetInfo; + pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; + pDdiTable->pfnMap = urVirtualMemMap; + pDdiTable->pfnReserve = urVirtualMemReserve; + pDdiTable->pfnSetAccess = urVirtualMemSetAccess; + pDdiTable->pfnUnmap = urVirtualMemUnmap; return retVal; } @@ -338,9 +338,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } - pDdiTable->pfnCreate = nullptr; - pDdiTable->pfnRelease = nullptr; - pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnCreate = urPhysicalMemCreate; + pDdiTable->pfnRelease = urPhysicalMemRelease; + pDdiTable->pfnRetain = urPhysicalMemRetain; return retVal; } diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 5f6da8fd86..6f32e24e10 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -403,13 +403,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } - pDdiTable->pfnFree = nullptr; - pDdiTable->pfnGetInfo = nullptr; - pDdiTable->pfnGranularityGetInfo = nullptr; - pDdiTable->pfnMap = nullptr; - pDdiTable->pfnReserve = nullptr; - pDdiTable->pfnSetAccess = nullptr; - pDdiTable->pfnUnmap = nullptr; + pDdiTable->pfnFree = urVirtualMemFree; + pDdiTable->pfnGetInfo = urVirtualMemGetInfo; + pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; + pDdiTable->pfnMap = urVirtualMemMap; + pDdiTable->pfnReserve = urVirtualMemReserve; + pDdiTable->pfnSetAccess = urVirtualMemSetAccess; + pDdiTable->pfnUnmap = urVirtualMemUnmap; return retVal; } @@ -424,9 +424,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } - pDdiTable->pfnCreate = nullptr; - pDdiTable->pfnRelease = nullptr; - pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnCreate = urPhysicalMemCreate; + pDdiTable->pfnRelease = urPhysicalMemRelease; + pDdiTable->pfnRetain = urPhysicalMemRetain; return retVal; } From 9a50ceb628da8310147c0b17d80523636983680c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 00:43:43 -0800 Subject: [PATCH 016/138] Add OpenCL interfaces Signed-off-by: Larsen, Steffen --- source/adapters/opencl/CMakeLists.txt | 3 +++ source/adapters/opencl/device.cpp | 3 +++ .../adapters/opencl/ur_interface_loader.cpp | 20 +++++++++---------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index dc43a68ffa..07b6b41ce4 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -25,6 +25,8 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp @@ -32,6 +34,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 710ebcfb88..c11019070b 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -787,6 +787,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { + return ReturnValue(false); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 7333385182..0ec00809ed 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -356,13 +356,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } - pDdiTable->pfnFree = nullptr; - pDdiTable->pfnGetInfo = nullptr; - pDdiTable->pfnGranularityGetInfo = nullptr; - pDdiTable->pfnMap = nullptr; - pDdiTable->pfnReserve = nullptr; - pDdiTable->pfnSetAccess = nullptr; - pDdiTable->pfnUnmap = nullptr; + pDdiTable->pfnFree = urVirtualMemFree; + pDdiTable->pfnGetInfo = urVirtualMemGetInfo; + pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; + pDdiTable->pfnMap = urVirtualMemMap; + pDdiTable->pfnReserve = urVirtualMemReserve; + pDdiTable->pfnSetAccess = urVirtualMemSetAccess; + pDdiTable->pfnUnmap = urVirtualMemUnmap; return retVal; } @@ -374,9 +374,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } - pDdiTable->pfnCreate = nullptr; - pDdiTable->pfnRelease = nullptr; - pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnCreate = urPhysicalMemCreate; + pDdiTable->pfnRelease = urPhysicalMemRelease; + pDdiTable->pfnRetain = urPhysicalMemRetain; return retVal; } From 5f680fb9688e9d9a4aab172c8ade461c70401460 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 00:47:58 -0800 Subject: [PATCH 017/138] Add NATIVE CPU interfaces Signed-off-by: Larsen, Steffen --- source/adapters/native_cpu/CMakeLists.txt | 3 +++ source/adapters/native_cpu/device.cpp | 3 ++- .../native_cpu/ur_interface_loader.cpp | 20 +++++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index a15e7aac84..8549a4d3c0 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -24,6 +24,8 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/kernel.hpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_state.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp @@ -34,6 +36,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 78540a1b90..f93c648ab7 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -302,7 +302,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); - + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: + return ReturnValue(false); default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index a9cfe2330e..c0796006d7 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -342,9 +342,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } - pDdiTable->pfnCreate = nullptr; - pDdiTable->pfnRelease = nullptr; - pDdiTable->pfnRetain = nullptr; + pDdiTable->pfnCreate = urPhysicalMemCreate; + pDdiTable->pfnRelease = urPhysicalMemRelease; + pDdiTable->pfnRetain = urPhysicalMemRetain; return retVal; } @@ -369,13 +369,13 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( return retVal; } - pDdiTable->pfnFree = nullptr; - pDdiTable->pfnGetInfo = nullptr; - pDdiTable->pfnGranularityGetInfo = nullptr; - pDdiTable->pfnMap = nullptr; - pDdiTable->pfnReserve = nullptr; - pDdiTable->pfnSetAccess = nullptr; - pDdiTable->pfnUnmap = nullptr; + pDdiTable->pfnFree = urVirtualMemFree; + pDdiTable->pfnGetInfo = urVirtualMemGetInfo; + pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; + pDdiTable->pfnMap = urVirtualMemMap; + pDdiTable->pfnReserve = urVirtualMemReserve; + pDdiTable->pfnSetAccess = urVirtualMemSetAccess; + pDdiTable->pfnUnmap = urVirtualMemUnmap; return retVal; } From dda9c04a3f96cc21a3bd50bc03fdb157f68b87c0 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 02:10:24 -0800 Subject: [PATCH 018/138] Add missed files Signed-off-by: Larsen, Steffen --- source/adapters/native_cpu/physical_mem.cpp | 36 +++++++++++ source/adapters/native_cpu/physical_mem.hpp | 16 +++++ source/adapters/native_cpu/virtual_mem.cpp | 69 +++++++++++++++++++++ source/adapters/opencl/physical_mem.cpp | 36 +++++++++++ source/adapters/opencl/physical_mem.hpp | 16 +++++ source/adapters/opencl/virtual_mem.cpp | 69 +++++++++++++++++++++ 6 files changed, 242 insertions(+) create mode 100644 source/adapters/native_cpu/physical_mem.cpp create mode 100644 source/adapters/native_cpu/physical_mem.hpp create mode 100644 source/adapters/native_cpu/virtual_mem.cpp create mode 100644 source/adapters/opencl/physical_mem.cpp create mode 100644 source/adapters/opencl/physical_mem.hpp create mode 100644 source/adapters/opencl/virtual_mem.cpp diff --git a/source/adapters/native_cpu/physical_mem.cpp b/source/adapters/native_cpu/physical_mem.cpp new file mode 100644 index 0000000000..c3b5acfc58 --- /dev/null +++ b/source/adapters/native_cpu/physical_mem.cpp @@ -0,0 +1,36 @@ +//===--------- physical_mem.cpp - NATIVE CPU Adapter ----------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "physical_mem.hpp" +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( + ur_context_handle_t, ur_device_handle_t, size_t, + const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRetain(ur_physical_mem_handle_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRelease(ur_physical_mem_handle_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/physical_mem.hpp b/source/adapters/native_cpu/physical_mem.hpp new file mode 100644 index 0000000000..00681fd66f --- /dev/null +++ b/source/adapters/native_cpu/physical_mem.hpp @@ -0,0 +1,16 @@ +//===---------- physical_mem.hpp - NATIVE CPU Adapter ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +/// UR queue mapping on physical memory allocations used in virtual memory +/// management. +/// TODO: Implement. +/// +struct ur_physical_mem_handle_t_ {}; diff --git a/source/adapters/native_cpu/virtual_mem.cpp b/source/adapters/native_cpu/virtual_mem.cpp new file mode 100644 index 0000000000..7a95b8dcb7 --- /dev/null +++ b/source/adapters/native_cpu/virtual_mem.cpp @@ -0,0 +1,69 @@ +//===--------- virtual_mem.cpp - NATIVE CPU Adapter -----------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "physical_mem.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( + ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, + size_t, void *, size_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, + const void *, size_t, + void **) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, + const void *, size_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( + ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( + ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, + ur_virtual_mem_access_flags_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, + const void *, size_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, + const void *, size_t, + ur_virtual_mem_info_t, + size_t, void *, + size_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "NATIVE CPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/physical_mem.cpp b/source/adapters/opencl/physical_mem.cpp new file mode 100644 index 0000000000..27c5913eab --- /dev/null +++ b/source/adapters/opencl/physical_mem.cpp @@ -0,0 +1,36 @@ +//===--------- physical_mem.cpp - OpenCL Adapter --------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "physical_mem.hpp" +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( + ur_context_handle_t, ur_device_handle_t, size_t, + const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRetain(ur_physical_mem_handle_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPhysicalMemRelease(ur_physical_mem_handle_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/physical_mem.hpp b/source/adapters/opencl/physical_mem.hpp new file mode 100644 index 0000000000..fc7020e59d --- /dev/null +++ b/source/adapters/opencl/physical_mem.hpp @@ -0,0 +1,16 @@ +//===---------- physical_mem.hpp - OpenCL Adapter -------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +/// UR queue mapping on physical memory allocations used in virtual memory +/// management. +/// TODO: Implement. +/// +struct ur_physical_mem_handle_t_ {}; diff --git a/source/adapters/opencl/virtual_mem.cpp b/source/adapters/opencl/virtual_mem.cpp new file mode 100644 index 0000000000..a908a31818 --- /dev/null +++ b/source/adapters/opencl/virtual_mem.cpp @@ -0,0 +1,69 @@ +//===--------- virtual_mem.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "physical_mem.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( + ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, + size_t, void *, size_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, + const void *, size_t, + void **) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, + const void *, size_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( + ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( + ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, + ur_virtual_mem_access_flags_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, + const void *, size_t) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, + const void *, size_t, + ur_virtual_mem_info_t, + size_t, void *, + size_t *) { + detail::ur::die("Virtual memory extension is not currently implemented for " + "OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} From cd7a552fe3cb84ec409faf31a6ffc7f0e5f816d2 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 02:42:42 -0800 Subject: [PATCH 019/138] Remove repeat cases and invalid include Signed-off-by: Larsen, Steffen --- source/adapters/hip/device.cpp | 1 - source/adapters/opencl/device.cpp | 3 --- source/adapters/opencl/physical_mem.cpp | 1 - 3 files changed, 5 deletions(-) diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 1803f312f7..278894c436 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -833,7 +833,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BFLOAT16: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_ASYNC_BARRIER: - case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index c11019070b..710ebcfb88 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -787,9 +787,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } - case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { - return ReturnValue(false); - } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: diff --git a/source/adapters/opencl/physical_mem.cpp b/source/adapters/opencl/physical_mem.cpp index 27c5913eab..2cd3f4a625 100644 --- a/source/adapters/opencl/physical_mem.cpp +++ b/source/adapters/opencl/physical_mem.cpp @@ -11,7 +11,6 @@ #include "physical_mem.hpp" #include "common.hpp" #include "context.hpp" -#include "event.hpp" UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, From d4649a12e2ddabc54014f5e2667aeeed9e95b363 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 03:30:43 -0800 Subject: [PATCH 020/138] Remove another invalid case and fix namespace Signed-off-by: Larsen, Steffen --- source/adapters/opencl/physical_mem.cpp | 6 +++--- source/adapters/opencl/virtual_mem.cpp | 15 +++++++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/source/adapters/opencl/physical_mem.cpp b/source/adapters/opencl/physical_mem.cpp index 2cd3f4a625..80b35068a1 100644 --- a/source/adapters/opencl/physical_mem.cpp +++ b/source/adapters/opencl/physical_mem.cpp @@ -15,21 +15,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRetain(ur_physical_mem_handle_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRelease(ur_physical_mem_handle_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/virtual_mem.cpp b/source/adapters/opencl/virtual_mem.cpp index a908a31818..2dde2ef5e1 100644 --- a/source/adapters/opencl/virtual_mem.cpp +++ b/source/adapters/opencl/virtual_mem.cpp @@ -10,13 +10,12 @@ #include "common.hpp" #include "context.hpp" -#include "event.hpp" #include "physical_mem.hpp" UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -24,21 +23,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, const void *, size_t, void **) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, const void *, size_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -46,14 +45,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, const void *, size_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -63,7 +62,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, ur_virtual_mem_info_t, size_t, void *, size_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " + cl_adapter:die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 75c4dd4206cfca51c2f0c97d6164fd1d5bd6c6f7 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 21 Nov 2023 03:54:57 -0800 Subject: [PATCH 021/138] Fix wrong die prefix Signed-off-by: Larsen, Steffen --- source/adapters/opencl/virtual_mem.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/adapters/opencl/virtual_mem.cpp b/source/adapters/opencl/virtual_mem.cpp index 2dde2ef5e1..479297593c 100644 --- a/source/adapters/opencl/virtual_mem.cpp +++ b/source/adapters/opencl/virtual_mem.cpp @@ -15,7 +15,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -23,21 +23,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, const void *, size_t, void **) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, const void *, size_t) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -45,14 +45,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, ur_virtual_mem_access_flags_t) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, const void *, size_t) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -62,7 +62,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, ur_virtual_mem_info_t, size_t, void *, size_t *) { - cl_adapter:die("Virtual memory extension is not currently implemented for " + cl_adapter::die("Virtual memory extension is not currently implemented for " "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From a0b045d5ec232890ba6aa3ba7c9c9dcf785336a4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 22 Nov 2023 00:01:24 -0800 Subject: [PATCH 022/138] Avoid death Signed-off-by: Larsen, Steffen --- source/adapters/hip/physical_mem.cpp | 6 ------ source/adapters/hip/virtual_mem.cpp | 14 -------------- source/adapters/native_cpu/physical_mem.cpp | 6 ------ source/adapters/native_cpu/virtual_mem.cpp | 14 -------------- source/adapters/opencl/physical_mem.cpp | 6 ------ source/adapters/opencl/virtual_mem.cpp | 14 -------------- 6 files changed, 60 deletions(-) diff --git a/source/adapters/hip/physical_mem.cpp b/source/adapters/hip/physical_mem.cpp index 8939d89d33..f0003b6c00 100644 --- a/source/adapters/hip/physical_mem.cpp +++ b/source/adapters/hip/physical_mem.cpp @@ -16,21 +16,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRetain(ur_physical_mem_handle_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRelease(ur_physical_mem_handle_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/hip/virtual_mem.cpp b/source/adapters/hip/virtual_mem.cpp index 6330451797..12cf9f838e 100644 --- a/source/adapters/hip/virtual_mem.cpp +++ b/source/adapters/hip/virtual_mem.cpp @@ -16,45 +16,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, const void *, size_t, void **) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, const void *, size_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, const void *, size_t) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -63,7 +51,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, ur_virtual_mem_info_t, size_t, void *, size_t *) { - detail::ur::die( - "Virtual memory extension is not currently implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/native_cpu/physical_mem.cpp b/source/adapters/native_cpu/physical_mem.cpp index c3b5acfc58..0593ff9403 100644 --- a/source/adapters/native_cpu/physical_mem.cpp +++ b/source/adapters/native_cpu/physical_mem.cpp @@ -16,21 +16,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRetain(ur_physical_mem_handle_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRelease(ur_physical_mem_handle_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/native_cpu/virtual_mem.cpp b/source/adapters/native_cpu/virtual_mem.cpp index 7a95b8dcb7..ffdbb15810 100644 --- a/source/adapters/native_cpu/virtual_mem.cpp +++ b/source/adapters/native_cpu/virtual_mem.cpp @@ -16,45 +16,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, const void *, size_t, void **) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, const void *, size_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, ur_virtual_mem_access_flags_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, const void *, size_t) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -63,7 +51,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, ur_virtual_mem_info_t, size_t, void *, size_t *) { - detail::ur::die("Virtual memory extension is not currently implemented for " - "NATIVE CPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/physical_mem.cpp b/source/adapters/opencl/physical_mem.cpp index 80b35068a1..9fffd0f979 100644 --- a/source/adapters/opencl/physical_mem.cpp +++ b/source/adapters/opencl/physical_mem.cpp @@ -15,21 +15,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, const ur_physical_mem_properties_t *, ur_physical_mem_handle_t *) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRetain(ur_physical_mem_handle_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemRelease(ur_physical_mem_handle_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/virtual_mem.cpp b/source/adapters/opencl/virtual_mem.cpp index 479297593c..7c411d9b7b 100644 --- a/source/adapters/opencl/virtual_mem.cpp +++ b/source/adapters/opencl/virtual_mem.cpp @@ -15,45 +15,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t, const void *, size_t, void **) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t, const void *, size_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemSetAccess( ur_context_handle_t, const void *, size_t, ur_virtual_mem_access_flags_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemMap( ur_context_handle_t, const void *, size_t, ur_physical_mem_handle_t, size_t, ur_virtual_mem_access_flags_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t, const void *, size_t) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -62,7 +50,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t, ur_virtual_mem_info_t, size_t, void *, size_t *) { - cl_adapter::die("Virtual memory extension is not currently implemented for " - "OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 31aba086cf63c80f43ba39de02ee037e15faba17 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 23 Nov 2023 05:09:41 -0800 Subject: [PATCH 023/138] Fix return value of virtual mem info call Signed-off-by: Larsen, Steffen --- source/adapters/level_zero/virtual_mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index 545f9fde54..de72502614 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -108,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( RetFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; if (Access & ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY) RetFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; - return ReturnValue(Access); + return ReturnValue(RetFlags); } default: urPrint("Unsupported propName in urQueueGetInfo: propName=%d(0x%x)\n", From 40c8da9b9ca637afbd887b1e5a3deb7f7581febc Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Mon, 2 Oct 2023 08:32:07 -0700 Subject: [PATCH 024/138] [UR][L0] Check Global Mem Size as Limit for Free Memory Signed-off-by: Spruit, Neil R --- source/adapters/level_zero/device.cpp | 35 ++++++++++++++++++--------- source/adapters/level_zero/device.hpp | 5 ++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index f5b00d80cc..ec6a294c21 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -88,6 +88,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_SUCCESS; } +uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { + // Cache GlobalMemSize + Device->ZeGlobalMemSize.Compute = + [Device](struct ze_global_memsize &GlobalMemSize) { + for (const auto &ZeDeviceMemoryExtProperty : + Device->ZeDeviceMemoryProperties->second) { + GlobalMemSize.value += ZeDeviceMemoryExtProperty.physicalSize; + } + if (GlobalMemSize.value == 0) { + for (const auto &ZeDeviceMemoryProperty : + Device->ZeDeviceMemoryProperties->first) { + GlobalMemSize.value += ZeDeviceMemoryProperty.totalSize; + } + } + }; + return Device->ZeGlobalMemSize.operator->()->value; +} + UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve @@ -251,20 +269,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { - uint64_t GlobalMemSize = 0; // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize // is not available. - for (const auto &ZeDeviceMemoryExtProperty : - Device->ZeDeviceMemoryProperties->second) { - GlobalMemSize += ZeDeviceMemoryExtProperty.physicalSize; - } - if (GlobalMemSize == 0) { - for (const auto &ZeDeviceMemoryProperty : - Device->ZeDeviceMemoryProperties->first) { - GlobalMemSize += ZeDeviceMemoryProperty.totalSize; - } - } + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); return ReturnValue(uint64_t{GlobalMemSize}); } case UR_DEVICE_INFO_LOCAL_MEM_SIZE: @@ -637,6 +645,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } + // Calculate the global memory size as the max limit that can be reported as + // "free" memory for the user to allocate. + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); // Only report device memory which zeMemAllocDevice can allocate from. // Currently this is only the one enumerated with ordinal 0. uint64_t FreeMemory = 0; @@ -661,7 +672,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } } } - return ReturnValue(FreeMemory); + return ReturnValue(std::min(GlobalMemSize, FreeMemory)); } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 35404c6525..bdae64beba 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -39,6 +39,10 @@ enum EventsScope { LastCommandInBatchHostVisible }; +struct ze_global_memsize { + uint64_t value; +}; + struct ur_device_handle_t_ : _ur_object { ur_device_handle_t_(ze_device_handle_t Device, ur_platform_handle_t Plt, ur_device_handle_t ParentDevice = nullptr) @@ -170,4 +174,5 @@ struct ur_device_handle_t_ : _ur_object { ZeDeviceMemoryAccessProperties; ZeCache> ZeDeviceCacheProperties; ZeCache> ZeDeviceIpVersionExt; + ZeCache ZeGlobalMemSize; }; From 28590a82e9a4b63612f7319760dae4f0d02c9d3b Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Mon, 20 Nov 2023 21:37:17 -0800 Subject: [PATCH 025/138] [UR][L0] Unify use of large allocation in L0 adapter Intel(R) GPUs have two modes of operation in terms of allocations: Stateful and stateless mode. Stateful optimizes memory accesses through pointer arithmetic. This can be done as long as allocations used by the allocation are smaller than 4GB. Stateless disables such pointer-arithmetic optimization to allow the kernel to use allocations larger than 4GB. Currently, L0 adapter dynamically and automatically requests the L0 driver large allocations if it detects an allocation size is larger than 4GB. This creates a problem if a kernel has been previously compiled for stateful access. This ultimately means the adapter mixes stateful and stateless behavior, which is not a user-friendly experience. This patch aims at correcting this behavior by defining a default one. On Intel(R) GPUs previous to Intel(R) Data Center GPU Max, default behavior is now stateless, meaning all allocations are only allowed by default. Users can opt-in for stateful mode setting a new environment variable UR_L0_USE_OPTIMIZED_32BIT_ACCESS=1. Addresses: https://stackoverflow.com/questions/75621264/sycl-dot-product-code-gives-wrong-results Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/device.cpp | 24 +++++++++++++++++++++- source/adapters/level_zero/device.hpp | 16 +++++++++++++++ source/adapters/level_zero/program.cpp | 28 ++++++++++++++++++++++++-- source/adapters/level_zero/usm.cpp | 8 +++++--- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index ec6a294c21..acc7c755f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -267,7 +267,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(uint32_t{64}); } case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + // if not optimized for 32-bit access, return total memory size. + // otherwise, return only maximum allocatable size. + if (Device->useOptimized32bitAccess() == 0) { + return ReturnValue(uint64_t{calculateGlobalMemSize(Device)}); + } else { + return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + } case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize @@ -911,6 +917,22 @@ ur_device_handle_t_::useImmediateCommandLists() { } } +int32_t ur_device_handle_t_::useOptimized32bitAccess() { + static const int32_t Optimize32bitAccessMode = [this] { + // If device is Intel(R) Data Center GPU Max, + // use default provided by L0 driver. + // TODO: Use IP versioning to select based on range of devices + if (this->isPVC()) + return -1; + const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS"); + if (!UrRet) + return 0; + return std::atoi(UrRet); + }(); + + return Optimize32bitAccessMode; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index bdae64beba..5f34efab44 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -145,6 +145,22 @@ struct ur_device_handle_t_ : _ur_object { // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; + // Returns whether large allocations are being used + // or not to have a consistent behavior throughout + // the adapter between the creation of large allocations + // and the compilation of kernels into stateful and + // stateless modes. + // With stateful mode, kernels are compiled with + // pointer-arithmetic optimizations for optimized + // access of allocations smaller than 4GB. + // In stateless mode, such optimizations are not + // applied. + // Even if a GPU supports both modes, L0 driver may + // provide support for only one, like for Intel(R) + // Data Center GPU Max, for which L0 driver only + // supports stateless. + int32_t useOptimized32bitAccess(); + bool isSubDevice() { return RootDevice != nullptr; } // Is this a Data Center GPU Max series (aka PVC)? diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 92a3c87aea..f118a5b9dd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -148,9 +148,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) ? ZE_MODULE_FORMAT_IL_SPIRV : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->CodeLength; ZeModuleDesc.pInputModule = hProgram->Code.get(); - ZeModuleDesc.pBuildFlags = pOptions; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + std::string ZeBuildOptions{}; + if (pOptions) { + ZeBuildOptions += pOptions; + } + + if (phDevices[0]->useOptimized32bitAccess() == 0) { + ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; + } + + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; @@ -234,8 +249,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( // This produces better code because the driver can do cross-module // optimizations. Therefore, we just remember the compilation flags, so we // can use them later. - if (Options) + if (Options) { Program->BuildFlags = Options; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (Context->Devices[0]->useOptimized32bitAccess() == 0) { + Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; + } + } Program->State = ur_program_handle_t_::Object; return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index daec0408fb..c6d98855e7 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -178,9 +178,11 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.flags = 0; ZeDesc.ordinal = 0; - ZeStruct RelaxedDesc; - if (Size > Device->ZeDeviceProperties->maxMemAllocSize) { - // Tell Level-Zero to accept Size > maxMemAllocSize + if (Device->useOptimized32bitAccess() == 0 && + (Size > Device->ZeDeviceProperties->maxMemAllocSize)) { + // Tell Level-Zero to accept Size > maxMemAllocSize if + // large allocations are used. + ZeStruct RelaxedDesc; RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ZeDesc.pNext = &RelaxedDesc; } From ca9b67a94e83eb0c36143bfd89829d409e2aeec4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 29 Nov 2023 05:47:00 -0800 Subject: [PATCH 026/138] Remove passing match lines Signed-off-by: Larsen, Steffen --- .../virtual_memory_adapter_cuda.match | 91 ------------------- .../virtual_memory_adapter_opencl.match | 4 - 2 files changed, 95 deletions(-) diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match b/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match index 4c5b8e4a62..7af199a3f2 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match @@ -1,39 +1,4 @@ -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___4000 urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}___12345 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___1 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___2 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___3 @@ -43,61 +8,5 @@ urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___44 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___1024 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___4000 urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urPhysicalMemReleaseTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}_ -urPhysicalMemRetainTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemFreeTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemFreeTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ urVirtualMemFreeTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGetInfoTestWithParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_VIRTUAL_MEM_INFO_ACCESS_MODE -urVirtualMemGetInfoTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGetInfoTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGetInfoTest.InvalidEnumerationInfo/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM -urVirtualMemGranularityGetInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ urVirtualMemGranularityGetInfoNegativeTest.InvalidEnumeration/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropValue/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidPropSizeZero/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemMapTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandlePhysicalMem/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidEnumerationFlags/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___100000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/NVIDIA_CUDA_BACKEND___{{.*}}___100000 -urVirtualMemReserveTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemReserveTest.InvalidNullPointer/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match b/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match index bf82e6fb92..5491914e8a 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_opencl.match @@ -4,11 +4,7 @@ urVirtualMemGetInfoTest.InvalidNullPointerStart/Intel_R__OpenCL___{{.*}}_ urVirtualMemGetInfoTest.InvalidEnumerationInfo/Intel_R__OpenCL___{{.*}}_ urVirtualMemGranularityGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM urVirtualMemGranularityGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ urVirtualMemGranularityGetInfoNegativeTest.InvalidEnumeration/Intel_R__OpenCL___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidPropSizeZero/Intel_R__OpenCL___{{.*}}_ urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__OpenCL___{{.*}}_ urVirtualMemSetAccessTest.Success/Intel_R__OpenCL___{{.*}}_ urVirtualMemSetAccessTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ From 548019503f873364d541e2beac350a8b05581668 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 30 Nov 2023 06:30:04 -0800 Subject: [PATCH 027/138] Address feedback Signed-off-by: Larsen, Steffen --- source/adapters/cuda/physical_mem.cpp | 7 ++++++- test/conformance/virtual_memory/urPhysicalMemCreate.cpp | 3 +-- test/conformance/virtual_memory/urVirtualMemFree.cpp | 2 +- .../virtual_memory/urVirtualMemGranularityGetInfo.cpp | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/source/adapters/cuda/physical_mem.cpp b/source/adapters/cuda/physical_mem.cpp index 444d492aa3..e2c46696a0 100644 --- a/source/adapters/cuda/physical_mem.cpp +++ b/source/adapters/cuda/physical_mem.cpp @@ -26,7 +26,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( UR_CHECK_ERROR(GetDeviceOrdinal(hDevice, AllocProps.location.id)); CUmemGenericAllocationHandle ResHandle; - UR_CHECK_ERROR(cuMemCreate(&ResHandle, size, &AllocProps, 0)); + switch (auto Result = cuMemCreate(&ResHandle, size, &AllocProps, 0)) { + case CUDA_ERROR_INVALID_VALUE: + return UR_RESULT_ERROR_INVALID_SIZE; + default: + UR_CHECK_ERROR(Result); + } *phPhysicalMem = new ur_physical_mem_handle_t_(ResHandle, hContext); return UR_RESULT_SUCCESS; diff --git a/test/conformance/virtual_memory/urPhysicalMemCreate.cpp b/test/conformance/virtual_memory/urPhysicalMemCreate.cpp index 078b0e68db..e5124da1cc 100644 --- a/test/conformance/virtual_memory/urPhysicalMemCreate.cpp +++ b/test/conformance/virtual_memory/urPhysicalMemCreate.cpp @@ -16,8 +16,7 @@ struct urPhysicalMemCreateTest size_t size; }; -UUR_TEST_SUITE_P(urPhysicalMemCreateTest, - ::testing::Values(1, 2, 3, 7, 12, 44, 1024, 4000, 12345), +UUR_TEST_SUITE_P(urPhysicalMemCreateTest, ::testing::Values(1, 2, 3, 7, 12, 44), uur::deviceTestWithParamPrinter); TEST_P(urPhysicalMemCreateTest, Success) { diff --git a/test/conformance/virtual_memory/urVirtualMemFree.cpp b/test/conformance/virtual_memory/urVirtualMemFree.cpp index 6cb8795547..3b4c4babc5 100644 --- a/test/conformance/virtual_memory/urVirtualMemFree.cpp +++ b/test/conformance/virtual_memory/urVirtualMemFree.cpp @@ -19,5 +19,5 @@ TEST_P(urVirtualMemFreeTest, InvalidNullHandleContext) { TEST_P(urVirtualMemFreeTest, InvalidNullPointerStart) { ASSERT_EQ_RESULT(urVirtualMemFree(context, nullptr, size), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_RESULT_ERROR_INVALID_NULL_POINTER); } diff --git a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp index d4feccd6dc..df8c28792f 100644 --- a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp +++ b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp @@ -60,7 +60,7 @@ TEST_P(urVirtualMemGranularityGetInfoNegativeTest, InvalidEnumeration) { context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_FORCE_UINT32, 0, nullptr, &size), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_RESULT_ERROR_INVALID_ENUMERATION); } TEST_P(urVirtualMemGranularityGetInfoNegativeTest, From 2940ec8f135f43d4c3836e38565f96f124b7c1e9 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 30 Nov 2023 07:01:20 -0800 Subject: [PATCH 028/138] Remove virtual mem CUDA matches Signed-off-by: Larsen, Steffen --- .../virtual_memory/virtual_memory_adapter_cuda.match | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match b/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match index 7af199a3f2..e69de29bb2 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_cuda.match @@ -1,12 +0,0 @@ -urPhysicalMemCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}___12345 -urVirtualMemFreeTest.InvalidNullPointerStart/NVIDIA_CUDA_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidEnumeration/NVIDIA_CUDA_BACKEND___{{.*}}_ From b78f541d27246f542287e77496fea7c2f04aadf5 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 24 Nov 2023 10:48:11 +0000 Subject: [PATCH 029/138] [OpenCL] Add usm_alloc_location_desc struct and handle it in the adapter. This is the equivalent of buffer_alloc_location_properties_t, it turns out the CL extension flag is applicable to both buffer creation and USM allocations. --- include/ur.py | 20 +++ include/ur_api.h | 23 +++ include/ur_print.hpp | 34 ++++ scripts/core/registry.yml | 3 + scripts/core/usm.yml | 20 +++ source/adapters/opencl/usm.cpp | 158 ++++++++++-------- source/loader/ur_libapi.cpp | 3 + source/ur_api.cpp | 3 + test/conformance/usm/usm_adapter_opencl.match | 2 - 9 files changed, 194 insertions(+), 72 deletions(-) diff --git a/include/ur.py b/include/ur.py index 09b7955e07..90cc06d895 100644 --- a/include/ur.py +++ b/include/ur.py @@ -249,6 +249,7 @@ class ur_structure_type_v(IntEnum): KERNEL_EXEC_INFO_PROPERTIES = 31 ## ::ur_kernel_exec_info_properties_t KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t + USM_ALLOC_LOCATION_DESC = 35 ## ::ur_usm_alloc_location_desc_t EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t @@ -1558,6 +1559,25 @@ class ur_usm_device_desc_t(Structure): ("flags", ur_usm_device_mem_flags_t) ## [in] device memory allocation flags. ] +############################################################################### +## @brief USM allocation location desc +## +## @details +## - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and +## ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. +## +## @remarks +## _Analogues_ +## - cl_intel_mem_alloc_buffer_location +class ur_usm_alloc_location_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory + ## should be allocated. + ] + ############################################################################### ## @brief USM pool descriptor type class ur_usm_pool_desc_t(Structure): diff --git a/include/ur_api.h b/include/ur_api.h index 09f6d77a6b..745d2ad3a5 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -258,6 +258,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t @@ -3287,6 +3288,25 @@ typedef struct ur_usm_device_desc_t { } ur_usm_device_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief USM allocation location desc +/// +/// @details +/// - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and +/// ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. +/// +/// @remarks +/// _Analogues_ +/// - cl_intel_mem_alloc_buffer_location +typedef struct ur_usm_alloc_location_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t location; ///< [in] Identifies the ID of global memory partition to which the memory + ///< should be allocated. + +} ur_usm_alloc_location_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief USM pool descriptor type typedef struct ur_usm_pool_desc_t { @@ -3324,6 +3344,7 @@ typedef struct ur_usm_pool_limits_desc_t { /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3369,6 +3390,7 @@ urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3417,6 +3439,7 @@ urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/include/ur_print.hpp b/include/ur_print.hpp index dc7442068c..9cfa4d9815 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -267,6 +267,7 @@ inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_alloc_location_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_limits_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_info_t value); @@ -993,6 +994,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: + os << "UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; @@ -1204,6 +1208,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + const ur_usm_alloc_location_desc_t *pstruct = (const ur_usm_alloc_location_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: { const ur_exp_command_buffer_desc_t *pstruct = (const ur_exp_command_buffer_desc_t *)ptr; printPtr(os, pstruct); @@ -6537,6 +6546,31 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_device_des return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_alloc_location_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_alloc_location_desc_t params) { + os << "(struct ur_usm_alloc_location_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".location = "; + + os << (params.location); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_pool_desc_t type /// @returns /// std::ostream & diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index deb5ee9604..6195cd4980 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -666,3 +666,6 @@ etors: - name: KERNEL_ARG_LOCAL_PROPERTIES desc: $x_kernel_arg_local_properties_t value: '33' +- name: USM_ALLOC_LOCATION_DESC + desc: $x_usm_alloc_location_desc_t + value: '35' diff --git a/scripts/core/usm.yml b/scripts/core/usm.yml index 0b793d7226..1476eec34a 100644 --- a/scripts/core/usm.yml +++ b/scripts/core/usm.yml @@ -175,6 +175,23 @@ members: desc: "[in] device memory allocation flags." --- #-------------------------------------------------------------------------- type: struct +desc: "USM allocation location desc" +details: + - Specify these properties in $xUSMHostAlloc, $xUSMDeviceAlloc and + $xUSMSharedAlloc via $x_usm_desc_t as part of a `pNext` chain. +analogue: + - "cl_intel_mem_alloc_buffer_location" +class: $xUSM +name: $x_usm_alloc_location_desc_t +base: $x_base_desc_t +members: + - type: uint32_t + name: location + desc: > + [in] Identifies the ID of global memory partition to which the memory + should be allocated. +--- #-------------------------------------------------------------------------- +type: struct desc: "USM pool descriptor type" class: $xUSM name: $x_usm_pool_desc_t @@ -212,6 +229,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -253,6 +271,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -298,6 +317,7 @@ details: - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 5d46aec2ef..0d64f23d13 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -10,6 +10,75 @@ #include "common.hpp" +inline cl_mem_alloc_flags_intel +hostDescToClFlags(const ur_usm_host_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + return allocFlags; +} + +inline cl_mem_alloc_flags_intel +deviceDescToClFlags(const ur_usm_device_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + allocFlags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + return allocFlags; +} + +ur_result_t +usmDescToCLMemProperties(const ur_base_desc_t *Desc, + std::vector &Properties) { + cl_mem_alloc_flags_intel AllocFlags = 0; + const auto *Next = Desc; + do { + switch (Next->stype) { + case UR_STRUCTURE_TYPE_USM_HOST_DESC: { + auto HostDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & HostDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= hostDescToClFlags(*HostDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: { + auto DeviceDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & DeviceDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= deviceDescToClFlags(*DeviceDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + auto LocationDesc = + reinterpret_cast(Next); + Properties.push_back(CL_MEM_ALLOC_BUFFER_LOCATION_INTEL); + // CL bitfields are cl_ulong + Properties.push_back(static_cast(LocationDesc->location)); + break; + } + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + Next = Next->pNext ? static_cast(Next->pNext) + : nullptr; + } while (Next); + + if (AllocFlags) { + Properties.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + Properties.push_back(AllocFlags); + } + Properties.push_back(0); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -17,23 +86,10 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -47,7 +103,9 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, Properties, size, Alignment, &ClResult); + Ptr = FuncPtr(CLContext, + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -71,25 +129,10 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -104,8 +147,8 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, &ClResult); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -129,35 +172,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - const auto *NextStruct = - (pUSMDesc ? static_cast(pUSMDesc->pNext) - : nullptr); - while (NextStruct) { - if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - reinterpret_cast(NextStruct); - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *DevDesc = - reinterpret_cast(NextStruct); - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - } - NextStruct = static_cast(NextStruct->pNext); - } - - cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; - - // Passing a flags value of 0 doesn't work, so truncate the properties - if (Flags == 0) { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -172,8 +190,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&ClResult)); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, cl_adapter::cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index de9e029536..94ff5e4edf 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2121,6 +2121,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2177,6 +2178,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2236,6 +2238,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/source/ur_api.cpp b/source/ur_api.cpp index ca1f82019c..6e6121febd 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -1811,6 +1811,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1861,6 +1862,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1914,6 +1916,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index b9aa3f3bdf..16211ba8e7 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -1,6 +1,5 @@ urUSMDeviceAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled urUSMDeviceAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidNullPtrResult/Intel_R__OpenCL___{{.*}}___UsePoolEnabled @@ -9,7 +8,6 @@ urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UseP urUSMAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL urUSMHostAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled From c3a98eac9e37fbdcdbaabdc5a3180a8b8afa1949 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 1 Dec 2023 06:55:13 -0800 Subject: [PATCH 030/138] Adjust L0 match file Signed-off-by: Larsen, Steffen --- .../virtual_memory_adapter_level_zero.match | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match index 2d02cac1f9..dbb7cdebd5 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match @@ -4,45 +4,30 @@ urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero_ urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4000 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12345 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12345 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12345 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12345 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12345 urPhysicalMemReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urPhysicalMemRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From 31da61b419f316b5bb3d4b024ba3bf3bab334fea Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 1 Dec 2023 07:30:38 -0800 Subject: [PATCH 031/138] Remove L0 Virtual Mem device query from expected failures Signed-off-by: Larsen, Steffen --- test/conformance/device/device_adapter_level_zero.match | 1 - 1 file changed, 1 deletion(-) diff --git a/test/conformance/device/device_adapter_level_zero.match b/test/conformance/device/device_adapter_level_zero.match index 9711e9152b..e7f102b919 100644 --- a/test/conformance/device/device_adapter_level_zero.match +++ b/test/conformance/device/device_adapter_level_zero.match @@ -17,4 +17,3 @@ urDeviceGetInfoTest.Success/UR_DEVICE_INFO_ASYNC_BARRIER urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT urDeviceGetInfoTest.Success/UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP -urDeviceGetInfoTest.Success/UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT From 63d395789ded61a77f1a31efbb22fb149464bd63 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 1 Dec 2023 08:05:54 -0800 Subject: [PATCH 032/138] Remove HIP expected failures Signed-off-by: Larsen, Steffen --- .../virtual_memory_adapter_hip.match | 103 ------------------ 1 file changed, 103 deletions(-) diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_hip.match b/test/conformance/virtual_memory/virtual_memory_adapter_hip.match index 2e26995f76..e69de29bb2 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_hip.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_hip.match @@ -1,103 +0,0 @@ -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.Success/AMD_HIP_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/AMD_HIP_BACKEND___{{.*}}___12345 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___1024 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___4000 -urPhysicalMemCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___12345 -urPhysicalMemReleaseTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/AMD_HIP_BACKEND___{{.*}}_ -urPhysicalMemRetainTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemFreeTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemFreeTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemFreeTest.InvalidNullPointerStart/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGetInfoTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_VIRTUAL_MEM_INFO_ACCESS_MODE -urVirtualMemGetInfoTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGetInfoTest.InvalidNullPointerStart/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGetInfoTest.InvalidEnumerationInfo/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM -urVirtualMemGranularityGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidEnumeration/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidNullPointerPropValue/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidPropSizeZero/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemMapTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandlePhysicalMem/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidNullPointerStart/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemMapTest.InvalidEnumerationFlags/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/AMD_HIP_BACKEND___{{.*}}___100000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/AMD_HIP_BACKEND___{{.*}}___100000 -urVirtualMemReserveTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemReserveTest.InvalidNullPointer/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullPointerStart/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullPointerStart/AMD_HIP_BACKEND___{{.*}}_ From 53f3383118af9fc60ab91f44a9f384813b9a81ab Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Mon, 4 Dec 2023 11:27:54 +0000 Subject: [PATCH 033/138] [OpenCL] Fix UR_ADAPTER_INFO_BACKEND value --- source/adapters/opencl/adapter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index f1d710ebb4..8ae1e77755 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -66,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, switch (propName) { case UR_ADAPTER_INFO_BACKEND: - return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: return ReturnValue(adapter.RefCount.load()); default: From b58375a2e49ea90a3f92fb70aa262935f054ee51 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 9 Oct 2023 14:11:40 +0100 Subject: [PATCH 034/138] [EXP][CMDBUF] Add adapters code for Prefetch and Advise commands Adds adapters code support for prefetch and advise memory hints for level_zero backend. Adds entry points for prefetch and advise memory hints for CUDA backend. --- source/adapters/cuda/command_buffer.cpp | 32 ++++++++ source/adapters/cuda/ur_interface_loader.cpp | 2 + source/adapters/level_zero/command_buffer.cpp | 74 +++++++++++++++++++ .../level_zero/ur_interface_loader.cpp | 2 + 4 files changed, 110 insertions(+) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 24a5d9497c..49ab0b813e 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -525,6 +525,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void * /* Mem */, + size_t /*Size*/, ur_usm_migration_flags_t /*Flags*/, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void * /* Mem */, + size_t /*Size*/, ur_usm_advice_flags_t /*Advice*/, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index e3258f379d..049e532dfe 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -289,6 +289,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e8f3b061f9..db4fc78938 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -683,6 +683,80 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( SyncPointWaitList, SyncPoint); } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, + ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + std::ignore = Flags; + + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_PREFETCH; + + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + + // TODO: figure out how to translate "flags" + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (CommandBuffer->ZeCommandList, Mem, Size)); + + // TODO: Level Zero does not have a completion "event" with the prefetch API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, + ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + auto ZeAdvice = ur_cast(Advice); + + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_ADVISE; + + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + + ZE2UR_CALL(zeCommandListAppendMemAdvise, + (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, + Mem, Size, ZeAdvice)); + + // TODO: Level Zero does not have a completion "event" with the advise API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 09f4405744..5db5bfa6c9 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -336,6 +336,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From da9b81fc4ca9903c02f961524d5f67469cd58454 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 28 Sep 2023 14:26:15 +0100 Subject: [PATCH 035/138] Merge pull request #885 from Bensuo/mfrancepillois/cmd-buffer-prefetch-memadvice [EXP][CMDBUF] Add Prefetch and Advise commands to cmd buffer experimental feature --- include/ur.py | 14 ++++++++++ include/ur_api.h | 28 +++++++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 3 ++ source/adapters/null/ur_nullddi.cpp | 6 ++++ source/loader/layers/tracing/ur_trcddi.cpp | 8 ++++++ source/loader/layers/validation/ur_valddi.cpp | 8 ++++++ 6 files changed, 67 insertions(+) diff --git a/include/ur.py b/include/ur.py index 09b7955e07..45c2cdb350 100644 --- a/include/ur.py +++ b/include/ur.py @@ -3691,6 +3691,20 @@ class ur_usm_exp_dditable_t(Structure): else: _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp +if __use_win_types: + _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +if __use_win_types: + _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp if __use_win_types: diff --git a/include/ur_api.h b/include/ur_api.h index 09f6d77a6b..1d8c6ca8f9 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -10667,6 +10667,34 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_exp_command_buffer_sync_point_t **ppSyncPoint; } ur_command_buffer_append_usm_advise_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_migration_flags_t *pflags; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_prefetch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_advise_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_advice_flags_t *padvice; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_advise_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..386bf48f37 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -227,6 +227,9 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.3 | Add function definitions for Prefetch and Advise | +| | commands | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index a4e91e3dc0..e8f7d48ae6 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5457,6 +5457,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = driver::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMPrefetchExp = + driver::urCommandBufferAppendUSMPrefetchExp; + + pDdiTable->pfnAppendUSMAdviseExp = + driver::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; return result; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d33a3aaf51..792f34ec1f 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6362,6 +6362,14 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ec0df692cf..13ec6eb47f 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8123,6 +8123,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_validation_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_validation_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; From 7a833cce436cdf940e5b5e65cd465a32f382d593 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 9 Oct 2023 17:21:57 +0100 Subject: [PATCH 036/138] Adds HIP adapters entry points for prefetch and mem advise --- source/adapters/hip/command_buffer.cpp | 18 ++++++++++++++++++ source/adapters/hip/ur_interface_loader.cpp | 2 ++ 2 files changed, 20 insertions(+) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index d2cd156719..58ebc86928 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -122,6 +122,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t, const void *, size_t, + ur_usm_migration_flags_t, uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t, ur_queue_handle_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index a02f80957e..94764caaec 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -286,6 +286,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 402494d7d0d9836877782cae07e5a8ddfc3a095c Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 10:13:11 +0100 Subject: [PATCH 037/138] Adds explicit parsing for memory advise + updates comments --- source/adapters/level_zero/command_buffer.cpp | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index db4fc78938..1a5894a72e 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -712,7 +712,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeCommandList, Mem, Size)); - // TODO: Level Zero does not have a completion "event" with the prefetch API, + // Level Zero does not have a completion "event" with the prefetch API, // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); @@ -725,7 +725,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - auto ZeAdvice = ur_cast(Advice); + std::unordered_map + URToCUMemAdviseDeviceFlagsMap = { + {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, + ZE_MEMORY_ADVICE_SET_READ_MOSTLY}, + {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, + ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, + {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, + ZR_MEM_ADVISE_SET_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, + ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY}, + {UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY, + ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY}, + {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, + {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, + {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, + ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION}, + {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, + ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION}}; + + ze_memory_advice_t ZeAdvice = 0; + for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { + if (Advice & FlagPair.first) { + ZeAdvice |= FlagPair.second; + } + } std::vector ZeEventList; UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, @@ -749,7 +775,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, Mem, Size, ZeAdvice)); - // TODO: Level Zero does not have a completion "event" with the advise API, + // Level Zero does not have a completion "event" with the advise API, // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); From 379fc46cc7344b44cf85f2e1475e496f6f1f1355 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 12:22:57 +0100 Subject: [PATCH 038/138] Bugfix --- source/adapters/level_zero/command_buffer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 1a5894a72e..b26a916644 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -732,7 +732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZR_MEM_ADVISE_SET_PREFERRED_LOCATION}, + ZE_MEMORY_ADVISE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, @@ -742,16 +742,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION}, + ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION}}; + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}}; - ze_memory_advice_t ZeAdvice = 0; + uint32_t Value = 0; for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { if (Advice & FlagPair.first) { - ZeAdvice |= FlagPair.second; + Value |= static_cast(FlagPair.second); } } + ze_memory_advice_t ZeAdvice = static_cast(Value); std::vector ZeEventList; UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, From 0c9f62d455fb0d346819f816003eeb33d9f1f03b Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 10 Oct 2023 14:32:06 +0100 Subject: [PATCH 039/138] typos --- source/adapters/level_zero/command_buffer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index b26a916644..7f861a50d6 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -732,7 +732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZE_MEMORY_ADVISE_SET_PREFERRED_LOCATION}, + ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, @@ -744,7 +744,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}}; + ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, + }; uint32_t Value = 0; for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { From f2df3fe2f5462887451d9e5ec8602e70eb17ee8d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 11 Oct 2023 09:56:25 +0100 Subject: [PATCH 040/138] Changes map usage to if statements to handle advice flags --- source/adapters/level_zero/command_buffer.cpp | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 7f861a50d6..491fa99b61 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -725,34 +725,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::unordered_map - URToCUMemAdviseDeviceFlagsMap = { - {UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, - ZE_MEMORY_ADVICE_SET_READ_MOSTLY}, - {UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, - ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY}, - {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, - ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY, - ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY}, - {UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY, - ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY}, - {UR_USM_ADVICE_FLAG_BIAS_CACHED, ZE_MEMORY_ADVICE_BIAS_CACHED}, - {UR_USM_ADVICE_FLAG_BIAS_UNCACHED, ZE_MEMORY_ADVICE_BIAS_UNCACHED}, - {UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION}, - {UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, - ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION}, - }; - + // A memory chunk can be advised with muliple memory advices + // We therefore prefer if statements to switch cases to combine all potential + // flags uint32_t Value = 0; - for (auto &FlagPair : URToCUMemAdviseDeviceFlagsMap) { - if (Advice & FlagPair.first) { - Value |= static_cast(FlagPair.second); - } - } + if (Advice & UR_USM_ADVICE_FLAG_SET_READ_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_READ_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY); + if (Advice & UR_USM_ADVICE_FLAG_BIAS_CACHED) + Value |= static_cast(ZE_MEMORY_ADVICE_BIAS_CACHED); + if (Advice & UR_USM_ADVICE_FLAG_BIAS_UNCACHED) + Value |= static_cast(ZE_MEMORY_ADVICE_BIAS_UNCACHED); + if (Advice & UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST) + Value |= static_cast(ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION); + if (Advice & UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST) + Value |= static_cast(ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION); + ze_memory_advice_t ZeAdvice = static_cast(Value); std::vector ZeEventList; From f135e6c232cb41aadf4b3859fe3212616a5f657d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 11 Oct 2023 10:34:02 +0100 Subject: [PATCH 041/138] Bugfix hip stub function signature --- source/adapters/hip/command_buffer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 58ebc86928..c85b3e9216 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -134,6 +134,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, + uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); From 5cc3f880d296f433018e9f421d6c40384e28a8a8 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 12 Oct 2023 09:59:02 +0100 Subject: [PATCH 042/138] Improves comments --- source/adapters/level_zero/command_buffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 491fa99b61..bb081f9b2d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -708,7 +708,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( *SyncPoint = CommandBuffer->GetNextSyncPoint(); CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - // TODO: figure out how to translate "flags" + // Add the prefetch command to the command buffer. + // Note that L0 does not handle migration flags. ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeCommandList, Mem, Size)); From 511637252d5b6e2c7a4fe40772ace944b70aaba8 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 4 Dec 2023 16:52:35 +0000 Subject: [PATCH 043/138] Updates generated files --- include/ur.py | 14 ---------- include/ur_api.h | 28 ------------------- source/adapters/null/ur_nullddi.cpp | 6 ---- source/loader/layers/tracing/ur_trcddi.cpp | 8 ------ source/loader/layers/validation/ur_valddi.cpp | 8 ------ 5 files changed, 64 deletions(-) diff --git a/include/ur.py b/include/ur.py index 45c2cdb350..09b7955e07 100644 --- a/include/ur.py +++ b/include/ur.py @@ -3691,20 +3691,6 @@ class ur_usm_exp_dditable_t(Structure): else: _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp -if __use_win_types: - _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp -if __use_win_types: - _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp if __use_win_types: diff --git a/include/ur_api.h b/include/ur_api.h index 1d8c6ca8f9..09f6d77a6b 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -10667,34 +10667,6 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_exp_command_buffer_sync_point_t **ppSyncPoint; } ur_command_buffer_append_usm_advise_exp_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { - ur_exp_command_buffer_handle_t *phCommandBuffer; - const void **ppMemory; - size_t *psize; - ur_usm_migration_flags_t *pflags; - uint32_t *pnumSyncPointsInWaitList; - const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; - ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_usm_prefetch_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_usm_advise_exp_params_t { - ur_exp_command_buffer_handle_t *phCommandBuffer; - const void **ppMemory; - size_t *psize; - ur_usm_advice_flags_t *padvice; - uint32_t *pnumSyncPointsInWaitList; - const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; - ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_usm_advise_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index e8f7d48ae6..a4e91e3dc0 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5457,12 +5457,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = driver::urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMPrefetchExp = - driver::urCommandBufferAppendUSMPrefetchExp; - - pDdiTable->pfnAppendUSMAdviseExp = - driver::urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; return result; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 792f34ec1f..d33a3aaf51 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6362,14 +6362,6 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMPrefetchExp = - ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; - - dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMAdviseExp = - ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 13ec6eb47f..ec0df692cf 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8123,14 +8123,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_validation_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMPrefetchExp = - ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; - - dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; - pDdiTable->pfnAppendUSMAdviseExp = - ur_validation_layer::urCommandBufferAppendUSMAdviseExp; - dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; From 3276f70d5e6ced45bcc3b8967bb2b6248757ac11 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 13 Nov 2023 16:31:50 +0000 Subject: [PATCH 044/138] [UR] Enable UMF tracking by default It is needed by adapters's urUSMFree implementations where we rely on umfPoolByPtr (which always returns null if tracking is disabled). --- CMakeLists.txt | 2 +- .../kernel/kernel_adapter_level_zero.match | 1 - test/conformance/usm/usm_adapter_cuda.match | 40 +------------------ .../usm/usm_adapter_level_zero.match | 25 ------------ 4 files changed, 2 insertions(+), 66 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bad7a6ca5..80a9f64ea7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) -option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" OFF) +option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON) option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF) option(UR_BUILD_ADAPTER_OPENCL "Build the OpenCL adapter" OFF) option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF) diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index 8194c7ddad..2668b6821a 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -11,7 +11,6 @@ urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runt urKernelSetArgPointerTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index e2ba6b6f63..15b68f5c6c 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,45 +1,7 @@ -urUSMDeviceAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_SIZE -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -{{OPT}}urUSMHostAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMPoolCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPoolDesc/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidEnumerationFlags/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -{{OPT}}urUSMPoolGetInfoTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidEnumerationProperty/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeZero/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeTooSmall/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropValue/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithMultipleAdvices/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 9e275d805e..bf45b83ec2 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,36 +1,11 @@ -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMFreeTest.SuccessDeviceAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessHostAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessSharedAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_TYPE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_SIZE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_DEVICE urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidValuePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled From b0aed5954774597a32b5b4a38297ed32051fada3 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 5 Dec 2023 10:23:27 +0000 Subject: [PATCH 045/138] Adds CUDA support --- source/adapters/cuda/command_buffer.cpp | 59 ++++++++++++++++++------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 49ab0b813e..f2b896eada 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -531,14 +531,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + // Prefetch cmd is not supported by Cuda Graph. + // We implement it as an empty node to enforce dependencies. + ur_result_t Result = UR_RESULT_SUCCESS; + CUgraphNode GraphNode; + + std::vector DepsList; + UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, DepsList)); + + try { + // Add an empty node to preserve dependencies. + UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size())); + + // Get sync point and register the cuNode with it. + *pSyncPoint = + hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( @@ -547,14 +560,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + // Mem-Advise cmd is not supported by Cuda Graph. + // We implement it as an empty node to enforce dependencies. + ur_result_t Result = UR_RESULT_SUCCESS; + CUgraphNode GraphNode; + + std::vector DepsList; + UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, DepsList)); + + try { + // Add an empty node to preserve dependencies. + UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size())); + + // Get sync point and register the cuNode with it. + *pSyncPoint = + hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + } catch (ur_result_t Err) { + Result = Err; + } + + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( From 303e26c7bed95bc50560e228050661cf57ac9f93 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 5 Dec 2023 11:57:14 +0000 Subject: [PATCH 046/138] Cuda support bugfix --- source/adapters/cuda/command_buffer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index f2b896eada..695ff03ce2 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -538,7 +538,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( std::vector DepsList; UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + pSyncPointWaitList, DepsList), + Result); try { // Add an empty node to preserve dependencies. @@ -567,7 +568,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( std::vector DepsList; UR_CALL(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, - pSyncPointWaitList, DepsList)); + pSyncPointWaitList, DepsList), + Result); try { // Add an empty node to preserve dependencies. From 584314d313ca36b42ee8279f9826f6b3b662471a Mon Sep 17 00:00:00 2001 From: Adam Date: Thu, 26 Oct 2023 10:05:50 +0200 Subject: [PATCH 047/138] [UR][Tests] Add a CMake option to limit the test devices count and test platforms count in CTS. --- test/conformance/CMakeLists.txt | 4 +++- test/conformance/README.md | 9 ++++++++- test/conformance/cts_exe.py | 6 +++++- test/conformance/source/environment.cpp | 17 +++++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index a4c2e8cf94..df80c02681 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -4,6 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(UR_CONFORMANCE_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +option(UR_TEST_DEVICES_COUNT "Count of devices on which CTS will be run" 1) +option(UR_TEST_PLATFORMS_COUNT "Count of platforms on which CTS will be run" 1) function(add_test_adapter name adapter) set(TEST_TARGET_NAME test-${name}) @@ -12,7 +14,7 @@ function(add_test_adapter name adapter) add_test(NAME ${TEST_NAME} COMMAND ${CMAKE_COMMAND} -D TEST_FILE=${Python3_EXECUTABLE} - -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME}" + -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME} --test_devices_count=${UR_TEST_DEVICES_COUNT} --test_platforms_count=${UR_TEST_PLATFORMS_COUNT}" -D MODE=stdout -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}_${adapter}.match -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake diff --git a/test/conformance/README.md b/test/conformance/README.md index db90fc759b..9e78337151 100644 --- a/test/conformance/README.md +++ b/test/conformance/README.md @@ -8,4 +8,11 @@ In the future, when all bugs are fixed, and the tests pass, this solution will no longer be necessary. When you fix any test, the match file must be updated Empty match files indicate that there are no failing tests -in a particular group for the corresponding adapter. \ No newline at end of file +in a particular group for the corresponding adapter. + +## How to limit the test devices count + +To limit how many devices you want to run the CTS on, +use CMake option UR_TEST_DEVICES_COUNT. If you want to run +the tests on all available devices, set 0. +The default value is 1. \ No newline at end of file diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index ce3ca00a20..55ab134b07 100644 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -20,9 +20,13 @@ parser = ArgumentParser() parser.add_argument("--test_command", help="Ctest test case") + parser.add_argument("--test_devices_count", type=str, help="Number of devices on which tests will be run") + parser.add_argument("--test_platforms_count", type=str, help="Number of platforms on which tests will be run") args = parser.parse_args() - result = subprocess.Popen([args.test_command, '--gtest_brief=1'], stdout = subprocess.PIPE, text = True) # nosec B603 + result = subprocess.Popen([args.test_command, '--gtest_brief=1', f'--devices_count={args.test_devices_count}', + f'--platforms_count={args.test_platforms_count}'], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) # nosec B603 pat = re.compile(r'\[( )*FAILED( )*\]') output_list = [] diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index e76b84692c..6918cfb829 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include @@ -209,6 +210,22 @@ DevicesEnvironment::DevicesEnvironment(int argc, char **argv) error = "Could not find any devices associated with the platform"; return; } + // Get the argument (test_devices_count) to limit test devices count. + u_long count_set = 0; + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--test_devices_count") == 0 && i + 1 < argc) { + count_set = std::strtoul(argv[i + 1], nullptr, 10); + break; + } + } + // In case, the count_set is "0", the variable count will not be changed. + // The CTS will run on all devices. + if (count_set > (std::numeric_limits::max)()) { + error = "Invalid test_devices_count argument"; + return; + } else if (count_set > 0) { + count = (std::min)(count, static_cast(count_set)); + } devices.resize(count); if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), nullptr)) { From 371bd3cb1d1d9f919557cf3dafc8e5765a88735b Mon Sep 17 00:00:00 2001 From: szadam Date: Wed, 22 Nov 2023 17:30:50 +0100 Subject: [PATCH 048/138] [UR][Tests] Add options in CTS to set test devices count or test device name and test platforms count or test platform name --- test/conformance/README.md | 14 +- test/conformance/source/environment.cpp | 130 ++++++++++++++---- .../testing/include/uur/environment.h | 10 ++ 3 files changed, 125 insertions(+), 29 deletions(-) diff --git a/test/conformance/README.md b/test/conformance/README.md index 9e78337151..e895a5299d 100644 --- a/test/conformance/README.md +++ b/test/conformance/README.md @@ -10,9 +10,13 @@ When you fix any test, the match file must be updated Empty match files indicate that there are no failing tests in a particular group for the corresponding adapter. -## How to limit the test devices count +## How to set test device/platform name or limit the test devices/platforms count -To limit how many devices you want to run the CTS on, -use CMake option UR_TEST_DEVICES_COUNT. If you want to run -the tests on all available devices, set 0. -The default value is 1. \ No newline at end of file +To limit how many devices/platforms you want to run the CTS on, +use CMake option UR_TEST_DEVICES_COUNT or +UR_TEST_PLATFORMS_COUNT. If you want to run the tests on +all available devices/platforms, set 0. The default value is 1. +If you run binaries for the tests, you can use the parameter +`--platforms_count=COUNT` or `--devices_count=COUNT`. +To set test device/platform name you want to run the CTS on, use +parameter `--platform=NAME` or `--device=NAME`. \ No newline at end of file diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 6918cfb829..6c917914ed 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -42,6 +42,23 @@ std::ostream &operator<<(std::ostream &out, return out; } +std::ostream &operator<<(std::ostream &out, const ur_device_handle_t &device) { + size_t size; + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, 0, nullptr, &size); + std::vector name(size); + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, size, name.data(), nullptr); + out << name.data(); + return out; +} + +std::ostream &operator<<(std::ostream &out, + const std::vector &devices) { + for (auto device : devices) { + out << "\n * \"" << device << "\""; + } + return out; +} + uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) : platform_options{parsePlatformOptions(argc, argv)} { instance = this; @@ -101,14 +118,16 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) } if (platform_options.platform_name.empty()) { - if (platforms.size() == 1) { + + if (platforms.size() == 1 || platform_options.platforms_count == 1) { platform = platforms[0]; } else { std::stringstream ss_error; ss_error << "Select a single platform from below using the " "--platform=NAME " "command-line option:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -137,7 +156,8 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) << "\" not found. Select a single platform from below " "using the " "--platform=NAME command-line options:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -178,6 +198,10 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { arg, "--platform=", sizeof("--platform=") - 1) == 0) { options.platform_name = std::string(&arg[std::strlen("--platform=")]); + } else if (std::strncmp(arg, "--platforms_count=", + sizeof("--platforms_count=") - 1) == 0) { + options.platforms_count = std::strtoul( + &arg[std::strlen("--platforms_count=")], nullptr, 10); } } @@ -193,10 +217,31 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { return options; } +DevicesEnvironment::DeviceOptions +DevicesEnvironment::parseDeviceOptions(int argc, char **argv) { + DeviceOptions options; + for (int argi = 1; argi < argc; ++argi) { + const char *arg = argv[argi]; + if (!(std::strcmp(arg, "-h") && std::strcmp(arg, "--help"))) { + // TODO - print help + break; + } else if (std::strncmp(arg, "--device=", sizeof("--device=") - 1) == + 0) { + options.device_name = std::string(&arg[std::strlen("--device=")]); + } else if (std::strncmp(arg, "--devices_count=", + sizeof("--devices_count=") - 1) == 0) { + options.devices_count = std::strtoul( + &arg[std::strlen("--devices_count=")], nullptr, 10); + } + } + return options; +} + DevicesEnvironment *DevicesEnvironment::instance = nullptr; DevicesEnvironment::DevicesEnvironment(int argc, char **argv) - : PlatformEnvironment(argc, argv) { + : PlatformEnvironment(argc, argv), + device_options(parseDeviceOptions(argc, argv)) { instance = this; if (!error.empty()) { return; @@ -210,27 +255,64 @@ DevicesEnvironment::DevicesEnvironment(int argc, char **argv) error = "Could not find any devices associated with the platform"; return; } - // Get the argument (test_devices_count) to limit test devices count. - u_long count_set = 0; - for (int i = 1; i < argc; ++i) { - if (std::strcmp(argv[i], "--test_devices_count") == 0 && i + 1 < argc) { - count_set = std::strtoul(argv[i + 1], nullptr, 10); - break; - } - } - // In case, the count_set is "0", the variable count will not be changed. + + // Get the argument (devices_count) to limit test devices count. + // In case, the devices_count is "0", the variable count will not be changed. // The CTS will run on all devices. - if (count_set > (std::numeric_limits::max)()) { - error = "Invalid test_devices_count argument"; - return; - } else if (count_set > 0) { - count = (std::min)(count, static_cast(count_set)); - } - devices.resize(count); - if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), - nullptr)) { - error = "urDeviceGet() failed to get devices."; - return; + if (device_options.device_name.empty()) { + if (device_options.devices_count > + (std::numeric_limits::max)()) { + error = "Invalid devices_count argument"; + return; + } else if (device_options.devices_count > 0) { + count = (std::min)( + count, static_cast(device_options.devices_count)); + } + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + } else { + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + for (u_long i = 0; i < count; i++) { + size_t size; + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, 0, nullptr, + &size)) { + error = "urDeviceGetInfo() failed"; + return; + } + std::vector device_name(size); + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, size, + device_name.data(), nullptr)) { + error = "urDeviceGetInfo() failed"; + return; + } + if (device_options.device_name == device_name.data()) { + device = devices[i]; + devices.clear(); + devices.resize(1); + devices[0] = device; + break; + } + } + if (!device) { + std::stringstream ss_error; + ss_error << "Device \"" << device_options.device_name + << "\" not found. Select a single device from below " + "using the " + "--device=NAME command-line options:" + << devices << std::endl + << "or set --devices_count=COUNT."; + error = ss_error.str(); + return; + } } } diff --git a/test/conformance/testing/include/uur/environment.h b/test/conformance/testing/include/uur/environment.h index 5cc6756364..551be76e17 100644 --- a/test/conformance/testing/include/uur/environment.h +++ b/test/conformance/testing/include/uur/environment.h @@ -17,6 +17,7 @@ struct PlatformEnvironment : ::testing::Environment { struct PlatformOptions { std::string platform_name; + unsigned long platforms_count; }; PlatformEnvironment(int argc, char **argv); @@ -36,17 +37,26 @@ struct PlatformEnvironment : ::testing::Environment { struct DevicesEnvironment : PlatformEnvironment { + struct DeviceOptions { + std::string device_name; + unsigned long devices_count; + }; + DevicesEnvironment(int argc, char **argv); virtual ~DevicesEnvironment() override = default; virtual void SetUp() override; virtual void TearDown() override; + DeviceOptions parseDeviceOptions(int argc, char **argv); + inline const std::vector &GetDevices() const { return devices; } + DeviceOptions device_options; std::vector devices; + ur_device_handle_t device = nullptr; static DevicesEnvironment *instance; }; From 1f256efc291ad9019535d812aa27fbb92e4f3803 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 15:28:46 +0000 Subject: [PATCH 049/138] removes duplicate entry --- scripts/core/EXP-COMMAND-BUFFER.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 386bf48f37..a6a32a66a1 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -227,9 +227,6 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ -| 1.3 | Add function definitions for Prefetch and Advise | -| | commands | -+-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- From ff8f41011f706efcea3541fe585ad02f1b9e5340 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 17:16:04 +0000 Subject: [PATCH 050/138] Adds OpenCL stubs --- source/adapters/opencl/command_buffer.cpp | 34 +++++++++++++++++++ .../adapters/opencl/ur_interface_loader.cpp | 2 ++ 2 files changed, 36 insertions(+) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 56b4d16b88..3bbe531fde 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -297,6 +297,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)mem; + (void)size; + (void)flags; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + // Not implemented + return PI_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, + ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)mem; + (void)size; + (void)flags; + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + // Not implemented + return PI_ERROR_INVALID_OPERATION; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index b42df19350..d8f34bc398 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -296,6 +296,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteExp; pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 1ed21073c8723a5d8f8333a1a3f0cf6c0db2654f Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 17:35:48 +0000 Subject: [PATCH 051/138] Fixes opencl stubs --- source/adapters/opencl/command_buffer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 3bbe531fde..25d3311b79 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -311,24 +311,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( (void)pSyncPoint; // Not implemented - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, + ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { (void)hCommandBuffer; (void)mem; (void)size; - (void)flags; + (void)advice; (void)numSyncPointsInWaitList; (void)pSyncPointWaitList; (void)pSyncPoint; // Not implemented - return PI_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( From 601df77b4ea0aa655c5d37c15f12c80d29285b47 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Thu, 7 Dec 2023 13:11:35 +0100 Subject: [PATCH 052/138] Set cron for coverity run --- .github/workflows/coverity.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index de327f92c7..a72b7caf77 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -9,8 +9,9 @@ name: coverity-unified-runtime # It runs static analysis build - Coverity. It requires special token (set in CI's secret). on: - push: - branches: ["main"] + schedule: + # Run every day at 23:00 UTC + - cron: '0 23 * * *' workflow_dispatch: env: From 1ec152ef8ae1d31589042b778166e69be81a1009 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Thu, 7 Dec 2023 13:18:00 +0000 Subject: [PATCH 053/138] Use static_cast in switch for ur_device_info_t --- source/adapters/native_cpu/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 78540a1b90..a72d3032fb 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -60,7 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - switch (propName) { + switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: return ReturnValue(UR_DEVICE_TYPE_CPU); case UR_DEVICE_INFO_PARENT_DEVICE: From 2f5aa14c23085ad7936a1d4726ac86ec567e8272 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Thu, 7 Dec 2023 17:16:17 +0000 Subject: [PATCH 054/138] [NATIVECPU] Fix coverity warnings 1574319, 1574318, 1574316, 1574317, 1574244. Unitialised fields, copy instead of move and unreachable code. --- source/adapters/native_cpu/enqueue.cpp | 3 ++- source/adapters/native_cpu/kernel.cpp | 1 - source/adapters/native_cpu/kernel.hpp | 3 ++- source/adapters/native_cpu/memory.hpp | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index d9e73c5453..f13701e2f3 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -25,7 +25,8 @@ struct NDRDescT { RangeT GlobalSize; RangeT LocalSize; NDRDescT(uint32_t WorkDim, const size_t *GlobalWorkOffset, - const size_t *GlobalWorkSize, const size_t *LocalWorkSize) { + const size_t *GlobalWorkSize, const size_t *LocalWorkSize) + : WorkDim(WorkDim) { for (uint32_t I = 0; I < WorkDim; I++) { GlobalOffset[I] = GlobalWorkOffset[I]; GlobalSize[I] = GlobalWorkSize[I]; diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index 7bfd3c328c..2a7e765e41 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -97,7 +97,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, default: return UR_RESULT_ERROR_INVALID_VALUE; } - DIE_NO_IMPLEMENTATION } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/source/adapters/native_cpu/kernel.hpp b/source/adapters/native_cpu/kernel.hpp index 8d15d9f6e1..d608bdcbd3 100644 --- a/source/adapters/native_cpu/kernel.hpp +++ b/source/adapters/native_cpu/kernel.hpp @@ -13,6 +13,7 @@ #include "common.hpp" #include "nativecpu_state.hpp" #include +#include namespace native_cpu { @@ -39,7 +40,7 @@ struct local_arg_info_t { struct ur_kernel_handle_t_ : RefCounted { ur_kernel_handle_t_(const char *name, nativecpu_task_t subhandler) - : _name{name}, _subhandler{subhandler} {} + : _name{name}, _subhandler{std::move(subhandler)} {} const char *_name; nativecpu_task_t _subhandler; diff --git a/source/adapters/native_cpu/memory.hpp b/source/adapters/native_cpu/memory.hpp index 45a28ccc67..cce2f8d0d1 100644 --- a/source/adapters/native_cpu/memory.hpp +++ b/source/adapters/native_cpu/memory.hpp @@ -68,7 +68,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } struct BB { - BB(_ur_buffer *b) : Parent(b) {} + BB(_ur_buffer *b) : Parent(b), Origin(0) {} BB() : BB(nullptr) {} _ur_buffer *const Parent; size_t Origin; // only valid if Parent != nullptr From d39b4d0c2d256d5da98ec2d33d354bc47edda110 Mon Sep 17 00:00:00 2001 From: Isaac Ault Date: Tue, 12 Dec 2023 13:04:11 +0000 Subject: [PATCH 055/138] Layered images rework. Remove unused structs. --- include/ur.py | 18 +---- include/ur_api.h | 100 +++++++++++---------------- include/ur_print.hpp | 34 --------- scripts/core/EXP-BINDLESS-IMAGES.rst | 4 +- scripts/core/exp-bindless-images.yml | 19 +---- 5 files changed, 46 insertions(+), 129 deletions(-) diff --git a/include/ur.py b/include/ur.py index 90cc06d895..b35a259fcd 100644 --- a/include/ur.py +++ b/include/ur.py @@ -256,8 +256,7 @@ class ur_structure_type_v(IntEnum): EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t EXP_FILE_DESCRIPTOR = 0x2003 ## ::ur_exp_file_descriptor_t EXP_WIN32_HANDLE = 0x2004 ## ::ur_exp_win32_handle_t - EXP_LAYERED_IMAGE_PROPERTIES = 0x2005 ## ::ur_exp_layered_image_properties_t - EXP_SAMPLER_ADDR_MODES = 0x2006 ## ::ur_exp_sampler_addr_modes_t + EXP_SAMPLER_ADDR_MODES = 0x2005 ## ::ur_exp_sampler_addr_modes_t class ur_structure_type_t(c_int): def __str__(self): @@ -2294,21 +2293,6 @@ class ur_exp_interop_semaphore_desc_t(Structure): ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure ] -############################################################################### -## @brief Describes layered image properties -## -## @details -## - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp -## or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as -## part of a `pNext` chain. -class ur_exp_layered_image_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("numLayers", c_ulong) ## [in] number of layers the image should have - ] - ############################################################################### ## @brief The extension string which defines support for command-buffers which ## is returned when querying device extensions. diff --git a/include/ur_api.h b/include/ur_api.h index 06c64d5184..0403e2b306 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -224,49 +224,48 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7152,21 +7151,6 @@ typedef struct ur_exp_interop_semaphore_desc_t { } ur_exp_interop_semaphore_desc_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Describes layered image properties -/// -/// @details -/// - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp -/// or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as -/// part of a `pNext` chain. -typedef struct ur_exp_layered_image_properties_t { - ur_structure_type_t stype; ///< [in] type of this structure, must be - ///< ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES - void *pNext; ///< [in,out][optional] pointer to extension-specific structure - uint32_t numLayers; ///< [in] number of layers the image should have - -} ur_exp_layered_image_properties_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/include/ur_print.hpp b/include/ur_print.hpp index c5076a91e5..70e5b9886d 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -318,7 +318,6 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_layered_image_properties_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); @@ -1015,9 +1014,6 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE: os << "UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE"; break; - case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: - os << "UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES"; - break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES"; break; @@ -1243,11 +1239,6 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; - case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: { - const ur_exp_layered_image_properties_t *pstruct = (const ur_exp_layered_image_properties_t *)ptr; - printPtr(os, pstruct); - } break; - case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: { const ur_exp_sampler_addr_modes_t *pstruct = (const ur_exp_sampler_addr_modes_t *)ptr; printPtr(os, pstruct); @@ -9130,31 +9121,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_se return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_layered_image_properties_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_layered_image_properties_t params) { - os << "(struct ur_exp_layered_image_properties_t){"; - - os << ".stype = "; - - os << (params.stype); - - os << ", "; - os << ".pNext = "; - - ur::details::printStruct(os, - (params.pNext)); - - os << ", "; - os << ".numLayers = "; - - os << (params.numLayers); - - os << "}"; - return os; -} -/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_desc_t type /// @returns /// std::ostream & diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index c794c199d9..fe6a1ac32b 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -68,7 +68,6 @@ Enums ${X}_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC ${X}_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR ${X}_STRUCTURE_TYPE_EXP_WIN32_HANDLE - ${X}_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES ${X}_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES * ${x}_device_info_t @@ -129,7 +128,6 @@ Types * ${x}_exp_interop_semaphore_desc_t * ${x}_exp_file_descriptor_t * ${x}_exp_win32_handle_t -* ${x}_exp_layered_image_properties_t * ${x}_exp_sampler_addr_modes_t Functions @@ -184,6 +182,8 @@ Changelog +----------+-------------------------------------------------------------+ | 8.0 | Added structure for sampler addressing modes per dimension. | +------------------------------------------------------------------------+ +| 9.0 | Remove layered image properties struct. | ++------------------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index b5f87a6633..d2e508c4a7 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -107,12 +107,9 @@ etors: - name: EXP_WIN32_HANDLE desc: $x_exp_win32_handle_t value: "0x2004" - - name: EXP_LAYERED_IMAGE_PROPERTIES - desc: $x_exp_layered_image_properties_t - value: "0x2005" - name: EXP_SAMPLER_ADDR_MODES desc: $x_exp_sampler_addr_modes_t - value: "0x2006" + value: "0x2005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -205,20 +202,6 @@ name: $x_exp_interop_semaphore_desc_t base: $x_base_desc_t members: [] --- #-------------------------------------------------------------------------- -type: struct -desc: "Describes layered image properties" -details: - - Specify these properties in $xBindlessImagesUnsampledImageCreateExp or - $xBindlessImagesSampledImageCreateExp via $x_image_desc_t as part of a - `pNext` chain. -class: $xBindlessImages -name: $x_exp_layered_image_properties_t -base: $x_base_properties_t -members: - - type: uint32_t - name: numLayers - desc: "[in] number of layers the image should have" ---- #-------------------------------------------------------------------------- type: function desc: "USM allocate pitched memory" class: $xUSM From 526f7e6b150e22c56a4c4312e606e17cb60a3bdc Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 12 Dec 2023 14:08:27 +0000 Subject: [PATCH 056/138] Remove inline from helper --- source/adapters/hip/memory.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 68ded26263..6a220f53c4 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -458,9 +458,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { return UR_RESULT_SUCCESS; } -inline ur_result_t -allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, - const ur_device_handle_t hDevice) { +ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { ScopedContext Active(hDevice); ur_lock LockGuard(Mem->MemoryAllocationMutex); From 421043382cdff957fc72cc144e249c31cfe16741 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Tue, 12 Dec 2023 14:49:34 +0100 Subject: [PATCH 057/138] [L0] coverity fixes --- source/adapters/level_zero/adapter.cpp | 11 ++++------- source/adapters/level_zero/device.cpp | 19 ++++++++++--------- source/adapters/level_zero/device.hpp | 3 ++- source/adapters/level_zero/kernel.hpp | 6 ++++-- source/adapters/level_zero/memory.cpp | 10 +++++----- source/adapters/level_zero/memory.hpp | 16 +++++++++------- source/adapters/level_zero/platform.hpp | 4 +++- source/adapters/level_zero/queue.cpp | 6 +++--- source/adapters/level_zero/queue.hpp | 3 ++- 9 files changed, 42 insertions(+), 36 deletions(-) mode change 100755 => 100644 source/adapters/level_zero/queue.cpp diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 1850083caa..d43ae07cdb 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -156,17 +156,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - [[maybe_unused]] ur_adapter_handle_t - AdapterHandle, ///< [in] handle of the platform instance + ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. - [[maybe_unused]] int32_t - *Error ///< [out] pointer to an integer where the adapter specific - ///< error code will be stored. + int32_t *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { - AdapterHandle = &Adapter; *Message = ErrorMessage; - Error = &ErrorAdapterNativeCode; + *Error = ErrorAdapterNativeCode; return ErrorMessageCode; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index acc7c755f4..05b66e12f4 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -12,6 +12,7 @@ #include "ur_level_zero.hpp" #include #include +#include UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance @@ -353,8 +354,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. - if (pSize && !Device->isSubDevice()) { - *pSize = 0; + if (Device->SubDeviceCreationProperty == std::nullopt || + !Device->isSubDevice()) { + if (pSize) + *pSize = 0; return UR_RESULT_SUCCESS; } @@ -365,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(cslice); } - return ReturnValue(Device->SubDeviceCreationProperty); + return ReturnValue(*Device->SubDeviceCreationProperty); } // Everything under here is not supported yet case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: @@ -1218,16 +1221,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); for (uint32_t I = 0; I < NumDevices; I++) { - Device->SubDevices[I]->SubDeviceCreationProperty = - Properties->pProperties[0]; - if (Properties->pProperties[0].type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { // In case the value is NEXT_PARTITIONABLE, we need to change it to the // chosen domain. This will always be NUMA since that's the only domain // supported by level zero. - Device->SubDevices[I]->SubDeviceCreationProperty.value.affinity_domain = - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; OutDevices[I] = Device->SubDevices[I]; // reusing the same pi_device needs to increment the reference count diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 5f34efab44..3b91b70058 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ struct ur_device_handle_t_ : _ur_object { // If this device is a subdevice, this variable contains the properties that // were used during its creation. - ur_device_partition_property_t SubDeviceCreationProperty; + std::optional SubDeviceCreationProperty; // PI platform to which this device belongs. // This field is only set at _ur_device_handle_t creation time, and cannot diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index 64f6e4f939..4ef21ce18b 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -16,13 +16,15 @@ struct ur_kernel_handle_t_ : _ur_object { ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_program_handle_t Program) - : Program{Program}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{nullptr}, Program{Program}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_context_handle_t Context) - : Context{Context}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{Context}, Program{nullptr}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index aefa661dac..fa3ef18e47 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -2078,9 +2078,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, auto &Allocation = Allocations[Device]; // Sub-buffers don't maintain own allocations but rely on parent buffer. - if (isSubBuffer()) { - UR_CALL(SubBuffer.Parent->getZeHandle(ZeHandle, AccessMode, Device)); - ZeHandle += SubBuffer.Origin; + if (SubBuffer) { + UR_CALL(SubBuffer->Parent->getZeHandle(ZeHandle, AccessMode, Device)); + ZeHandle += SubBuffer->Origin; // Still store the allocation info in the PI sub-buffer for // getZeHandlePtr to work. At least zeKernelSetArgumentValue needs to // be given a pointer to the allocation handle rather than its value. @@ -2312,7 +2312,7 @@ ur_result_t _ur_buffer::free() { // Buffer constructor _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, char *HostPtr, bool ImportedHostPtr = false) - : ur_mem_handle_t_(Context), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context), Size(Size) { // We treat integrated devices (physical memory shared with the CPU) // differently from discrete devices (those with distinct memories). @@ -2347,7 +2347,7 @@ _ur_buffer::_ur_buffer(ur_context_handle_t Context, ur_device_handle_t Device, _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, ur_device_handle_t Device, char *ZeMemHandle, bool OwnZeMemHandle) - : ur_mem_handle_t_(Context, Device), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context, Device), Size(Size) { // Device == nullptr means host allocation Allocations[Device].ZeHandle = ZeMemHandle; diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 54f9a84e6b..8efd5b136e 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,8 @@ struct ur_mem_handle_t_ : _ur_object { virtual ~ur_mem_handle_t_() = default; protected: - ur_mem_handle_t_(ur_context_handle_t Context) : UrContext{Context} {} + ur_mem_handle_t_(ur_context_handle_t Context) + : UrContext{Context}, UrDevice{nullptr} {} ur_mem_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) : UrContext{Context}, UrDevice(Device) {} @@ -101,7 +103,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { // Sub-buffer constructor _ur_buffer(_ur_buffer *Parent, size_t Origin, size_t Size) : ur_mem_handle_t_(Parent->UrContext), - Size(Size), SubBuffer{Parent, Origin} {} + Size(Size), SubBuffer{{Parent, Origin}} {} // Interop-buffer constructor _ur_buffer(ur_context_handle_t Context, size_t Size, @@ -121,8 +123,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { ur_device_handle_t Device = nullptr) override; bool isImage() const override { return false; } - - bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } + bool isSubBuffer() const { return SubBuffer != std::nullopt; } // Frees all allocations made for the buffer. ur_result_t free(); @@ -174,10 +175,11 @@ struct _ur_buffer final : ur_mem_handle_t_ { size_t Size; size_t getAlignment() const; - struct { + struct SubBuffer_t { _ur_buffer *Parent; - size_t Origin; // only valid if Parent != nullptr - } SubBuffer; + size_t Origin; + }; + std::optional SubBuffer; }; struct _ur_image final : ur_mem_handle_t_ { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index f7b9576189..86aa4ec745 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -10,11 +10,13 @@ #pragma once #include "common.hpp" +#include "ze_api.h" struct ur_device_handle_t_; struct ur_platform_handle_t_ : public _ur_platform { - ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver} {} + ur_platform_handle_t_(ze_driver_handle_t Driver) + : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp old mode 100755 new mode 100644 index 994f595a5d..f07e0df675 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -219,7 +219,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( if (ImmCmdList == Queue->CommandListMap.end()) continue; - auto EventList = ImmCmdList->second.EventList; + const auto &EventList = ImmCmdList->second.EventList; for (auto It = EventList.crbegin(); It != EventList.crend(); It++) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, ((*It)->ZeEvent)); @@ -391,11 +391,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // At this point only the thread creating the queue will have associated // command-lists. Other threads have not accessed the queue yet. So we can // only warmup the initial thread's command-lists. - auto QueueGroup = Q->ComputeQueueGroupsByTID.get(); + const auto &QueueGroup = Q->ComputeQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(false, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); if (Q->useCopyEngine()) { - auto QueueGroup = Q->CopyQueueGroupsByTID.get(); + const auto &QueueGroup = Q->CopyQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(true, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 306cec5416..88281925ce 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -424,7 +424,8 @@ struct ur_queue_handle_t_ : _ur_object { // checked. Otherwise, the OpenCommandList containing compute commands is // checked. bool hasOpenCommandList(bool IsCopy) const { - auto CommandBatch = (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; + const auto &CommandBatch = + (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; return CommandBatch.OpenCommandList != CommandListMap.end(); } From 642c45442f0b2d6a0a5220e078f11b3ae0b1e803 Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Tue, 14 Nov 2023 11:31:52 +0000 Subject: [PATCH 058/138] Remove Python API bindings --- .github/workflows/cmake.yml | 4 +- .github/workflows/coverage.yml | 2 +- include/ur.py | 4309 -------------------------------- scripts/generate_code.py | 28 - scripts/templates/api.py.mako | 196 -- scripts/templates/helper.py | 116 +- test/CMakeLists.txt | 1 - test/python/CMakeLists.txt | 26 - test/python/basic.py | 19 - 9 files changed, 24 insertions(+), 4677 deletions(-) delete mode 100644 include/ur.py delete mode 100644 scripts/templates/api.py.mako delete mode 100644 test/python/CMakeLists.txt delete mode 100755 test/python/basic.py diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 953c9fb024..e697dd6aaf 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -110,7 +110,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" fuzztest-build: name: Build and run quick fuzztest scenarios @@ -346,7 +346,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" macos-build: diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ad1ac23e7a..731f7ea320 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -65,7 +65,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" - name: Quick Coverage Info working-directory: ${{github.workspace}}/build diff --git a/include/ur.py b/include/ur.py deleted file mode 100644 index b35a259fcd..0000000000 --- a/include/ur.py +++ /dev/null @@ -1,4309 +0,0 @@ -""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - @file ur.py - @version v0.9-r0 - - """ -import platform -from ctypes import * -from enum import * - -# ctypes does not define c_intptr_t, so let's define it here manually -c_intptr_t = c_ssize_t - -############################################################################### -__version__ = "1.0" - -############################################################################### -## @brief Defines unique stable identifiers for all functions -class ur_function_v(IntEnum): - CONTEXT_CREATE = 1 ## Enumerator for ::urContextCreate - CONTEXT_RETAIN = 2 ## Enumerator for ::urContextRetain - CONTEXT_RELEASE = 3 ## Enumerator for ::urContextRelease - CONTEXT_GET_INFO = 4 ## Enumerator for ::urContextGetInfo - CONTEXT_GET_NATIVE_HANDLE = 5 ## Enumerator for ::urContextGetNativeHandle - CONTEXT_CREATE_WITH_NATIVE_HANDLE = 6 ## Enumerator for ::urContextCreateWithNativeHandle - CONTEXT_SET_EXTENDED_DELETER = 7 ## Enumerator for ::urContextSetExtendedDeleter - DEVICE_GET = 8 ## Enumerator for ::urDeviceGet - DEVICE_GET_INFO = 9 ## Enumerator for ::urDeviceGetInfo - DEVICE_RETAIN = 10 ## Enumerator for ::urDeviceRetain - DEVICE_RELEASE = 11 ## Enumerator for ::urDeviceRelease - DEVICE_PARTITION = 12 ## Enumerator for ::urDevicePartition - DEVICE_SELECT_BINARY = 13 ## Enumerator for ::urDeviceSelectBinary - DEVICE_GET_NATIVE_HANDLE = 14 ## Enumerator for ::urDeviceGetNativeHandle - DEVICE_CREATE_WITH_NATIVE_HANDLE = 15 ## Enumerator for ::urDeviceCreateWithNativeHandle - DEVICE_GET_GLOBAL_TIMESTAMPS = 16 ## Enumerator for ::urDeviceGetGlobalTimestamps - ENQUEUE_KERNEL_LAUNCH = 17 ## Enumerator for ::urEnqueueKernelLaunch - ENQUEUE_EVENTS_WAIT = 18 ## Enumerator for ::urEnqueueEventsWait - ENQUEUE_EVENTS_WAIT_WITH_BARRIER = 19 ## Enumerator for ::urEnqueueEventsWaitWithBarrier - ENQUEUE_MEM_BUFFER_READ = 20 ## Enumerator for ::urEnqueueMemBufferRead - ENQUEUE_MEM_BUFFER_WRITE = 21 ## Enumerator for ::urEnqueueMemBufferWrite - ENQUEUE_MEM_BUFFER_READ_RECT = 22 ## Enumerator for ::urEnqueueMemBufferReadRect - ENQUEUE_MEM_BUFFER_WRITE_RECT = 23 ## Enumerator for ::urEnqueueMemBufferWriteRect - ENQUEUE_MEM_BUFFER_COPY = 24 ## Enumerator for ::urEnqueueMemBufferCopy - ENQUEUE_MEM_BUFFER_COPY_RECT = 25 ## Enumerator for ::urEnqueueMemBufferCopyRect - ENQUEUE_MEM_BUFFER_FILL = 26 ## Enumerator for ::urEnqueueMemBufferFill - ENQUEUE_MEM_IMAGE_READ = 27 ## Enumerator for ::urEnqueueMemImageRead - ENQUEUE_MEM_IMAGE_WRITE = 28 ## Enumerator for ::urEnqueueMemImageWrite - ENQUEUE_MEM_IMAGE_COPY = 29 ## Enumerator for ::urEnqueueMemImageCopy - ENQUEUE_MEM_BUFFER_MAP = 30 ## Enumerator for ::urEnqueueMemBufferMap - ENQUEUE_MEM_UNMAP = 31 ## Enumerator for ::urEnqueueMemUnmap - ENQUEUE_USM_FILL = 32 ## Enumerator for ::urEnqueueUSMFill - ENQUEUE_USM_MEMCPY = 33 ## Enumerator for ::urEnqueueUSMMemcpy - ENQUEUE_USM_PREFETCH = 34 ## Enumerator for ::urEnqueueUSMPrefetch - ENQUEUE_USM_ADVISE = 35 ## Enumerator for ::urEnqueueUSMAdvise - ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE = 38 ## Enumerator for ::urEnqueueDeviceGlobalVariableWrite - ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ = 39 ## Enumerator for ::urEnqueueDeviceGlobalVariableRead - EVENT_GET_INFO = 40 ## Enumerator for ::urEventGetInfo - EVENT_GET_PROFILING_INFO = 41 ## Enumerator for ::urEventGetProfilingInfo - EVENT_WAIT = 42 ## Enumerator for ::urEventWait - EVENT_RETAIN = 43 ## Enumerator for ::urEventRetain - EVENT_RELEASE = 44 ## Enumerator for ::urEventRelease - EVENT_GET_NATIVE_HANDLE = 45 ## Enumerator for ::urEventGetNativeHandle - EVENT_CREATE_WITH_NATIVE_HANDLE = 46 ## Enumerator for ::urEventCreateWithNativeHandle - EVENT_SET_CALLBACK = 47 ## Enumerator for ::urEventSetCallback - KERNEL_CREATE = 48 ## Enumerator for ::urKernelCreate - KERNEL_SET_ARG_VALUE = 49 ## Enumerator for ::urKernelSetArgValue - KERNEL_SET_ARG_LOCAL = 50 ## Enumerator for ::urKernelSetArgLocal - KERNEL_GET_INFO = 51 ## Enumerator for ::urKernelGetInfo - KERNEL_GET_GROUP_INFO = 52 ## Enumerator for ::urKernelGetGroupInfo - KERNEL_GET_SUB_GROUP_INFO = 53 ## Enumerator for ::urKernelGetSubGroupInfo - KERNEL_RETAIN = 54 ## Enumerator for ::urKernelRetain - KERNEL_RELEASE = 55 ## Enumerator for ::urKernelRelease - KERNEL_SET_ARG_POINTER = 56 ## Enumerator for ::urKernelSetArgPointer - KERNEL_SET_EXEC_INFO = 57 ## Enumerator for ::urKernelSetExecInfo - KERNEL_SET_ARG_SAMPLER = 58 ## Enumerator for ::urKernelSetArgSampler - KERNEL_SET_ARG_MEM_OBJ = 59 ## Enumerator for ::urKernelSetArgMemObj - KERNEL_SET_SPECIALIZATION_CONSTANTS = 60 ## Enumerator for ::urKernelSetSpecializationConstants - KERNEL_GET_NATIVE_HANDLE = 61 ## Enumerator for ::urKernelGetNativeHandle - KERNEL_CREATE_WITH_NATIVE_HANDLE = 62 ## Enumerator for ::urKernelCreateWithNativeHandle - MEM_IMAGE_CREATE = 63 ## Enumerator for ::urMemImageCreate - MEM_BUFFER_CREATE = 64 ## Enumerator for ::urMemBufferCreate - MEM_RETAIN = 65 ## Enumerator for ::urMemRetain - MEM_RELEASE = 66 ## Enumerator for ::urMemRelease - MEM_BUFFER_PARTITION = 67 ## Enumerator for ::urMemBufferPartition - MEM_GET_NATIVE_HANDLE = 68 ## Enumerator for ::urMemGetNativeHandle - ENQUEUE_READ_HOST_PIPE = 69 ## Enumerator for ::urEnqueueReadHostPipe - MEM_GET_INFO = 70 ## Enumerator for ::urMemGetInfo - MEM_IMAGE_GET_INFO = 71 ## Enumerator for ::urMemImageGetInfo - PLATFORM_GET = 72 ## Enumerator for ::urPlatformGet - PLATFORM_GET_INFO = 73 ## Enumerator for ::urPlatformGetInfo - PLATFORM_GET_API_VERSION = 74 ## Enumerator for ::urPlatformGetApiVersion - PLATFORM_GET_NATIVE_HANDLE = 75 ## Enumerator for ::urPlatformGetNativeHandle - PLATFORM_CREATE_WITH_NATIVE_HANDLE = 76 ## Enumerator for ::urPlatformCreateWithNativeHandle - PROGRAM_CREATE_WITH_IL = 78 ## Enumerator for ::urProgramCreateWithIL - PROGRAM_CREATE_WITH_BINARY = 79 ## Enumerator for ::urProgramCreateWithBinary - PROGRAM_BUILD = 80 ## Enumerator for ::urProgramBuild - PROGRAM_COMPILE = 81 ## Enumerator for ::urProgramCompile - PROGRAM_LINK = 82 ## Enumerator for ::urProgramLink - PROGRAM_RETAIN = 83 ## Enumerator for ::urProgramRetain - PROGRAM_RELEASE = 84 ## Enumerator for ::urProgramRelease - PROGRAM_GET_FUNCTION_POINTER = 85 ## Enumerator for ::urProgramGetFunctionPointer - PROGRAM_GET_INFO = 86 ## Enumerator for ::urProgramGetInfo - PROGRAM_GET_BUILD_INFO = 87 ## Enumerator for ::urProgramGetBuildInfo - PROGRAM_SET_SPECIALIZATION_CONSTANTS = 88 ## Enumerator for ::urProgramSetSpecializationConstants - PROGRAM_GET_NATIVE_HANDLE = 89 ## Enumerator for ::urProgramGetNativeHandle - PROGRAM_CREATE_WITH_NATIVE_HANDLE = 90 ## Enumerator for ::urProgramCreateWithNativeHandle - QUEUE_GET_INFO = 91 ## Enumerator for ::urQueueGetInfo - QUEUE_CREATE = 92 ## Enumerator for ::urQueueCreate - QUEUE_RETAIN = 93 ## Enumerator for ::urQueueRetain - QUEUE_RELEASE = 94 ## Enumerator for ::urQueueRelease - QUEUE_GET_NATIVE_HANDLE = 95 ## Enumerator for ::urQueueGetNativeHandle - QUEUE_CREATE_WITH_NATIVE_HANDLE = 96 ## Enumerator for ::urQueueCreateWithNativeHandle - QUEUE_FINISH = 97 ## Enumerator for ::urQueueFinish - QUEUE_FLUSH = 98 ## Enumerator for ::urQueueFlush - SAMPLER_CREATE = 101 ## Enumerator for ::urSamplerCreate - SAMPLER_RETAIN = 102 ## Enumerator for ::urSamplerRetain - SAMPLER_RELEASE = 103 ## Enumerator for ::urSamplerRelease - SAMPLER_GET_INFO = 104 ## Enumerator for ::urSamplerGetInfo - SAMPLER_GET_NATIVE_HANDLE = 105 ## Enumerator for ::urSamplerGetNativeHandle - SAMPLER_CREATE_WITH_NATIVE_HANDLE = 106 ## Enumerator for ::urSamplerCreateWithNativeHandle - USM_HOST_ALLOC = 107 ## Enumerator for ::urUSMHostAlloc - USM_DEVICE_ALLOC = 108 ## Enumerator for ::urUSMDeviceAlloc - USM_SHARED_ALLOC = 109 ## Enumerator for ::urUSMSharedAlloc - USM_FREE = 110 ## Enumerator for ::urUSMFree - USM_GET_MEM_ALLOC_INFO = 111 ## Enumerator for ::urUSMGetMemAllocInfo - USM_POOL_CREATE = 112 ## Enumerator for ::urUSMPoolCreate - COMMAND_BUFFER_CREATE_EXP = 113 ## Enumerator for ::urCommandBufferCreateExp - PLATFORM_GET_BACKEND_OPTION = 114 ## Enumerator for ::urPlatformGetBackendOption - MEM_BUFFER_CREATE_WITH_NATIVE_HANDLE = 115 ## Enumerator for ::urMemBufferCreateWithNativeHandle - MEM_IMAGE_CREATE_WITH_NATIVE_HANDLE = 116 ## Enumerator for ::urMemImageCreateWithNativeHandle - ENQUEUE_WRITE_HOST_PIPE = 117 ## Enumerator for ::urEnqueueWriteHostPipe - USM_POOL_RETAIN = 118 ## Enumerator for ::urUSMPoolRetain - USM_POOL_RELEASE = 119 ## Enumerator for ::urUSMPoolRelease - USM_POOL_GET_INFO = 120 ## Enumerator for ::urUSMPoolGetInfo - COMMAND_BUFFER_RETAIN_EXP = 121 ## Enumerator for ::urCommandBufferRetainExp - COMMAND_BUFFER_RELEASE_EXP = 122 ## Enumerator for ::urCommandBufferReleaseExp - COMMAND_BUFFER_FINALIZE_EXP = 123 ## Enumerator for ::urCommandBufferFinalizeExp - COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125 ## Enumerator for ::urCommandBufferAppendKernelLaunchExp - COMMAND_BUFFER_ENQUEUE_EXP = 128 ## Enumerator for ::urCommandBufferEnqueueExp - USM_PITCHED_ALLOC_EXP = 132 ## Enumerator for ::urUSMPitchedAllocExp - BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133## Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp - BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134 ## Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp - BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP = 135 ## Enumerator for ::urBindlessImagesImageAllocateExp - BINDLESS_IMAGES_IMAGE_FREE_EXP = 136 ## Enumerator for ::urBindlessImagesImageFreeExp - BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP = 137## Enumerator for ::urBindlessImagesUnsampledImageCreateExp - BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP = 138 ## Enumerator for ::urBindlessImagesSampledImageCreateExp - BINDLESS_IMAGES_IMAGE_COPY_EXP = 139 ## Enumerator for ::urBindlessImagesImageCopyExp - BINDLESS_IMAGES_IMAGE_GET_INFO_EXP = 140 ## Enumerator for ::urBindlessImagesImageGetInfoExp - BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP = 141 ## Enumerator for ::urBindlessImagesMipmapGetLevelExp - BINDLESS_IMAGES_MIPMAP_FREE_EXP = 142 ## Enumerator for ::urBindlessImagesMipmapFreeExp - BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP = 143 ## Enumerator for ::urBindlessImagesImportOpaqueFDExp - BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP = 144 ## Enumerator for ::urBindlessImagesMapExternalArrayExp - BINDLESS_IMAGES_RELEASE_INTEROP_EXP = 145 ## Enumerator for ::urBindlessImagesReleaseInteropExp - BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP = 146 ## Enumerator for ::urBindlessImagesImportExternalSemaphoreOpaqueFDExp - BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP = 147## Enumerator for ::urBindlessImagesDestroyExternalSemaphoreExp - BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP = 148 ## Enumerator for ::urBindlessImagesWaitExternalSemaphoreExp - BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP = 149 ## Enumerator for ::urBindlessImagesSignalExternalSemaphoreExp - ENQUEUE_USM_FILL_2D = 151 ## Enumerator for ::urEnqueueUSMFill2D - ENQUEUE_USM_MEMCPY_2D = 152 ## Enumerator for ::urEnqueueUSMMemcpy2D - VIRTUAL_MEM_GRANULARITY_GET_INFO = 153 ## Enumerator for ::urVirtualMemGranularityGetInfo - VIRTUAL_MEM_RESERVE = 154 ## Enumerator for ::urVirtualMemReserve - VIRTUAL_MEM_FREE = 155 ## Enumerator for ::urVirtualMemFree - VIRTUAL_MEM_MAP = 156 ## Enumerator for ::urVirtualMemMap - VIRTUAL_MEM_UNMAP = 157 ## Enumerator for ::urVirtualMemUnmap - VIRTUAL_MEM_SET_ACCESS = 158 ## Enumerator for ::urVirtualMemSetAccess - VIRTUAL_MEM_GET_INFO = 159 ## Enumerator for ::urVirtualMemGetInfo - PHYSICAL_MEM_CREATE = 160 ## Enumerator for ::urPhysicalMemCreate - PHYSICAL_MEM_RETAIN = 161 ## Enumerator for ::urPhysicalMemRetain - PHYSICAL_MEM_RELEASE = 162 ## Enumerator for ::urPhysicalMemRelease - USM_IMPORT_EXP = 163 ## Enumerator for ::urUSMImportExp - USM_RELEASE_EXP = 164 ## Enumerator for ::urUSMReleaseExp - USM_P2P_ENABLE_PEER_ACCESS_EXP = 165 ## Enumerator for ::urUsmP2PEnablePeerAccessExp - USM_P2P_DISABLE_PEER_ACCESS_EXP = 166 ## Enumerator for ::urUsmP2PDisablePeerAccessExp - USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167 ## Enumerator for ::urUsmP2PPeerAccessGetInfoExp - LOADER_CONFIG_CREATE = 172 ## Enumerator for ::urLoaderConfigCreate - LOADER_CONFIG_RELEASE = 173 ## Enumerator for ::urLoaderConfigRelease - LOADER_CONFIG_RETAIN = 174 ## Enumerator for ::urLoaderConfigRetain - LOADER_CONFIG_GET_INFO = 175 ## Enumerator for ::urLoaderConfigGetInfo - LOADER_CONFIG_ENABLE_LAYER = 176 ## Enumerator for ::urLoaderConfigEnableLayer - ADAPTER_RELEASE = 177 ## Enumerator for ::urAdapterRelease - ADAPTER_GET = 178 ## Enumerator for ::urAdapterGet - ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain - ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError - ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo - PROGRAM_BUILD_EXP = 197 ## Enumerator for ::urProgramBuildExp - PROGRAM_COMPILE_EXP = 198 ## Enumerator for ::urProgramCompileExp - PROGRAM_LINK_EXP = 199 ## Enumerator for ::urProgramLinkExp - LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 200 ## Enumerator for ::urLoaderConfigSetCodeLocationCallback - LOADER_INIT = 201 ## Enumerator for ::urLoaderInit - LOADER_TEAR_DOWN = 202 ## Enumerator for ::urLoaderTearDown - COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 203 ## Enumerator for ::urCommandBufferAppendUSMMemcpyExp - COMMAND_BUFFER_APPEND_USM_FILL_EXP = 204 ## Enumerator for ::urCommandBufferAppendUSMFillExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 205 ## Enumerator for ::urCommandBufferAppendMemBufferCopyExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 206## Enumerator for ::urCommandBufferAppendMemBufferWriteExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 207 ## Enumerator for ::urCommandBufferAppendMemBufferReadExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 208## Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 209 ## Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 210## Enumerator for ::urCommandBufferAppendMemBufferReadRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 211 ## Enumerator for ::urCommandBufferAppendMemBufferFillExp - COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 212 ## Enumerator for ::urCommandBufferAppendUSMPrefetchExp - COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213 ## Enumerator for ::urCommandBufferAppendUSMAdviseExp - ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214 ## Enumerator for ::urEnqueueCooperativeKernelLaunchExp - KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215## Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp - -class ur_function_t(c_int): - def __str__(self): - return str(ur_function_v(self.value)) - - -############################################################################### -## @brief Defines structure types -class ur_structure_type_v(IntEnum): - CONTEXT_PROPERTIES = 0 ## ::ur_context_properties_t - IMAGE_DESC = 1 ## ::ur_image_desc_t - BUFFER_PROPERTIES = 2 ## ::ur_buffer_properties_t - BUFFER_REGION = 3 ## ::ur_buffer_region_t - BUFFER_CHANNEL_PROPERTIES = 4 ## ::ur_buffer_channel_properties_t - BUFFER_ALLOC_LOCATION_PROPERTIES = 5 ## ::ur_buffer_alloc_location_properties_t - PROGRAM_PROPERTIES = 6 ## ::ur_program_properties_t - USM_DESC = 7 ## ::ur_usm_desc_t - USM_HOST_DESC = 8 ## ::ur_usm_host_desc_t - USM_DEVICE_DESC = 9 ## ::ur_usm_device_desc_t - USM_POOL_DESC = 10 ## ::ur_usm_pool_desc_t - USM_POOL_LIMITS_DESC = 11 ## ::ur_usm_pool_limits_desc_t - DEVICE_BINARY = 12 ## ::ur_device_binary_t - SAMPLER_DESC = 13 ## ::ur_sampler_desc_t - QUEUE_PROPERTIES = 14 ## ::ur_queue_properties_t - QUEUE_INDEX_PROPERTIES = 15 ## ::ur_queue_index_properties_t - CONTEXT_NATIVE_PROPERTIES = 16 ## ::ur_context_native_properties_t - KERNEL_NATIVE_PROPERTIES = 17 ## ::ur_kernel_native_properties_t - QUEUE_NATIVE_PROPERTIES = 18 ## ::ur_queue_native_properties_t - MEM_NATIVE_PROPERTIES = 19 ## ::ur_mem_native_properties_t - EVENT_NATIVE_PROPERTIES = 20 ## ::ur_event_native_properties_t - PLATFORM_NATIVE_PROPERTIES = 21 ## ::ur_platform_native_properties_t - DEVICE_NATIVE_PROPERTIES = 22 ## ::ur_device_native_properties_t - PROGRAM_NATIVE_PROPERTIES = 23 ## ::ur_program_native_properties_t - SAMPLER_NATIVE_PROPERTIES = 24 ## ::ur_sampler_native_properties_t - QUEUE_NATIVE_DESC = 25 ## ::ur_queue_native_desc_t - DEVICE_PARTITION_PROPERTIES = 26 ## ::ur_device_partition_properties_t - KERNEL_ARG_MEM_OBJ_PROPERTIES = 27 ## ::ur_kernel_arg_mem_obj_properties_t - PHYSICAL_MEM_PROPERTIES = 28 ## ::ur_physical_mem_properties_t - KERNEL_ARG_POINTER_PROPERTIES = 29 ## ::ur_kernel_arg_pointer_properties_t - KERNEL_ARG_SAMPLER_PROPERTIES = 30 ## ::ur_kernel_arg_sampler_properties_t - KERNEL_EXEC_INFO_PROPERTIES = 31 ## ::ur_kernel_exec_info_properties_t - KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t - KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t - USM_ALLOC_LOCATION_DESC = 35 ## ::ur_usm_alloc_location_desc_t - EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t - EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t - EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t - EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t - EXP_FILE_DESCRIPTOR = 0x2003 ## ::ur_exp_file_descriptor_t - EXP_WIN32_HANDLE = 0x2004 ## ::ur_exp_win32_handle_t - EXP_SAMPLER_ADDR_MODES = 0x2005 ## ::ur_exp_sampler_addr_modes_t - -class ur_structure_type_t(c_int): - def __str__(self): - return str(ur_structure_type_v(self.value)) - - -############################################################################### -## @brief Generates generic 'oneAPI' API versions -def UR_MAKE_VERSION( _major, _minor ): - return (( _major << 16 )|( _minor & 0x0000ffff)) - -############################################################################### -## @brief Extracts 'oneAPI' API major version -def UR_MAJOR_VERSION( _ver ): - return ( _ver >> 16 ) - -############################################################################### -## @brief Extracts 'oneAPI' API minor version -def UR_MINOR_VERSION( _ver ): - return ( _ver & 0x0000ffff ) - -############################################################################### -## @brief Calling convention for all API functions -# UR_APICALL not required for python - -############################################################################### -## @brief Microsoft-specific dllexport storage-class attribute -# UR_APIEXPORT not required for python - -############################################################################### -## @brief Microsoft-specific dllexport storage-class attribute -# UR_DLLEXPORT not required for python - -############################################################################### -## @brief GCC-specific dllexport storage-class attribute -# UR_DLLEXPORT not required for python - -############################################################################### -## @brief compiler-independent type -class ur_bool_t(c_ubyte): - pass - -############################################################################### -## @brief Handle of a loader config object -class ur_loader_config_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of an adapter instance -class ur_adapter_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a platform instance -class ur_platform_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of platform's device object -class ur_device_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of context object -class ur_context_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of event object -class ur_event_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of Program object -class ur_program_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of program's Kernel object -class ur_kernel_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a queue object -class ur_queue_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a native object -class ur_native_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a Sampler object -class ur_sampler_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of memory object which can either be buffer or image -class ur_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of physical memory object -class ur_physical_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Generic macro for enumerator bit masks -def UR_BIT( _i ): - return ( 1 << _i ) - -############################################################################### -## @brief Defines Return/Error codes -class ur_result_v(IntEnum): - SUCCESS = 0 ## Success - ERROR_INVALID_OPERATION = 1 ## Invalid operation - ERROR_INVALID_QUEUE_PROPERTIES = 2 ## Invalid queue properties - ERROR_INVALID_QUEUE = 3 ## Invalid queue - ERROR_INVALID_VALUE = 4 ## Invalid Value - ERROR_INVALID_CONTEXT = 5 ## Invalid context - ERROR_INVALID_PLATFORM = 6 ## Invalid platform - ERROR_INVALID_BINARY = 7 ## Invalid binary - ERROR_INVALID_PROGRAM = 8 ## Invalid program - ERROR_INVALID_SAMPLER = 9 ## Invalid sampler - ERROR_INVALID_BUFFER_SIZE = 10 ## Invalid buffer size - ERROR_INVALID_MEM_OBJECT = 11 ## Invalid memory object - ERROR_INVALID_EVENT = 12 ## Invalid event - ERROR_INVALID_EVENT_WAIT_LIST = 13 ## Returned when the event wait list or the events in the wait list are - ## invalid. - ERROR_MISALIGNED_SUB_BUFFER_OFFSET = 14 ## Misaligned sub buffer offset - ERROR_INVALID_WORK_GROUP_SIZE = 15 ## Invalid work group size - ERROR_COMPILER_NOT_AVAILABLE = 16 ## Compiler not available - ERROR_PROFILING_INFO_NOT_AVAILABLE = 17 ## Profiling info not available - ERROR_DEVICE_NOT_FOUND = 18 ## Device not found - ERROR_INVALID_DEVICE = 19 ## Invalid device - ERROR_DEVICE_LOST = 20 ## Device hung, reset, was removed, or adapter update occurred - ERROR_DEVICE_REQUIRES_RESET = 21 ## Device requires a reset - ERROR_DEVICE_IN_LOW_POWER_STATE = 22 ## Device currently in low power state - ERROR_DEVICE_PARTITION_FAILED = 23 ## Device partitioning failed - ERROR_INVALID_DEVICE_PARTITION_COUNT = 24 ## Invalid counts provided with ::UR_DEVICE_PARTITION_BY_COUNTS - ERROR_INVALID_WORK_ITEM_SIZE = 25 ## Invalid work item size - ERROR_INVALID_WORK_DIMENSION = 26 ## Invalid work dimension - ERROR_INVALID_KERNEL_ARGS = 27 ## Invalid kernel args - ERROR_INVALID_KERNEL = 28 ## Invalid kernel - ERROR_INVALID_KERNEL_NAME = 29 ## [Validation] kernel name is not found in the program - ERROR_INVALID_KERNEL_ARGUMENT_INDEX = 30 ## [Validation] kernel argument index is not valid for kernel - ERROR_INVALID_KERNEL_ARGUMENT_SIZE = 31 ## [Validation] kernel argument size does not match kernel - ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE = 32 ## [Validation] value of kernel attribute is not valid for the kernel or - ## device - ERROR_INVALID_IMAGE_SIZE = 33 ## Invalid image size - ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR = 34 ## Invalid image format descriptor - ERROR_IMAGE_FORMAT_NOT_SUPPORTED = 35 ## Image format not supported - ERROR_MEM_OBJECT_ALLOCATION_FAILURE = 36 ## Memory object allocation failure - ERROR_INVALID_PROGRAM_EXECUTABLE = 37 ## Program object parameter is invalid. - ERROR_UNINITIALIZED = 38 ## [Validation] adapter is not initialized or specific entry-point is not - ## implemented - ERROR_OUT_OF_HOST_MEMORY = 39 ## Insufficient host memory to satisfy call - ERROR_OUT_OF_DEVICE_MEMORY = 40 ## Insufficient device memory to satisfy call - ERROR_OUT_OF_RESOURCES = 41 ## Out of resources - ERROR_PROGRAM_BUILD_FAILURE = 42 ## Error occurred when building program, see build log for details - ERROR_PROGRAM_LINK_FAILURE = 43 ## Error occurred when linking programs, see build log for details - ERROR_UNSUPPORTED_VERSION = 44 ## [Validation] generic error code for unsupported versions - ERROR_UNSUPPORTED_FEATURE = 45 ## [Validation] generic error code for unsupported features - ERROR_INVALID_ARGUMENT = 46 ## [Validation] generic error code for invalid arguments - ERROR_INVALID_NULL_HANDLE = 47 ## [Validation] handle argument is not valid - ERROR_HANDLE_OBJECT_IN_USE = 48 ## [Validation] object pointed to by handle still in-use by device - ERROR_INVALID_NULL_POINTER = 49 ## [Validation] pointer argument may not be nullptr - ERROR_INVALID_SIZE = 50 ## [Validation] invalid size or dimensions (e.g., must not be zero, or is - ## out of bounds) - ERROR_UNSUPPORTED_SIZE = 51 ## [Validation] size argument is not supported by the device (e.g., too - ## large) - ERROR_UNSUPPORTED_ALIGNMENT = 52 ## [Validation] alignment argument is not supported by the device (e.g., - ## too small) - ERROR_INVALID_SYNCHRONIZATION_OBJECT = 53 ## [Validation] synchronization object in invalid state - ERROR_INVALID_ENUMERATION = 54 ## [Validation] enumerator argument is not valid - ERROR_UNSUPPORTED_ENUMERATION = 55 ## [Validation] enumerator argument is not supported by the device - ERROR_UNSUPPORTED_IMAGE_FORMAT = 56 ## [Validation] image format is not supported by the device - ERROR_INVALID_NATIVE_BINARY = 57 ## [Validation] native binary is not supported by the device - ERROR_INVALID_GLOBAL_NAME = 58 ## [Validation] global variable is not found in the program - ERROR_INVALID_FUNCTION_NAME = 59 ## [Validation] function name is not found in the program - ERROR_INVALID_GROUP_SIZE_DIMENSION = 60 ## [Validation] group size dimension is not valid for the kernel or - ## device - ERROR_INVALID_GLOBAL_WIDTH_DIMENSION = 61 ## [Validation] global width dimension is not valid for the kernel or - ## device - ERROR_PROGRAM_UNLINKED = 62 ## [Validation] compiled program or program with imports needs to be - ## linked before kernels can be created from it. - ERROR_OVERLAPPING_REGIONS = 63 ## [Validation] copy operations do not support overlapping regions of - ## memory - ERROR_INVALID_HOST_PTR = 64 ## Invalid host pointer - ERROR_INVALID_USM_SIZE = 65 ## Invalid USM size - ERROR_OBJECT_ALLOCATION_FAILURE = 66 ## Objection allocation failure - ERROR_ADAPTER_SPECIFIC = 67 ## An adapter specific warning/error has been reported and can be - ## retrieved via the urPlatformGetLastError entry point. - ERROR_LAYER_NOT_PRESENT = 68 ## A requested layer was not found by the loader. - ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000 ## Invalid Command-Buffer - ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001## Sync point is not valid for the command-buffer - ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002 ## Sync point wait list is invalid - ERROR_UNKNOWN = 0x7ffffffe ## Unknown or internal error - -class ur_result_t(c_int): - def __str__(self): - return str(ur_result_v(self.value)) - - -############################################################################### -## @brief Base for all properties types -class ur_base_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Base for all descriptor types -class ur_base_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief 3D offset argument passed to buffer rect operations -class ur_rect_offset_t(Structure): - _fields_ = [ - ("x", c_ulonglong), ## [in] x offset (bytes) - ("y", c_ulonglong), ## [in] y offset (scalar) - ("z", c_ulonglong) ## [in] z offset (scalar) - ] - -############################################################################### -## @brief 3D region argument passed to buffer rect operations -class ur_rect_region_t(Structure): - _fields_ = [ - ("width", c_ulonglong), ## [in] width (bytes) - ("height", c_ulonglong), ## [in] height (scalar) - ("depth", c_ulonglong) ## [in] scalar (scalar) - ] - -############################################################################### -## @brief Supported device initialization flags -class ur_device_init_flags_v(IntEnum): - GPU = UR_BIT(0) ## initialize GPU device adapters. - CPU = UR_BIT(1) ## initialize CPU device adapters. - FPGA = UR_BIT(2) ## initialize FPGA device adapters. - MCA = UR_BIT(3) ## initialize MCA device adapters. - VPU = UR_BIT(4) ## initialize VPU device adapters. - -class ur_device_init_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Supported loader info -class ur_loader_config_info_v(IntEnum): - AVAILABLE_LAYERS = 0 ## [char[]] Null-terminated, semi-colon separated list of available - ## layers. - REFERENCE_COUNT = 1 ## [uint32_t] Reference count of the loader config object. - -class ur_loader_config_info_t(c_int): - def __str__(self): - return str(ur_loader_config_info_v(self.value)) - - -############################################################################### -## @brief Code location data -class ur_code_location_t(Structure): - _fields_ = [ - ("functionName", c_char_p), ## [in][out] Function name. - ("sourceFile", c_char_p), ## [in][out] Source code file. - ("lineNumber", c_ulong), ## [in][out] Source code line number. - ("columnNumber", c_ulong) ## [in][out] Source code column number. - ] - -############################################################################### -## @brief Code location callback with user data. -def ur_code_location_callback_t(user_defined_callback): - @CFUNCTYPE(ur_code_location_t, c_void_p) - def ur_code_location_callback_t_wrapper(pUserData): - return user_defined_callback(pUserData) - return ur_code_location_callback_t_wrapper - -############################################################################### -## @brief Supported adapter info -class ur_adapter_info_v(IntEnum): - BACKEND = 0 ## [::ur_adapter_backend_t] Identifies the native backend supported by - ## the adapter. - REFERENCE_COUNT = 1 ## [uint32_t] Reference count of the adapter. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - -class ur_adapter_info_t(c_int): - def __str__(self): - return str(ur_adapter_info_v(self.value)) - - -############################################################################### -## @brief Identifies backend of the adapter -class ur_adapter_backend_v(IntEnum): - UNKNOWN = 0 ## The backend is not a recognized one - LEVEL_ZERO = 1 ## The backend is Level Zero - OPENCL = 2 ## The backend is OpenCL - CUDA = 3 ## The backend is CUDA - HIP = 4 ## The backend is HIP - NATIVE_CPU = 5 ## The backend is Native CPU - -class ur_adapter_backend_t(c_int): - def __str__(self): - return str(ur_adapter_backend_v(self.value)) - - -############################################################################### -## @brief Supported platform info -class ur_platform_info_v(IntEnum): - NAME = 1 ## [char[]] The string denoting name of the platform. The size of the - ## info needs to be dynamically queried. - VENDOR_NAME = 2 ## [char[]] The string denoting name of the vendor of the platform. The - ## size of the info needs to be dynamically queried. - VERSION = 3 ## [char[]] The string denoting the version of the platform. The size of - ## the info needs to be dynamically queried. - EXTENSIONS = 4 ## [char[]] The string denoting extensions supported by the platform. The - ## size of the info needs to be dynamically queried. - PROFILE = 5 ## [char[]] The string denoting profile of the platform. The size of the - ## info needs to be dynamically queried. - BACKEND = 6 ## [::ur_platform_backend_t] The backend of the platform. Identifies the - ## native backend adapter implementing this platform. - -class ur_platform_info_t(c_int): - def __str__(self): - return str(ur_platform_info_v(self.value)) - - -############################################################################### -## @brief Supported API versions -## -## @details -## - API versions contain major and minor attributes, use -## ::UR_MAJOR_VERSION and ::UR_MINOR_VERSION -class ur_api_version_v(IntEnum): - _0_6 = UR_MAKE_VERSION( 0, 6 ) ## version 0.6 - _0_7 = UR_MAKE_VERSION( 0, 7 ) ## version 0.7 - _0_8 = UR_MAKE_VERSION( 0, 8 ) ## version 0.8 - _0_9 = UR_MAKE_VERSION( 0, 9 ) ## version 0.9 - CURRENT = UR_MAKE_VERSION( 0, 9 ) ## latest known version - -class ur_api_version_t(c_int): - def __str__(self): - return str(ur_api_version_v(self.value)) - - -############################################################################### -## @brief Native platform creation properties -class ur_platform_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Identifies native backend adapters -class ur_platform_backend_v(IntEnum): - UNKNOWN = 0 ## The backend is not a recognized one - LEVEL_ZERO = 1 ## The backend is Level Zero - OPENCL = 2 ## The backend is OpenCL - CUDA = 3 ## The backend is CUDA - HIP = 4 ## The backend is HIP - NATIVE_CPU = 5 ## The backend is Native CPU - -class ur_platform_backend_t(c_int): - def __str__(self): - return str(ur_platform_backend_v(self.value)) - - -############################################################################### -## @brief Target identification strings for -## ::ur_device_binary_t.pDeviceTargetSpec -## A device type represented by a particular target triple requires -## specific -## binary images. We need to map the image type onto the device target triple -UR_DEVICE_BINARY_TARGET_UNKNOWN = "" - -############################################################################### -## @brief SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device -UR_DEVICE_BINARY_TARGET_SPIRV32 = "spir" - -############################################################################### -## @brief SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device -UR_DEVICE_BINARY_TARGET_SPIRV64 = "spir64" - -############################################################################### -## @brief Device-specific binary images produced from SPIR-V 64-bit <-> various -## "spir64_*" triples for specific 64-bit OpenCL CPU devices -UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 = "spir64_x86_64" - -############################################################################### -## @brief Generic GPU device (64-bit OpenCL) -UR_DEVICE_BINARY_TARGET_SPIRV64_GEN = "spir64_gen" - -############################################################################### -## @brief 64-bit OpenCL FPGA device -UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA = "spir64_fpga" - -############################################################################### -## @brief PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device -UR_DEVICE_BINARY_TARGET_NVPTX64 = "nvptx64" - -############################################################################### -## @brief AMD GCN -UR_DEVICE_BINARY_TARGET_AMDGCN = "amdgcn" - -############################################################################### -## @brief Native CPU -UR_DEVICE_BINARY_TARGET_NATIVE_CPU = "native_cpu" - -############################################################################### -## @brief Device Binary Type -class ur_device_binary_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_DEVICE_BINARY - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("pDeviceTargetSpec", c_char_p) ## [in] null-terminated string representation of the device's target architecture. - ## For example: - ## + ::UR_DEVICE_BINARY_TARGET_UNKNOWN - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV32 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_GEN - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA - ## + ::UR_DEVICE_BINARY_TARGET_NVPTX64 - ## + ::UR_DEVICE_BINARY_TARGET_AMDGCN - ] - -############################################################################### -## @brief Supported device types -class ur_device_type_v(IntEnum): - DEFAULT = 1 ## The default device type as preferred by the runtime - ALL = 2 ## Devices of all types - GPU = 3 ## Graphics Processing Unit - CPU = 4 ## Central Processing Unit - FPGA = 5 ## Field Programmable Gate Array - MCA = 6 ## Memory Copy Accelerator - VPU = 7 ## Vision Processing Unit - -class ur_device_type_t(c_int): - def __str__(self): - return str(ur_device_type_v(self.value)) - - -############################################################################### -## @brief Supported device info -class ur_device_info_v(IntEnum): - TYPE = 0 ## [::ur_device_type_t] type of the device - VENDOR_ID = 1 ## [uint32_t] vendor Id of the device - DEVICE_ID = 2 ## [uint32_t] Id of the device - MAX_COMPUTE_UNITS = 3 ## [uint32_t] the number of compute units - MAX_WORK_ITEM_DIMENSIONS = 4 ## [uint32_t] max work item dimensions - MAX_WORK_ITEM_SIZES = 5 ## [size_t[]] return an array of max work item sizes - MAX_WORK_GROUP_SIZE = 6 ## [size_t] max work group size - SINGLE_FP_CONFIG = 7 ## [::ur_device_fp_capability_flags_t] single precision floating point - ## capability - HALF_FP_CONFIG = 8 ## [::ur_device_fp_capability_flags_t] half precision floating point - ## capability - DOUBLE_FP_CONFIG = 9 ## [::ur_device_fp_capability_flags_t] double precision floating point - ## capability - QUEUE_PROPERTIES = 10 ## [::ur_queue_flags_t] command queue properties supported by the device - PREFERRED_VECTOR_WIDTH_CHAR = 11 ## [uint32_t] preferred vector width for char - PREFERRED_VECTOR_WIDTH_SHORT = 12 ## [uint32_t] preferred vector width for short - PREFERRED_VECTOR_WIDTH_INT = 13 ## [uint32_t] preferred vector width for int - PREFERRED_VECTOR_WIDTH_LONG = 14 ## [uint32_t] preferred vector width for long - PREFERRED_VECTOR_WIDTH_FLOAT = 15 ## [uint32_t] preferred vector width for float - PREFERRED_VECTOR_WIDTH_DOUBLE = 16 ## [uint32_t] preferred vector width for double - PREFERRED_VECTOR_WIDTH_HALF = 17 ## [uint32_t] preferred vector width for half float - NATIVE_VECTOR_WIDTH_CHAR = 18 ## [uint32_t] native vector width for char - NATIVE_VECTOR_WIDTH_SHORT = 19 ## [uint32_t] native vector width for short - NATIVE_VECTOR_WIDTH_INT = 20 ## [uint32_t] native vector width for int - NATIVE_VECTOR_WIDTH_LONG = 21 ## [uint32_t] native vector width for long - NATIVE_VECTOR_WIDTH_FLOAT = 22 ## [uint32_t] native vector width for float - NATIVE_VECTOR_WIDTH_DOUBLE = 23 ## [uint32_t] native vector width for double - NATIVE_VECTOR_WIDTH_HALF = 24 ## [uint32_t] native vector width for half float - MAX_CLOCK_FREQUENCY = 25 ## [uint32_t] max clock frequency in MHz - MEMORY_CLOCK_RATE = 26 ## [uint32_t] memory clock frequency in MHz - ADDRESS_BITS = 27 ## [uint32_t] address bits - MAX_MEM_ALLOC_SIZE = 28 ## [uint64_t] max memory allocation size - IMAGE_SUPPORTED = 29 ## [::ur_bool_t] images are supported - MAX_READ_IMAGE_ARGS = 30 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the read_only qualifier - MAX_WRITE_IMAGE_ARGS = 31 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the write_only qualifier - MAX_READ_WRITE_IMAGE_ARGS = 32 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the read_write qualifier - IMAGE2D_MAX_WIDTH = 33 ## [size_t] max width of Image2D object - IMAGE2D_MAX_HEIGHT = 34 ## [size_t] max height of Image2D object - IMAGE3D_MAX_WIDTH = 35 ## [size_t] max width of Image3D object - IMAGE3D_MAX_HEIGHT = 36 ## [size_t] max height of Image3D object - IMAGE3D_MAX_DEPTH = 37 ## [size_t] max depth of Image3D object - IMAGE_MAX_BUFFER_SIZE = 38 ## [size_t] max image buffer size - IMAGE_MAX_ARRAY_SIZE = 39 ## [size_t] max image array size - MAX_SAMPLERS = 40 ## [uint32_t] max number of samplers that can be used in a kernel - MAX_PARAMETER_SIZE = 41 ## [size_t] max size in bytes of all arguments passed to a kernel - MEM_BASE_ADDR_ALIGN = 42 ## [uint32_t] memory base address alignment - GLOBAL_MEM_CACHE_TYPE = 43 ## [::ur_device_mem_cache_type_t] global memory cache type - GLOBAL_MEM_CACHELINE_SIZE = 44 ## [uint32_t] global memory cache line size in bytes - GLOBAL_MEM_CACHE_SIZE = 45 ## [uint64_t] size of global memory cache in bytes - GLOBAL_MEM_SIZE = 46 ## [uint64_t] size of global memory in bytes - GLOBAL_MEM_FREE = 47 ## [uint64_t] size of global memory which is free in bytes - MAX_CONSTANT_BUFFER_SIZE = 48 ## [uint64_t] max constant buffer size in bytes - MAX_CONSTANT_ARGS = 49 ## [uint32_t] max number of __const declared arguments in a kernel - LOCAL_MEM_TYPE = 50 ## [::ur_device_local_mem_type_t] local memory type - LOCAL_MEM_SIZE = 51 ## [uint64_t] local memory size in bytes - ERROR_CORRECTION_SUPPORT = 52 ## [::ur_bool_t] support error correction to global and local memory - HOST_UNIFIED_MEMORY = 53 ## [::ur_bool_t] unified host device memory - PROFILING_TIMER_RESOLUTION = 54 ## [size_t] profiling timer resolution in nanoseconds - ENDIAN_LITTLE = 55 ## [::ur_bool_t] little endian byte order - AVAILABLE = 56 ## [::ur_bool_t] device is available - COMPILER_AVAILABLE = 57 ## [::ur_bool_t] device compiler is available - LINKER_AVAILABLE = 58 ## [::ur_bool_t] device linker is available - EXECUTION_CAPABILITIES = 59 ## [::ur_device_exec_capability_flags_t] device kernel execution - ## capability bit-field - QUEUE_ON_DEVICE_PROPERTIES = 60 ## [::ur_queue_flags_t] device command queue property bit-field - QUEUE_ON_HOST_PROPERTIES = 61 ## [::ur_queue_flags_t] host queue property bit-field - BUILT_IN_KERNELS = 62 ## [char[]] a semi-colon separated list of built-in kernels - PLATFORM = 63 ## [::ur_platform_handle_t] the platform associated with the device - REFERENCE_COUNT = 64 ## [uint32_t] Reference count of the device object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - IL_VERSION = 65 ## [char[]] IL version - NAME = 66 ## [char[]] Device name - VENDOR = 67 ## [char[]] Device vendor - DRIVER_VERSION = 68 ## [char[]] Driver version - PROFILE = 69 ## [char[]] Device profile - VERSION = 70 ## [char[]] Device version - BACKEND_RUNTIME_VERSION = 71 ## [char[]] Version of backend runtime - EXTENSIONS = 72 ## [char[]] Return a space separated list of extension names - PRINTF_BUFFER_SIZE = 73 ## [size_t] Maximum size in bytes of internal printf buffer - PREFERRED_INTEROP_USER_SYNC = 74 ## [::ur_bool_t] prefer user synchronization when sharing object with - ## other API - PARENT_DEVICE = 75 ## [::ur_device_handle_t] return parent device handle - SUPPORTED_PARTITIONS = 76 ## [::ur_device_partition_t[]] Returns an array of partition types - ## supported by the device - PARTITION_MAX_SUB_DEVICES = 77 ## [uint32_t] maximum number of sub-devices when the device is - ## partitioned - PARTITION_AFFINITY_DOMAIN = 78 ## [::ur_device_affinity_domain_flags_t] Returns a bit-field of the - ## supported affinity domains for partitioning. - ## If the device does not support any affinity domains, then 0 will be returned. - PARTITION_TYPE = 79 ## [::ur_device_partition_property_t[]] return an array of - ## ::ur_device_partition_property_t for properties specified in - ## ::urDevicePartition - MAX_NUM_SUB_GROUPS = 80 ## [uint32_t] max number of sub groups - SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81 ## [::ur_bool_t] support sub group independent forward progress - SUB_GROUP_SIZES_INTEL = 82 ## [uint32_t[]] return an array of sub group sizes supported on Intel - ## device - USM_HOST_SUPPORT = 83 ## [::ur_device_usm_access_capability_flags_t] support USM host memory - ## access - USM_DEVICE_SUPPORT = 84 ## [::ur_device_usm_access_capability_flags_t] support USM device memory - ## access - USM_SINGLE_SHARED_SUPPORT = 85 ## [::ur_device_usm_access_capability_flags_t] support USM single device - ## shared memory access - USM_CROSS_SHARED_SUPPORT = 86 ## [::ur_device_usm_access_capability_flags_t] support USM cross device - ## shared memory access - USM_SYSTEM_SHARED_SUPPORT = 87 ## [::ur_device_usm_access_capability_flags_t] support USM system wide - ## shared memory access - UUID = 88 ## [char[]] return device UUID - PCI_ADDRESS = 89 ## [char[]] return device PCI address - GPU_EU_COUNT = 90 ## [uint32_t] return Intel GPU EU count - GPU_EU_SIMD_WIDTH = 91 ## [uint32_t] return Intel GPU EU SIMD width - GPU_EU_SLICES = 92 ## [uint32_t] return Intel GPU number of slices - GPU_EU_COUNT_PER_SUBSLICE = 93 ## [uint32_t] return Intel GPU EU count per subslice - GPU_SUBSLICES_PER_SLICE = 94 ## [uint32_t] return Intel GPU number of subslices per slice - GPU_HW_THREADS_PER_EU = 95 ## [uint32_t] return Intel GPU number of threads per EU - MAX_MEMORY_BANDWIDTH = 96 ## [uint32_t] return max memory bandwidth in Mb/s - IMAGE_SRGB = 97 ## [::ur_bool_t] device supports sRGB images - BUILD_ON_SUBDEVICE = 98 ## [::ur_bool_t] Return true if sub-device should do its own program - ## build - ATOMIC_64 = 99 ## [::ur_bool_t] support 64 bit atomics - ATOMIC_MEMORY_ORDER_CAPABILITIES = 100 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory order capabilities - ATOMIC_MEMORY_SCOPE_CAPABILITIES = 101 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory scope capabilities - ATOMIC_FENCE_ORDER_CAPABILITIES = 102 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory fence order capabilities - ATOMIC_FENCE_SCOPE_CAPABILITIES = 103 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory fence scope capabilities - BFLOAT16 = 104 ## [::ur_bool_t] support for bfloat16 - MAX_COMPUTE_QUEUE_INDICES = 105 ## [uint32_t] Returns 1 if the device doesn't have a notion of a - ## queue index. Otherwise, returns the number of queue indices that are - ## available for this device. - KERNEL_SET_SPECIALIZATION_CONSTANTS = 106 ## [::ur_bool_t] support the ::urKernelSetSpecializationConstants entry - ## point - MEMORY_BUS_WIDTH = 107 ## [uint32_t] return the width in bits of the memory bus interface of the - ## device. - MAX_WORK_GROUPS_3D = 108 ## [size_t[3]] return max 3D work groups - ASYNC_BARRIER = 109 ## [::ur_bool_t] return true if Async Barrier is supported - MEM_CHANNEL_SUPPORT = 110 ## [::ur_bool_t] return true if specifying memory channels is supported - HOST_PIPE_READ_WRITE_SUPPORTED = 111 ## [::ur_bool_t] Return true if the device supports enqueueing commands - ## to read and write pipes from the host. - MAX_REGISTERS_PER_WORK_GROUP = 112 ## [uint32_t] The maximum number of registers available per block. - IP_VERSION = 113 ## [uint32_t] The device IP version. The meaning of the device IP version - ## is implementation-defined, but newer devices should have a higher - ## version than older devices. - VIRTUAL_MEMORY_SUPPORT = 114 ## [::ur_bool_t] return true if the device supports virtual memory. - ESIMD_SUPPORT = 115 ## [::ur_bool_t] return true if the device supports ESIMD. - BINDLESS_IMAGES_SUPPORT_EXP = 0x2000 ## [::ur_bool_t] returns true if the device supports the creation of - ## bindless images - BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001 ## [::ur_bool_t] returns true if the device supports the creation of - ## bindless images backed by shared USM - BINDLESS_IMAGES_1D_USM_SUPPORT_EXP = 0x2002 ## [::ur_bool_t] returns true if the device supports the creation of 1D - ## bindless images backed by USM - BINDLESS_IMAGES_2D_USM_SUPPORT_EXP = 0x2003 ## [::ur_bool_t] returns true if the device supports the creation of 2D - ## bindless images backed by USM - IMAGE_PITCH_ALIGN_EXP = 0x2004 ## [uint32_t] returns the required alignment of the pitch between two - ## rows of an image in bytes - MAX_IMAGE_LINEAR_WIDTH_EXP = 0x2005 ## [size_t] returns the maximum linear width allowed for images allocated - ## using USM - MAX_IMAGE_LINEAR_HEIGHT_EXP = 0x2006 ## [size_t] returns the maximum linear height allowed for images - ## allocated using USM - MAX_IMAGE_LINEAR_PITCH_EXP = 0x2007 ## [size_t] returns the maximum linear pitch allowed for images allocated - ## using USM - MIPMAP_SUPPORT_EXP = 0x2008 ## [::ur_bool_t] returns true if the device supports allocating mipmap - ## resources - MIPMAP_ANISOTROPY_SUPPORT_EXP = 0x2009 ## [::ur_bool_t] returns true if the device supports sampling mipmap - ## images with anisotropic filtering - MIPMAP_MAX_ANISOTROPY_EXP = 0x200A ## [uint32_t] returns the maximum anisotropic ratio supported by the - ## device - MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP = 0x200B ## [::ur_bool_t] returns true if the device supports using images created - ## from individual mipmap levels - INTEROP_MEMORY_IMPORT_SUPPORT_EXP = 0x200C ## [::ur_bool_t] returns true if the device supports importing external - ## memory resources - INTEROP_MEMORY_EXPORT_SUPPORT_EXP = 0x200D ## [::ur_bool_t] returns true if the device supports exporting internal - ## memory resources - INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP = 0x200E ## [::ur_bool_t] returns true if the device supports importing external - ## semaphore resources - INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP = 0x200F ## [::ur_bool_t] returns true if the device supports exporting internal - ## event resources - -class ur_device_info_t(c_int): - def __str__(self): - return str(ur_device_info_v(self.value)) - - -############################################################################### -## @brief Device affinity domain -class ur_device_affinity_domain_flags_v(IntEnum): - NUMA = UR_BIT(0) ## Split the device into sub devices comprised of compute units that - ## share a NUMA node. - L4_CACHE = UR_BIT(1) ## Split the device into sub devices comprised of compute units that - ## share a level 4 data cache. - L3_CACHE = UR_BIT(2) ## Split the device into sub devices comprised of compute units that - ## share a level 3 data cache. - L2_CACHE = UR_BIT(3) ## Split the device into sub devices comprised of compute units that - ## share a level 2 data cache. - L1_CACHE = UR_BIT(4) ## Split the device into sub devices comprised of compute units that - ## share a level 1 data cache. - NEXT_PARTITIONABLE = UR_BIT(5) ## Split the device along the next partitionable affinity domain. - ## The implementation shall find the first level along which the device - ## or sub device may be further subdivided in the order: - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, - ## and partition the device into sub devices comprised of compute units - ## that share memory subsystems at this level. - -class ur_device_affinity_domain_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Partition Properties -class ur_device_partition_v(IntEnum): - EQUALLY = 0x1086 ## Partition Equally - BY_COUNTS = 0x1087 ## Partition by counts - BY_AFFINITY_DOMAIN = 0x1088 ## Partition by affinity domain - BY_CSLICE = 0x1089 ## Partition by c-slice - -class ur_device_partition_t(c_int): - def __str__(self): - return str(ur_device_partition_v(self.value)) - - -############################################################################### -## @brief Device partition value. -class ur_device_partition_value_t(Structure): - _fields_ = [ - ("equally", c_ulong), ## [in] Number of compute units per sub-device when partitioning with - ## ::UR_DEVICE_PARTITION_EQUALLY. - ("count", c_ulong), ## [in] Number of compute units in a sub-device when partitioning with - ## ::UR_DEVICE_PARTITION_BY_COUNTS. - ("affinity_domain", ur_device_affinity_domain_flags_t) ## [in] The affinity domain to partition for when partitioning with - ## ::UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN. - ] - -############################################################################### -## @brief Device partition property -class ur_device_partition_property_t(Structure): - _fields_ = [ - ("type", ur_device_partition_t), ## [in] The partitioning type to be used. - ("value", ur_device_partition_value_t) ## [in][tagged_by(type)] The partitioning value. - ] - -############################################################################### -## @brief Device Partition Properties -class ur_device_partition_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("pProperties", POINTER(ur_device_partition_property_t)), ## [in] Pointer to the beginning of the properties array. - ("PropCount", c_size_t) ## [in] The length of properties pointed to by `pProperties`. - ] - -############################################################################### -## @brief FP capabilities -class ur_device_fp_capability_flags_v(IntEnum): - CORRECTLY_ROUNDED_DIVIDE_SQRT = UR_BIT(0) ## Support correctly rounded divide and sqrt - ROUND_TO_NEAREST = UR_BIT(1) ## Support round to nearest - ROUND_TO_ZERO = UR_BIT(2) ## Support round to zero - ROUND_TO_INF = UR_BIT(3) ## Support round to infinity - INF_NAN = UR_BIT(4) ## Support INF to NAN - DENORM = UR_BIT(5) ## Support denorm - FMA = UR_BIT(6) ## Support FMA - SOFT_FLOAT = UR_BIT(7) ## Basic floating point operations implemented in software. - -class ur_device_fp_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Device memory cache type -class ur_device_mem_cache_type_v(IntEnum): - NONE = 0 ## Has none cache - READ_ONLY_CACHE = 1 ## Has read only cache - READ_WRITE_CACHE = 2 ## Has read write cache - -class ur_device_mem_cache_type_t(c_int): - def __str__(self): - return str(ur_device_mem_cache_type_v(self.value)) - - -############################################################################### -## @brief Device local memory type -class ur_device_local_mem_type_v(IntEnum): - NONE = 0 ## No local memory support - LOCAL = 1 ## Dedicated local memory - GLOBAL = 2 ## Global memory - -class ur_device_local_mem_type_t(c_int): - def __str__(self): - return str(ur_device_local_mem_type_v(self.value)) - - -############################################################################### -## @brief Device kernel execution capability -class ur_device_exec_capability_flags_v(IntEnum): - KERNEL = UR_BIT(0) ## Support kernel execution - NATIVE_KERNEL = UR_BIT(1) ## Support native kernel execution - -class ur_device_exec_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Native device creation properties -class ur_device_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Memory order capabilities -class ur_memory_order_capability_flags_v(IntEnum): - RELAXED = UR_BIT(0) ## Relaxed memory ordering - ACQUIRE = UR_BIT(1) ## Acquire memory ordering - RELEASE = UR_BIT(2) ## Release memory ordering - ACQ_REL = UR_BIT(3) ## Acquire/release memory ordering - SEQ_CST = UR_BIT(4) ## Sequentially consistent memory ordering - -class ur_memory_order_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Memory scope capabilities -class ur_memory_scope_capability_flags_v(IntEnum): - WORK_ITEM = UR_BIT(0) ## Work item scope - SUB_GROUP = UR_BIT(1) ## Sub group scope - WORK_GROUP = UR_BIT(2) ## Work group scope - DEVICE = UR_BIT(3) ## Device scope - SYSTEM = UR_BIT(4) ## System scope - -class ur_memory_scope_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM access capabilities -class ur_device_usm_access_capability_flags_v(IntEnum): - ACCESS = UR_BIT(0) ## Memory can be accessed - ATOMIC_ACCESS = UR_BIT(1) ## Memory can be accessed atomically - CONCURRENT_ACCESS = UR_BIT(2) ## Memory can be accessed concurrently - ATOMIC_CONCURRENT_ACCESS = UR_BIT(3) ## Memory can be accessed atomically and concurrently - -class ur_device_usm_access_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Context property type -class ur_context_flags_v(IntEnum): - TBD = UR_BIT(0) ## reserved for future use - -class ur_context_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Context creation properties -class ur_context_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_context_flags_t) ## [in] context creation flags. - ] - -############################################################################### -## @brief Supported context info -class ur_context_info_v(IntEnum): - NUM_DEVICES = 0 ## [uint32_t] The number of the devices in the context - DEVICES = 1 ## [::ur_device_handle_t[]] The array of the device handles in the - ## context - REFERENCE_COUNT = 2 ## [uint32_t] Reference count of the context object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - USM_MEMCPY2D_SUPPORT = 3 ## [::ur_bool_t] to indicate if the ::urEnqueueUSMMemcpy2D entrypoint is - ## supported. - USM_FILL2D_SUPPORT = 4 ## [::ur_bool_t] to indicate if the ::urEnqueueUSMFill2D entrypoint is - ## supported. - ATOMIC_MEMORY_ORDER_CAPABILITIES = 5 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory order capabilities. - ATOMIC_MEMORY_SCOPE_CAPABILITIES = 6 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory scope capabilities. - ATOMIC_FENCE_ORDER_CAPABILITIES = 7 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory fence order capabilities. - ## Zero is returned if the backend does not support context-level fences. - ATOMIC_FENCE_SCOPE_CAPABILITIES = 8 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory fence scope capabilities. - ## Zero is returned if the backend does not support context-level fences. - -class ur_context_info_t(c_int): - def __str__(self): - return str(ur_context_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urContextCreateWithNativeHandle. -class ur_context_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Context's extended deleter callback function with user data. -def ur_context_extended_deleter_t(user_defined_callback): - @CFUNCTYPE(None, c_void_p) - def ur_context_extended_deleter_t_wrapper(pUserData): - return user_defined_callback(pUserData) - return ur_context_extended_deleter_t_wrapper - -############################################################################### -## @brief Memory flags -class ur_mem_flags_v(IntEnum): - READ_WRITE = UR_BIT(0) ## The memory object will be read and written by a kernel. This is the - ## default - WRITE_ONLY = UR_BIT(1) ## The memory object will be written but not read by a kernel - READ_ONLY = UR_BIT(2) ## The memory object is a read-only inside a kernel - USE_HOST_POINTER = UR_BIT(3) ## Use memory pointed by a host pointer parameter as the storage bits for - ## the memory object - ALLOC_HOST_POINTER = UR_BIT(4) ## Allocate memory object from host accessible memory - ALLOC_COPY_HOST_POINTER = UR_BIT(5) ## Allocate memory and copy the data from host pointer pointed memory - -class ur_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Memory types -class ur_mem_type_v(IntEnum): - BUFFER = 0 ## Buffer object - IMAGE2D = 1 ## 2D image object - IMAGE3D = 2 ## 3D image object - IMAGE2D_ARRAY = 3 ## 2D image array object - IMAGE1D = 4 ## 1D image object - IMAGE1D_ARRAY = 5 ## 1D image array object - IMAGE1D_BUFFER = 6 ## 1D image buffer object - -class ur_mem_type_t(c_int): - def __str__(self): - return str(ur_mem_type_v(self.value)) - - -############################################################################### -## @brief Memory Information type -class ur_mem_info_v(IntEnum): - SIZE = 0 ## [size_t] actual size of of memory object in bytes - CONTEXT = 1 ## [::ur_context_handle_t] context in which the memory object was created - -class ur_mem_info_t(c_int): - def __str__(self): - return str(ur_mem_info_v(self.value)) - - -############################################################################### -## @brief Image channel order info: number of channels and the channel layout -class ur_image_channel_order_v(IntEnum): - A = 0 ## channel order A - R = 1 ## channel order R - RG = 2 ## channel order RG - RA = 3 ## channel order RA - RGB = 4 ## channel order RGB - RGBA = 5 ## channel order RGBA - BGRA = 6 ## channel order BGRA - ARGB = 7 ## channel order ARGB - ABGR = 8 ## channel order ABGR - INTENSITY = 9 ## channel order intensity - LUMINANCE = 10 ## channel order luminance - RX = 11 ## channel order Rx - RGX = 12 ## channel order RGx - RGBX = 13 ## channel order RGBx - SRGBA = 14 ## channel order sRGBA - -class ur_image_channel_order_t(c_int): - def __str__(self): - return str(ur_image_channel_order_v(self.value)) - - -############################################################################### -## @brief Image channel type info: describe the size of the channel data type -class ur_image_channel_type_v(IntEnum): - SNORM_INT8 = 0 ## channel type snorm int8 - SNORM_INT16 = 1 ## channel type snorm int16 - UNORM_INT8 = 2 ## channel type unorm int8 - UNORM_INT16 = 3 ## channel type unorm int16 - UNORM_SHORT_565 = 4 ## channel type unorm short 565 - UNORM_SHORT_555 = 5 ## channel type unorm short 555 - INT_101010 = 6 ## channel type int 101010 - SIGNED_INT8 = 7 ## channel type signed int8 - SIGNED_INT16 = 8 ## channel type signed int16 - SIGNED_INT32 = 9 ## channel type signed int32 - UNSIGNED_INT8 = 10 ## channel type unsigned int8 - UNSIGNED_INT16 = 11 ## channel type unsigned int16 - UNSIGNED_INT32 = 12 ## channel type unsigned int32 - HALF_FLOAT = 13 ## channel type half float - FLOAT = 14 ## channel type float - -class ur_image_channel_type_t(c_int): - def __str__(self): - return str(ur_image_channel_type_v(self.value)) - - -############################################################################### -## @brief Image information types -class ur_image_info_v(IntEnum): - FORMAT = 0 ## [::ur_image_format_t] image format - ELEMENT_SIZE = 1 ## [size_t] element size - ROW_PITCH = 2 ## [size_t] row pitch - SLICE_PITCH = 3 ## [size_t] slice pitch - WIDTH = 4 ## [size_t] image width - HEIGHT = 5 ## [size_t] image height - DEPTH = 6 ## [size_t] image depth - -class ur_image_info_t(c_int): - def __str__(self): - return str(ur_image_info_v(self.value)) - - -############################################################################### -## @brief Image format including channel layout and data type -class ur_image_format_t(Structure): - _fields_ = [ - ("channelOrder", ur_image_channel_order_t), ## [in] image channel order - ("channelType", ur_image_channel_type_t) ## [in] image channel type - ] - -############################################################################### -## @brief Image descriptor type. -class ur_image_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_IMAGE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("type", ur_mem_type_t), ## [in][nocheck] memory object type - ("width", c_size_t), ## [in] image width - ("height", c_size_t), ## [in] image height - ("depth", c_size_t), ## [in] image depth - ("arraySize", c_size_t), ## [in] image array size - ("rowPitch", c_size_t), ## [in] image row pitch - ("slicePitch", c_size_t), ## [in] image slice pitch - ("numMipLevel", c_ulong), ## [in] number of MIP levels - ("numSamples", c_ulong) ## [in] number of samples - ] - -############################################################################### -## @brief Buffer creation properties -class ur_buffer_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("pHost", c_void_p) ## [in][optional] pointer to the buffer data - ] - -############################################################################### -## @brief Buffer memory channel creation properties -## -## @details -## - Specify these properties in ::urMemBufferCreate via -## ::ur_buffer_properties_t as part of a `pNext` chain. -## -## @remarks -## _Analogues_ -## - cl_intel_mem_channel_property -class ur_buffer_channel_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("channel", c_ulong) ## [in] Identifies the channel/region to which the buffer should be mapped. - ] - -############################################################################### -## @brief Buffer allocation location creation properties -## -## @details -## - Specify these properties in ::urMemBufferCreate via -## ::ur_buffer_properties_t as part of a `pNext` chain. -## -## @remarks -## _Analogues_ -## - cl_intel_mem_alloc_buffer_location -class ur_buffer_alloc_location_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory - ## should be allocated. - ] - -############################################################################### -## @brief Buffer region type, used to describe a sub buffer -class ur_buffer_region_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_BUFFER_REGION - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("origin", c_size_t), ## [in] buffer origin offset - ("size", c_size_t) ## [in] size of the buffer region - ] - -############################################################################### -## @brief Buffer creation type -class ur_buffer_create_type_v(IntEnum): - REGION = 0 ## buffer create type is region - -class ur_buffer_create_type_t(c_int): - def __str__(self): - return str(ur_buffer_create_type_v(self.value)) - - -############################################################################### -## @brief Native memory object creation properties -class ur_mem_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Sampler Filter Mode -class ur_sampler_filter_mode_v(IntEnum): - NEAREST = 0 ## Filter mode nearest. - LINEAR = 1 ## Filter mode linear. - -class ur_sampler_filter_mode_t(c_int): - def __str__(self): - return str(ur_sampler_filter_mode_v(self.value)) - - -############################################################################### -## @brief Sampler addressing mode -class ur_sampler_addressing_mode_v(IntEnum): - NONE = 0 ## None - CLAMP_TO_EDGE = 1 ## Clamp to edge - CLAMP = 2 ## Clamp - REPEAT = 3 ## Repeat - MIRRORED_REPEAT = 4 ## Mirrored Repeat - -class ur_sampler_addressing_mode_t(c_int): - def __str__(self): - return str(ur_sampler_addressing_mode_v(self.value)) - - -############################################################################### -## @brief Get sample object information -class ur_sampler_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the sampler object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] Sampler context info - NORMALIZED_COORDS = 2 ## [::ur_bool_t] Sampler normalized coordinate setting - ADDRESSING_MODE = 3 ## [::ur_sampler_addressing_mode_t] Sampler addressing mode setting - FILTER_MODE = 4 ## [::ur_sampler_filter_mode_t] Sampler filter mode setting - -class ur_sampler_info_t(c_int): - def __str__(self): - return str(ur_sampler_info_v(self.value)) - - -############################################################################### -## @brief Sampler description. -class ur_sampler_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_SAMPLER_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("normalizedCoords", c_bool), ## [in] Specify if image coordinates are normalized (true) or not (false) - ("addressingMode", ur_sampler_addressing_mode_t), ## [in] Specify the address mode of the sampler - ("filterMode", ur_sampler_filter_mode_t) ## [in] Specify the filter mode of the sampler - ] - -############################################################################### -## @brief Native sampler creation properties -class ur_sampler_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief USM host memory property flags -class ur_usm_host_mem_flags_v(IntEnum): - INITIAL_PLACEMENT = UR_BIT(0) ## Optimize shared allocation for first access on the host - -class ur_usm_host_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM device memory property flags -class ur_usm_device_mem_flags_v(IntEnum): - WRITE_COMBINED = UR_BIT(0) ## Memory should be allocated write-combined (WC) - INITIAL_PLACEMENT = UR_BIT(1) ## Optimize shared allocation for first access on the device - DEVICE_READ_ONLY = UR_BIT(2) ## Memory is only possibly modified from the host, but read-only in all - ## device code - -class ur_usm_device_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM memory property flags -class ur_usm_pool_flags_v(IntEnum): - ZERO_INITIALIZE_BLOCK = UR_BIT(0) ## All coarse-grain allocations (allocations from the driver) will be - ## zero-initialized. - -class ur_usm_pool_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM allocation type -class ur_usm_type_v(IntEnum): - UNKNOWN = 0 ## Unknown USM type - HOST = 1 ## Host USM type - DEVICE = 2 ## Device USM type - SHARED = 3 ## Shared USM type - -class ur_usm_type_t(c_int): - def __str__(self): - return str(ur_usm_type_v(self.value)) - - -############################################################################### -## @brief USM memory allocation information type -class ur_usm_alloc_info_v(IntEnum): - TYPE = 0 ## [::ur_usm_type_t] Memory allocation type info - BASE_PTR = 1 ## [void *] Memory allocation base pointer info - SIZE = 2 ## [size_t] Memory allocation size info - DEVICE = 3 ## [::ur_device_handle_t] Memory allocation device info - POOL = 4 ## [::ur_usm_pool_handle_t] Memory allocation pool info - -class ur_usm_alloc_info_t(c_int): - def __str__(self): - return str(ur_usm_alloc_info_v(self.value)) - - -############################################################################### -## @brief USM memory advice -class ur_usm_advice_flags_v(IntEnum): - DEFAULT = UR_BIT(0) ## The USM memory advice is default - SET_READ_MOSTLY = UR_BIT(1) ## Hint that memory will be read from frequently and written to rarely - CLEAR_READ_MOSTLY = UR_BIT(2) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_READ_MOSTLY - SET_PREFERRED_LOCATION = UR_BIT(3) ## Hint that the preferred memory location is the specified device - CLEAR_PREFERRED_LOCATION = UR_BIT(4) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION - SET_NON_ATOMIC_MOSTLY = UR_BIT(5) ## Hint that memory will mostly be accessed non-atomically - CLEAR_NON_ATOMIC_MOSTLY = UR_BIT(6) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY - BIAS_CACHED = UR_BIT(7) ## Hint that memory should be cached - BIAS_UNCACHED = UR_BIT(8) ## Hint that memory should be not be cached - SET_ACCESSED_BY_DEVICE = UR_BIT(9) ## Hint that memory will be mostly accessed by the specified device - CLEAR_ACCESSED_BY_DEVICE = UR_BIT(10) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE - SET_ACCESSED_BY_HOST = UR_BIT(11) ## Hint that memory will be mostly accessed by the host - CLEAR_ACCESSED_BY_HOST = UR_BIT(12) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST - SET_PREFERRED_LOCATION_HOST = UR_BIT(13) ## Hint that the preferred memory location is the host - CLEAR_PREFERRED_LOCATION_HOST = UR_BIT(14) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST - -class ur_usm_advice_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Handle of USM pool -class ur_usm_pool_handle_t(c_void_p): - pass - -############################################################################### -## @brief USM allocation descriptor type. -class ur_usm_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("hints", ur_usm_advice_flags_t), ## [in] Memory advice hints - ("align", c_ulong) ## [in] alignment of the USM memory object - ## Must be zero or a power of 2. - ## Must be equal to or smaller than the size of the largest data type - ## supported by `hDevice`. - ] - -############################################################################### -## @brief USM host allocation descriptor type. -## -## @details -## - Specify these properties in ::urUSMHostAlloc and ::urUSMSharedAlloc -## via ::ur_usm_desc_t as part of a `pNext` chain. -class ur_usm_host_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_HOST_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_host_mem_flags_t) ## [in] host memory allocation flags - ] - -############################################################################### -## @brief USM device allocation descriptor type. -## -## @details -## - Specify these properties in ::urUSMDeviceAlloc and ::urUSMSharedAlloc -## via ::ur_usm_desc_t as part of a `pNext` chain. -class ur_usm_device_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_USM_DEVICE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_device_mem_flags_t) ## [in] device memory allocation flags. - ] - -############################################################################### -## @brief USM allocation location desc -## -## @details -## - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and -## ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. -## -## @remarks -## _Analogues_ -## - cl_intel_mem_alloc_buffer_location -class ur_usm_alloc_location_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory - ## should be allocated. - ] - -############################################################################### -## @brief USM pool descriptor type -class ur_usm_pool_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_POOL_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_pool_flags_t) ## [in] memory allocation flags - ] - -############################################################################### -## @brief USM pool limits descriptor type -## -## @details -## - Specify these properties in ::urUSMPoolCreate via ::ur_usm_pool_desc_t -## as part of a `pNext` chain. -class ur_usm_pool_limits_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("maxPoolableSize", c_size_t), ## [in] Allocations up to this limit will be subject to pooling - ("minDriverAllocSize", c_size_t) ## [in] Minimum allocation size that will be requested from the driver - ] - -############################################################################### -## @brief Get USM memory pool information -class ur_usm_pool_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the pool object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] USM memory pool context info - -class ur_usm_pool_info_t(c_int): - def __str__(self): - return str(ur_usm_pool_info_v(self.value)) - - -############################################################################### -## @brief Virtual memory granularity info -class ur_virtual_mem_granularity_info_v(IntEnum): - MINIMUM = 0x30100 ## [size_t] size in bytes of the minimum virtual memory granularity. - RECOMMENDED = 0x30101 ## [size_t] size in bytes of the recommended virtual memory granularity. - -class ur_virtual_mem_granularity_info_t(c_int): - def __str__(self): - return str(ur_virtual_mem_granularity_info_v(self.value)) - - -############################################################################### -## @brief Virtual memory access mode flags. -class ur_virtual_mem_access_flags_v(IntEnum): - NONE = UR_BIT(0) ## Virtual memory has no access. - READ_WRITE = UR_BIT(1) ## Virtual memory has both read and write access. - READ_ONLY = UR_BIT(2) ## Virtual memory has read only access. - -class ur_virtual_mem_access_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Virtual memory range info queries. -class ur_virtual_mem_info_v(IntEnum): - ACCESS_MODE = 0 ## [::ur_virtual_mem_access_flags_t] access flags of a mapped virtual - ## memory range. - -class ur_virtual_mem_info_t(c_int): - def __str__(self): - return str(ur_virtual_mem_info_v(self.value)) - - -############################################################################### -## @brief Physical memory creation properties. -class ur_physical_mem_flags_v(IntEnum): - TBD = UR_BIT(0) ## reserved for future use. - -class ur_physical_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Physical memory creation properties. -class ur_physical_mem_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_physical_mem_flags_t) ## [in] physical memory creation flags - ] - -############################################################################### -## @brief Program metadata property type. -class ur_program_metadata_type_v(IntEnum): - UINT32 = 0 ## type is a 32-bit integer. - UINT64 = 1 ## type is a 64-bit integer. - BYTE_ARRAY = 2 ## type is a byte array. - STRING = 3 ## type is a null-terminated string. - -class ur_program_metadata_type_t(c_int): - def __str__(self): - return str(ur_program_metadata_type_v(self.value)) - - -############################################################################### -## @brief Program metadata value union. -class ur_program_metadata_value_t(Structure): - _fields_ = [ - ("data32", c_ulong), ## [in] inline storage for the 32-bit data, type - ## ::UR_PROGRAM_METADATA_TYPE_UINT32. - ("data64", c_ulonglong), ## [in] inline storage for the 64-bit data, type - ## ::UR_PROGRAM_METADATA_TYPE_UINT64. - ("pString", c_char_p), ## [in] pointer to null-terminated string data, type - ## ::UR_PROGRAM_METADATA_TYPE_STRING. - ("pData", c_void_p) ## [in] pointer to binary data, type - ## ::UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY. - ] - -############################################################################### -## @brief Program metadata property. -class ur_program_metadata_t(Structure): - _fields_ = [ - ("pName", c_char_p), ## [in] null-terminated metadata name. - ("type", ur_program_metadata_type_t), ## [in] the type of metadata value. - ("size", c_size_t), ## [in] size in bytes of the data pointed to by value.pData, or 0 when - ## value size is less than 64-bits and is stored directly in value.data. - ("value", ur_program_metadata_value_t) ## [in][tagged_by(type)] the metadata value storage. - ] - -############################################################################### -## @brief Program creation properties. -class ur_program_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("count", c_ulong), ## [in] the number of entries in pMetadatas, if count is greater than - ## zero then pMetadatas must not be null. - ("pMetadatas", POINTER(ur_program_metadata_t)) ## [in][optional][range(0,count)] pointer to array of metadata entries. - ] - -############################################################################### -## @brief Get Program object information -class ur_program_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the program object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] Program context info. - NUM_DEVICES = 2 ## [uint32_t] Return number of devices associated with Program. - DEVICES = 3 ## [::ur_device_handle_t[]] Return list of devices associated with - ## Program. - SOURCE = 4 ## [char[]] Return program source associated with Program. - BINARY_SIZES = 5 ## [size_t[]] Return program binary sizes for each device. - BINARIES = 6 ## [unsigned char[]] Return program binaries for all devices for this - ## Program. - NUM_KERNELS = 7 ## [size_t] Number of kernels in Program, return type size_t. - KERNEL_NAMES = 8 ## [char[]] Return a null-terminated, semi-colon separated list of kernel - ## names in Program. - -class ur_program_info_t(c_int): - def __str__(self): - return str(ur_program_info_v(self.value)) - - -############################################################################### -## @brief Program object build status -class ur_program_build_status_v(IntEnum): - NONE = 0 ## Program build status none - ERROR = 1 ## Program build error - SUCCESS = 2 ## Program build success - IN_PROGRESS = 3 ## Program build in progress - -class ur_program_build_status_t(c_int): - def __str__(self): - return str(ur_program_build_status_v(self.value)) - - -############################################################################### -## @brief Program object binary type -class ur_program_binary_type_v(IntEnum): - NONE = 0 ## No program binary is associated with device - COMPILED_OBJECT = 1 ## Program binary is compiled object - LIBRARY = 2 ## Program binary is library object - EXECUTABLE = 3 ## Program binary is executable - -class ur_program_binary_type_t(c_int): - def __str__(self): - return str(ur_program_binary_type_v(self.value)) - - -############################################################################### -## @brief Get Program object build information -class ur_program_build_info_v(IntEnum): - STATUS = 0 ## [::ur_program_build_status_t] Program build status. - OPTIONS = 1 ## [char[]] Null-terminated options string specified by last build, - ## compile or link operation performed on the program. - LOG = 2 ## [char[]] Null-terminated program build log. - BINARY_TYPE = 3 ## [::ur_program_binary_type_t] Program binary type. - -class ur_program_build_info_t(c_int): - def __str__(self): - return str(ur_program_build_info_v(self.value)) - - -############################################################################### -## @brief Specialization constant information -class ur_specialization_constant_info_t(Structure): - _fields_ = [ - ("id", c_ulong), ## [in] specialization constant Id - ("size", c_size_t), ## [in] size of the specialization constant value - ("pValue", c_void_p) ## [in] pointer to the specialization constant value bytes - ] - -############################################################################### -## @brief Native program creation properties -class ur_program_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgValue. -class ur_kernel_arg_value_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgLocal. -class ur_kernel_arg_local_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Get Kernel object information -class ur_kernel_info_v(IntEnum): - FUNCTION_NAME = 0 ## [char[]] Return null-terminated kernel function name. - NUM_ARGS = 1 ## [size_t] Return Kernel number of arguments. - REFERENCE_COUNT = 2 ## [uint32_t] Reference count of the kernel object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 3 ## [::ur_context_handle_t] Return Context object associated with Kernel. - PROGRAM = 4 ## [::ur_program_handle_t] Return Program object associated with Kernel. - ATTRIBUTES = 5 ## [char[]] Return null-terminated kernel attributes string. - NUM_REGS = 6 ## [uint32_t] Return the number of registers used by the compiled kernel - ## (device specific). - -class ur_kernel_info_t(c_int): - def __str__(self): - return str(ur_kernel_info_v(self.value)) - - -############################################################################### -## @brief Get Kernel Work Group information -class ur_kernel_group_info_v(IntEnum): - GLOBAL_WORK_SIZE = 0 ## [size_t[3]] Return Work Group maximum global size - WORK_GROUP_SIZE = 1 ## [size_t] Return maximum Work Group size - COMPILE_WORK_GROUP_SIZE = 2 ## [size_t[3]] Return Work Group size required by the source code, such - ## as __attribute__((required_work_group_size(X,Y,Z)) - LOCAL_MEM_SIZE = 3 ## [size_t] Return local memory required by the Kernel - PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4 ## [size_t] Return preferred multiple of Work Group size for launch - PRIVATE_MEM_SIZE = 5 ## [size_t] Return minimum amount of private memory in bytes used by each - ## work item in the Kernel - -class ur_kernel_group_info_t(c_int): - def __str__(self): - return str(ur_kernel_group_info_v(self.value)) - - -############################################################################### -## @brief Get Kernel SubGroup information -class ur_kernel_sub_group_info_v(IntEnum): - MAX_SUB_GROUP_SIZE = 0 ## [uint32_t] Return maximum SubGroup size - MAX_NUM_SUB_GROUPS = 1 ## [uint32_t] Return maximum number of SubGroup - COMPILE_NUM_SUB_GROUPS = 2 ## [uint32_t] Return number of SubGroup required by the source code - SUB_GROUP_SIZE_INTEL = 3 ## [uint32_t] Return SubGroup size required by Intel - -class ur_kernel_sub_group_info_t(c_int): - def __str__(self): - return str(ur_kernel_sub_group_info_v(self.value)) - - -############################################################################### -## @brief Kernel Cache Configuration. -class ur_kernel_cache_config_v(IntEnum): - DEFAULT = 0 ## No preference for SLM or data cache. - LARGE_SLM = 1 ## Large Shared Local Memory (SLM) size. - LARGE_DATA = 2 ## Large General Data size. - -class ur_kernel_cache_config_t(c_int): - def __str__(self): - return str(ur_kernel_cache_config_v(self.value)) - - -############################################################################### -## @brief Set additional Kernel execution information -class ur_kernel_exec_info_v(IntEnum): - USM_INDIRECT_ACCESS = 0 ## [::ur_bool_t] Kernel might access data through USM pointer. - USM_PTRS = 1 ## [void *[]] Provide an explicit array of USM pointers that the kernel - ## will access. - CACHE_CONFIG = 2 ## [::ur_kernel_cache_config_t] Provide the preferred cache configuration - -class ur_kernel_exec_info_t(c_int): - def __str__(self): - return str(ur_kernel_exec_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urKernelSetArgPointer. -class ur_kernel_arg_pointer_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetExecInfo. -class ur_kernel_exec_info_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgSampler. -class ur_kernel_arg_sampler_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgMemObj. -class ur_kernel_arg_mem_obj_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("memoryAccess", ur_mem_flags_t) ## [in] Memory access flag. Allowed values are: ::UR_MEM_FLAG_READ_WRITE, - ## ::UR_MEM_FLAG_WRITE_ONLY, ::UR_MEM_FLAG_READ_ONLY. - ] - -############################################################################### -## @brief Properties for for ::urKernelCreateWithNativeHandle. -class ur_kernel_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Query queue info -class ur_queue_info_v(IntEnum): - CONTEXT = 0 ## [::ur_queue_handle_t] context associated with this queue. - DEVICE = 1 ## [::ur_device_handle_t] device associated with this queue. - DEVICE_DEFAULT = 2 ## [::ur_queue_handle_t] the current default queue of the underlying - ## device. - FLAGS = 3 ## [::ur_queue_flags_t] the properties associated with - ## ::ur_queue_properties_t::flags. - REFERENCE_COUNT = 4 ## [uint32_t] Reference count of the queue object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - SIZE = 5 ## [uint32_t] The size of the queue - EMPTY = 6 ## [::ur_bool_t] return true if the queue was empty at the time of the - ## query - -class ur_queue_info_t(c_int): - def __str__(self): - return str(ur_queue_info_v(self.value)) - - -############################################################################### -## @brief Queue property flags -class ur_queue_flags_v(IntEnum): - OUT_OF_ORDER_EXEC_MODE_ENABLE = UR_BIT(0) ## Enable/disable out of order execution - PROFILING_ENABLE = UR_BIT(1) ## Enable/disable profiling - ON_DEVICE = UR_BIT(2) ## Is a device queue - ON_DEVICE_DEFAULT = UR_BIT(3) ## Is the default queue for a device - DISCARD_EVENTS = UR_BIT(4) ## Events will be discarded - PRIORITY_LOW = UR_BIT(5) ## Low priority queue - PRIORITY_HIGH = UR_BIT(6) ## High priority queue - SUBMISSION_BATCHED = UR_BIT(7) ## Hint: enqueue and submit in a batch later. No change in queue - ## semantics. Implementation chooses submission mode. - SUBMISSION_IMMEDIATE = UR_BIT(8) ## Hint: enqueue and submit immediately. No change in queue semantics. - ## Implementation chooses submission mode. - USE_DEFAULT_STREAM = UR_BIT(9) ## Use the default stream. Only meaningful for CUDA. Other platforms may - ## ignore this flag. - SYNC_WITH_DEFAULT_STREAM = UR_BIT(10) ## Synchronize with the default stream. Only meaningful for CUDA. Other - ## platforms may ignore this flag. - -class ur_queue_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Queue creation properties -class ur_queue_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_queue_flags_t) ## [in] Bitfield of queue creation flags - ] - -############################################################################### -## @brief Queue index creation properties -## -## @details -## - Specify these properties in ::urQueueCreate via -## ::ur_queue_properties_t as part of a `pNext` chain. -class ur_queue_index_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("computeIndex", c_ulong) ## [in] Specifies the compute index as described in the - ## sycl_ext_intel_queue_index extension. - ] - -############################################################################### -## @brief Descriptor for ::urQueueGetNativeHandle and -## ::urQueueCreateWithNativeHandle. -## -## @details -## - Specify this descriptor in ::urQueueGetNativeHandle directly or -## ::urQueueCreateWithNativeHandle via ::ur_queue_native_properties_t as -## part of a `pNext` chain. -class ur_queue_native_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("pNativeData", c_void_p) ## [in][optional] Adapter-specific metadata needed to create the handle. - ] - -############################################################################### -## @brief Properties for for ::urQueueCreateWithNativeHandle. -class ur_queue_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Command type -class ur_command_v(IntEnum): - KERNEL_LAUNCH = 0 ## Event created by ::urEnqueueKernelLaunch - EVENTS_WAIT = 1 ## Event created by ::urEnqueueEventsWait - EVENTS_WAIT_WITH_BARRIER = 2 ## Event created by ::urEnqueueEventsWaitWithBarrier - MEM_BUFFER_READ = 3 ## Event created by ::urEnqueueMemBufferRead - MEM_BUFFER_WRITE = 4 ## Event created by ::urEnqueueMemBufferWrite - MEM_BUFFER_READ_RECT = 5 ## Event created by ::urEnqueueMemBufferReadRect - MEM_BUFFER_WRITE_RECT = 6 ## Event created by ::urEnqueueMemBufferWriteRect - MEM_BUFFER_COPY = 7 ## Event created by ::urEnqueueMemBufferCopy - MEM_BUFFER_COPY_RECT = 8 ## Event created by ::urEnqueueMemBufferCopyRect - MEM_BUFFER_FILL = 9 ## Event created by ::urEnqueueMemBufferFill - MEM_IMAGE_READ = 10 ## Event created by ::urEnqueueMemImageRead - MEM_IMAGE_WRITE = 11 ## Event created by ::urEnqueueMemImageWrite - MEM_IMAGE_COPY = 12 ## Event created by ::urEnqueueMemImageCopy - MEM_BUFFER_MAP = 14 ## Event created by ::urEnqueueMemBufferMap - MEM_UNMAP = 16 ## Event created by ::urEnqueueMemUnmap - USM_FILL = 17 ## Event created by ::urEnqueueUSMFill - USM_MEMCPY = 18 ## Event created by ::urEnqueueUSMMemcpy - USM_PREFETCH = 19 ## Event created by ::urEnqueueUSMPrefetch - USM_ADVISE = 20 ## Event created by ::urEnqueueUSMAdvise - USM_FILL_2D = 21 ## Event created by ::urEnqueueUSMFill2D - USM_MEMCPY_2D = 22 ## Event created by ::urEnqueueUSMMemcpy2D - DEVICE_GLOBAL_VARIABLE_WRITE = 23 ## Event created by ::urEnqueueDeviceGlobalVariableWrite - DEVICE_GLOBAL_VARIABLE_READ = 24 ## Event created by ::urEnqueueDeviceGlobalVariableRead - READ_HOST_PIPE = 25 ## Event created by ::urEnqueueReadHostPipe - WRITE_HOST_PIPE = 26 ## Event created by ::urEnqueueWriteHostPipe - COMMAND_BUFFER_ENQUEUE_EXP = 0x1000 ## Event created by ::urCommandBufferEnqueueExp - INTEROP_SEMAPHORE_WAIT_EXP = 0x2000 ## Event created by ::urBindlessImagesWaitExternalSemaphoreExp - INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001 ## Event created by ::urBindlessImagesSignalExternalSemaphoreExp - -class ur_command_t(c_int): - def __str__(self): - return str(ur_command_v(self.value)) - - -############################################################################### -## @brief Event Status -class ur_event_status_v(IntEnum): - COMPLETE = 0 ## Command is complete - RUNNING = 1 ## Command is running - SUBMITTED = 2 ## Command is submitted - QUEUED = 3 ## Command is queued - -class ur_event_status_t(c_int): - def __str__(self): - return str(ur_event_status_v(self.value)) - - -############################################################################### -## @brief Event query information type -class ur_event_info_v(IntEnum): - COMMAND_QUEUE = 0 ## [::ur_queue_handle_t] Command queue information of an event object - CONTEXT = 1 ## [::ur_context_handle_t] Context information of an event object - COMMAND_TYPE = 2 ## [::ur_command_t] Command type information of an event object - COMMAND_EXECUTION_STATUS = 3 ## [::ur_event_status_t] Command execution status of an event object - REFERENCE_COUNT = 4 ## [uint32_t] Reference count of the event object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - -class ur_event_info_t(c_int): - def __str__(self): - return str(ur_event_info_v(self.value)) - - -############################################################################### -## @brief Profiling query information type -class ur_profiling_info_v(IntEnum): - COMMAND_QUEUED = 0 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event is enqueued - COMMAND_SUBMIT = 1 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event is submitted - COMMAND_START = 2 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event starts execution - COMMAND_END = 3 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event has finished execution - COMMAND_COMPLETE = 4 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event and any child events enqueued by this event on the - ## device have finished execution - -class ur_profiling_info_t(c_int): - def __str__(self): - return str(ur_profiling_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urEventCreateWithNativeHandle. -class ur_event_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Event states for all events. -class ur_execution_info_v(IntEnum): - COMPLETE = 0 ## Indicates that the event has completed. - RUNNING = 1 ## Indicates that the device has started processing this event. - SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. - QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of - ## events. - -class ur_execution_info_t(c_int): - def __str__(self): - return str(ur_execution_info_v(self.value)) - - -############################################################################### -## @brief Event callback function that can be registered by the application. -def ur_event_callback_t(user_defined_callback): - @CFUNCTYPE(None, ur_event_handle_t, ur_execution_info_t, c_void_p) - def ur_event_callback_t_wrapper(hEvent, execStatus, pUserData): - return user_defined_callback(hEvent, execStatus, pUserData) - return ur_event_callback_t_wrapper - -############################################################################### -## @brief Map flags -class ur_map_flags_v(IntEnum): - READ = UR_BIT(0) ## Map for read access - WRITE = UR_BIT(1) ## Map for write access - WRITE_INVALIDATE_REGION = UR_BIT(2) ## Map for discard_write access - -class ur_map_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Map flags -class ur_usm_migration_flags_v(IntEnum): - DEFAULT = UR_BIT(0) ## Default migration TODO: Add more enums! - -class ur_usm_migration_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Handle of bindless image -class ur_exp_image_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of bindless image memory -class ur_exp_image_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of interop memory -class ur_exp_interop_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of interop semaphore -class ur_exp_interop_semaphore_handle_t(c_void_p): - pass - -############################################################################### -## @brief Dictates the type of memory copy. -class ur_exp_image_copy_flags_v(IntEnum): - HOST_TO_DEVICE = UR_BIT(0) ## Host to device - DEVICE_TO_HOST = UR_BIT(1) ## Device to host - DEVICE_TO_DEVICE = UR_BIT(2) ## Device to device - -class ur_exp_image_copy_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief File descriptor -class ur_exp_file_descriptor_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("fd", c_int) ## [in] A file descriptor used for Linux and & MacOS operating systems. - ] - -############################################################################### -## @brief Windows specific file handle -class ur_exp_win32_handle_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("handle", c_void_p) ## [in] A win32 file handle. - ] - -############################################################################### -## @brief Describes mipmap sampler properties -## -## @details -## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t -## as part of a `pNext` chain. -class ur_exp_sampler_mip_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("minMipmapLevelClamp", c_float), ## [in] minimum mipmap level from which we can sample, minimum value - ## being 0 - ("maxMipmapLevelClamp", c_float), ## [in] maximum mipmap level from which we can sample, maximum value - ## being the number of levels - ("maxAnisotropy", c_float), ## [in] anisotropic ratio used when samplling the mipmap with anisotropic - ## filtering - ("mipFilterMode", ur_sampler_filter_mode_t) ## [in] mipmap filter mode used for filtering between mipmap levels - ] - -############################################################################### -## @brief Describes unique sampler addressing mode per dimension -## -## @details -## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t -## as part of a `pNext` chain. -class ur_exp_sampler_addr_modes_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("addrModes", ur_sampler_addressing_mode_t * 3) ## [in] Specify the address mode of the sampler per dimension - ] - -############################################################################### -## @brief Describes an interop memory resource descriptor -class ur_exp_interop_mem_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Describes an interop semaphore resource descriptor -class ur_exp_interop_semaphore_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief The extension string which defines support for command-buffers which -## is returned when querying device extensions. -UR_COMMAND_BUFFER_EXTENSION_STRING_EXP = "ur_exp_command_buffer" - -############################################################################### -## @brief Command-Buffer Descriptor Type -class ur_exp_command_buffer_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief A value that identifies a command inside of a command-buffer, used for -## defining dependencies between commands in the same command-buffer. -class ur_exp_command_buffer_sync_point_t(c_ulong): - pass - -############################################################################### -## @brief Handle of Command-Buffer object -class ur_exp_command_buffer_handle_t(c_void_p): - pass - -############################################################################### -## @brief The extension string which defines support for cooperative-kernels -## which is returned when querying device extensions. -UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP = "ur_exp_cooperative_kernels" - -############################################################################### -## @brief The extension string which defines support for test -## which is returned when querying device extensions. -UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP = "ur_exp_multi_device_compile" - -############################################################################### -## @brief Supported peer info -class ur_exp_peer_info_v(IntEnum): - UR_PEER_ACCESS_SUPPORTED = 0 ## [uint32_t] 1 if P2P access is supported otherwise P2P access is not - ## supported. - UR_PEER_ATOMICS_SUPPORTED = 1 ## [uint32_t] 1 if atomic operations are supported over the P2P link, - ## otherwise such operations are not supported. - -class ur_exp_peer_info_t(c_int): - def __str__(self): - return str(ur_exp_peer_info_v(self.value)) - - -############################################################################### -__use_win_types = "Windows" == platform.uname()[0] - -############################################################################### -## @brief Function-pointer for urPlatformGet -if __use_win_types: - _urPlatformGet_t = WINFUNCTYPE( ur_result_t, POINTER(ur_adapter_handle_t), c_ulong, c_ulong, POINTER(ur_platform_handle_t), POINTER(c_ulong) ) -else: - _urPlatformGet_t = CFUNCTYPE( ur_result_t, POINTER(ur_adapter_handle_t), c_ulong, c_ulong, POINTER(ur_platform_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetInfo -if __use_win_types: - _urPlatformGetInfo_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_platform_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urPlatformGetInfo_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_platform_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetNativeHandle -if __use_win_types: - _urPlatformGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_native_handle_t) ) -else: - _urPlatformGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformCreateWithNativeHandle -if __use_win_types: - _urPlatformCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, POINTER(ur_platform_native_properties_t), POINTER(ur_platform_handle_t) ) -else: - _urPlatformCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, POINTER(ur_platform_native_properties_t), POINTER(ur_platform_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetApiVersion -if __use_win_types: - _urPlatformGetApiVersion_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_api_version_t) ) -else: - _urPlatformGetApiVersion_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_api_version_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetBackendOption -if __use_win_types: - _urPlatformGetBackendOption_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, c_char_p, POINTER(c_char_p) ) -else: - _urPlatformGetBackendOption_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, c_char_p, POINTER(c_char_p) ) - - -############################################################################### -## @brief Table of Platform functions pointers -class ur_platform_dditable_t(Structure): - _fields_ = [ - ("pfnGet", c_void_p), ## _urPlatformGet_t - ("pfnGetInfo", c_void_p), ## _urPlatformGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urPlatformGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urPlatformCreateWithNativeHandle_t - ("pfnGetApiVersion", c_void_p), ## _urPlatformGetApiVersion_t - ("pfnGetBackendOption", c_void_p) ## _urPlatformGetBackendOption_t - ] - -############################################################################### -## @brief Function-pointer for urContextCreate -if __use_win_types: - _urContextCreate_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_properties_t), POINTER(ur_context_handle_t) ) -else: - _urContextCreate_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_properties_t), POINTER(ur_context_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextRetain -if __use_win_types: - _urContextRetain_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t ) -else: - _urContextRetain_t = CFUNCTYPE( ur_result_t, ur_context_handle_t ) - -############################################################################### -## @brief Function-pointer for urContextRelease -if __use_win_types: - _urContextRelease_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t ) -else: - _urContextRelease_t = CFUNCTYPE( ur_result_t, ur_context_handle_t ) - -############################################################################### -## @brief Function-pointer for urContextGetInfo -if __use_win_types: - _urContextGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_context_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urContextGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_context_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urContextGetNativeHandle -if __use_win_types: - _urContextGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_native_handle_t) ) -else: - _urContextGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextCreateWithNativeHandle -if __use_win_types: - _urContextCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_native_properties_t), POINTER(ur_context_handle_t) ) -else: - _urContextCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_native_properties_t), POINTER(ur_context_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextSetExtendedDeleter -if __use_win_types: - _urContextSetExtendedDeleter_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_void_p ) -else: - _urContextSetExtendedDeleter_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_void_p ) - - -############################################################################### -## @brief Table of Context functions pointers -class ur_context_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urContextCreate_t - ("pfnRetain", c_void_p), ## _urContextRetain_t - ("pfnRelease", c_void_p), ## _urContextRelease_t - ("pfnGetInfo", c_void_p), ## _urContextGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urContextGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urContextCreateWithNativeHandle_t - ("pfnSetExtendedDeleter", c_void_p) ## _urContextSetExtendedDeleter_t - ] - -############################################################################### -## @brief Function-pointer for urEventGetInfo -if __use_win_types: - _urEventGetInfo_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_event_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urEventGetInfo_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_event_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urEventGetProfilingInfo -if __use_win_types: - _urEventGetProfilingInfo_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_profiling_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urEventGetProfilingInfo_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_profiling_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urEventWait -if __use_win_types: - _urEventWait_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_event_handle_t) ) -else: - _urEventWait_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventRetain -if __use_win_types: - _urEventRetain_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t ) -else: - _urEventRetain_t = CFUNCTYPE( ur_result_t, ur_event_handle_t ) - -############################################################################### -## @brief Function-pointer for urEventRelease -if __use_win_types: - _urEventRelease_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t ) -else: - _urEventRelease_t = CFUNCTYPE( ur_result_t, ur_event_handle_t ) - -############################################################################### -## @brief Function-pointer for urEventGetNativeHandle -if __use_win_types: - _urEventGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, POINTER(ur_native_handle_t) ) -else: - _urEventGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventCreateWithNativeHandle -if __use_win_types: - _urEventCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_event_native_properties_t), POINTER(ur_event_handle_t) ) -else: - _urEventCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_event_native_properties_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventSetCallback -if __use_win_types: - _urEventSetCallback_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_execution_info_t, c_void_p, c_void_p ) -else: - _urEventSetCallback_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_execution_info_t, c_void_p, c_void_p ) - - -############################################################################### -## @brief Table of Event functions pointers -class ur_event_dditable_t(Structure): - _fields_ = [ - ("pfnGetInfo", c_void_p), ## _urEventGetInfo_t - ("pfnGetProfilingInfo", c_void_p), ## _urEventGetProfilingInfo_t - ("pfnWait", c_void_p), ## _urEventWait_t - ("pfnRetain", c_void_p), ## _urEventRetain_t - ("pfnRelease", c_void_p), ## _urEventRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urEventGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urEventCreateWithNativeHandle_t - ("pfnSetCallback", c_void_p) ## _urEventSetCallback_t - ] - -############################################################################### -## @brief Function-pointer for urProgramCreateWithIL -if __use_win_types: - _urProgramCreateWithIL_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithIL_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramCreateWithBinary -if __use_win_types: - _urProgramCreateWithBinary_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(c_ubyte), POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithBinary_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(c_ubyte), POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramBuild -if __use_win_types: - _urProgramBuild_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) -else: - _urProgramBuild_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramCompile -if __use_win_types: - _urProgramCompile_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) -else: - _urProgramCompile_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramLink -if __use_win_types: - _urProgramLink_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) -else: - _urProgramLink_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramRetain -if __use_win_types: - _urProgramRetain_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t ) -else: - _urProgramRetain_t = CFUNCTYPE( ur_result_t, ur_program_handle_t ) - -############################################################################### -## @brief Function-pointer for urProgramRelease -if __use_win_types: - _urProgramRelease_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t ) -else: - _urProgramRelease_t = CFUNCTYPE( ur_result_t, ur_program_handle_t ) - -############################################################################### -## @brief Function-pointer for urProgramGetFunctionPointer -if __use_win_types: - _urProgramGetFunctionPointer_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_program_handle_t, c_char_p, POINTER(c_void_p) ) -else: - _urProgramGetFunctionPointer_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_program_handle_t, c_char_p, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urProgramGetInfo -if __use_win_types: - _urProgramGetInfo_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, ur_program_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urProgramGetInfo_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, ur_program_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urProgramGetBuildInfo -if __use_win_types: - _urProgramGetBuildInfo_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, ur_device_handle_t, ur_program_build_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urProgramGetBuildInfo_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, ur_device_handle_t, ur_program_build_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urProgramSetSpecializationConstants -if __use_win_types: - _urProgramSetSpecializationConstants_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) -else: - _urProgramSetSpecializationConstants_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) - -############################################################################### -## @brief Function-pointer for urProgramGetNativeHandle -if __use_win_types: - _urProgramGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, POINTER(ur_native_handle_t) ) -else: - _urProgramGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramCreateWithNativeHandle -if __use_win_types: - _urProgramCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_program_native_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_program_native_properties_t), POINTER(ur_program_handle_t) ) - - -############################################################################### -## @brief Table of Program functions pointers -class ur_program_dditable_t(Structure): - _fields_ = [ - ("pfnCreateWithIL", c_void_p), ## _urProgramCreateWithIL_t - ("pfnCreateWithBinary", c_void_p), ## _urProgramCreateWithBinary_t - ("pfnBuild", c_void_p), ## _urProgramBuild_t - ("pfnCompile", c_void_p), ## _urProgramCompile_t - ("pfnLink", c_void_p), ## _urProgramLink_t - ("pfnRetain", c_void_p), ## _urProgramRetain_t - ("pfnRelease", c_void_p), ## _urProgramRelease_t - ("pfnGetFunctionPointer", c_void_p), ## _urProgramGetFunctionPointer_t - ("pfnGetInfo", c_void_p), ## _urProgramGetInfo_t - ("pfnGetBuildInfo", c_void_p), ## _urProgramGetBuildInfo_t - ("pfnSetSpecializationConstants", c_void_p), ## _urProgramSetSpecializationConstants_t - ("pfnGetNativeHandle", c_void_p), ## _urProgramGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p) ## _urProgramCreateWithNativeHandle_t - ] - -############################################################################### -## @brief Function-pointer for urProgramBuildExp -if __use_win_types: - _urProgramBuildExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) -else: - _urProgramBuildExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramCompileExp -if __use_win_types: - _urProgramCompileExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) -else: - _urProgramCompileExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramLinkExp -if __use_win_types: - _urProgramLinkExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) -else: - _urProgramLinkExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) - - -############################################################################### -## @brief Table of ProgramExp functions pointers -class ur_program_exp_dditable_t(Structure): - _fields_ = [ - ("pfnBuildExp", c_void_p), ## _urProgramBuildExp_t - ("pfnCompileExp", c_void_p), ## _urProgramCompileExp_t - ("pfnLinkExp", c_void_p) ## _urProgramLinkExp_t - ] - -############################################################################### -## @brief Function-pointer for urKernelCreate -if __use_win_types: - _urKernelCreate_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_char_p, POINTER(ur_kernel_handle_t) ) -else: - _urKernelCreate_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_char_p, POINTER(ur_kernel_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetInfo -if __use_win_types: - _urKernelGetInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetGroupInfo -if __use_win_types: - _urKernelGetGroupInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetGroupInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetSubGroupInfo -if __use_win_types: - _urKernelGetSubGroupInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_sub_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetSubGroupInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_sub_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelRetain -if __use_win_types: - _urKernelRetain_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t ) -else: - _urKernelRetain_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelRelease -if __use_win_types: - _urKernelRelease_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t ) -else: - _urKernelRelease_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelGetNativeHandle -if __use_win_types: - _urKernelGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(ur_native_handle_t) ) -else: - _urKernelGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelCreateWithNativeHandle -if __use_win_types: - _urKernelCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_program_handle_t, POINTER(ur_kernel_native_properties_t), POINTER(ur_kernel_handle_t) ) -else: - _urKernelCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_program_handle_t, POINTER(ur_kernel_native_properties_t), POINTER(ur_kernel_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgValue -if __use_win_types: - _urKernelSetArgValue_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_value_properties_t), c_void_p ) -else: - _urKernelSetArgValue_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_value_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgLocal -if __use_win_types: - _urKernelSetArgLocal_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_local_properties_t) ) -else: - _urKernelSetArgLocal_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_local_properties_t) ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgPointer -if __use_win_types: - _urKernelSetArgPointer_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_pointer_properties_t), c_void_p ) -else: - _urKernelSetArgPointer_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_pointer_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetExecInfo -if __use_win_types: - _urKernelSetExecInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_exec_info_t, c_size_t, POINTER(ur_kernel_exec_info_properties_t), c_void_p ) -else: - _urKernelSetExecInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_exec_info_t, c_size_t, POINTER(ur_kernel_exec_info_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgSampler -if __use_win_types: - _urKernelSetArgSampler_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_sampler_properties_t), ur_sampler_handle_t ) -else: - _urKernelSetArgSampler_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_sampler_properties_t), ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgMemObj -if __use_win_types: - _urKernelSetArgMemObj_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_mem_obj_properties_t), ur_mem_handle_t ) -else: - _urKernelSetArgMemObj_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_mem_obj_properties_t), ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelSetSpecializationConstants -if __use_win_types: - _urKernelSetSpecializationConstants_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) -else: - _urKernelSetSpecializationConstants_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) - - -############################################################################### -## @brief Table of Kernel functions pointers -class ur_kernel_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urKernelCreate_t - ("pfnGetInfo", c_void_p), ## _urKernelGetInfo_t - ("pfnGetGroupInfo", c_void_p), ## _urKernelGetGroupInfo_t - ("pfnGetSubGroupInfo", c_void_p), ## _urKernelGetSubGroupInfo_t - ("pfnRetain", c_void_p), ## _urKernelRetain_t - ("pfnRelease", c_void_p), ## _urKernelRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urKernelGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urKernelCreateWithNativeHandle_t - ("pfnSetArgValue", c_void_p), ## _urKernelSetArgValue_t - ("pfnSetArgLocal", c_void_p), ## _urKernelSetArgLocal_t - ("pfnSetArgPointer", c_void_p), ## _urKernelSetArgPointer_t - ("pfnSetExecInfo", c_void_p), ## _urKernelSetExecInfo_t - ("pfnSetArgSampler", c_void_p), ## _urKernelSetArgSampler_t - ("pfnSetArgMemObj", c_void_p), ## _urKernelSetArgMemObj_t - ("pfnSetSpecializationConstants", c_void_p) ## _urKernelSetSpecializationConstants_t - ] - -############################################################################### -## @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp -if __use_win_types: - _urKernelSuggestMaxCooperativeGroupCountExp_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) -else: - _urKernelSuggestMaxCooperativeGroupCountExp_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) - - -############################################################################### -## @brief Table of KernelExp functions pointers -class ur_kernel_exp_dditable_t(Structure): - _fields_ = [ - ("pfnSuggestMaxCooperativeGroupCountExp", c_void_p) ## _urKernelSuggestMaxCooperativeGroupCountExp_t - ] - -############################################################################### -## @brief Function-pointer for urSamplerCreate -if __use_win_types: - _urSamplerCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_sampler_desc_t), POINTER(ur_sampler_handle_t) ) -else: - _urSamplerCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_sampler_desc_t), POINTER(ur_sampler_handle_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerRetain -if __use_win_types: - _urSamplerRetain_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t ) -else: - _urSamplerRetain_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urSamplerRelease -if __use_win_types: - _urSamplerRelease_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t ) -else: - _urSamplerRelease_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urSamplerGetInfo -if __use_win_types: - _urSamplerGetInfo_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t, ur_sampler_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urSamplerGetInfo_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t, ur_sampler_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerGetNativeHandle -if __use_win_types: - _urSamplerGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t, POINTER(ur_native_handle_t) ) -else: - _urSamplerGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerCreateWithNativeHandle -if __use_win_types: - _urSamplerCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_sampler_native_properties_t), POINTER(ur_sampler_handle_t) ) -else: - _urSamplerCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_sampler_native_properties_t), POINTER(ur_sampler_handle_t) ) - - -############################################################################### -## @brief Table of Sampler functions pointers -class ur_sampler_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urSamplerCreate_t - ("pfnRetain", c_void_p), ## _urSamplerRetain_t - ("pfnRelease", c_void_p), ## _urSamplerRelease_t - ("pfnGetInfo", c_void_p), ## _urSamplerGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urSamplerGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p) ## _urSamplerCreateWithNativeHandle_t - ] - -############################################################################### -## @brief Function-pointer for urMemImageCreate -if __use_win_types: - _urMemImageCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), c_void_p, POINTER(ur_mem_handle_t) ) -else: - _urMemImageCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), c_void_p, POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemBufferCreate -if __use_win_types: - _urMemBufferCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, c_size_t, POINTER(ur_buffer_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, c_size_t, POINTER(ur_buffer_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemRetain -if __use_win_types: - _urMemRetain_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t ) -else: - _urMemRetain_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urMemRelease -if __use_win_types: - _urMemRelease_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t ) -else: - _urMemRelease_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urMemBufferPartition -if __use_win_types: - _urMemBufferPartition_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_flags_t, ur_buffer_create_type_t, POINTER(ur_buffer_region_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferPartition_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_flags_t, ur_buffer_create_type_t, POINTER(ur_buffer_region_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemGetNativeHandle -if __use_win_types: - _urMemGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, POINTER(ur_native_handle_t) ) -else: - _urMemGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemBufferCreateWithNativeHandle -if __use_win_types: - _urMemBufferCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemImageCreateWithNativeHandle -if __use_win_types: - _urMemImageCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemImageCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemGetInfo -if __use_win_types: - _urMemGetInfo_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urMemGetInfo_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urMemImageGetInfo -if __use_win_types: - _urMemImageGetInfo_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_image_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urMemImageGetInfo_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_image_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Mem functions pointers -class ur_mem_dditable_t(Structure): - _fields_ = [ - ("pfnImageCreate", c_void_p), ## _urMemImageCreate_t - ("pfnBufferCreate", c_void_p), ## _urMemBufferCreate_t - ("pfnRetain", c_void_p), ## _urMemRetain_t - ("pfnRelease", c_void_p), ## _urMemRelease_t - ("pfnBufferPartition", c_void_p), ## _urMemBufferPartition_t - ("pfnGetNativeHandle", c_void_p), ## _urMemGetNativeHandle_t - ("pfnBufferCreateWithNativeHandle", c_void_p), ## _urMemBufferCreateWithNativeHandle_t - ("pfnImageCreateWithNativeHandle", c_void_p), ## _urMemImageCreateWithNativeHandle_t - ("pfnGetInfo", c_void_p), ## _urMemGetInfo_t - ("pfnImageGetInfo", c_void_p) ## _urMemImageGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urPhysicalMemCreate -if __use_win_types: - _urPhysicalMemCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_physical_mem_properties_t), POINTER(ur_physical_mem_handle_t) ) -else: - _urPhysicalMemCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_physical_mem_properties_t), POINTER(ur_physical_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPhysicalMemRetain -if __use_win_types: - _urPhysicalMemRetain_t = WINFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) -else: - _urPhysicalMemRetain_t = CFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urPhysicalMemRelease -if __use_win_types: - _urPhysicalMemRelease_t = WINFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) -else: - _urPhysicalMemRelease_t = CFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) - - -############################################################################### -## @brief Table of PhysicalMem functions pointers -class ur_physical_mem_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urPhysicalMemCreate_t - ("pfnRetain", c_void_p), ## _urPhysicalMemRetain_t - ("pfnRelease", c_void_p) ## _urPhysicalMemRelease_t - ] - -############################################################################### -## @brief Function-pointer for urAdapterGet -if __use_win_types: - _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) -else: - _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urAdapterRelease -if __use_win_types: - _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterRetain -if __use_win_types: - _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterGetLastError -if __use_win_types: - _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) -else: - _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) - -############################################################################### -## @brief Function-pointer for urAdapterGetInfo -if __use_win_types: - _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Global functions pointers -class ur_global_dditable_t(Structure): - _fields_ = [ - ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t - ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t - ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t - ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t - ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urEnqueueKernelLaunch -if __use_win_types: - _urEnqueueKernelLaunch_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueKernelLaunch_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueEventsWait -if __use_win_types: - _urEnqueueEventsWait_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueEventsWait_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueEventsWaitWithBarrier -if __use_win_types: - _urEnqueueEventsWaitWithBarrier_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueEventsWaitWithBarrier_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferRead -if __use_win_types: - _urEnqueueMemBufferRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferWrite -if __use_win_types: - _urEnqueueMemBufferWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferReadRect -if __use_win_types: - _urEnqueueMemBufferReadRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferReadRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferWriteRect -if __use_win_types: - _urEnqueueMemBufferWriteRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferWriteRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferCopy -if __use_win_types: - _urEnqueueMemBufferCopy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferCopy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferCopyRect -if __use_win_types: - _urEnqueueMemBufferCopyRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferCopyRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferFill -if __use_win_types: - _urEnqueueMemBufferFill_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferFill_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageRead -if __use_win_types: - _urEnqueueMemImageRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageWrite -if __use_win_types: - _urEnqueueMemImageWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageCopy -if __use_win_types: - _urEnqueueMemImageCopy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageCopy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferMap -if __use_win_types: - _urEnqueueMemBufferMap_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_map_flags_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t), POINTER(c_void_p) ) -else: - _urEnqueueMemBufferMap_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_map_flags_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t), POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemUnmap -if __use_win_types: - _urEnqueueMemUnmap_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemUnmap_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMFill -if __use_win_types: - _urEnqueueUSMFill_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMFill_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMMemcpy -if __use_win_types: - _urEnqueueUSMMemcpy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMMemcpy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMPrefetch -if __use_win_types: - _urEnqueueUSMPrefetch_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMPrefetch_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMAdvise -if __use_win_types: - _urEnqueueUSMAdvise_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMAdvise_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMFill2D -if __use_win_types: - _urEnqueueUSMFill2D_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_size_t, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMFill2D_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_size_t, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMMemcpy2D -if __use_win_types: - _urEnqueueUSMMemcpy2D_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_size_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMMemcpy2D_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_size_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueDeviceGlobalVariableWrite -if __use_win_types: - _urEnqueueDeviceGlobalVariableWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueDeviceGlobalVariableWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueDeviceGlobalVariableRead -if __use_win_types: - _urEnqueueDeviceGlobalVariableRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueDeviceGlobalVariableRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueReadHostPipe -if __use_win_types: - _urEnqueueReadHostPipe_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueReadHostPipe_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueWriteHostPipe -if __use_win_types: - _urEnqueueWriteHostPipe_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueWriteHostPipe_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of Enqueue functions pointers -class ur_enqueue_dditable_t(Structure): - _fields_ = [ - ("pfnKernelLaunch", c_void_p), ## _urEnqueueKernelLaunch_t - ("pfnEventsWait", c_void_p), ## _urEnqueueEventsWait_t - ("pfnEventsWaitWithBarrier", c_void_p), ## _urEnqueueEventsWaitWithBarrier_t - ("pfnMemBufferRead", c_void_p), ## _urEnqueueMemBufferRead_t - ("pfnMemBufferWrite", c_void_p), ## _urEnqueueMemBufferWrite_t - ("pfnMemBufferReadRect", c_void_p), ## _urEnqueueMemBufferReadRect_t - ("pfnMemBufferWriteRect", c_void_p), ## _urEnqueueMemBufferWriteRect_t - ("pfnMemBufferCopy", c_void_p), ## _urEnqueueMemBufferCopy_t - ("pfnMemBufferCopyRect", c_void_p), ## _urEnqueueMemBufferCopyRect_t - ("pfnMemBufferFill", c_void_p), ## _urEnqueueMemBufferFill_t - ("pfnMemImageRead", c_void_p), ## _urEnqueueMemImageRead_t - ("pfnMemImageWrite", c_void_p), ## _urEnqueueMemImageWrite_t - ("pfnMemImageCopy", c_void_p), ## _urEnqueueMemImageCopy_t - ("pfnMemBufferMap", c_void_p), ## _urEnqueueMemBufferMap_t - ("pfnMemUnmap", c_void_p), ## _urEnqueueMemUnmap_t - ("pfnUSMFill", c_void_p), ## _urEnqueueUSMFill_t - ("pfnUSMMemcpy", c_void_p), ## _urEnqueueUSMMemcpy_t - ("pfnUSMPrefetch", c_void_p), ## _urEnqueueUSMPrefetch_t - ("pfnUSMAdvise", c_void_p), ## _urEnqueueUSMAdvise_t - ("pfnUSMFill2D", c_void_p), ## _urEnqueueUSMFill2D_t - ("pfnUSMMemcpy2D", c_void_p), ## _urEnqueueUSMMemcpy2D_t - ("pfnDeviceGlobalVariableWrite", c_void_p), ## _urEnqueueDeviceGlobalVariableWrite_t - ("pfnDeviceGlobalVariableRead", c_void_p), ## _urEnqueueDeviceGlobalVariableRead_t - ("pfnReadHostPipe", c_void_p), ## _urEnqueueReadHostPipe_t - ("pfnWriteHostPipe", c_void_p) ## _urEnqueueWriteHostPipe_t - ] - -############################################################################### -## @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp -if __use_win_types: - _urEnqueueCooperativeKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueCooperativeKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of EnqueueExp functions pointers -class ur_enqueue_exp_dditable_t(Structure): - _fields_ = [ - ("pfnCooperativeKernelLaunchExp", c_void_p) ## _urEnqueueCooperativeKernelLaunchExp_t - ] - -############################################################################### -## @brief Function-pointer for urQueueGetInfo -if __use_win_types: - _urQueueGetInfo_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_queue_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urQueueGetInfo_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_queue_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urQueueCreate -if __use_win_types: - _urQueueCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_properties_t), POINTER(ur_queue_handle_t) ) -else: - _urQueueCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_properties_t), POINTER(ur_queue_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueRetain -if __use_win_types: - _urQueueRetain_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueRetain_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueRelease -if __use_win_types: - _urQueueRelease_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueRelease_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueGetNativeHandle -if __use_win_types: - _urQueueGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, POINTER(ur_queue_native_desc_t), POINTER(ur_native_handle_t) ) -else: - _urQueueGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, POINTER(ur_queue_native_desc_t), POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueCreateWithNativeHandle -if __use_win_types: - _urQueueCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_native_properties_t), POINTER(ur_queue_handle_t) ) -else: - _urQueueCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_native_properties_t), POINTER(ur_queue_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueFinish -if __use_win_types: - _urQueueFinish_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueFinish_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueFlush -if __use_win_types: - _urQueueFlush_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueFlush_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - - -############################################################################### -## @brief Table of Queue functions pointers -class ur_queue_dditable_t(Structure): - _fields_ = [ - ("pfnGetInfo", c_void_p), ## _urQueueGetInfo_t - ("pfnCreate", c_void_p), ## _urQueueCreate_t - ("pfnRetain", c_void_p), ## _urQueueRetain_t - ("pfnRelease", c_void_p), ## _urQueueRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urQueueGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urQueueCreateWithNativeHandle_t - ("pfnFinish", c_void_p), ## _urQueueFinish_t - ("pfnFlush", c_void_p) ## _urQueueFlush_t - ] - -############################################################################### -## @brief Function-pointer for urBindlessImagesUnsampledImageHandleDestroyExp -if __use_win_types: - _urBindlessImagesUnsampledImageHandleDestroyExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) -else: - _urBindlessImagesUnsampledImageHandleDestroyExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSampledImageHandleDestroyExp -if __use_win_types: - _urBindlessImagesSampledImageHandleDestroyExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) -else: - _urBindlessImagesSampledImageHandleDestroyExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageAllocateExp -if __use_win_types: - _urBindlessImagesImageAllocateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesImageAllocateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageFreeExp -if __use_win_types: - _urBindlessImagesImageFreeExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) -else: - _urBindlessImagesImageFreeExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesUnsampledImageCreateExp -if __use_win_types: - _urBindlessImagesUnsampledImageCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) -else: - _urBindlessImagesUnsampledImageCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSampledImageCreateExp -if __use_win_types: - _urBindlessImagesSampledImageCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_sampler_handle_t, POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) -else: - _urBindlessImagesSampledImageCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_sampler_handle_t, POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageCopyExp -if __use_win_types: - _urBindlessImagesImageCopyExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_void_p, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesImageCopyExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_void_p, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageGetInfoExp -if __use_win_types: - _urBindlessImagesImageGetInfoExp_t = WINFUNCTYPE( ur_result_t, ur_exp_image_mem_handle_t, ur_image_info_t, c_void_p, POINTER(c_size_t) ) -else: - _urBindlessImagesImageGetInfoExp_t = CFUNCTYPE( ur_result_t, ur_exp_image_mem_handle_t, ur_image_info_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMipmapGetLevelExp -if __use_win_types: - _urBindlessImagesMipmapGetLevelExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, c_ulong, POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesMipmapGetLevelExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, c_ulong, POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMipmapFreeExp -if __use_win_types: - _urBindlessImagesMipmapFreeExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) -else: - _urBindlessImagesMipmapFreeExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImportOpaqueFDExp -if __use_win_types: - _urBindlessImagesImportOpaqueFDExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_exp_interop_mem_desc_t), POINTER(ur_exp_interop_mem_handle_t) ) -else: - _urBindlessImagesImportOpaqueFDExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_exp_interop_mem_desc_t), POINTER(ur_exp_interop_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMapExternalArrayExp -if __use_win_types: - _urBindlessImagesMapExternalArrayExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_interop_mem_handle_t, POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesMapExternalArrayExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_interop_mem_handle_t, POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesReleaseInteropExp -if __use_win_types: - _urBindlessImagesReleaseInteropExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_mem_handle_t ) -else: - _urBindlessImagesReleaseInteropExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImportExternalSemaphoreOpaqueFDExp -if __use_win_types: - _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_interop_semaphore_desc_t), POINTER(ur_exp_interop_semaphore_handle_t) ) -else: - _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_interop_semaphore_desc_t), POINTER(ur_exp_interop_semaphore_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesDestroyExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesDestroyExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_semaphore_handle_t ) -else: - _urBindlessImagesDestroyExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_semaphore_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesWaitExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesWaitExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesWaitExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSignalExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesSignalExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesSignalExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of BindlessImagesExp functions pointers -class ur_bindless_images_exp_dditable_t(Structure): - _fields_ = [ - ("pfnUnsampledImageHandleDestroyExp", c_void_p), ## _urBindlessImagesUnsampledImageHandleDestroyExp_t - ("pfnSampledImageHandleDestroyExp", c_void_p), ## _urBindlessImagesSampledImageHandleDestroyExp_t - ("pfnImageAllocateExp", c_void_p), ## _urBindlessImagesImageAllocateExp_t - ("pfnImageFreeExp", c_void_p), ## _urBindlessImagesImageFreeExp_t - ("pfnUnsampledImageCreateExp", c_void_p), ## _urBindlessImagesUnsampledImageCreateExp_t - ("pfnSampledImageCreateExp", c_void_p), ## _urBindlessImagesSampledImageCreateExp_t - ("pfnImageCopyExp", c_void_p), ## _urBindlessImagesImageCopyExp_t - ("pfnImageGetInfoExp", c_void_p), ## _urBindlessImagesImageGetInfoExp_t - ("pfnMipmapGetLevelExp", c_void_p), ## _urBindlessImagesMipmapGetLevelExp_t - ("pfnMipmapFreeExp", c_void_p), ## _urBindlessImagesMipmapFreeExp_t - ("pfnImportOpaqueFDExp", c_void_p), ## _urBindlessImagesImportOpaqueFDExp_t - ("pfnMapExternalArrayExp", c_void_p), ## _urBindlessImagesMapExternalArrayExp_t - ("pfnReleaseInteropExp", c_void_p), ## _urBindlessImagesReleaseInteropExp_t - ("pfnImportExternalSemaphoreOpaqueFDExp", c_void_p), ## _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t - ("pfnDestroyExternalSemaphoreExp", c_void_p), ## _urBindlessImagesDestroyExternalSemaphoreExp_t - ("pfnWaitExternalSemaphoreExp", c_void_p), ## _urBindlessImagesWaitExternalSemaphoreExp_t - ("pfnSignalExternalSemaphoreExp", c_void_p) ## _urBindlessImagesSignalExternalSemaphoreExp_t - ] - -############################################################################### -## @brief Function-pointer for urUSMHostAlloc -if __use_win_types: - _urUSMHostAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMHostAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMDeviceAlloc -if __use_win_types: - _urUSMDeviceAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMDeviceAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMSharedAlloc -if __use_win_types: - _urUSMSharedAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMSharedAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMFree -if __use_win_types: - _urUSMFree_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) -else: - _urUSMFree_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) - -############################################################################### -## @brief Function-pointer for urUSMGetMemAllocInfo -if __use_win_types: - _urUSMGetMemAllocInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, ur_usm_alloc_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUSMGetMemAllocInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, ur_usm_alloc_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urUSMPoolCreate -if __use_win_types: - _urUSMPoolCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_pool_desc_t), POINTER(ur_usm_pool_handle_t) ) -else: - _urUSMPoolCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_pool_desc_t), POINTER(ur_usm_pool_handle_t) ) - -############################################################################### -## @brief Function-pointer for urUSMPoolRetain -if __use_win_types: - _urUSMPoolRetain_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) -else: - _urUSMPoolRetain_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) - -############################################################################### -## @brief Function-pointer for urUSMPoolRelease -if __use_win_types: - _urUSMPoolRelease_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) -else: - _urUSMPoolRelease_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) - -############################################################################### -## @brief Function-pointer for urUSMPoolGetInfo -if __use_win_types: - _urUSMPoolGetInfo_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t, ur_usm_pool_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUSMPoolGetInfo_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t, ur_usm_pool_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of USM functions pointers -class ur_usm_dditable_t(Structure): - _fields_ = [ - ("pfnHostAlloc", c_void_p), ## _urUSMHostAlloc_t - ("pfnDeviceAlloc", c_void_p), ## _urUSMDeviceAlloc_t - ("pfnSharedAlloc", c_void_p), ## _urUSMSharedAlloc_t - ("pfnFree", c_void_p), ## _urUSMFree_t - ("pfnGetMemAllocInfo", c_void_p), ## _urUSMGetMemAllocInfo_t - ("pfnPoolCreate", c_void_p), ## _urUSMPoolCreate_t - ("pfnPoolRetain", c_void_p), ## _urUSMPoolRetain_t - ("pfnPoolRelease", c_void_p), ## _urUSMPoolRelease_t - ("pfnPoolGetInfo", c_void_p) ## _urUSMPoolGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urUSMPitchedAllocExp -if __use_win_types: - _urUSMPitchedAllocExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, c_size_t, c_size_t, POINTER(c_void_p), POINTER(c_size_t) ) -else: - _urUSMPitchedAllocExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, c_size_t, c_size_t, POINTER(c_void_p), POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urUSMImportExp -if __use_win_types: - _urUSMImportExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urUSMImportExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urUSMReleaseExp -if __use_win_types: - _urUSMReleaseExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) -else: - _urUSMReleaseExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) - - -############################################################################### -## @brief Table of USMExp functions pointers -class ur_usm_exp_dditable_t(Structure): - _fields_ = [ - ("pfnPitchedAllocExp", c_void_p), ## _urUSMPitchedAllocExp_t - ("pfnImportExp", c_void_p), ## _urUSMImportExp_t - ("pfnReleaseExp", c_void_p) ## _urUSMReleaseExp_t - ] - -############################################################################### -## @brief Function-pointer for urCommandBufferCreateExp -if __use_win_types: - _urCommandBufferCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_command_buffer_desc_t), POINTER(ur_exp_command_buffer_handle_t) ) -else: - _urCommandBufferCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_command_buffer_desc_t), POINTER(ur_exp_command_buffer_handle_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferRetainExp -if __use_win_types: - _urCommandBufferRetainExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferRetainExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferReleaseExp -if __use_win_types: - _urCommandBufferReleaseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferReleaseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferFinalizeExp -if __use_win_types: - _urCommandBufferFinalizeExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferFinalizeExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendKernelLaunchExp -if __use_win_types: - _urCommandBufferAppendKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp -if __use_win_types: - _urCommandBufferAppendUSMMemcpyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMMemcpyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMFillExp -if __use_win_types: - _urCommandBufferAppendUSMFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp -if __use_win_types: - _urCommandBufferAppendMemBufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp -if __use_win_types: - _urCommandBufferAppendMemBufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferReadExp -if __use_win_types: - _urCommandBufferAppendMemBufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferFillExp -if __use_win_types: - _urCommandBufferAppendMemBufferFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp -if __use_win_types: - _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp -if __use_win_types: - _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferEnqueueExp -if __use_win_types: - _urCommandBufferEnqueueExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urCommandBufferEnqueueExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of CommandBufferExp functions pointers -class ur_command_buffer_exp_dditable_t(Structure): - _fields_ = [ - ("pfnCreateExp", c_void_p), ## _urCommandBufferCreateExp_t - ("pfnRetainExp", c_void_p), ## _urCommandBufferRetainExp_t - ("pfnReleaseExp", c_void_p), ## _urCommandBufferReleaseExp_t - ("pfnFinalizeExp", c_void_p), ## _urCommandBufferFinalizeExp_t - ("pfnAppendKernelLaunchExp", c_void_p), ## _urCommandBufferAppendKernelLaunchExp_t - ("pfnAppendUSMMemcpyExp", c_void_p), ## _urCommandBufferAppendUSMMemcpyExp_t - ("pfnAppendUSMFillExp", c_void_p), ## _urCommandBufferAppendUSMFillExp_t - ("pfnAppendMemBufferCopyExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyExp_t - ("pfnAppendMemBufferWriteExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteExp_t - ("pfnAppendMemBufferReadExp", c_void_p), ## _urCommandBufferAppendMemBufferReadExp_t - ("pfnAppendMemBufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyRectExp_t - ("pfnAppendMemBufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteRectExp_t - ("pfnAppendMemBufferReadRectExp", c_void_p), ## _urCommandBufferAppendMemBufferReadRectExp_t - ("pfnAppendMemBufferFillExp", c_void_p), ## _urCommandBufferAppendMemBufferFillExp_t - ("pfnAppendUSMPrefetchExp", c_void_p), ## _urCommandBufferAppendUSMPrefetchExp_t - ("pfnAppendUSMAdviseExp", c_void_p), ## _urCommandBufferAppendUSMAdviseExp_t - ("pfnEnqueueExp", c_void_p) ## _urCommandBufferEnqueueExp_t - ] - -############################################################################### -## @brief Function-pointer for urUsmP2PEnablePeerAccessExp -if __use_win_types: - _urUsmP2PEnablePeerAccessExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) -else: - _urUsmP2PEnablePeerAccessExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urUsmP2PDisablePeerAccessExp -if __use_win_types: - _urUsmP2PDisablePeerAccessExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) -else: - _urUsmP2PDisablePeerAccessExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urUsmP2PPeerAccessGetInfoExp -if __use_win_types: - _urUsmP2PPeerAccessGetInfoExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUsmP2PPeerAccessGetInfoExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of UsmP2PExp functions pointers -class ur_usm_p2p_exp_dditable_t(Structure): - _fields_ = [ - ("pfnEnablePeerAccessExp", c_void_p), ## _urUsmP2PEnablePeerAccessExp_t - ("pfnDisablePeerAccessExp", c_void_p), ## _urUsmP2PDisablePeerAccessExp_t - ("pfnPeerAccessGetInfoExp", c_void_p) ## _urUsmP2PPeerAccessGetInfoExp_t - ] - -############################################################################### -## @brief Function-pointer for urVirtualMemGranularityGetInfo -if __use_win_types: - _urVirtualMemGranularityGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urVirtualMemGranularityGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urVirtualMemReserve -if __use_win_types: - _urVirtualMemReserve_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(c_void_p) ) -else: - _urVirtualMemReserve_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urVirtualMemFree -if __use_win_types: - _urVirtualMemFree_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urVirtualMemFree_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemMap -if __use_win_types: - _urVirtualMemMap_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_physical_mem_handle_t, c_size_t, ur_virtual_mem_access_flags_t ) -else: - _urVirtualMemMap_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_physical_mem_handle_t, c_size_t, ur_virtual_mem_access_flags_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemUnmap -if __use_win_types: - _urVirtualMemUnmap_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urVirtualMemUnmap_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemSetAccess -if __use_win_types: - _urVirtualMemSetAccess_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_access_flags_t ) -else: - _urVirtualMemSetAccess_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_access_flags_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemGetInfo -if __use_win_types: - _urVirtualMemGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urVirtualMemGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of VirtualMem functions pointers -class ur_virtual_mem_dditable_t(Structure): - _fields_ = [ - ("pfnGranularityGetInfo", c_void_p), ## _urVirtualMemGranularityGetInfo_t - ("pfnReserve", c_void_p), ## _urVirtualMemReserve_t - ("pfnFree", c_void_p), ## _urVirtualMemFree_t - ("pfnMap", c_void_p), ## _urVirtualMemMap_t - ("pfnUnmap", c_void_p), ## _urVirtualMemUnmap_t - ("pfnSetAccess", c_void_p), ## _urVirtualMemSetAccess_t - ("pfnGetInfo", c_void_p) ## _urVirtualMemGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urDeviceGet -if __use_win_types: - _urDeviceGet_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_device_type_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) -else: - _urDeviceGet_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_device_type_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetInfo -if __use_win_types: - _urDeviceGetInfo_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urDeviceGetInfo_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceRetain -if __use_win_types: - _urDeviceRetain_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t ) -else: - _urDeviceRetain_t = CFUNCTYPE( ur_result_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urDeviceRelease -if __use_win_types: - _urDeviceRelease_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t ) -else: - _urDeviceRelease_t = CFUNCTYPE( ur_result_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urDevicePartition -if __use_win_types: - _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) -else: - _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceSelectBinary -if __use_win_types: - _urDeviceSelectBinary_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_binary_t), c_ulong, POINTER(c_ulong) ) -else: - _urDeviceSelectBinary_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_binary_t), c_ulong, POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetNativeHandle -if __use_win_types: - _urDeviceGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_native_handle_t) ) -else: - _urDeviceGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceCreateWithNativeHandle -if __use_win_types: - _urDeviceCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_platform_handle_t, POINTER(ur_device_native_properties_t), POINTER(ur_device_handle_t) ) -else: - _urDeviceCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_platform_handle_t, POINTER(ur_device_native_properties_t), POINTER(ur_device_handle_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetGlobalTimestamps -if __use_win_types: - _urDeviceGetGlobalTimestamps_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(c_ulonglong), POINTER(c_ulonglong) ) -else: - _urDeviceGetGlobalTimestamps_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(c_ulonglong), POINTER(c_ulonglong) ) - - -############################################################################### -## @brief Table of Device functions pointers -class ur_device_dditable_t(Structure): - _fields_ = [ - ("pfnGet", c_void_p), ## _urDeviceGet_t - ("pfnGetInfo", c_void_p), ## _urDeviceGetInfo_t - ("pfnRetain", c_void_p), ## _urDeviceRetain_t - ("pfnRelease", c_void_p), ## _urDeviceRelease_t - ("pfnPartition", c_void_p), ## _urDevicePartition_t - ("pfnSelectBinary", c_void_p), ## _urDeviceSelectBinary_t - ("pfnGetNativeHandle", c_void_p), ## _urDeviceGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urDeviceCreateWithNativeHandle_t - ("pfnGetGlobalTimestamps", c_void_p) ## _urDeviceGetGlobalTimestamps_t - ] - -############################################################################### -class ur_dditable_t(Structure): - _fields_ = [ - ("Platform", ur_platform_dditable_t), - ("Context", ur_context_dditable_t), - ("Event", ur_event_dditable_t), - ("Program", ur_program_dditable_t), - ("ProgramExp", ur_program_exp_dditable_t), - ("Kernel", ur_kernel_dditable_t), - ("KernelExp", ur_kernel_exp_dditable_t), - ("Sampler", ur_sampler_dditable_t), - ("Mem", ur_mem_dditable_t), - ("PhysicalMem", ur_physical_mem_dditable_t), - ("Global", ur_global_dditable_t), - ("Enqueue", ur_enqueue_dditable_t), - ("EnqueueExp", ur_enqueue_exp_dditable_t), - ("Queue", ur_queue_dditable_t), - ("BindlessImagesExp", ur_bindless_images_exp_dditable_t), - ("USM", ur_usm_dditable_t), - ("USMExp", ur_usm_exp_dditable_t), - ("CommandBufferExp", ur_command_buffer_exp_dditable_t), - ("UsmP2PExp", ur_usm_p2p_exp_dditable_t), - ("VirtualMem", ur_virtual_mem_dditable_t), - ("Device", ur_device_dditable_t) - ] - -############################################################################### -## @brief ur device-driver interfaces -class UR_DDI: - def __init__(self, version : ur_api_version_t): - # load the ur_loader library - if "Windows" == platform.uname()[0]: - self.__dll = WinDLL("ur_loader.dll", winmode=0) - else: - self.__dll = CDLL("libur_loader.so") - - # fill the ddi tables - self.__dditable = ur_dditable_t() - - # initialize the UR - self.__dll.urLoaderInit(0, 0) - - # call driver to get function pointers - Platform = ur_platform_dditable_t() - r = ur_result_v(self.__dll.urGetPlatformProcAddrTable(version, byref(Platform))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Platform = Platform - - # attach function interface to function address - self.urPlatformGet = _urPlatformGet_t(self.__dditable.Platform.pfnGet) - self.urPlatformGetInfo = _urPlatformGetInfo_t(self.__dditable.Platform.pfnGetInfo) - self.urPlatformGetNativeHandle = _urPlatformGetNativeHandle_t(self.__dditable.Platform.pfnGetNativeHandle) - self.urPlatformCreateWithNativeHandle = _urPlatformCreateWithNativeHandle_t(self.__dditable.Platform.pfnCreateWithNativeHandle) - self.urPlatformGetApiVersion = _urPlatformGetApiVersion_t(self.__dditable.Platform.pfnGetApiVersion) - self.urPlatformGetBackendOption = _urPlatformGetBackendOption_t(self.__dditable.Platform.pfnGetBackendOption) - - # call driver to get function pointers - Context = ur_context_dditable_t() - r = ur_result_v(self.__dll.urGetContextProcAddrTable(version, byref(Context))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Context = Context - - # attach function interface to function address - self.urContextCreate = _urContextCreate_t(self.__dditable.Context.pfnCreate) - self.urContextRetain = _urContextRetain_t(self.__dditable.Context.pfnRetain) - self.urContextRelease = _urContextRelease_t(self.__dditable.Context.pfnRelease) - self.urContextGetInfo = _urContextGetInfo_t(self.__dditable.Context.pfnGetInfo) - self.urContextGetNativeHandle = _urContextGetNativeHandle_t(self.__dditable.Context.pfnGetNativeHandle) - self.urContextCreateWithNativeHandle = _urContextCreateWithNativeHandle_t(self.__dditable.Context.pfnCreateWithNativeHandle) - self.urContextSetExtendedDeleter = _urContextSetExtendedDeleter_t(self.__dditable.Context.pfnSetExtendedDeleter) - - # call driver to get function pointers - Event = ur_event_dditable_t() - r = ur_result_v(self.__dll.urGetEventProcAddrTable(version, byref(Event))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Event = Event - - # attach function interface to function address - self.urEventGetInfo = _urEventGetInfo_t(self.__dditable.Event.pfnGetInfo) - self.urEventGetProfilingInfo = _urEventGetProfilingInfo_t(self.__dditable.Event.pfnGetProfilingInfo) - self.urEventWait = _urEventWait_t(self.__dditable.Event.pfnWait) - self.urEventRetain = _urEventRetain_t(self.__dditable.Event.pfnRetain) - self.urEventRelease = _urEventRelease_t(self.__dditable.Event.pfnRelease) - self.urEventGetNativeHandle = _urEventGetNativeHandle_t(self.__dditable.Event.pfnGetNativeHandle) - self.urEventCreateWithNativeHandle = _urEventCreateWithNativeHandle_t(self.__dditable.Event.pfnCreateWithNativeHandle) - self.urEventSetCallback = _urEventSetCallback_t(self.__dditable.Event.pfnSetCallback) - - # call driver to get function pointers - Program = ur_program_dditable_t() - r = ur_result_v(self.__dll.urGetProgramProcAddrTable(version, byref(Program))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Program = Program - - # attach function interface to function address - self.urProgramCreateWithIL = _urProgramCreateWithIL_t(self.__dditable.Program.pfnCreateWithIL) - self.urProgramCreateWithBinary = _urProgramCreateWithBinary_t(self.__dditable.Program.pfnCreateWithBinary) - self.urProgramBuild = _urProgramBuild_t(self.__dditable.Program.pfnBuild) - self.urProgramCompile = _urProgramCompile_t(self.__dditable.Program.pfnCompile) - self.urProgramLink = _urProgramLink_t(self.__dditable.Program.pfnLink) - self.urProgramRetain = _urProgramRetain_t(self.__dditable.Program.pfnRetain) - self.urProgramRelease = _urProgramRelease_t(self.__dditable.Program.pfnRelease) - self.urProgramGetFunctionPointer = _urProgramGetFunctionPointer_t(self.__dditable.Program.pfnGetFunctionPointer) - self.urProgramGetInfo = _urProgramGetInfo_t(self.__dditable.Program.pfnGetInfo) - self.urProgramGetBuildInfo = _urProgramGetBuildInfo_t(self.__dditable.Program.pfnGetBuildInfo) - self.urProgramSetSpecializationConstants = _urProgramSetSpecializationConstants_t(self.__dditable.Program.pfnSetSpecializationConstants) - self.urProgramGetNativeHandle = _urProgramGetNativeHandle_t(self.__dditable.Program.pfnGetNativeHandle) - self.urProgramCreateWithNativeHandle = _urProgramCreateWithNativeHandle_t(self.__dditable.Program.pfnCreateWithNativeHandle) - - # call driver to get function pointers - ProgramExp = ur_program_exp_dditable_t() - r = ur_result_v(self.__dll.urGetProgramExpProcAddrTable(version, byref(ProgramExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.ProgramExp = ProgramExp - - # attach function interface to function address - self.urProgramBuildExp = _urProgramBuildExp_t(self.__dditable.ProgramExp.pfnBuildExp) - self.urProgramCompileExp = _urProgramCompileExp_t(self.__dditable.ProgramExp.pfnCompileExp) - self.urProgramLinkExp = _urProgramLinkExp_t(self.__dditable.ProgramExp.pfnLinkExp) - - # call driver to get function pointers - Kernel = ur_kernel_dditable_t() - r = ur_result_v(self.__dll.urGetKernelProcAddrTable(version, byref(Kernel))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Kernel = Kernel - - # attach function interface to function address - self.urKernelCreate = _urKernelCreate_t(self.__dditable.Kernel.pfnCreate) - self.urKernelGetInfo = _urKernelGetInfo_t(self.__dditable.Kernel.pfnGetInfo) - self.urKernelGetGroupInfo = _urKernelGetGroupInfo_t(self.__dditable.Kernel.pfnGetGroupInfo) - self.urKernelGetSubGroupInfo = _urKernelGetSubGroupInfo_t(self.__dditable.Kernel.pfnGetSubGroupInfo) - self.urKernelRetain = _urKernelRetain_t(self.__dditable.Kernel.pfnRetain) - self.urKernelRelease = _urKernelRelease_t(self.__dditable.Kernel.pfnRelease) - self.urKernelGetNativeHandle = _urKernelGetNativeHandle_t(self.__dditable.Kernel.pfnGetNativeHandle) - self.urKernelCreateWithNativeHandle = _urKernelCreateWithNativeHandle_t(self.__dditable.Kernel.pfnCreateWithNativeHandle) - self.urKernelSetArgValue = _urKernelSetArgValue_t(self.__dditable.Kernel.pfnSetArgValue) - self.urKernelSetArgLocal = _urKernelSetArgLocal_t(self.__dditable.Kernel.pfnSetArgLocal) - self.urKernelSetArgPointer = _urKernelSetArgPointer_t(self.__dditable.Kernel.pfnSetArgPointer) - self.urKernelSetExecInfo = _urKernelSetExecInfo_t(self.__dditable.Kernel.pfnSetExecInfo) - self.urKernelSetArgSampler = _urKernelSetArgSampler_t(self.__dditable.Kernel.pfnSetArgSampler) - self.urKernelSetArgMemObj = _urKernelSetArgMemObj_t(self.__dditable.Kernel.pfnSetArgMemObj) - self.urKernelSetSpecializationConstants = _urKernelSetSpecializationConstants_t(self.__dditable.Kernel.pfnSetSpecializationConstants) - - # call driver to get function pointers - KernelExp = ur_kernel_exp_dditable_t() - r = ur_result_v(self.__dll.urGetKernelExpProcAddrTable(version, byref(KernelExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.KernelExp = KernelExp - - # attach function interface to function address - self.urKernelSuggestMaxCooperativeGroupCountExp = _urKernelSuggestMaxCooperativeGroupCountExp_t(self.__dditable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp) - - # call driver to get function pointers - Sampler = ur_sampler_dditable_t() - r = ur_result_v(self.__dll.urGetSamplerProcAddrTable(version, byref(Sampler))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Sampler = Sampler - - # attach function interface to function address - self.urSamplerCreate = _urSamplerCreate_t(self.__dditable.Sampler.pfnCreate) - self.urSamplerRetain = _urSamplerRetain_t(self.__dditable.Sampler.pfnRetain) - self.urSamplerRelease = _urSamplerRelease_t(self.__dditable.Sampler.pfnRelease) - self.urSamplerGetInfo = _urSamplerGetInfo_t(self.__dditable.Sampler.pfnGetInfo) - self.urSamplerGetNativeHandle = _urSamplerGetNativeHandle_t(self.__dditable.Sampler.pfnGetNativeHandle) - self.urSamplerCreateWithNativeHandle = _urSamplerCreateWithNativeHandle_t(self.__dditable.Sampler.pfnCreateWithNativeHandle) - - # call driver to get function pointers - Mem = ur_mem_dditable_t() - r = ur_result_v(self.__dll.urGetMemProcAddrTable(version, byref(Mem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Mem = Mem - - # attach function interface to function address - self.urMemImageCreate = _urMemImageCreate_t(self.__dditable.Mem.pfnImageCreate) - self.urMemBufferCreate = _urMemBufferCreate_t(self.__dditable.Mem.pfnBufferCreate) - self.urMemRetain = _urMemRetain_t(self.__dditable.Mem.pfnRetain) - self.urMemRelease = _urMemRelease_t(self.__dditable.Mem.pfnRelease) - self.urMemBufferPartition = _urMemBufferPartition_t(self.__dditable.Mem.pfnBufferPartition) - self.urMemGetNativeHandle = _urMemGetNativeHandle_t(self.__dditable.Mem.pfnGetNativeHandle) - self.urMemBufferCreateWithNativeHandle = _urMemBufferCreateWithNativeHandle_t(self.__dditable.Mem.pfnBufferCreateWithNativeHandle) - self.urMemImageCreateWithNativeHandle = _urMemImageCreateWithNativeHandle_t(self.__dditable.Mem.pfnImageCreateWithNativeHandle) - self.urMemGetInfo = _urMemGetInfo_t(self.__dditable.Mem.pfnGetInfo) - self.urMemImageGetInfo = _urMemImageGetInfo_t(self.__dditable.Mem.pfnImageGetInfo) - - # call driver to get function pointers - PhysicalMem = ur_physical_mem_dditable_t() - r = ur_result_v(self.__dll.urGetPhysicalMemProcAddrTable(version, byref(PhysicalMem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.PhysicalMem = PhysicalMem - - # attach function interface to function address - self.urPhysicalMemCreate = _urPhysicalMemCreate_t(self.__dditable.PhysicalMem.pfnCreate) - self.urPhysicalMemRetain = _urPhysicalMemRetain_t(self.__dditable.PhysicalMem.pfnRetain) - self.urPhysicalMemRelease = _urPhysicalMemRelease_t(self.__dditable.PhysicalMem.pfnRelease) - - # call driver to get function pointers - Global = ur_global_dditable_t() - r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Global = Global - - # attach function interface to function address - self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) - self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) - self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) - self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) - self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) - - # call driver to get function pointers - Enqueue = ur_enqueue_dditable_t() - r = ur_result_v(self.__dll.urGetEnqueueProcAddrTable(version, byref(Enqueue))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Enqueue = Enqueue - - # attach function interface to function address - self.urEnqueueKernelLaunch = _urEnqueueKernelLaunch_t(self.__dditable.Enqueue.pfnKernelLaunch) - self.urEnqueueEventsWait = _urEnqueueEventsWait_t(self.__dditable.Enqueue.pfnEventsWait) - self.urEnqueueEventsWaitWithBarrier = _urEnqueueEventsWaitWithBarrier_t(self.__dditable.Enqueue.pfnEventsWaitWithBarrier) - self.urEnqueueMemBufferRead = _urEnqueueMemBufferRead_t(self.__dditable.Enqueue.pfnMemBufferRead) - self.urEnqueueMemBufferWrite = _urEnqueueMemBufferWrite_t(self.__dditable.Enqueue.pfnMemBufferWrite) - self.urEnqueueMemBufferReadRect = _urEnqueueMemBufferReadRect_t(self.__dditable.Enqueue.pfnMemBufferReadRect) - self.urEnqueueMemBufferWriteRect = _urEnqueueMemBufferWriteRect_t(self.__dditable.Enqueue.pfnMemBufferWriteRect) - self.urEnqueueMemBufferCopy = _urEnqueueMemBufferCopy_t(self.__dditable.Enqueue.pfnMemBufferCopy) - self.urEnqueueMemBufferCopyRect = _urEnqueueMemBufferCopyRect_t(self.__dditable.Enqueue.pfnMemBufferCopyRect) - self.urEnqueueMemBufferFill = _urEnqueueMemBufferFill_t(self.__dditable.Enqueue.pfnMemBufferFill) - self.urEnqueueMemImageRead = _urEnqueueMemImageRead_t(self.__dditable.Enqueue.pfnMemImageRead) - self.urEnqueueMemImageWrite = _urEnqueueMemImageWrite_t(self.__dditable.Enqueue.pfnMemImageWrite) - self.urEnqueueMemImageCopy = _urEnqueueMemImageCopy_t(self.__dditable.Enqueue.pfnMemImageCopy) - self.urEnqueueMemBufferMap = _urEnqueueMemBufferMap_t(self.__dditable.Enqueue.pfnMemBufferMap) - self.urEnqueueMemUnmap = _urEnqueueMemUnmap_t(self.__dditable.Enqueue.pfnMemUnmap) - self.urEnqueueUSMFill = _urEnqueueUSMFill_t(self.__dditable.Enqueue.pfnUSMFill) - self.urEnqueueUSMMemcpy = _urEnqueueUSMMemcpy_t(self.__dditable.Enqueue.pfnUSMMemcpy) - self.urEnqueueUSMPrefetch = _urEnqueueUSMPrefetch_t(self.__dditable.Enqueue.pfnUSMPrefetch) - self.urEnqueueUSMAdvise = _urEnqueueUSMAdvise_t(self.__dditable.Enqueue.pfnUSMAdvise) - self.urEnqueueUSMFill2D = _urEnqueueUSMFill2D_t(self.__dditable.Enqueue.pfnUSMFill2D) - self.urEnqueueUSMMemcpy2D = _urEnqueueUSMMemcpy2D_t(self.__dditable.Enqueue.pfnUSMMemcpy2D) - self.urEnqueueDeviceGlobalVariableWrite = _urEnqueueDeviceGlobalVariableWrite_t(self.__dditable.Enqueue.pfnDeviceGlobalVariableWrite) - self.urEnqueueDeviceGlobalVariableRead = _urEnqueueDeviceGlobalVariableRead_t(self.__dditable.Enqueue.pfnDeviceGlobalVariableRead) - self.urEnqueueReadHostPipe = _urEnqueueReadHostPipe_t(self.__dditable.Enqueue.pfnReadHostPipe) - self.urEnqueueWriteHostPipe = _urEnqueueWriteHostPipe_t(self.__dditable.Enqueue.pfnWriteHostPipe) - - # call driver to get function pointers - EnqueueExp = ur_enqueue_exp_dditable_t() - r = ur_result_v(self.__dll.urGetEnqueueExpProcAddrTable(version, byref(EnqueueExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.EnqueueExp = EnqueueExp - - # attach function interface to function address - self.urEnqueueCooperativeKernelLaunchExp = _urEnqueueCooperativeKernelLaunchExp_t(self.__dditable.EnqueueExp.pfnCooperativeKernelLaunchExp) - - # call driver to get function pointers - Queue = ur_queue_dditable_t() - r = ur_result_v(self.__dll.urGetQueueProcAddrTable(version, byref(Queue))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Queue = Queue - - # attach function interface to function address - self.urQueueGetInfo = _urQueueGetInfo_t(self.__dditable.Queue.pfnGetInfo) - self.urQueueCreate = _urQueueCreate_t(self.__dditable.Queue.pfnCreate) - self.urQueueRetain = _urQueueRetain_t(self.__dditable.Queue.pfnRetain) - self.urQueueRelease = _urQueueRelease_t(self.__dditable.Queue.pfnRelease) - self.urQueueGetNativeHandle = _urQueueGetNativeHandle_t(self.__dditable.Queue.pfnGetNativeHandle) - self.urQueueCreateWithNativeHandle = _urQueueCreateWithNativeHandle_t(self.__dditable.Queue.pfnCreateWithNativeHandle) - self.urQueueFinish = _urQueueFinish_t(self.__dditable.Queue.pfnFinish) - self.urQueueFlush = _urQueueFlush_t(self.__dditable.Queue.pfnFlush) - - # call driver to get function pointers - BindlessImagesExp = ur_bindless_images_exp_dditable_t() - r = ur_result_v(self.__dll.urGetBindlessImagesExpProcAddrTable(version, byref(BindlessImagesExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.BindlessImagesExp = BindlessImagesExp - - # attach function interface to function address - self.urBindlessImagesUnsampledImageHandleDestroyExp = _urBindlessImagesUnsampledImageHandleDestroyExp_t(self.__dditable.BindlessImagesExp.pfnUnsampledImageHandleDestroyExp) - self.urBindlessImagesSampledImageHandleDestroyExp = _urBindlessImagesSampledImageHandleDestroyExp_t(self.__dditable.BindlessImagesExp.pfnSampledImageHandleDestroyExp) - self.urBindlessImagesImageAllocateExp = _urBindlessImagesImageAllocateExp_t(self.__dditable.BindlessImagesExp.pfnImageAllocateExp) - self.urBindlessImagesImageFreeExp = _urBindlessImagesImageFreeExp_t(self.__dditable.BindlessImagesExp.pfnImageFreeExp) - self.urBindlessImagesUnsampledImageCreateExp = _urBindlessImagesUnsampledImageCreateExp_t(self.__dditable.BindlessImagesExp.pfnUnsampledImageCreateExp) - self.urBindlessImagesSampledImageCreateExp = _urBindlessImagesSampledImageCreateExp_t(self.__dditable.BindlessImagesExp.pfnSampledImageCreateExp) - self.urBindlessImagesImageCopyExp = _urBindlessImagesImageCopyExp_t(self.__dditable.BindlessImagesExp.pfnImageCopyExp) - self.urBindlessImagesImageGetInfoExp = _urBindlessImagesImageGetInfoExp_t(self.__dditable.BindlessImagesExp.pfnImageGetInfoExp) - self.urBindlessImagesMipmapGetLevelExp = _urBindlessImagesMipmapGetLevelExp_t(self.__dditable.BindlessImagesExp.pfnMipmapGetLevelExp) - self.urBindlessImagesMipmapFreeExp = _urBindlessImagesMipmapFreeExp_t(self.__dditable.BindlessImagesExp.pfnMipmapFreeExp) - self.urBindlessImagesImportOpaqueFDExp = _urBindlessImagesImportOpaqueFDExp_t(self.__dditable.BindlessImagesExp.pfnImportOpaqueFDExp) - self.urBindlessImagesMapExternalArrayExp = _urBindlessImagesMapExternalArrayExp_t(self.__dditable.BindlessImagesExp.pfnMapExternalArrayExp) - self.urBindlessImagesReleaseInteropExp = _urBindlessImagesReleaseInteropExp_t(self.__dditable.BindlessImagesExp.pfnReleaseInteropExp) - self.urBindlessImagesImportExternalSemaphoreOpaqueFDExp = _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t(self.__dditable.BindlessImagesExp.pfnImportExternalSemaphoreOpaqueFDExp) - self.urBindlessImagesDestroyExternalSemaphoreExp = _urBindlessImagesDestroyExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnDestroyExternalSemaphoreExp) - self.urBindlessImagesWaitExternalSemaphoreExp = _urBindlessImagesWaitExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnWaitExternalSemaphoreExp) - self.urBindlessImagesSignalExternalSemaphoreExp = _urBindlessImagesSignalExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnSignalExternalSemaphoreExp) - - # call driver to get function pointers - USM = ur_usm_dditable_t() - r = ur_result_v(self.__dll.urGetUSMProcAddrTable(version, byref(USM))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.USM = USM - - # attach function interface to function address - self.urUSMHostAlloc = _urUSMHostAlloc_t(self.__dditable.USM.pfnHostAlloc) - self.urUSMDeviceAlloc = _urUSMDeviceAlloc_t(self.__dditable.USM.pfnDeviceAlloc) - self.urUSMSharedAlloc = _urUSMSharedAlloc_t(self.__dditable.USM.pfnSharedAlloc) - self.urUSMFree = _urUSMFree_t(self.__dditable.USM.pfnFree) - self.urUSMGetMemAllocInfo = _urUSMGetMemAllocInfo_t(self.__dditable.USM.pfnGetMemAllocInfo) - self.urUSMPoolCreate = _urUSMPoolCreate_t(self.__dditable.USM.pfnPoolCreate) - self.urUSMPoolRetain = _urUSMPoolRetain_t(self.__dditable.USM.pfnPoolRetain) - self.urUSMPoolRelease = _urUSMPoolRelease_t(self.__dditable.USM.pfnPoolRelease) - self.urUSMPoolGetInfo = _urUSMPoolGetInfo_t(self.__dditable.USM.pfnPoolGetInfo) - - # call driver to get function pointers - USMExp = ur_usm_exp_dditable_t() - r = ur_result_v(self.__dll.urGetUSMExpProcAddrTable(version, byref(USMExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.USMExp = USMExp - - # attach function interface to function address - self.urUSMPitchedAllocExp = _urUSMPitchedAllocExp_t(self.__dditable.USMExp.pfnPitchedAllocExp) - self.urUSMImportExp = _urUSMImportExp_t(self.__dditable.USMExp.pfnImportExp) - self.urUSMReleaseExp = _urUSMReleaseExp_t(self.__dditable.USMExp.pfnReleaseExp) - - # call driver to get function pointers - CommandBufferExp = ur_command_buffer_exp_dditable_t() - r = ur_result_v(self.__dll.urGetCommandBufferExpProcAddrTable(version, byref(CommandBufferExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.CommandBufferExp = CommandBufferExp - - # attach function interface to function address - self.urCommandBufferCreateExp = _urCommandBufferCreateExp_t(self.__dditable.CommandBufferExp.pfnCreateExp) - self.urCommandBufferRetainExp = _urCommandBufferRetainExp_t(self.__dditable.CommandBufferExp.pfnRetainExp) - self.urCommandBufferReleaseExp = _urCommandBufferReleaseExp_t(self.__dditable.CommandBufferExp.pfnReleaseExp) - self.urCommandBufferFinalizeExp = _urCommandBufferFinalizeExp_t(self.__dditable.CommandBufferExp.pfnFinalizeExp) - self.urCommandBufferAppendKernelLaunchExp = _urCommandBufferAppendKernelLaunchExp_t(self.__dditable.CommandBufferExp.pfnAppendKernelLaunchExp) - self.urCommandBufferAppendUSMMemcpyExp = _urCommandBufferAppendUSMMemcpyExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMMemcpyExp) - self.urCommandBufferAppendUSMFillExp = _urCommandBufferAppendUSMFillExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMFillExp) - self.urCommandBufferAppendMemBufferCopyExp = _urCommandBufferAppendMemBufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyExp) - self.urCommandBufferAppendMemBufferWriteExp = _urCommandBufferAppendMemBufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteExp) - self.urCommandBufferAppendMemBufferReadExp = _urCommandBufferAppendMemBufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadExp) - self.urCommandBufferAppendMemBufferCopyRectExp = _urCommandBufferAppendMemBufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyRectExp) - self.urCommandBufferAppendMemBufferWriteRectExp = _urCommandBufferAppendMemBufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteRectExp) - self.urCommandBufferAppendMemBufferReadRectExp = _urCommandBufferAppendMemBufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadRectExp) - self.urCommandBufferAppendMemBufferFillExp = _urCommandBufferAppendMemBufferFillExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferFillExp) - self.urCommandBufferAppendUSMPrefetchExp = _urCommandBufferAppendUSMPrefetchExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMPrefetchExp) - self.urCommandBufferAppendUSMAdviseExp = _urCommandBufferAppendUSMAdviseExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMAdviseExp) - self.urCommandBufferEnqueueExp = _urCommandBufferEnqueueExp_t(self.__dditable.CommandBufferExp.pfnEnqueueExp) - - # call driver to get function pointers - UsmP2PExp = ur_usm_p2p_exp_dditable_t() - r = ur_result_v(self.__dll.urGetUsmP2PExpProcAddrTable(version, byref(UsmP2PExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.UsmP2PExp = UsmP2PExp - - # attach function interface to function address - self.urUsmP2PEnablePeerAccessExp = _urUsmP2PEnablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnEnablePeerAccessExp) - self.urUsmP2PDisablePeerAccessExp = _urUsmP2PDisablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnDisablePeerAccessExp) - self.urUsmP2PPeerAccessGetInfoExp = _urUsmP2PPeerAccessGetInfoExp_t(self.__dditable.UsmP2PExp.pfnPeerAccessGetInfoExp) - - # call driver to get function pointers - VirtualMem = ur_virtual_mem_dditable_t() - r = ur_result_v(self.__dll.urGetVirtualMemProcAddrTable(version, byref(VirtualMem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.VirtualMem = VirtualMem - - # attach function interface to function address - self.urVirtualMemGranularityGetInfo = _urVirtualMemGranularityGetInfo_t(self.__dditable.VirtualMem.pfnGranularityGetInfo) - self.urVirtualMemReserve = _urVirtualMemReserve_t(self.__dditable.VirtualMem.pfnReserve) - self.urVirtualMemFree = _urVirtualMemFree_t(self.__dditable.VirtualMem.pfnFree) - self.urVirtualMemMap = _urVirtualMemMap_t(self.__dditable.VirtualMem.pfnMap) - self.urVirtualMemUnmap = _urVirtualMemUnmap_t(self.__dditable.VirtualMem.pfnUnmap) - self.urVirtualMemSetAccess = _urVirtualMemSetAccess_t(self.__dditable.VirtualMem.pfnSetAccess) - self.urVirtualMemGetInfo = _urVirtualMemGetInfo_t(self.__dditable.VirtualMem.pfnGetInfo) - - # call driver to get function pointers - Device = ur_device_dditable_t() - r = ur_result_v(self.__dll.urGetDeviceProcAddrTable(version, byref(Device))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Device = Device - - # attach function interface to function address - self.urDeviceGet = _urDeviceGet_t(self.__dditable.Device.pfnGet) - self.urDeviceGetInfo = _urDeviceGetInfo_t(self.__dditable.Device.pfnGetInfo) - self.urDeviceRetain = _urDeviceRetain_t(self.__dditable.Device.pfnRetain) - self.urDeviceRelease = _urDeviceRelease_t(self.__dditable.Device.pfnRelease) - self.urDevicePartition = _urDevicePartition_t(self.__dditable.Device.pfnPartition) - self.urDeviceSelectBinary = _urDeviceSelectBinary_t(self.__dditable.Device.pfnSelectBinary) - self.urDeviceGetNativeHandle = _urDeviceGetNativeHandle_t(self.__dditable.Device.pfnGetNativeHandle) - self.urDeviceCreateWithNativeHandle = _urDeviceCreateWithNativeHandle_t(self.__dditable.Device.pfnCreateWithNativeHandle) - self.urDeviceGetGlobalTimestamps = _urDeviceGetGlobalTimestamps_t(self.__dditable.Device.pfnGetGlobalTimestamps) - - # success! diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 492ff88df8..eebb954487 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -70,26 +70,6 @@ def _mako_ddi_h(path, namespace, tags, version, revision, specs, meta): specs=specs, meta=meta) -""" - generates python files from the specification documents -""" -def _mako_api_py(path, namespace, tags, version, revision, specs, meta): - template = "api.py.mako" - fin = os.path.join("templates", template) - - filename = "%s.py"%(namespace) - fout = os.path.join(path, filename) - - print("Generating %s..."%fout) - return util.makoWrite( - fin, fout, - ver=version, - rev=revision, - namespace=namespace, - tags=tags, - specs=specs, - meta=meta) - """ generates c/c++ files from the specification documents """ @@ -101,13 +81,6 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec return loc -""" - generates python files from the specification documents -""" -def _generate_api_py(incpath, namespace, tags, version, revision, specs, meta): - loc = _mako_api_py(incpath, namespace, tags, version, revision, specs, meta) - return loc - """ Entry-point: generates api code @@ -118,7 +91,6 @@ def generate_api(incpath, srcpath, namespace, tags, version, revision, specs, me loc = 0 loc += _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, specs, meta) - loc += _generate_api_py(incpath, namespace, tags, version, revision, specs, meta) print("Generated %s lines of code.\n"%loc) templates_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") diff --git a/scripts/templates/api.py.mako b/scripts/templates/api.py.mako deleted file mode 100644 index 7815f2cf53..0000000000 --- a/scripts/templates/api.py.mako +++ /dev/null @@ -1,196 +0,0 @@ -<% -import re -from templates import helper as th -%><% - n=namespace - N=n.upper() - - x=tags['$x'] - X=x.upper() -%>""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - @file ${n}.py - @version v${ver}-r${rev} - - """ -import platform -from ctypes import * -from enum import * - -# ctypes does not define c_intptr_t, so let's define it here manually -c_intptr_t = c_ssize_t - -${"###############################################################################"} -__version__ = "1.0" - -%for s in specs: -%for obj in s['objects']: -%if not re.match(r"class", obj['type']) and not re.match(r"function", obj['type']): -${"###############################################################################"} -%for line in th.make_desc_lines(n, tags, obj): -${"##"} ${line} -%endfor -%for line in th.make_details_lines(n, tags, obj): -${"##"} ${line} -%endfor -## MACRO ###################################################################### -%if re.match(r"macro", obj['type']): -%if re.match(r".*\(.*\)", obj['name']): -def ${th.make_macro_name(n, tags, obj)}: - return ${th.subt(n, tags, obj['value'])} -%elif 'altvalue' not in obj and not obj['value'].startswith("__"): -${th.make_macro_name(n, tags, obj)} = ${th.subt(n, tags, obj['value'])} -%else: -# ${th.make_macro_name(n, tags, obj)} not required for python -%endif -## TYPEDEF #################################################################### -%elif re.match(r"typedef", obj['type']): -class ${th.make_type_name(n, tags, obj)}(${th.get_ctype_name(n, tags, {'type': obj['value']})}): - pass -## FPTR TYPEDEF ############################################################### -%elif re.match(r"fptr_typedef", obj['type']): -def ${th.make_type_name(n, tags, obj)}(user_defined_callback): - @CFUNCTYPE(${th.get_ctype_name(n, tags, {'type': obj['return']})}\ -%if 'params' in obj: -%for param in obj['params']: -, ${th.get_ctype_name(n, tags, {'type': param['type']})}\ -%endfor -%endif -) - def ${th.make_type_name(n, tags, obj)}_wrapper(\ -%if 'params' in obj: -%for index, item in enumerate(obj['params']): -${item['name']}\ -%if index < (len(obj['params']) - 1): -, \ -%endif -%endfor -%endif -): - return user_defined_callback(\ -%if 'params' in obj: -%for index, item in enumerate(obj['params']): -${item['name']}\ -%if index < (len(obj['params']) - 1): -, \ -%endif -%endfor -%endif -) - return ${th.make_type_name(n, tags, obj)}_wrapper -## ENUM ####################################################################### -%elif re.match(r"enum", obj['type']): -class ${re.sub(r"(\w+)_t", r"\1_v", th.make_type_name(n, tags, obj))}(IntEnum): - %for line in th.make_etor_lines(n, tags, obj, py=True, meta=meta): - ${line} - %endfor - -class ${th.make_type_name(n, tags, obj)}(c_int): - def __str__(self): - %if th.type_traits.is_flags(obj['name']): - return hex(self.value) - %else: - return str(${re.sub(r"(\w+)_t", r"\1_v", th.make_type_name(n, tags, obj))}(self.value)) - %endif - -## STRUCT/UNION ############################################################### -%elif re.match(r"struct|union", obj['type']): -class ${th.make_type_name(n, tags, obj)}(Structure): - _fields_ = [ - %for line in th.make_member_lines(n, tags, obj, py=True, meta=meta): - ${line} - %endfor - ] -## HANDLE ##################################################################### -%elif re.match(r"handle", obj['type']): -class ${th.make_type_name(n, tags, obj)}(c_void_p): - pass -%endif - -%endif # !class && !function -%endfor # objects -%endfor # specs -${"###############################################################################"} -__use_win_types = "Windows" == platform.uname()[0] -<% - tables = th.get_pfntables(specs, meta, n, tags) -%> -%for tbl in tables: -%for obj in tbl['functions']: -${"###############################################################################"} -${"##"} @brief Function-pointer for ${th.make_func_name(n, tags, obj)} -%if 'condition' not in obj: -if __use_win_types: - _${th.make_func_name(n, tags, obj)}_t = WINFUNCTYPE( ${x}_result_t, ${", ".join(th.make_param_lines(n, tags, obj, py=True, meta=meta, format=["type"]))} ) -else: - _${th.make_func_name(n, tags, obj)}_t = CFUNCTYPE( ${x}_result_t, ${", ".join(th.make_param_lines(n, tags, obj, py=True, meta=meta, format=["type"]))} ) -%endif # condition - -%endfor # functions - -${"###############################################################################"} -${"##"} @brief Table of ${tbl['name']} functions pointers -class ${tbl['type']}(Structure): - _fields_ = [ - %for obj in tbl['functions']: - %if 'condition' not in obj: - %if loop.index < len(tbl['functions'])-1: - ${th.append_ws("(\""+th.make_pfn_name(n, tags, obj)+"\", c_void_p),", 63)} ## _${th.make_func_name(n, tags, obj)}_t - %else: - ${th.append_ws("(\""+th.make_pfn_name(n, tags, obj)+"\", c_void_p)", 63)} ## _${th.make_func_name(n, tags, obj)}_t - %endif - %endif # condition - %endfor - ] - -%endfor # tables -${"###############################################################################"} -class ${n}_dditable_t(Structure): - _fields_ = [ - %for tbl in tables: - %if loop.index < len(tables)-1: - ("${tbl['name']}", ${tbl['type']}), - %else: - ("${tbl['name']}", ${tbl['type']}) - %endif - %endfor - ] - -${"###############################################################################"} -${"##"} @brief ${n} device-driver interfaces -class ${N}_DDI: - def __init__(self, version : ${x}_api_version_t): - # load the ${x}_loader library - if "Windows" == platform.uname()[0]: - self.__dll = WinDLL("${x}_loader.dll", winmode=0) - else: - self.__dll = CDLL("lib${x}_loader.so") - - # fill the ddi tables - self.__dditable = ${n}_dditable_t() - - # initialize the UR - self.__dll.${x}LoaderInit(0, 0) - - %for tbl in tables: - # call driver to get function pointers - ${tbl['name']} = ${tbl['type']}() - r = ${x}_result_v(self.__dll.${tbl['export']['name']}(version, byref(${tbl['name']}))) - if r != ${x}_result_v.SUCCESS: - raise Exception(r) - self.__dditable.${tbl['name']} = ${tbl['name']} - - # attach function interface to function address - %for obj in tbl['functions']: - %if 'condition' not in obj: - self.${th.make_func_name(n, tags, obj)} = _${th.make_func_name(n, tags, obj)}_t(self.__dditable.${tbl['name']}.${th.make_pfn_name(n, tags, obj)}) - %endif - %endfor # functions - - %endfor # tables - # success! diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 928db1675c..d7d29dc0a8 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -705,18 +705,8 @@ def make_flags_bitmask(namespace, tags, obj, meta): Public: returns c/c++ name of etor """ -def make_etor_name(namespace, tags, enum, etor, py=False, meta=None): - if py: - # e.g., "ENUM_NAME_ETOR_NAME" -> "ETOR_NAME" - if type_traits.is_flags(enum): - prefix = re.sub(r"(\w+)_flags_t", r"\1_flag", subt(namespace, tags, enum)).upper() - else: - prefix = re.sub(r"(\w+)_t", r"\1", subt(namespace, tags, enum)).upper() - name = re.sub(r"%s_(\w+)"%prefix, r"\1", subt(namespace, tags, etor)) - name = re.sub(r"^(\d+\w*)", r"_\1", name) - else: - name = subt(namespace, tags, etor) - return name +def make_etor_name(namespace, tags, enum, etor, meta=None): + return subt(namespace, tags, etor) """ Private: @@ -742,33 +732,28 @@ def _get_value_name(namespace, tags, value): Public: returns a list of strings for declaring each enumerator in an enumeration c++ format: "ETOR_NAME = VALUE, ///< DESCRIPTION" - python format: "ETOR_NAME = VALUE, ## DESCRIPTION" """ -def make_etor_lines(namespace, tags, obj, py=False, meta=None): +def make_etor_lines(namespace, tags, obj, meta=None): lines = [] for item in obj['etors']: - name = make_etor_name(namespace, tags, obj['name'], item['name'], py, meta) + name = make_etor_name(namespace, tags, obj['name'], item['name'], meta) if 'value' in item: - delim = "," if not py else "" + delim = "," value = _get_value_name(namespace, tags, item['value']) prologue = "%s = %s%s"%(name, value, delim) - elif py: - prologue = "%s = auto()"%(name) else: prologue = "%s,"%(name) - comment_style = "##" if py else "///<" for line in split_line(subt(namespace, tags, item['desc'], True), 70): - lines.append("%s%s %s"%(append_ws(prologue, 48), comment_style, line)) + lines.append("%s%s %s"%(append_ws(prologue, 48), "///<", line)) prologue = "" - if not py: - lines += [ - "/// @cond", - "%sFORCE_UINT32 = 0x7fffffff"%make_enum_name(namespace, tags, obj)[:-1].upper(), - "/// @endcond", - ] + lines += [ + "/// @cond", + "%sFORCE_UINT32 = 0x7fffffff"%make_enum_name(namespace, tags, obj)[:-1].upper(), + "/// @endcond", + ] return lines @@ -783,43 +768,6 @@ def _get_type_name(namespace, tags, obj, item): name = subt(namespace, tags, type,) return name -""" -Private: - returns python c_type name of any type -""" -def get_ctype_name(namespace, tags, item): - name = subt(namespace, tags, item['type']) - name = _remove_const(name) - name = re.sub(r"void\*", "c_void_p", name) - name = re.sub(r"char\*", "c_char_p", name) - name = re.sub(r"bool", "c_bool", name) - name = re.sub(r"uint8_t", "c_ubyte", name) - name = re.sub(r"uint16_t", "c_ushort", name) - name = re.sub(r"uint32_t", "c_ulong", name) - name = re.sub(r"uint64_t", "c_ulonglong", name) - name = re.sub(r"int8_t", "c_byte", name) - name = re.sub(r"int16_t", "c_short", name) - name = re.sub(r"int32_t", "c_long", name) - name = re.sub(r"int64_t", "c_longlong", name) - name = re.sub(r"size_t", "c_size_t", name) - name = re.sub(r"float", "c_float", name) - name = re.sub(r"double", "c_double", name) - name = re.sub(r"\bchar", "c_char", name) - name = re.sub(r"\bint", "c_int", name) - # Handle void - if re.match(r"void", name): - if not re.match(r"_void_", name): # its not c_void_p - name = re.sub(r"void", "None", name) - - while type_traits.is_pointer(name): - name = "POINTER(%s)"%_remove_ptr(name) - - if 'name' in item and type_traits.is_array(item['type']): - length = subt(namespace, tags, type_traits.get_array_length(item['type'])) - name = "%s * %s"%(type_traits.get_array_element_type(name), length) - - return name - """ Public: returns c/c++ name of member of struct/class @@ -836,32 +784,21 @@ def make_member_name(namespace, tags, item, prefix="", remove_array=False): Public: returns a list of strings for each member of a structure or class c++ format: "TYPE NAME = INIT, ///< DESCRIPTION" - python format: "("NAME", TYPE)" ## DESCRIPTION" """ -def make_member_lines(namespace, tags, obj, prefix="", py=False, meta=None): +def make_member_lines(namespace, tags, obj, prefix="", meta=None): lines = [] if 'members' not in obj: return lines for i, item in enumerate(obj['members']): - name = make_member_name(namespace, tags, item, prefix, remove_array=py) - - if py: - tname = get_ctype_name(namespace, tags, item) - else: - tname = _get_type_name(namespace, tags, obj, item) + name = make_member_name(namespace, tags, item, prefix) + tname = _get_type_name(namespace, tags, obj, item) - if py: - delim = "," if i < (len(obj['members'])-1) else "" - prologue = "(\"%s\", %s)%s"%(name, tname, delim) - else: - array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" - prologue = "%s %s %s;"%(tname, name, array_suffix) + array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" + prologue = "%s %s %s;"%(tname, name, array_suffix) - comment_style = "##" if py else "///<" - ws_count = 64 if py else 48 for line in split_line(subt(namespace, tags, item['desc'], True), 70): - lines.append("%s%s %s"%(append_ws(prologue, ws_count), comment_style, line)) + lines.append("%s%s %s"%(append_ws(prologue, 48), "///<", line)) prologue = "" return lines @@ -878,7 +815,7 @@ def _get_param_name(namespace, tags, item): returns a list of c++ strings for each parameter of a function format: "TYPE NAME = INIT, ///< DESCRIPTION" """ -def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, format=["type", "name", "delim", "desc"], delim=",", replacements={}): +def make_param_lines(namespace, tags, obj, decl=False, meta=None, format=["type", "name", "delim", "desc"], delim=",", replacements={}): lines = [] params = obj['params'] @@ -890,19 +827,8 @@ def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, form name = _get_param_name(namespace, tags, item) if replacements.get(name): name = replacements[name] - if py: - tname = get_ctype_name(namespace, tags, item) - # Handle fptr_typedef - # On Python side, passing a function pointer to a CFUNCTYPE is a bit awkward - # So solve this, if we encounter a function pointer type, we relpace it with - # c_void_p - a generic void pointer - if len(fptr_types) > 0: - for fptr_type in fptr_types: - if tname == subt(namespace, tags, fptr_type): - tname = 'c_void_p' # Substitute function pointers to c_void_p - break - else: - tname = _get_type_name(namespace, tags, obj, item) + + tname = _get_type_name(namespace, tags, obj, item) words = [] if "type*" in format: @@ -926,7 +852,7 @@ def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, form else: lines.append(prologue) - if "type" in format and len(lines) == 0 and not py: + if "type" in format and len(lines) == 0: lines = ["void"] return lines diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a9fdf2ba37..335fae5e1d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,7 +14,6 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) enable_testing() -add_subdirectory(python) add_subdirectory(loader) add_subdirectory(adapters) add_subdirectory(conformance) diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt deleted file mode 100644 index 396d5722b8..0000000000 --- a/test/python/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -function(add_python_test name) - set(TEST_NAME python-${name}) - add_test(NAME ${TEST_NAME} - COMMAND ${Python3_EXECUTABLE} -B -m pytest ${CMAKE_CURRENT_SOURCE_DIR}/${name}.py - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_tests_properties(${TEST_NAME} PROPERTIES LABELS "python") - # python uses LD_LIBRARY_PATH (PATH on Windows) to search for dynamic libraries, - # so set it to the location where it can find the loader. - if(UNIX) - set_property(TEST ${TEST_NAME} PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") - else() - set_property(TEST ${TEST_NAME} PROPERTY - ENVIRONMENT_MODIFICATION "PATH=cmake_list_prepend:${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - endif() - # this is for importing the include/ur.py module in other python files - set_property(TEST ${TEST_NAME} APPEND PROPERTY - ENVIRONMENT "PYTHONPATH=${PROJECT_SOURCE_DIR}" "UR_ADAPTERS_FORCE_LOAD=\"$\"") -endfunction() - -add_python_test(basic) diff --git a/test/python/basic.py b/test/python/basic.py deleted file mode 100755 index 0d33d235db..0000000000 --- a/test/python/basic.py +++ /dev/null @@ -1,19 +0,0 @@ -#! /usr/bin/env python3 -""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -""" - -import pytest -import sys -import os - -import include.ur as ur - -def test_ddi(): - ddi = ur.UR_DDI(ur.ur_api_version_v.CURRENT) - assert True From a314dd8cc8f573d7937adef74f7da71cb8379166 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Fri, 8 Dec 2023 20:51:12 +0100 Subject: [PATCH 059/138] [UR][L0] Fix tracing param in disjoint_pool_parser The disjoint_pool implementation distinguishes between a few different levels of logging (0-3). The argument should be an int, not bool. --- source/adapters/level_zero/usm.cpp | 2 +- source/common/umf_pools/disjoint_pool_config_parser.cpp | 2 +- source/common/umf_pools/disjoint_pool_config_parser.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index c6d98855e7..0d696a5524 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -64,7 +64,7 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig() { ? PoolUrTraceVal : (PoolPiTraceVal ? PoolPiTraceVal : nullptr); - bool PoolTrace = false; + int PoolTrace = 0; if (PoolTraceVal != nullptr) { PoolTrace = std::atoi(PoolTraceVal); } diff --git a/source/common/umf_pools/disjoint_pool_config_parser.cpp b/source/common/umf_pools/disjoint_pool_config_parser.cpp index e7fabf07df..92ffd1197d 100644 --- a/source/common/umf_pools/disjoint_pool_config_parser.cpp +++ b/source/common/umf_pools/disjoint_pool_config_parser.cpp @@ -61,7 +61,7 @@ DisjointPoolAllConfigs::DisjointPoolAllConfigs() { } DisjointPoolAllConfigs parseDisjointPoolConfig(const std::string &config, - bool trace) { + int trace) { DisjointPoolAllConfigs AllConfigs; // TODO: replace with UR ENV var parser and avoid creating a copy of 'config' diff --git a/source/common/umf_pools/disjoint_pool_config_parser.hpp b/source/common/umf_pools/disjoint_pool_config_parser.hpp index 55f1242db1..2e9b07deba 100644 --- a/source/common/umf_pools/disjoint_pool_config_parser.hpp +++ b/source/common/umf_pools/disjoint_pool_config_parser.hpp @@ -51,7 +51,7 @@ class DisjointPoolAllConfigs { // Example of usage: // "1;32M;host:1M,4,64K;device:1M,4,64K;shared:0,0,2M" DisjointPoolAllConfigs parseDisjointPoolConfig(const std::string &config, - bool trace = 1); + int trace = 1); } // namespace usm #endif From 8ee650de3f92ca55b8411ce73612654779b8cdbd Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 13 Dec 2023 19:38:30 +0100 Subject: [PATCH 060/138] [UR][L0] Make allocator tracing independent of setting the limits Without this change, setting UR_L0_USM_ALLOCATOR_TRACE or SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR_TRACE only had an effect if UR_L0_USM_ALLOCATOR or SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR was also set. --- source/adapters/level_zero/usm.cpp | 16 ++++++++-------- .../umf_pools/disjoint_pool_config_parser.cpp | 6 +++++- .../umf_pools/disjoint_pool_config_parser.hpp | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 0d696a5524..febea17d32 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -49,14 +49,6 @@ ur_result_t umf2urResult(umf_result_t umfResult) { } usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig() { - const char *PoolUrConfigVal = std::getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR"); - const char *PoolPiConfigVal = std::getenv("UR_L0_USM_ALLOCATOR"); - const char *PoolConfigVal = - PoolUrConfigVal ? PoolUrConfigVal : PoolPiConfigVal; - if (PoolConfigVal == nullptr) { - return usm::DisjointPoolAllConfigs(); - } - const char *PoolUrTraceVal = std::getenv("UR_L0_USM_ALLOCATOR_TRACE"); const char *PoolPiTraceVal = std::getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR_TRACE"); @@ -69,6 +61,14 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig() { PoolTrace = std::atoi(PoolTraceVal); } + const char *PoolUrConfigVal = std::getenv("SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR"); + const char *PoolPiConfigVal = std::getenv("UR_L0_USM_ALLOCATOR"); + const char *PoolConfigVal = + PoolUrConfigVal ? PoolUrConfigVal : PoolPiConfigVal; + if (PoolConfigVal == nullptr) { + return usm::DisjointPoolAllConfigs(PoolTrace); + } + return usm::parseDisjointPoolConfig(PoolConfigVal, PoolTrace); } diff --git a/source/common/umf_pools/disjoint_pool_config_parser.cpp b/source/common/umf_pools/disjoint_pool_config_parser.cpp index 92ffd1197d..539529fb90 100644 --- a/source/common/umf_pools/disjoint_pool_config_parser.cpp +++ b/source/common/umf_pools/disjoint_pool_config_parser.cpp @@ -24,7 +24,7 @@ constexpr auto operator""_GB(unsigned long long x) -> size_t { return x * 1024 * 1024 * 1024; } -DisjointPoolAllConfigs::DisjointPoolAllConfigs() { +DisjointPoolAllConfigs::DisjointPoolAllConfigs(int trace) { Configs[DisjointPoolMemType::Host].name = "Host"; Configs[DisjointPoolMemType::Device].name = "Device"; Configs[DisjointPoolMemType::Shared].name = "Shared"; @@ -58,6 +58,10 @@ DisjointPoolAllConfigs::DisjointPoolAllConfigs() { Configs[DisjointPoolMemType::SharedReadOnly].MaxPoolableSize = 4_MB; Configs[DisjointPoolMemType::SharedReadOnly].Capacity = 4; Configs[DisjointPoolMemType::SharedReadOnly].SlabMinSize = 2_MB; + + for (auto &Config : Configs) { + Config.PoolTrace = trace; + } } DisjointPoolAllConfigs parseDisjointPoolConfig(const std::string &config, diff --git a/source/common/umf_pools/disjoint_pool_config_parser.hpp b/source/common/umf_pools/disjoint_pool_config_parser.hpp index 2e9b07deba..98455fba0e 100644 --- a/source/common/umf_pools/disjoint_pool_config_parser.hpp +++ b/source/common/umf_pools/disjoint_pool_config_parser.hpp @@ -23,7 +23,7 @@ class DisjointPoolAllConfigs { size_t EnableBuffers = 1; DisjointPoolConfig Configs[DisjointPoolMemType::All]; - DisjointPoolAllConfigs(); + DisjointPoolAllConfigs(int trace = 0); }; // Parse optional config parameters of this form: From 776a647e572a842316cf72d27fbf67d078f8647f Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Tue, 21 Nov 2023 16:54:04 -0800 Subject: [PATCH 061/138] [UR][L0] Add several fixes to L0 adapter for test-usm Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/context.hpp | 3 + source/adapters/level_zero/usm.cpp | 93 ++++++++++++++++--- source/adapters/level_zero/usm.hpp | 2 + .../usm/usm_adapter_level_zero.match | 11 --- 4 files changed, 85 insertions(+), 24 deletions(-) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 94935ee59e..96935d470e 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -115,6 +115,9 @@ struct ur_context_handle_t_ : _ur_object { SharedReadOnlyMemProxyPools; umf::pool_unique_handle_t HostMemProxyPool; + // Map associating pools created with urUsmPoolCreate and internal pools + std::list UsmPoolHandles{}; + // We need to store all memory allocations in the context because there could // be kernels with indirect access. Kernels with indirect access start to // reference all existing memory allocations at the time when they are diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index c6d98855e7..ed4c0f40b3 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -187,8 +187,15 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, - Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = + zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, Alignment, + Device->ZeDevice, ResultPtr); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -226,8 +233,15 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, ZeDevDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, - Size, Alignment, Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = + zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + Alignment, Device->ZeDevice, ResultPtr); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -254,8 +268,14 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ZE2UR_CALL(zeMemAllocHost, - (Context->ZeContext, &ZeHostDesc, Size, Alignment, ResultPtr)); + ze_result_t ZeResult = zeMemAllocHost(Context->ZeContext, &ZeHostDesc, Size, + Alignment, ResultPtr); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -599,6 +619,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( ZE2UR_CALL(zeMemGetAddressRange, (Context->ZeContext, Ptr, nullptr, &Size)); return ReturnValue(Size); } + case UR_USM_ALLOC_INFO_POOL: { + auto UMFPool = umfPoolByPtr(Ptr); + if (!UMFPool) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + std::shared_lock ContextLock(Context->Mutex); + + auto SearchMatchingPool = + [](std::unordered_map + &PoolMap, + umf_memory_pool_handle_t UMFPool) { + for (auto &PoolPair : PoolMap) { + if (PoolPair.second.get() == UMFPool) { + return true; + } + } + return false; + }; + + for (auto &Pool : Context->UsmPoolHandles) { + if (SearchMatchingPool(Pool->DeviceMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (SearchMatchingPool(Pool->SharedMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (Pool->HostMemPool.get() == UMFPool) { + return ReturnValue(Pool); + } + } + + return UR_RESULT_ERROR_INVALID_VALUE; + } default: urPrint("urUSMGetMemAllocInfo: unsupported ParamName\n"); return UR_RESULT_ERROR_INVALID_VALUE; @@ -748,6 +802,7 @@ ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) { + this->Context = Context; zeroInit = static_cast(PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK); @@ -831,6 +886,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( try { *Pool = reinterpret_cast( new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + } catch (const UsmAllocationException &Ex) { return Ex.getError(); } @@ -848,6 +907,8 @@ ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); delete Pool; } return UR_RESULT_SUCCESS; @@ -861,13 +922,19 @@ ur_result_t urUSMPoolGetInfo( ///< property size_t *PropSizeRet ///< [out] size in bytes returned in pool property value ) { - std::ignore = Pool; - std::ignore = PropName; - std::ignore = PropSize; - std::ignore = PropValue; - std::ignore = PropSizeRet; - urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } } // If indirect access tracking is not enabled then this functions just performs diff --git a/source/adapters/level_zero/usm.hpp b/source/adapters/level_zero/usm.hpp index 01e215c578..958fca9354 100644 --- a/source/adapters/level_zero/usm.hpp +++ b/source/adapters/level_zero/usm.hpp @@ -29,6 +29,8 @@ struct ur_usm_pool_handle_t_ : _ur_object { SharedReadOnlyMemPools; umf::pool_unique_handle_t HostMemPool; + ur_context_handle_t Context{}; + ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); }; diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index bf45b83ec2..e69de29bb2 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,11 +0,0 @@ -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled From ad970bff9ab2d893797808f545836d4ceeb8e960 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 12 Dec 2023 14:11:04 +0000 Subject: [PATCH 062/138] [L0] Make two USM tests optionally pass There is a discrepancy between Debug and Release build pass rates in the USM test suite on Level Zero. Make those tests optional until the fix is introduced. --- test/conformance/usm/usm_adapter_level_zero.match | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index e69de29bb2..c036fa785c 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -0,0 +1,2 @@ +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled From a51f6aa9e82763aace8ac1196336def48bbb6d2e Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Wed, 13 Dec 2023 18:13:12 -0800 Subject: [PATCH 063/138] [L0] Fix Calls to L0 apis to use ZE_CALL_NOCHECK Signed-off-by: Spruit, Neil R --- source/adapters/level_zero/usm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index ed4c0f40b3..06cfe22dae 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -188,8 +188,8 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, } ze_result_t ZeResult = - zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, Alignment, - Device->ZeDevice, ResultPtr); + ZE_CALL_NOCHECK(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, Alignment, + Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; @@ -234,8 +234,8 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, } ze_result_t ZeResult = - zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, - Alignment, Device->ZeDevice, ResultPtr); + ZE_CALL_NOCHECK(zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + Alignment, Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; @@ -268,8 +268,8 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ze_result_t ZeResult = zeMemAllocHost(Context->ZeContext, &ZeHostDesc, Size, - Alignment, ResultPtr); + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeMemAllocHost(Context->ZeContext, &ZeHostDesc, Size, + Alignment, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; From fdfdb1acd1d69d02416918b9dffe6c4555e4ab14 Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Wed, 13 Dec 2023 18:28:13 -0800 Subject: [PATCH 064/138] [L0] Fix formatting of new api calls Signed-off-by: Spruit, Neil R --- source/adapters/level_zero/usm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 06cfe22dae..6adab96f97 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -188,8 +188,8 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, } ze_result_t ZeResult = - ZE_CALL_NOCHECK(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, Alignment, - Device->ZeDevice, ResultPtr)); + ZE_CALL_NOCHECK(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, + Alignment, Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; @@ -233,8 +233,8 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, ZeDevDesc.pNext = &RelaxedDesc; } - ze_result_t ZeResult = - ZE_CALL_NOCHECK(zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, Alignment, Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { @@ -268,8 +268,8 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ze_result_t ZeResult = ZE_CALL_NOCHECK(zeMemAllocHost(Context->ZeContext, &ZeHostDesc, Size, - Alignment, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeMemAllocHost( + Context->ZeContext, &ZeHostDesc, Size, Alignment, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; From 815a28691ccf25b7db09d2fd01a9fff6271097ad Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Wed, 13 Dec 2023 18:36:00 -0800 Subject: [PATCH 065/138] [L0] Fix reformatted calls to no check Signed-off-by: Spruit, Neil R --- source/adapters/level_zero/usm.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 6adab96f97..d2dfc9b37d 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -187,9 +187,9 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.pNext = &RelaxedDesc; } - ze_result_t ZeResult = - ZE_CALL_NOCHECK(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, - Alignment, Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, + Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; @@ -234,8 +234,8 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, } ze_result_t ZeResult = ZE_CALL_NOCHECK( - zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, - Alignment, Device->ZeDevice, ResultPtr)); + zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + Alignment, Device->ZeDevice, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; @@ -268,8 +268,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ze_result_t ZeResult = ZE_CALL_NOCHECK(zeMemAllocHost( - Context->ZeContext, &ZeHostDesc, Size, Alignment, ResultPtr)); + ze_result_t ZeResult = + ZE_CALL_NOCHECK(zeMemAllocHost, (Context->ZeContext, &ZeHostDesc, Size, + Alignment, ResultPtr)); if (ZeResult != ZE_RESULT_SUCCESS) { if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; From 3f94f6aa60619e1bf1f125f9f82b674a22a9da65 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 12 Dec 2023 19:55:16 +0100 Subject: [PATCH 066/138] Enable adapters for coverity build --- .github/workflows/coverity.yml | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index a72b7caf77..ab065ee77e 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -9,10 +9,10 @@ name: coverity-unified-runtime # It runs static analysis build - Coverity. It requires special token (set in CI's secret). on: - schedule: - # Run every day at 23:00 UTC - - cron: '0 23 * * *' workflow_dispatch: + schedule: + # Run every day at 22:00 UTC + - cron: '0 22 * * *' env: WORKDIR: ${{ github.workspace }} @@ -23,10 +23,11 @@ env: COVERITY_SCAN_BRANCH_PATTERN: "main" TRAVIS_BRANCH: ${{ github.ref_name }} + jobs: linux: name: Coverity - runs-on: ubuntu-latest + runs-on: coverity steps: - name: Clone the git repo @@ -36,7 +37,20 @@ jobs: run: pip install -r third_party/requirements.txt - name: Configure CMake - run: cmake -B $WORKDIR/build -DUR_ENABLE_TRACING=ON -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON -DUMF_ENABLE_POOL_TRACKING=ON + run: > + cmake + -B $WORKDIR/build + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DUR_BUILD_TESTS=ON + -DUMF_ENABLE_POOL_TRACKING=ON + -DUR_FORMAT_CPP_STYLE=ON + -DCMAKE_BUILD_TYPE=Debug + -DUR_BUILD_ADAPTER_L0=ON + -DUR_BUILD_ADAPTER_CUDA=ON + -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so + -DUR_BUILD_ADAPTER_NATIVE_CPU=ON + -DUR_BUILD_ADAPTER_HIP=ON - name: Run Coverity run: | From 771b743165df905882ce4f6adf6d51af71ba4bc0 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 13 Dec 2023 16:56:40 +0000 Subject: [PATCH 067/138] Fix coverity issues in OpenCL, cuda and hip adapters. --- source/adapters/cuda/command_buffer.cpp | 4 ++-- source/adapters/cuda/command_buffer.hpp | 6 ++--- source/adapters/cuda/device.cpp | 15 ++++++------ source/adapters/cuda/event.cpp | 29 +++++++---------------- source/adapters/cuda/event.hpp | 17 +++++++++++--- source/adapters/cuda/image.cpp | 2 +- source/adapters/cuda/memory.hpp | 4 ++-- source/adapters/cuda/program.cpp | 2 +- source/adapters/cuda/sampler.cpp | 2 +- source/adapters/hip/device.hpp | 2 +- source/adapters/hip/enqueue.cpp | 31 +++++++------------------ source/adapters/hip/event.cpp | 22 +++++++++++------- source/adapters/hip/event.hpp | 5 ++-- source/adapters/hip/memory.hpp | 14 ++++------- source/adapters/hip/program.cpp | 1 - source/adapters/hip/sampler.cpp | 1 - source/adapters/opencl/program.cpp | 11 +++++---- 17 files changed, 76 insertions(+), 92 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 24a5d9497c..1919b61cdb 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -21,8 +21,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice) - : Context(hContext), - Device(hDevice), CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCount{1} { + : Context(hContext), Device(hDevice), CudaGraph{nullptr}, + CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 4ceab42062..18264410c4 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -184,7 +184,7 @@ struct ur_exp_command_buffer_handle_t_ { void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, std::shared_ptr CuNode) { - SyncPoints[SyncPoint] = CuNode; + SyncPoints[SyncPoint] = std::move(CuNode); NextSyncPoint++; } @@ -193,12 +193,12 @@ struct ur_exp_command_buffer_handle_t_ { } // Helper to register next sync point - // @param CuNode Node to register as next sycn point + // @param CuNode Node to register as next sync point // @return Pointer to the sync that registers the Node ur_exp_command_buffer_sync_point_t AddSyncPoint(std::shared_ptr CuNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - RegisterSyncPoint(SyncPoint, CuNode); + RegisterSyncPoint(SyncPoint, std::move(CuNode)); return SyncPoint; } diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index a4877236ae..fb67b887ba 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1143,17 +1143,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( if (Result != UR_RESULT_SUCCESS) return Result; - ur_platform_handle_t *Plat = static_cast( - malloc(NumPlatforms * sizeof(ur_platform_handle_t))); - Result = urPlatformGet(&AdapterHandle, 1, NumPlatforms, Plat, nullptr); + std::vector Platforms(NumPlatforms); + + Result = urPlatformGet(&AdapterHandle, 1, Platforms.size(), Platforms.data(), + nullptr); if (Result != UR_RESULT_SUCCESS) return Result; // Iterate through platforms to find device that matches nativeHandle - for (uint32_t j = 0; j < NumPlatforms; ++j) { - auto SearchRes = - std::find_if(begin(Plat[j]->Devices), end(Plat[j]->Devices), IsDevice); - if (SearchRes != end(Plat[j]->Devices)) { + for (auto Platform : Platforms) { + auto SearchRes = std::find_if(begin(Platform->Devices), + end(Platform->Devices), IsDevice); + if (SearchRes != end(Platform->Devices)) { *phDevice = static_cast((*SearchRes).get()); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/cuda/event.cpp b/source/adapters/cuda/event.cpp index 6137f0ecce..2cbfcbc39b 100644 --- a/source/adapters/cuda/event.cpp +++ b/source/adapters/cuda/event.cpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #include "event.hpp" -#include "common.hpp" #include "context.hpp" #include "device.hpp" #include "queue.hpp" @@ -19,26 +18,15 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context, - ur_queue_handle_t Queue, CUstream Stream, + ur_queue_handle_t Queue, + native_type EvEnd, native_type EvQueued, + native_type EvStart, CUstream Stream, uint32_t StreamToken) : CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr}, - EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} { - - bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; - - UR_CHECK_ERROR(cuEventCreate( - &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); - - if (ProfilingEnabled) { - UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT)); - UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT)); - } - - if (Queue != nullptr) { - urQueueRetain(Queue); - } + StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart}, + EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { + urQueueRetain(Queue); urContextRetain(Context); } @@ -46,8 +34,9 @@ ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, CUevent EventNative) : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{std::numeric_limits::max()}, EvEnd{EventNative}, - EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} { + StreamToken{std::numeric_limits::max()}, EventID{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/source/adapters/cuda/event.hpp b/source/adapters/cuda/event.hpp index 3e5f466716..c583bf0735 100644 --- a/source/adapters/cuda/event.hpp +++ b/source/adapters/cuda/event.hpp @@ -12,6 +12,7 @@ #include #include +#include "common.hpp" #include "queue.hpp" /// UR Event mapping to CUevent @@ -82,8 +83,17 @@ struct ur_event_handle_t_ { static ur_event_handle_t makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream, uint32_t StreamToken = std::numeric_limits::max()) { - return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream, - StreamToken); + bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; + native_type EvEnd = nullptr, EvQueued = nullptr, EvStart = nullptr; + UR_CHECK_ERROR(cuEventCreate( + &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); + + if (ProfilingEnabled) { + UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT)); + UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT)); + } + return new ur_event_handle_t_(Type, Queue->getContext(), Queue, EvEnd, + EvQueued, EvStart, Stream, StreamToken); } static ur_event_handle_t makeWithNative(ur_context_handle_t context, @@ -99,7 +109,8 @@ struct ur_event_handle_t_ { // This constructor is private to force programmers to use the makeNative / // make_user static members in order to create a pi_event for CUDA. ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context, - ur_queue_handle_t Queue, CUstream Stream, + ur_queue_handle_t Queue, native_type EvEnd, + native_type EvQueued, native_type EvStart, CUstream Stream, uint32_t StreamToken); // This constructor is private to force programmers to use the diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 7ec53bd8bc..1f336dd2d7 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -234,7 +234,7 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format, ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, const ur_image_desc_t *pImageDesc, - CUDA_RESOURCE_DESC ResourceDesc, + const CUDA_RESOURCE_DESC &ResourceDesc, ur_exp_image_handle_t *phRetImage) { try { diff --git a/source/adapters/cuda/memory.hpp b/source/adapters/cuda/memory.hpp index e60e415d39..3a9a7e2d6b 100644 --- a/source/adapters/cuda/memory.hpp +++ b/source/adapters/cuda/memory.hpp @@ -190,7 +190,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR allocation for an unsampled image object ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUsurfObject Surf, ur_mem_type_t ImageType) - : Context{Context}, RefCount{1}, MemType{Type::Surface}, + : Context{Context}, RefCount{1}, MemType{Type::Surface}, MemFlags{0}, Mem{ImageMem{Array, (void *)Surf, ImageType, nullptr}} { urContextRetain(Context); } @@ -198,7 +198,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR allocation for a sampled image object ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex, ur_sampler_handle_t Sampler, ur_mem_type_t ImageType) - : Context{Context}, RefCount{1}, MemType{Type::Texture}, + : Context{Context}, RefCount{1}, MemType{Type::Texture}, MemFlags{0}, Mem{ImageMem{Array, (void *)Tex, ImageType, Sampler}} { urContextRetain(Context); } diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 6660c20d06..23e4844c72 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -137,7 +137,7 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { if (!this->BuildOptions.empty()) { unsigned int MaxRegs; - bool Valid = getMaxRegistersJitOptionValue(BuildOptions, MaxRegs); + bool Valid = getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs); if (Valid) { Options.push_back(CU_JIT_MAX_REGISTERS); OptionVals.push_back(reinterpret_cast(MaxRegs)); diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index 5c6b91de65..0e1305da23 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -17,7 +17,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, std::unique_ptr Sampler{ new ur_sampler_handle_t_(hContext)}; - if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { + if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) { Sampler->Props |= pDesc->normalizedCoords; Sampler->Props |= pDesc->filterMode << 1; Sampler->Props |= pDesc->addressingMode << 2; diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index bea2c46fb5..181c5a7bdb 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -33,7 +33,7 @@ struct ur_device_handle_t_ { : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), HIPContext(Context), DeviceIndex(DeviceIndex) {} - ~ur_device_handle_t_() { + ~ur_device_handle_t_() noexcept(false) { UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); } diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 078d3ae399..5f7fffba35 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -987,8 +987,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_result_t Result = UR_RESULT_SUCCESS; - ur_lock MemoryMigrationLock{hImage->MemoryMigrationMutex}; auto Device = hQueue->getDevice(); hipStream_t HIPStream = hQueue->getNextTransferStream(); @@ -1039,13 +1037,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - Array, hipMemoryTypeArray, SrcOffset, - pDst, hipMemoryTypeHost, nullptr); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } + UR_CHECK_ERROR(commonEnqueueMemImageNDCopy( + HIPStream, ImgType, AdjustedRegion, Array, hipMemoryTypeArray, + SrcOffset, pDst, hipMemoryTypeHost, nullptr)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -1061,7 +1055,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( return UR_RESULT_ERROR_UNKNOWN; } return UR_RESULT_SUCCESS; - return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -1071,15 +1064,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ur_result_t Result = UR_RESULT_SUCCESS; - try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); } hipArray *Array = @@ -1107,13 +1098,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemImageNDCopy(HIPStream, ImgType, AdjustedRegion, - pSrc, hipMemoryTypeHost, nullptr, - Array, hipMemoryTypeArray, DstOffset); - - if (Result != UR_RESULT_SUCCESS) { - return Result; - } + UR_CHECK_ERROR(commonEnqueueMemImageNDCopy( + HIPStream, ImgType, AdjustedRegion, pSrc, hipMemoryTypeHost, nullptr, + Array, hipMemoryTypeArray, DstOffset)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -1126,8 +1113,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( } return UR_RESULT_SUCCESS; - - return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 2af6c5e910..313212724a 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -19,7 +19,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, hipStream_t Stream, uint32_t StreamToken) : CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr}, + StreamToken{StreamToken}, EventId{0}, EvEnd{nullptr}, EvStart{nullptr}, EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} { bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; @@ -32,9 +32,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, UR_CHECK_ERROR(hipEventCreateWithFlags(&EvStart, hipEventDefault)); } - if (Queue != nullptr) { - urQueueRetain(Queue); - } + urQueueRetain(Queue); urContextRetain(Context); } @@ -42,8 +40,9 @@ ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, hipEvent_t EventNative) : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - StreamToken{std::numeric_limits::max()}, EvEnd{EventNative}, - EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} { + StreamToken{std::numeric_limits::max()}, EventId{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } @@ -72,7 +71,7 @@ ur_result_t ur_event_handle_t_::start() { return Result; } -bool ur_event_handle_t_::isCompleted() const noexcept { +bool ur_event_handle_t_::isCompleted() const { if (!IsRecorded) { return false; } @@ -225,8 +224,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, return ReturnValue(hEvent->getCommandType()); case UR_EVENT_INFO_REFERENCE_COUNT: return ReturnValue(hEvent->getReferenceCount()); - case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: - return ReturnValue(hEvent->getExecutionStatus()); + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: { + try { + return ReturnValue(hEvent->getExecutionStatus()); + } catch (ur_result_t Error) { + return Error; + } + } case UR_EVENT_INFO_CONTEXT: return ReturnValue(hEvent->getContext()); default: diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index ecb995dfbe..50de73b14f 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -42,10 +42,9 @@ struct ur_event_handle_t_ { bool isStarted() const noexcept { return IsStarted; } - bool isCompleted() const noexcept; - - uint32_t getExecutionStatus() const noexcept { + bool isCompleted() const; + uint32_t getExecutionStatus() const { if (!isRecorded()) { return UR_EVENT_STATUS_SUBMITTED; } diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index d36b9ee001..7707794b3c 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -187,20 +187,16 @@ struct SurfaceMem { void *HostPtr) : Arrays(Context->Devices.size(), nullptr), SurfObjs(Context->Devices.size(), nullptr), - OuterMemStruct{OuterMemStruct}, - ImageFormat{ImageFormat}, ImageDesc{ImageDesc}, HostPtr{HostPtr} { + OuterMemStruct{OuterMemStruct}, ImageFormat{ImageFormat}, + ImageDesc{ImageDesc}, ArrayDesc{}, HostPtr{HostPtr} { // We have to use hipArray3DCreate, which has some caveats. The height and // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc // gives a minimum value of 1, so we need to convert the answer. ArrayDesc.NumChannels = 4; // Only support 4 channel image ArrayDesc.Flags = 0; // No flags required ArrayDesc.Width = ImageDesc.width; - if (ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { - ArrayDesc.Height = 0; - ArrayDesc.Depth = 0; - } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + if (ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { ArrayDesc.Height = ImageDesc.height; - ArrayDesc.Depth = 0; } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { ArrayDesc.Height = ImageDesc.height; ArrayDesc.Depth = ImageDesc.depth; @@ -456,7 +452,7 @@ struct ur_mem_handle_t_ { urContextRetain(Context); } - ~ur_mem_handle_t_() { + ~ur_mem_handle_t_() noexcept(false) { if (isBuffer() && isSubBuffer()) { urMemRelease(std::get(Mem).Parent); return; @@ -468,7 +464,7 @@ struct ur_mem_handle_t_ { return std::holds_alternative(Mem); } - bool isSubBuffer() const noexcept { + bool isSubBuffer() const { return (isBuffer() && (std::get(Mem).Parent != nullptr)); } diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 0cf539602b..4d0351eb5b 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -408,7 +408,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, const uint8_t *pBinary, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { - UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY); UR_ASSERT(std::find(hContext->getDevices().begin(), hContext->getDevices().end(), hDevice) != hContext->getDevices().end(), diff --git a/source/adapters/hip/sampler.cpp b/source/adapters/hip/sampler.cpp index 5a177d6a9f..1ee1996164 100644 --- a/source/adapters/hip/sampler.cpp +++ b/source/adapters/hip/sampler.cpp @@ -58,7 +58,6 @@ ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, default: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - return {}; } ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index c6e11fe06c..f628c8152b 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -347,12 +347,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( &Ctx, &RetSize)); std::unique_ptr> DevicesInCtx; - cl_adapter::getDevicesFromContext(cl_adapter::cast(Ctx), - DevicesInCtx); + UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext( + cl_adapter::cast(Ctx), DevicesInCtx)); cl_platform_id CurPlatform; - clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr); + CL_RETURN_ON_FAILURE(clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, + nullptr)); oclv::OpenCLVersion PlatVer; cl_adapter::getPlatformVersion(CurPlatform, PlatVer); @@ -364,7 +365,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( for (cl_device_id Dev : *DevicesInCtx) { oclv::OpenCLVersion DevVer; - cl_adapter::getDeviceVersion(Dev, DevVer); + UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(Dev, DevVer)); if (DevVer < oclv::V2_2) { UseExtensionLookup = true; From 6a83d3e76be4a0eb1f2fcca82cfbd65924108d3e Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Thu, 14 Dec 2023 14:12:59 +0000 Subject: [PATCH 068/138] PR feedback --- source/adapters/cuda/device.cpp | 10 +++++----- source/adapters/cuda/event.hpp | 3 ++- source/adapters/cuda/program.cpp | 3 ++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index fb67b887ba..8d95ad05e8 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1145,15 +1145,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( std::vector Platforms(NumPlatforms); - Result = urPlatformGet(&AdapterHandle, 1, Platforms.size(), Platforms.data(), - nullptr); + Result = + urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr); if (Result != UR_RESULT_SUCCESS) return Result; // Iterate through platforms to find device that matches nativeHandle - for (auto Platform : Platforms) { - auto SearchRes = std::find_if(begin(Platform->Devices), - end(Platform->Devices), IsDevice); + for (const auto Platform : Platforms) { + auto SearchRes = std::find_if(std::begin(Platform->Devices), + std::end(Platform->Devices), IsDevice); if (SearchRes != end(Platform->Devices)) { *phDevice = static_cast((*SearchRes).get()); return UR_RESULT_SUCCESS; diff --git a/source/adapters/cuda/event.hpp b/source/adapters/cuda/event.hpp index c583bf0735..390fd7833a 100644 --- a/source/adapters/cuda/event.hpp +++ b/source/adapters/cuda/event.hpp @@ -83,7 +83,8 @@ struct ur_event_handle_t_ { static ur_event_handle_t makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream, uint32_t StreamToken = std::numeric_limits::max()) { - bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; + const bool ProfilingEnabled = + Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE; native_type EvEnd = nullptr, EvQueued = nullptr, EvStart = nullptr; UR_CHECK_ERROR(cuEventCreate( &EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 23e4844c72..9b7959eb85 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -137,7 +137,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { if (!this->BuildOptions.empty()) { unsigned int MaxRegs; - bool Valid = getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs); + const bool Valid = + getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs); if (Valid) { Options.push_back(CU_JIT_MAX_REGISTERS); OptionVals.push_back(reinterpret_cast(MaxRegs)); From b4acd546b9f493b8aa4852a9f1213e56296c562a Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 9 Nov 2023 11:50:07 -0800 Subject: [PATCH 069/138] [UR][L0] Upgrade L0 loader to v1.15.1 This to use latest features present in L0 spec. Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/CMakeLists.txt | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 223692e109..dfbacec377 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -22,8 +22,20 @@ endif() if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) message(STATUS "Download Level Zero loader and headers from github.com") + # Workaround warnings/errors for Level Zero build + set(CMAKE_CXX_FLAGS_BAK "${CMAKE_CXX_FLAGS}") + if (UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-truncation") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++98-compat-extra-semi") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-non-virtual-dtor") + endif() + set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.11.0) + set(LEVEL_ZERO_LOADER_TAG v1.15.1) # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 set(CMAKE_INCLUDE_CURRENT_DIR OFF) @@ -42,6 +54,9 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) FetchContent_MakeAvailable(level-zero-loader) FetchContent_GetProperties(level-zero-loader) + # Restore original flags + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_BAK}") + target_compile_options(ze_loader PRIVATE $<$,GNU;Clang;Intel;IntelLLVM>:-Wno-error> $<$:/WX- /UUNICODE> From 9fc2c2ebd60b97f1b8f4a5c3a6ac51eed5a5c460 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Mon, 27 Nov 2023 15:38:53 -0800 Subject: [PATCH 070/138] Address comments Signed-off-by: Jaime Arteaga --- source/adapters/level_zero/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index dfbacec377..7b24223b95 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -31,7 +31,6 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++98-compat-extra-semi") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-non-virtual-dtor") endif() set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") From 01cd56d2523c1f545955f86b98b637b25ef08d03 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 15 Dec 2023 11:12:53 +0000 Subject: [PATCH 071/138] Adds warning message to CUDA + removes calls it `die()` in Hip --- source/adapters/cuda/command_buffer.cpp | 10 ++++++++++ source/adapters/hip/command_buffer.cpp | 4 ---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 695ff03ce2..4a3f3da60e 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -549,6 +549,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( // Get sync point and register the cuNode with it. *pSyncPoint = hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + + setErrorMessage("Prefetch hint ignored and replaced with empty node as " + "prefetch is not supported by CUDA Graph backend", + UR_RESULT_SUCCESS); + Result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; } catch (ur_result_t Err) { Result = Err; } @@ -579,6 +584,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( // Get sync point and register the cuNode with it. *pSyncPoint = hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + + setErrorMessage("Memory advice ignored and replaced with empty node as " + "memory advice is not supported by CUDA Graph backend", + UR_RESULT_SUCCESS); + Result = UR_RESULT_ERROR_ADAPTER_SPECIFIC; } catch (ur_result_t Err) { Result = Err; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index c85b3e9216..c7609b6110 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -127,8 +127,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_usm_migration_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -136,8 +134,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *) { - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 01ce7faf7553a4443b62e88d55712bb9855466f8 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 20 Sep 2023 17:34:51 +0100 Subject: [PATCH 072/138] CTS fixes for ProgramLink and ProgramCreateWithBinary We now no longer attempt to link two programs with duplicate definitions together (the SPIR-V dpc++ generates for us always contains a wrapper function with the same name and function signature) We also now correctly query program binaries --- .../program/urProgramCreateWithBinary.cpp | 9 +++-- test/conformance/program/urProgramLink.cpp | 40 ++++++------------- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/test/conformance/program/urProgramCreateWithBinary.cpp b/test/conformance/program/urProgramCreateWithBinary.cpp index b0857e6007..fb6c8b128c 100644 --- a/test/conformance/program/urProgramCreateWithBinary.cpp +++ b/test/conformance/program/urProgramCreateWithBinary.cpp @@ -10,11 +10,14 @@ struct urProgramCreateWithBinaryTest : uur::urProgramTest { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); size_t binary_size = 0; - ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, 0, - nullptr, &binary_size)); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + sizeof(binary_size), &binary_size, + nullptr)); binary.resize(binary_size); + uint8_t *binary_ptr = binary.data(); ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, - binary_size, binary.data(), nullptr)); + sizeof(binary_ptr), &binary_ptr, + nullptr)); } void TearDown() override { diff --git a/test/conformance/program/urProgramLink.cpp b/test/conformance/program/urProgramLink.cpp index 14bb9ef864..72e2678df1 100644 --- a/test/conformance/program/urProgramLink.cpp +++ b/test/conformance/program/urProgramLink.cpp @@ -9,62 +9,48 @@ struct urProgramLinkTest : uur::urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); ASSERT_SUCCESS(urProgramCompile(context, program, nullptr)); - programs.push_back(program); - - uur::KernelsEnvironment::instance->LoadSource("bar", 0, bar_binary); - ASSERT_SUCCESS(urProgramCreateWithIL(context, bar_binary->data(), - bar_binary->size(), nullptr, - &bar_program)); - ASSERT_SUCCESS(urProgramCompile(context, bar_program, nullptr)); - programs.push_back(bar_program); } void TearDown() override { - if (bar_program) { - EXPECT_SUCCESS(urProgramRelease(bar_program)); - } if (linked_program) { EXPECT_SUCCESS(urProgramRelease(linked_program)); } UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::TearDown()); } - ur_program_handle_t bar_program = nullptr; ur_program_handle_t linked_program = nullptr; - std::shared_ptr> bar_binary; - std::vector programs; }; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urProgramLinkTest); TEST_P(urProgramLinkTest, Success) { - ASSERT_SUCCESS(urProgramLink(context, programs.size(), programs.data(), - nullptr, &linked_program)); + ASSERT_SUCCESS( + urProgramLink(context, 1, &program, nullptr, &linked_program)); ur_program_binary_type_t binary_type = UR_PROGRAM_BINARY_TYPE_NONE; ASSERT_SUCCESS(urProgramGetBuildInfo( - program, device, UR_PROGRAM_BUILD_INFO_BINARY_TYPE, sizeof(binary_type), - &binary_type, nullptr)); + linked_program, device, UR_PROGRAM_BUILD_INFO_BINARY_TYPE, + sizeof(binary_type), &binary_type, nullptr)); + ASSERT_EQ(binary_type, UR_PROGRAM_BINARY_TYPE_EXECUTABLE); } TEST_P(urProgramLinkTest, InvalidNullHandleContext) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urProgramLink(nullptr, programs.size(), programs.data(), - nullptr, &linked_program)); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_HANDLE, + urProgramLink(nullptr, 1, &program, nullptr, &linked_program)); } TEST_P(urProgramLinkTest, InvalidNullPointerProgram) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramLink(context, programs.size(), programs.data(), - nullptr, nullptr)); + urProgramLink(context, 1, &program, nullptr, nullptr)); } TEST_P(urProgramLinkTest, InvalidNullPointerInputPrograms) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urProgramLink(context, programs.size(), nullptr, nullptr, - &linked_program)); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urProgramLink(context, 1, nullptr, nullptr, &linked_program)); } TEST_P(urProgramLinkTest, InvalidSizeCount) { ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_SIZE, - urProgramLink(context, 0, programs.data(), nullptr, &linked_program)); + urProgramLink(context, 0, &program, nullptr, &linked_program)); } From 36492ad01acca2cbbef64438f18566d5dd63bf2d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 15 Dec 2023 14:47:23 +0000 Subject: [PATCH 073/138] Update level zero match file --- test/conformance/program/program_adapter_level_zero.match | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index 7a1c0d5b8e..5bbdfd554c 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -1 +1,6 @@ -Segmentation fault +urProgramCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramGetBuildInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_UR_PROGRAM_BUILD_INFO_STATUS +urProgramGetFunctionPointerTest.InvalidFunctionName/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +Aborted From d0d40de3aa6d4c952142fa6313810ae100a1bf02 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 15 Dec 2023 16:55:08 +0000 Subject: [PATCH 074/138] PR feedback and update CL match file. --- test/conformance/program/program_adapter_opencl.match | 7 ------- test/conformance/program/urProgramCreateWithBinary.cpp | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/test/conformance/program/program_adapter_opencl.match b/test/conformance/program/program_adapter_opencl.match index 716bf27d9d..0d429016ee 100644 --- a/test/conformance/program/program_adapter_opencl.match +++ b/test/conformance/program/program_adapter_opencl.match @@ -1,10 +1,3 @@ -urProgramCreateWithBinaryTest.Success/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidNullPointerBinary/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidNullPointerProgram/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/Intel_R__OpenCL___{{.*}}_ -urProgramCreateWithBinaryTest.InvalidSizePropertyCount/Intel_R__OpenCL___{{.*}}_ urProgramGetFunctionPointerTest.InvalidFunctionName/Intel_R__OpenCL___{{.*}}_ urProgramGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_PROGRAM_INFO_SOURCE urProgramGetInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_PROGRAM_INFO_BINARIES diff --git a/test/conformance/program/urProgramCreateWithBinary.cpp b/test/conformance/program/urProgramCreateWithBinary.cpp index fb6c8b128c..3fb6e3a268 100644 --- a/test/conformance/program/urProgramCreateWithBinary.cpp +++ b/test/conformance/program/urProgramCreateWithBinary.cpp @@ -9,6 +9,11 @@ struct urProgramCreateWithBinaryTest : uur::urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); + size_t binary_sizes_len = 0; + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + 0, nullptr, &binary_sizes_len)); + // We're expecting one binary + ASSERT_EQ(binary_sizes_len / sizeof(size_t), 1); size_t binary_size = 0; ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, sizeof(binary_size), &binary_size, From 66b2ac38a6710c678da3be90ff160e1fff41b226 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Mon, 6 Nov 2023 13:58:17 +0100 Subject: [PATCH 075/138] [CI][OpenCL] adapt e2e workflow to opencl target --- .github/workflows/e2e_nightly.yml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index eebb1f7bfa..e3cda49245 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -11,7 +11,8 @@ jobs: strategy: matrix: adapter: [ - {name: CUDA} + {name: CUDA, str_name: cuda, prefix: "ext_oneapi_", config: "--cuda --hip", unit: "gpu"}, + {name: OPENCL, str_name: opencl, prefix: "", config: "", unit: "cpu"} ] build_type: [Release] compiler: [{c: clang, cxx: clang++}] @@ -59,12 +60,18 @@ jobs: run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc) - - name: Set env vars & pre setup + - name: Set prefer UR + run: echo "SYCL_PREFER_UR=1" >> $GITHUB_ENV + + - name: Set CUDA env vars + if: matrix.adapter.name == 'CUDA' run: | - echo "SYCL_PREFER_UR=1" >> $GITHUB_ENV echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV - source /opt/intel/oneapi/setvars.sh + + - name: Run pre setup + run: | + source /opt/intel/oneapi/setvars.sh --force sycl-ls - name: Configure SYCL @@ -73,7 +80,7 @@ jobs: -t ${{matrix.build_type}} -o ${{github.workspace}}/sycl_build --cmake-gen "Unix Makefiles" - --ci-defaults --cuda --hip + --ci-defaults ${{matrix.adapter.config}} --cmake-opt="-DLLVM_INSTALL_UTILS=ON" --cmake-opt="-DSYCL_PI_TESTS=OFF" --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache @@ -91,7 +98,7 @@ jobs: - name: Swap UR loader and adapters run: | cp ${{github.workspace}}/ur-repo/build/lib/libur_loader.so* ${{github.workspace}}/sycl_build/lib/ - cp ${{github.workspace}}/ur-repo/build/lib/libur_adapter_cuda.so* ${{github.workspace}}/sycl_build/lib/ + cp ${{github.workspace}}/ur-repo/build/lib/libur_adapter_${{matrix.adapter.str_name}}.so* ${{github.workspace}}/sycl_build/lib/ - name: Set additional env. vars run: | @@ -110,7 +117,7 @@ jobs: -GNinja -B ${{github.workspace}}/build-e2e/ -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/ - -DSYCL_TEST_E2E_TARGETS="ext_oneapi_cuda:gpu" + -DSYCL_TEST_E2E_TARGETS="${{matrix.adapter.prefix}}${{matrix.adapter.str_name}}:${{matrix.adapter.unit}}" -DCMAKE_CXX_COMPILER="$(which clang++)" -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" From 0942e74cf3a78d2d624023d271234b388d1df9d8 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Sat, 16 Dec 2023 21:01:40 +0100 Subject: [PATCH 076/138] Fix getArrayDesc on ROCm 6 and make it return error codes --- source/adapters/hip/common.hpp | 35 ++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index 2649657f47..d7eea780a5 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -15,24 +15,39 @@ #include #include -// Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be -// indexed, but on NVidia it is an opaque type and needs to go through -// cuArrayGetDescriptor so implement a utility function to get the array -// properties -inline void getArrayDesc(hipArray *Array, hipArray_Format &Format, - size_t &Channels) { +// Before ROCm 6, hipify doesn't support cuArrayGetDescriptor, on AMD the +// hipArray can just be indexed, but on NVidia it is an opaque type and needs to +// go through cuArrayGetDescriptor so implement a utility function to get the +// array properties +inline static hipError_t getArrayDesc(hipArray *Array, hipArray_Format &Format, + size_t &Channels) { +#if HIP_VERSION_MAJOR >= 6 + HIP_ARRAY_DESCRIPTOR ArrayDesc; + hipError_t err = hipArrayGetDescriptor(&ArrayDesc, Array); + if (err == hipSuccess) { + Format = ArrayDesc.Format; + Channels = ArrayDesc.NumChannels; + } + return err; +#else #if defined(__HIP_PLATFORM_AMD__) Format = Array->Format; Channels = Array->NumChannels; + return hipSuccess; #elif defined(__HIP_PLATFORM_NVIDIA__) CUDA_ARRAY_DESCRIPTOR ArrayDesc; - cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array); - - Format = ArrayDesc.Format; - Channels = ArrayDesc.NumChannels; + CUresult err = cuArrayGetDescriptor(&ArrayDesc, (CUarray)Array); + if (err == CUDA_SUCCESS) { + Format = ArrayDesc.Format; + Channels = ArrayDesc.NumChannels; + return hipSuccess; + } else { + return hipErrorUnknown; // No easy way to map CUerror to hipError + } #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif +#endif } // HIP on NVIDIA headers guard hipArray3DCreate behind __CUDACC__, this does not From 205953dad7d0be3963888c84541128a8ce9dd8da Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Sat, 16 Dec 2023 21:02:07 +0100 Subject: [PATCH 077/138] Check error codes from getArrayDesc --- source/adapters/hip/enqueue.cpp | 8 ++++---- source/adapters/hip/kernel.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 5f7fffba35..7875650b85 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1017,7 +1017,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hipArray_Format Format; size_t NumChannels; - getArrayDesc(Array, Format, NumChannels); + UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels)); int ElementByteSize = imageElementByteSize(Format); @@ -1078,7 +1078,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hipArray_Format Format; size_t NumChannels; - getArrayDesc(Array, Format, NumChannels); + UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels)); int ElementByteSize = imageElementByteSize(Format); @@ -1141,13 +1141,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( std::get(hImageSrc->Mem).getArray(hQueue->getDevice()); hipArray_Format SrcFormat; size_t SrcNumChannels; - getArrayDesc(SrcArray, SrcFormat, SrcNumChannels); + UR_CHECK_ERROR(getArrayDesc(SrcArray, SrcFormat, SrcNumChannels)); hipArray *DstArray = std::get(hImageDst->Mem).getArray(hQueue->getDevice()); hipArray_Format DstFormat; size_t DstNumChannels; - getArrayDesc(DstArray, DstFormat, DstNumChannels); + UR_CHECK_ERROR(getArrayDesc(DstArray, DstFormat, DstNumChannels)); UR_ASSERT(SrcFormat == DstFormat, UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index ec58bafcc6..e3eb37dc88 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -282,7 +282,7 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, auto array = std::get(hArgValue->Mem).getArray(Device); hipArray_Format Format; size_t NumChannels; - getArrayDesc(array, Format, NumChannels); + UR_CHECK_ERROR(getArrayDesc(array, Format, NumChannels)); if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 && Format != HIP_AD_FORMAT_SIGNED_INT32 && Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) { From 34831f4bf1db420c2f993a998a434782b42961b3 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Sat, 16 Dec 2023 21:02:22 +0100 Subject: [PATCH 078/138] ROCm 6: use hipPointerAttributeType.type when supported --- source/adapters/hip/usm.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index e63379d13b..abd8c2e97f 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -72,7 +72,11 @@ USMFreeImpl([[maybe_unused]] ur_context_handle_t hContext, void *pMem) { try { hipPointerAttribute_t hipPointerAttributeType; UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); - unsigned int Type = hipPointerAttributeType.memoryType; +#if HIP_VERSION >= 50600000 + const auto Type = hipPointerAttributeType.type; +#else + const auto Type = hipPointerAttributeType.memoryType; +#endif UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost, UR_RESULT_ERROR_INVALID_MEM_OBJECT); if (Type == hipMemoryTypeDevice) { @@ -170,7 +174,11 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return ReturnValue(UR_USM_TYPE_SHARED); } UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); +#if HIP_VERSION >= 50600000 + Value = hipPointerAttributeType.type; +#else Value = hipPointerAttributeType.memoryType; +#endif UR_ASSERT(Value == hipMemoryTypeDevice || Value == hipMemoryTypeHost, UR_RESULT_ERROR_INVALID_MEM_OBJECT); if (Value == hipMemoryTypeDevice) { From 0b6538715b387f33e5d54022d8b9cc1fa24ad687 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Tue, 19 Dec 2023 12:40:06 +0100 Subject: [PATCH 079/138] remove outdated README section about adapters --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 5eea3c7570..226dbfbfe5 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,6 @@ [![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) [![codecov.io](https://codecov.io/github/oneapi-src/unified-runtime/coverage.svg?branch=main)](https://codecov.io/github/oneapi-src/unified-runtime?branch=master) -## Adapters -Adapter implementations for Unified Runtime currently reside in the [SYCL repository](https://github.com/intel/llvm/tree/sycl/sycl/plugins/unified_runtime/ur). This branch contains scripts to automatically -fetch and build them directly in the UR tree. The adapters are disabled by default, -see cmake options for details. - ## Table of contents From 7f5dfcc61605d28a53ed50f49406e43d5f5116bd Mon Sep 17 00:00:00 2001 From: pbalcer Date: Tue, 19 Dec 2023 15:09:30 +0100 Subject: [PATCH 080/138] [cuda][null][common] fix a few coverity issues --- source/adapters/cuda/device.cpp | 5 ++++- source/adapters/cuda/event.cpp | 6 +++++- source/adapters/cuda/sampler.cpp | 1 - source/adapters/null/ur_null.cpp | 4 ++-- source/common/ur_util.hpp | 2 ++ 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 45297dea56..0723cfe4e7 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -16,6 +16,7 @@ #include "context.hpp" #include "device.hpp" #include "platform.hpp" +#include "ur_util.hpp" int getAttribute(ur_device_handle_t device, CUdevice_attribute attribute) { int value; @@ -40,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, ur_device_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) try { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); static constexpr uint32_t MaxWorkItemDimensions = 3u; @@ -1034,6 +1035,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, break; } return UR_RESULT_ERROR_INVALID_ENUMERATION; +} catch (...) { + return exceptionToResult(std::current_exception()); } /// \return PI_SUCCESS if the function is executed successfully diff --git a/source/adapters/cuda/event.cpp b/source/adapters/cuda/event.cpp index 2cbfcbc39b..804b35a9b7 100644 --- a/source/adapters/cuda/event.cpp +++ b/source/adapters/cuda/event.cpp @@ -12,6 +12,8 @@ #include "context.hpp" #include "device.hpp" #include "queue.hpp" +#include "ur_api.h" +#include "ur_util.hpp" #include #include @@ -65,7 +67,7 @@ ur_result_t ur_event_handle_t_::start() { return Result; } -bool ur_event_handle_t_::isCompleted() const noexcept { +bool ur_event_handle_t_::isCompleted() const noexcept try { if (!IsRecorded) { return false; } @@ -80,6 +82,8 @@ bool ur_event_handle_t_::isCompleted() const noexcept { } } return true; +} catch (...) { + return exceptionToResult(std::current_exception()) == UR_RESULT_SUCCESS; } uint64_t ur_event_handle_t_::getQueuedTime() const { diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index 0e1305da23..ce4283edd3 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -71,7 +71,6 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, default: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - return {}; } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/source/adapters/null/ur_null.cpp b/source/adapters/null/ur_null.cpp index d79b607ed1..094f28c8fb 100644 --- a/source/adapters/null/ur_null.cpp +++ b/source/adapters/null/ur_null.cpp @@ -172,7 +172,7 @@ context_t::context_t() { return UR_RESULT_ERROR_UNSUPPORTED_SIZE; } *ppMem = malloc(size); - if (ppMem == nullptr) { + if (*ppMem == nullptr) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } return UR_RESULT_SUCCESS; @@ -187,7 +187,7 @@ context_t::context_t() { return UR_RESULT_ERROR_UNSUPPORTED_SIZE; } *ppMem = malloc(size); - if (ppMem == nullptr) { + if (*ppMem == nullptr) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } return UR_RESULT_SUCCESS; diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index a73f348b52..00aaf8eee2 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -288,6 +288,8 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) { return UR_RESULT_SUCCESS; } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (const ur_result_t &e) { + return e; } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } From f16d31d7ec4fd05b2a1beee616c76582ba0b0465 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Tue, 19 Dec 2023 11:51:10 +0100 Subject: [PATCH 081/138] [common] remove the use of std::getenv from common code MSVC complains about it being insecure. --- source/common/ur_util.hpp | 3 ++- source/ur/ur.hpp | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index a73f348b52..e0fd820cae 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -22,6 +22,8 @@ #include #ifdef _WIN32 +#define NOMINMAX + #include inline int ur_getpid(void) { return static_cast(GetCurrentProcessId()); } #else @@ -59,7 +61,6 @@ inline int ur_getpid(void) { return static_cast(getpid()); } #endif /////////////////////////////////////////////////////////////////////////////// #if defined(_WIN32) -#include #define MAKE_LIBRARY_NAME(NAME, VERSION) NAME ".dll" #else #define HMODULE void * diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index da5ef0d81f..11d619ea04 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -23,6 +23,8 @@ #include +#include "ur_util.hpp" + template To ur_cast(From Value) { // TODO: see if more sanity checks are possible. assert(sizeof(From) == sizeof(To)); @@ -61,9 +63,10 @@ const ur_command_t UR_EXT_COMMAND_TYPE_USER = // overhead from mutex locking. Default value is 0 which means that single // thread mode is disabled. static const bool SingleThreadMode = [] { - const char *UrRet = std::getenv("UR_L0_SINGLE_THREAD_MODE"); - const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); - const bool RetVal = UrRet ? std::stoi(UrRet) : (PiRet ? std::stoi(PiRet) : 0); + auto UrRet = ur_getenv("UR_L0_SINGLE_THREAD_MODE"); + auto PiRet = ur_getenv("SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE"); + const bool RetVal = + UrRet ? std::stoi(*UrRet) : (PiRet ? std::stoi(*PiRet) : 0); return RetVal; }(); From 3b1ef22ab24a1e62d63c548787c5c843d24ccc6d Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Thu, 21 Dec 2023 13:07:11 +0000 Subject: [PATCH 082/138] Remove event.hpp inclusion --- source/adapters/native_cpu/physical_mem.cpp | 1 - source/adapters/native_cpu/virtual_mem.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/source/adapters/native_cpu/physical_mem.cpp b/source/adapters/native_cpu/physical_mem.cpp index 0593ff9403..7c535bfcca 100644 --- a/source/adapters/native_cpu/physical_mem.cpp +++ b/source/adapters/native_cpu/physical_mem.cpp @@ -11,7 +11,6 @@ #include "physical_mem.hpp" #include "common.hpp" #include "context.hpp" -#include "event.hpp" UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( ur_context_handle_t, ur_device_handle_t, size_t, diff --git a/source/adapters/native_cpu/virtual_mem.cpp b/source/adapters/native_cpu/virtual_mem.cpp index ffdbb15810..131b480ac1 100644 --- a/source/adapters/native_cpu/virtual_mem.cpp +++ b/source/adapters/native_cpu/virtual_mem.cpp @@ -10,7 +10,6 @@ #include "common.hpp" #include "context.hpp" -#include "event.hpp" #include "physical_mem.hpp" UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( From c5c129e8094eef0befa30d9281b32276263cf265 Mon Sep 17 00:00:00 2001 From: Luke Drummond Date: Thu, 21 Dec 2023 14:46:41 +0000 Subject: [PATCH 083/138] [hip] Fix HSA headers lookup ROCm installations prior to v6 don't respect the traditional installation layout and install the HSA headers to `$PREFIX/hsa/include/hsa` whereas in rocm6 it looks like they're putting it in the right place at `$PREFIX/include/hsa`. That cleanup from AMD is good news, but it means that our workarounds now break and we need to check both places. --- source/adapters/hip/CMakeLists.txt | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 1ed9d52c2b..6db430dffd 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -13,7 +13,8 @@ set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include") -set(UR_HIP_HSA_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/hsa/include") +set(UR_HIP_HSA_INCLUDE_DIRS + "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include") # Set HIP lib dir set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib") @@ -31,9 +32,16 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") endif() # Check if HSA include path exists - if(NOT EXISTS "${UR_HIP_HSA_INCLUDE_DIR}") - message(FATAL_ERROR "Couldn't find the HSA include directory at '${UR_HIP_HSA_INCLUDE_DIR}'," - " please check ROCm installation.") + foreach(D IN LISTS UR_HIP_HSA_INCLUDE_DIRS) + if(EXISTS "${D}") + set(UR_HIP_HSA_INCLUDE_DIR "${D}") + break() + endif() + endforeach() + if(NOT UR_HIP_HSA_INCLUDE_DIR) + message(FATAL_ERROR "Couldn't find the HSA include directory in any of " + "these paths: '${UR_HIP_HSA_INCLUDE_DIRS}'. Please check ROCm " + "installation.") endif() endif() From da145aaa355e5a5c9a36ca6b373d33f121eef825 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 1 Nov 2023 15:02:33 +0000 Subject: [PATCH 084/138] Add UR_ENABLE_ASSERTIONS flag for enabling asserts on all build types. --- CMakeLists.txt | 3 +++ cmake/Assertions.cmake | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 cmake/Assertions.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 80a9f64ea7..fbf9947688 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,9 @@ option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable") set(UR_SYCL_LIBRARY_DIR "" CACHE PATH "Path of the SYCL runtime library directory") +option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) + +include(Assertions) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) diff --git a/cmake/Assertions.cmake b/cmake/Assertions.cmake new file mode 100644 index 0000000000..9d8f6c0f26 --- /dev/null +++ b/cmake/Assertions.cmake @@ -0,0 +1,30 @@ +# From the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This is lifted from llvm's LLVM_ENABLE_ASSERTIONS implementation +# https://github.com/llvm/llvm-project/blob/6be0e979896f7dd610abf263f845c532f1be3762/llvm/cmake/modules/HandleLLVMOptions.cmake#L89 +if(UR_ENABLE_ASSERTIONS) + # MSVC doesn't like _DEBUG on release builds + if( NOT MSVC ) + add_compile_definitions(_DEBUG) + endif() + # On non-Debug builds cmake automatically defines NDEBUG, so we + # explicitly undefine it: + if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) + add_compile_options($<$,$>:-UNDEBUG>) + if (MSVC) + # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines. + foreach (flags_var_to_scrub + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS_MINSIZEREL) + string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " " + "${flags_var_to_scrub}" "${${flags_var_to_scrub}}") + endforeach() + endif() + endif() +endif() From 557a3a17669712d5826fc77705b1c339f5b66a76 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Mon, 21 Aug 2023 17:56:48 +0100 Subject: [PATCH 085/138] [CMDBUF] Add fill commands to cmd buffer exp feature - Adds USM and Buffer fill append commands - Update feature spec for new commands - Align naming conventions for Append* commands with core equivalents - Also includes stubs for CUDA and HIP adapters --- scripts/core/EXP-COMMAND-BUFFER.rst | 1 + source/adapters/cuda/command_buffer.cpp | 43 +++++++++++ source/adapters/cuda/ur_interface_loader.cpp | 2 + source/adapters/hip/command_buffer.cpp | 18 +++++ source/adapters/hip/ur_interface_loader.cpp | 2 + source/adapters/level_zero/command_buffer.cpp | 77 +++++++++++++++++++ .../level_zero/ur_interface_loader.cpp | 2 + 7 files changed, 145 insertions(+) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..7b1f1d54b9 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -103,6 +103,7 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp +>>>>>>> 118f696b ([CMDBUF] Add fill commands to cmd buffer exp feature) It is planned to eventually support any command type from the Core API which can actually be appended to the equiavalent adapter native constructs. diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index dd97f48d6a..1b1faa870a 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -596,6 +596,49 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)hBuffer; + (void)pPattern; + (void)patternSize; + (void)offset; + (void)size; + + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pPtr, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + (void)hCommandBuffer; + (void)pPtr; + (void)pPattern; + (void)patternSize; + (void)size; + + (void)numSyncPointsInWaitList; + (void)pSyncPointWaitList; + (void)pSyncPoint; + + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for CUDA adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index af18d96017..f31ffe6d87 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -279,6 +279,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; pDdiTable->pfnAppendMemBufferCopyRectExp = urCommandBufferAppendMemBufferCopyRectExp; @@ -291,6 +292,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index c7609b6110..54a6fa2f4e 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -137,6 +137,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t, ur_mem_handle_t, const void *, size_t, + size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t, void *, const void *, size_t, size_t, + uint32_t, const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t, ur_queue_handle_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index f23d395d1a..7707e78425 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -276,6 +276,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; pDdiTable->pfnAppendMemBufferCopyRectExp = urCommandBufferAppendMemBufferCopyRectExp; @@ -289,6 +290,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bb081f9b2d..4b811ab033 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -379,6 +379,48 @@ static ur_result_t enqueueCommandBufferMemCopyRectHelper( return UR_RESULT_SUCCESS; } +// Helper function for enqueuing memory fills +static ur_result_t enqueueCommandBufferFillHelper( + ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer, + void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, + uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + // Pattern size must be a power of two. + UR_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0), + UR_RESULT_ERROR_INVALID_VALUE); + + // Pattern size must fit the compute queue capabilities. + UR_ASSERT( + PatternSize <= + CommandBuffer->Device + ->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] + .ZeProperties.maxMemoryFillPatternSize, + UR_RESULT_ERROR_INVALID_VALUE); + + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent)); + LaunchEvent->CommandType = CommandType; + + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + + ZE2UR_CALL(zeCommandListAppendMemoryFill, + (CommandBuffer->ZeCommandList, Ptr, Pattern, PatternSize, Size, + LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); + + urPrint("calling zeCommandListAppendMemoryFill() with" + " ZeEvent %#lx\n", + ur_cast(LaunchEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, @@ -783,6 +825,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, + const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, + uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + + std::scoped_lock Lock(Buffer->Mutex); + + char *ZeHandleDst = nullptr; + _ur_buffer *UrBuffer = reinterpret_cast<_ur_buffer *>(Buffer); + UR_CALL(UrBuffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, + CommandBuffer->Device)); + + return enqueueCommandBufferFillHelper( + UR_COMMAND_MEM_BUFFER_FILL, CommandBuffer, ZeHandleDst + Offset, + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr, + const void *Pattern, size_t PatternSize, size_t Size, + uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + + return enqueueCommandBufferFillHelper( + UR_COMMAND_MEM_BUFFER_FILL, CommandBuffer, Ptr, + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 5371fac082..74d0706b31 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -326,6 +326,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; pDdiTable->pfnAppendMemBufferCopyRectExp = urCommandBufferAppendMemBufferCopyRectExp; @@ -338,6 +339,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 3c4d445fee5e536d85bc7c66d08c57980d5946a4 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Tue, 31 Oct 2023 11:02:45 +0000 Subject: [PATCH 086/138] Add OpenCL fill stubs and fix naming --- source/adapters/opencl/command_buffer.cpp | 11 +++++++++++ source/adapters/opencl/ur_interface_loader.cpp | 2 ++ 2 files changed, 13 insertions(+) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 25d3311b79..0d9356644a 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -331,6 +331,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] void *pPtr, [[maybe_unused]] const void *pPattern, + [[maybe_unused]] size_t PatternSize, [[maybe_unused]] size_t Size, + [[maybe_unused]] uint32_t NumSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index b9887b1b1a..ac2c33475b 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -286,6 +286,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; pDdiTable->pfnAppendMemBufferCopyRectExp = urCommandBufferAppendMemBufferCopyRectExp; @@ -298,6 +299,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; return retVal; From 4813bd080aad4e4b87b16d57726a81e932e2c5bf Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 1 Dec 2023 16:47:51 +0000 Subject: [PATCH 087/138] fixes rebase issue --- source/adapters/opencl/command_buffer.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 0d9356644a..c0c2bfd915 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -136,17 +136,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, - [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, From e4950aa9bcfee8d346a665d5bad7489683a37e3b Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 1 Dec 2023 16:55:38 +0000 Subject: [PATCH 088/138] mend --- source/adapters/opencl/command_buffer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index c0c2bfd915..0d9356644a 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -136,6 +136,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, + [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, From 2882e1f1dbbde66f336cf5cd71c48c41c092ff0d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 1 Dec 2023 17:33:19 +0000 Subject: [PATCH 089/138] fixes naming issue --- source/adapters/opencl/command_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 0d9356644a..b880f94053 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -273,7 +273,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, From a9a325dec54d46e42f5de84049d89597c0a17c4d Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 4 Dec 2023 10:13:32 +0000 Subject: [PATCH 090/138] Adds CUDA support --- source/adapters/cuda/command_buffer.cpp | 137 +++++++++++++++++++----- 1 file changed, 110 insertions(+), 27 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 1b1faa870a..379afeb687 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -99,6 +99,90 @@ static void setCopyParams(const void *SrcPtr, const CUmemorytype_enum SrcType, Params.Depth = 1; } +// Helper function for enqueuing memory fills +static ur_result_t enqueueCommandBufferFillHelper( + ur_exp_command_buffer_handle_t CommandBuffer, void *DstDevice, + const CUmemorytype_enum DstType, const void *Pattern, size_t PatternSize, + size_t Size, uint32_t NumSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_result_t Result; + std::vector DepsList; + UR_CALL(getNodesFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, DepsList)); + + try { + size_t N = Size / PatternSize; + auto Value = *static_cast(Pattern); + auto DstPtr = DstType == CU_MEMORYTYPE_DEVICE + ? *static_cast(DstDevice) + : (CUdeviceptr)DstDevice; + + if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) { + // Create a new node + CUgraphNode GraphNode; + CUDA_MEMSET_NODE_PARAMS NodeParams = {}; + NodeParams.dst = DstPtr; + NodeParams.elementSize = PatternSize; + NodeParams.height = N; + NodeParams.pitch = PatternSize; + NodeParams.value = Value; + NodeParams.width = 1; + + Result = UR_CHECK_ERROR(cuGraphAddMemsetNode( + &GraphNode, CommandBuffer->CudaGraph, DepsList.data(), + DepsList.size(), &NodeParams, CommandBuffer->Device->getContext())); + + // Get sync point and register the cuNode with it. + *SyncPoint = + CommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + + } else { + // CUDA has no memset functions that allow setting values more than 4 + // bytes. UR API lets you pass an arbitrary "pattern" to the buffer + // fill, which can be more than 4 bytes. We must break up the pattern + // into 4 byte values, and set the buffer using multiple strided calls. + // This means that one cuGraphAddMemsetNode call is made for every 4 bytes + // in the pattern. + + size_t NumberOfSteps = PatternSize / sizeof(uint32_t); + + // we walk up the pattern in 4-byte steps, and call cuMemset for each + // 4-byte chunk of the pattern. + for (auto Step = 0u; Step < NumberOfSteps; ++Step) { + // take 4 bytes of the pattern + auto Value = *(static_cast(Pattern) + Step); + + // offset the pointer to the part of the buffer we want to write to + auto OffsetPtr = DstPtr + (Step * sizeof(uint32_t)); + + // Create a new node + CUgraphNode GraphNode; + // Update NodeParam + CUDA_MEMSET_NODE_PARAMS NodeParamsStep = {}; + NodeParamsStep.dst = (CUdeviceptr)OffsetPtr; + NodeParamsStep.elementSize = 4; + NodeParamsStep.height = N; + NodeParamsStep.pitch = PatternSize; + NodeParamsStep.value = Value; + NodeParamsStep.width = 1; + + Result = UR_CHECK_ERROR(cuGraphAddMemsetNode( + &GraphNode, CommandBuffer->CudaGraph, DepsList.data(), + DepsList.size(), &NodeParamsStep, + CommandBuffer->Device->getContext())); + + // Get sync point and register the cuNode with it. + *SyncPoint = CommandBuffer->AddSyncPoint( + std::make_shared(GraphNode)); + } + } + } catch (ur_result_t Err) { + Result = Err; + } + return Result; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_exp_command_buffer_desc_t *pCommandBufferDesc, @@ -602,20 +686,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)hBuffer; - (void)pPattern; - (void)patternSize; - (void)offset; - (void)size; - - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + auto ArgsAreMultiplesOfPatternSize = + (offset % patternSize == 0) || (size % patternSize == 0); + + auto PatternIsValid = (pPattern != nullptr); + + auto PatternSizeIsValid = ((patternSize & (patternSize - 1)) == 0) && + (patternSize > 0); // is a positive power of two + UR_ASSERT(ArgsAreMultiplesOfPatternSize && PatternIsValid && + PatternSizeIsValid, + UR_RESULT_ERROR_INVALID_SIZE); + + auto DstDevice = std::get(hBuffer->Mem).get() + offset; + + return enqueueCommandBufferFillHelper( + hCommandBuffer, &DstDevice, CU_MEMORYTYPE_DEVICE, pPattern, patternSize, + size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( @@ -624,19 +710,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - (void)hCommandBuffer; - (void)pPtr; - (void)pPattern; - (void)patternSize; - (void)size; - - (void)numSyncPointsInWaitList; - (void)pSyncPointWaitList; - (void)pSyncPoint; - - detail::ur::die("Experimental Command-buffer feature is not " - "implemented for CUDA adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + auto PatternIsValid = (pPattern != nullptr); + + auto PatternSizeIsValid = ((patternSize & (patternSize - 1)) == 0) && + (patternSize > 0); // is a positive power of two + + UR_ASSERT(PatternIsValid && PatternSizeIsValid, UR_RESULT_ERROR_INVALID_SIZE); + return enqueueCommandBufferFillHelper( + hCommandBuffer, pPtr, CU_MEMORYTYPE_UNIFIED, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( From 67b9061f42e8acc072b8ddbcb3c2d0835818563c Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 4 Dec 2023 11:05:55 +0000 Subject: [PATCH 091/138] fixes cuda support merge issues --- source/adapters/cuda/command_buffer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 379afeb687..6b62b32b70 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -106,10 +106,11 @@ static ur_result_t enqueueCommandBufferFillHelper( size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - ur_result_t Result; + ur_result_t Result = UR_RESULT_SUCCESS; std::vector DepsList; UR_CALL(getNodesFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, DepsList)); + SyncPointWaitList, DepsList), + Result); try { size_t N = Size / PatternSize; @@ -129,7 +130,7 @@ static ur_result_t enqueueCommandBufferFillHelper( NodeParams.value = Value; NodeParams.width = 1; - Result = UR_CHECK_ERROR(cuGraphAddMemsetNode( + UR_CHECK_ERROR(cuGraphAddMemsetNode( &GraphNode, CommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, CommandBuffer->Device->getContext())); @@ -167,7 +168,7 @@ static ur_result_t enqueueCommandBufferFillHelper( NodeParamsStep.value = Value; NodeParamsStep.width = 1; - Result = UR_CHECK_ERROR(cuGraphAddMemsetNode( + UR_CHECK_ERROR(cuGraphAddMemsetNode( &GraphNode, CommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParamsStep, CommandBuffer->Device->getContext())); From a8ea015c908f0de7f24268e63161cf82ebb98d14 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 6 Dec 2023 15:31:51 +0000 Subject: [PATCH 092/138] Removes code artefact --- scripts/core/EXP-COMMAND-BUFFER.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 7b1f1d54b9..a6a32a66a1 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -103,7 +103,6 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp ->>>>>>> 118f696b ([CMDBUF] Add fill commands to cmd buffer exp feature) It is planned to eventually support any command type from the Core API which can actually be appended to the equiavalent adapter native constructs. From 03c270d14c24c96302f5d7e021b1b2c613d496f2 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 8 Dec 2023 14:14:33 +0000 Subject: [PATCH 093/138] Add const variable --- source/adapters/cuda/command_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 6b62b32b70..a65530a1f1 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -113,7 +113,7 @@ static ur_result_t enqueueCommandBufferFillHelper( Result); try { - size_t N = Size / PatternSize; + const size_t N = Size / PatternSize; auto Value = *static_cast(Pattern); auto DstPtr = DstType == CU_MEMORYTYPE_DEVICE ? *static_cast(DstDevice) From 3ee71a71da6863c51bf731fb76c08a7c8afaa026 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 3 Jan 2024 17:10:56 +0000 Subject: [PATCH 094/138] fixup rebase --- source/adapters/opencl/command_buffer.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index b880f94053..74cdd8a03d 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -331,17 +331,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] void *pPtr, [[maybe_unused]] const void *pPattern, - [[maybe_unused]] size_t PatternSize, [[maybe_unused]] size_t Size, - [[maybe_unused]] uint32_t NumSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, From 31db0d8077efbc5691081ec6748f9f80c6c1a056 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Thu, 4 Jan 2024 00:25:42 +0100 Subject: [PATCH 095/138] Sort dependencies in third_party/requirements.txt (to trigger dependabot rescan) --- third_party/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 9aff32b1a4..5308c3554a 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -6,6 +6,7 @@ breathe==4.33.1 bs4==0.0.1 certifi==2019.11.28 chardet==3.0.4 +clang-format==15.0.7 colorama==0.4.1 docutils==0.15.2 exhale==0.3.0 @@ -18,6 +19,7 @@ MarkupSafe==1.1.1 packaging==19.2 Pygments==2.5.2 pyparsing==2.4.5 +pytest>=7.0 pytz==2019.3 PyYAML==5.2 requests==2.22.0 @@ -26,7 +28,7 @@ six==1.13.0 snowballstemmer==2.0.0 soupsieve==1.9.5 Sphinx==4.5.0 -sphinx-rtd-theme==1.0.0 +sphinx-book-theme==0.3.3 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==2.0.0 @@ -34,7 +36,5 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-websupport==1.2.4 -sphinx-book-theme==0.3.3 +sphinx-rtd-theme==1.0.0 urllib3==1.25.7 -pytest>=7.0 -clang-format==15.0.7 From e8be15f3084076568e5a77688b4f117ff36f8b2a Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 3 Jan 2024 16:32:47 +0000 Subject: [PATCH 096/138] [UR][HIP] Fix include for AMD COMGR --- source/adapters/hip/CMakeLists.txt | 4 ++-- source/adapters/hip/common.hpp | 4 ++++ source/adapters/hip/program.cpp | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 1ed9d52c2b..b7e06f0c63 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -108,8 +108,8 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") INTERFACE_INCLUDE_DIRECTORIES "${HIP_HEADERS}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_HEADERS}" ) - target_link_libraries(pi_hip PUBLIC amd_comgr) - target_compile_definitions(pi_hip PRIVATE SYCL_ENABLE_KERNEL_FUSION) + target_link_libraries(${TARGET_NAME} PUBLIC amd_comgr) + target_compile_definitions(${TARGET_NAME} PRIVATE SYCL_ENABLE_KERNEL_FUSION) endif(UR_ENABLE_COMGR) target_link_libraries(${TARGET_NAME} PRIVATE diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index d7eea780a5..6459b947b9 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -10,7 +10,11 @@ #pragma once #ifdef SYCL_ENABLE_KERNEL_FUSION +#if (ROCM_VERSION_MAJOR >= 5) #include +#else +#include +#endif #endif #include #include diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 4d0351eb5b..8b8a8407d1 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -11,7 +11,11 @@ #include "program.hpp" #ifdef SYCL_ENABLE_KERNEL_FUSION +#if (ROCM_VERSION_MAJOR >= 5) #include +#else +#include +#endif namespace { template struct COMgrObjCleanUp { From 6847c5d644583102725e286f3429534e70fe1f10 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 3 Jan 2024 18:28:03 +0000 Subject: [PATCH 097/138] [UR][HIP] Include ROCm version header Signed-off-by: Lukas Sommer --- source/adapters/hip/common.hpp | 1 + source/adapters/hip/program.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index 6459b947b9..12575e6cae 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -10,6 +10,7 @@ #pragma once #ifdef SYCL_ENABLE_KERNEL_FUSION +#include #if (ROCM_VERSION_MAJOR >= 5) #include #else diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 8b8a8407d1..9d807d4bb2 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -11,6 +11,7 @@ #include "program.hpp" #ifdef SYCL_ENABLE_KERNEL_FUSION +#include #if (ROCM_VERSION_MAJOR >= 5) #include #else From d398d4aec1f9c2397433dc5a4619d5c30269c622 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Thu, 4 Jan 2024 09:40:49 +0000 Subject: [PATCH 098/138] [UR][HIP] Detect COMGR version in CMake Signed-off-by: Lukas Sommer --- source/adapters/hip/CMakeLists.txt | 15 +++++++++++++++ source/adapters/hip/common.hpp | 7 +++---- source/adapters/hip/program.cpp | 7 +++---- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index b7e06f0c63..e95dcded41 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -101,6 +101,21 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") ) if(UR_ENABLE_COMGR) + set(UR_COMGR_VERSION5_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr/amd_comgr.h") + set(UR_COMGR_VERSION4_HEADER "${UR_HIP_INCLUDE_DIR}/amd_comgr.h") + # The COMGR header changed location between ROCm versions 4 and 5. + # Check for existence in the version 5 location or fallback to version 4 + if(NOT EXISTS "${UR_COMGR_VERSION5_HEADER}") + if(NOT EXISTS "${UR_COMGR_VERSION4_HEADER}") + message(FATAL_ERROR "Could not find AMD COMGR header at " + "${UR_COMGR_VERSION5_HEADER} or" + "${UR_COMGR_VERSION4_HEADER}, " + "check ROCm installation") + else() + target_compile_definitions(${TARGET_NAME} PRIVATE UR_COMGR_VERSION4_INCLUDE) + endif() + endif() + add_library(amd_comgr SHARED IMPORTED GLOBAL) set_target_properties( amd_comgr PROPERTIES diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index 12575e6cae..be332c280b 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -10,11 +10,10 @@ #pragma once #ifdef SYCL_ENABLE_KERNEL_FUSION -#include -#if (ROCM_VERSION_MAJOR >= 5) -#include -#else +#ifdef UR_COMGR_VERSION4_INCLUDE #include +#else +#include #endif #endif #include diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 9d807d4bb2..9aa64151e0 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -11,11 +11,10 @@ #include "program.hpp" #ifdef SYCL_ENABLE_KERNEL_FUSION -#include -#if (ROCM_VERSION_MAJOR >= 5) -#include -#else +#ifdef UR_COMGR_VERSION4_INCLUDE #include +#else +#include #endif namespace { template From eee75a2920889b0d95b47c361904c9cba89fbbac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Thu, 4 Jan 2024 14:40:00 +0000 Subject: [PATCH 099/138] [SPEC] Clarify the BinaryType output of urProgramCreateWithBinary (#991) --- include/ur_api.h | 3 +++ scripts/core/program.yml | 1 + source/loader/ur_libapi.cpp | 3 +++ source/ur_api.cpp | 3 +++ 4 files changed, 10 insertions(+) diff --git a/include/ur_api.h b/include/ur_api.h index 0403e2b306..5c9c7af5da 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -4037,6 +4037,9 @@ urProgramCreateWithIL( /// /// @details /// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, `phProgram` will +/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. /// /// @remarks /// _Analogues_ diff --git a/scripts/core/program.yml b/scripts/core/program.yml index acab24c3bd..88b652210b 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -127,6 +127,7 @@ analogue: - "**clCreateProgramWithBinary**" details: - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point, `phProgram` will contain a binary of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`." params: - type: $x_context_handle_t name: hContext diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 4b7525d92f..0a69fcd1e2 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2885,6 +2885,9 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// /// @details /// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, `phProgram` will +/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. /// /// @remarks /// _Analogues_ diff --git a/source/ur_api.cpp b/source/ur_api.cpp index eeca6c0c95..2bcc229f29 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2450,6 +2450,9 @@ ur_result_t UR_APICALL urProgramCreateWithIL( /// /// @details /// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, `phProgram` will +/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or +/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`. /// /// @remarks /// _Analogues_ From 7344ec4764e1842947a477630de7011773201404 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Fri, 5 Jan 2024 15:58:59 +0000 Subject: [PATCH 100/138] Revert "[HIP] Implement ext_oneapi_queue_priority" --- source/adapters/hip/queue.cpp | 24 +++++------------------- source/adapters/hip/queue.hpp | 5 ++--- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index bf4b636c38..f01fc0e180 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -38,8 +38,8 @@ hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) { // The second check is done after mutex is locked so other threads can not // change NumComputeStreams after that if (NumComputeStreams < ComputeStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithPriority( - &ComputeStreams[NumComputeStreams++], Flags, Priority)); + UR_CHECK_ERROR(hipStreamCreateWithFlags( + &ComputeStreams[NumComputeStreams++], Flags)); } } Token = ComputeStreamIdx++; @@ -97,8 +97,8 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { // The second check is done after mutex is locked so other threads can not // change NumTransferStreams after that if (NumTransferStreams < TransferStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithPriority( - &TransferStreams[NumTransferStreams++], Flags, Priority)); + UR_CHECK_ERROR(hipStreamCreateWithFlags( + &TransferStreams[NumTransferStreams++], Flags)); } } uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size(); @@ -118,19 +118,6 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, std::unique_ptr QueueImpl{nullptr}; unsigned int Flags = 0; - ur_queue_flags_t URFlags = 0; - int Priority = 0; // Not guaranteed, but, in ROCm 5.7, 0 is the default - - if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { - URFlags = pProps->flags; - if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { - ScopedContext Active(hContext->getDevice()); - UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); - } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { - ScopedContext Active(hContext->getDevice()); - UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); - } - } const bool IsOutOfOrder = pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE @@ -143,7 +130,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext, - hDevice, Flags, pProps ? pProps->flags : 0, Priority}); + hDevice, Flags, pProps ? pProps->flags : 0}); *phQueue = QueueImpl.release(); @@ -306,7 +293,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( hDevice, HIPFlags, Flags, - /*priority*/ 0, /*backend_owns*/ pProperties->isNativeHandleOwned}; (*phQueue)->NumComputeStreams = 1; diff --git a/source/adapters/hip/queue.hpp b/source/adapters/hip/queue.hpp index ad2f0f016e..c79bd293a3 100644 --- a/source/adapters/hip/queue.hpp +++ b/source/adapters/hip/queue.hpp @@ -44,7 +44,6 @@ struct ur_queue_handle_t_ { unsigned int LastSyncTransferStreams; unsigned int Flags; ur_queue_flags_t URFlags; - int Priority; // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be // locked at the same time, ComputeStreamSyncMutex should be locked first // to avoid deadlocks @@ -57,7 +56,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(std::vector &&ComputeStreams, std::vector &&TransferStreams, ur_context_handle_t Context, ur_device_handle_t Device, - unsigned int Flags, ur_queue_flags_t URFlags, int Priority, + unsigned int Flags, ur_queue_flags_t URFlags, bool BackendOwns = true) : ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move( TransferStreams)}, @@ -67,7 +66,7 @@ struct ur_queue_handle_t_ { Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0}, TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0}, LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags), - URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} { + URFlags(URFlags), HasOwnership{BackendOwns} { urContextRetain(Context); urDeviceRetain(Device); } From 2c5c237a5b3c49f4f7dcbea4ca83cb9b9b52ecc0 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Fri, 5 Jan 2024 17:08:19 +0000 Subject: [PATCH 101/138] Revert "Revert "[HIP] Implement ext_oneapi_queue_priority"" --- source/adapters/hip/queue.cpp | 24 +++++++++++++++++++----- source/adapters/hip/queue.hpp | 5 +++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index f01fc0e180..bf4b636c38 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -38,8 +38,8 @@ hipStream_t ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) { // The second check is done after mutex is locked so other threads can not // change NumComputeStreams after that if (NumComputeStreams < ComputeStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &ComputeStreams[NumComputeStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &ComputeStreams[NumComputeStreams++], Flags, Priority)); } } Token = ComputeStreamIdx++; @@ -97,8 +97,8 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { // The second check is done after mutex is locked so other threads can not // change NumTransferStreams after that if (NumTransferStreams < TransferStreams.size()) { - UR_CHECK_ERROR(hipStreamCreateWithFlags( - &TransferStreams[NumTransferStreams++], Flags)); + UR_CHECK_ERROR(hipStreamCreateWithPriority( + &TransferStreams[NumTransferStreams++], Flags, Priority)); } } uint32_t Stream_i = TransferStreamIdx++ % TransferStreams.size(); @@ -118,6 +118,19 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, std::unique_ptr QueueImpl{nullptr}; unsigned int Flags = 0; + ur_queue_flags_t URFlags = 0; + int Priority = 0; // Not guaranteed, but, in ROCm 5.7, 0 is the default + + if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { + URFlags = pProps->flags; + if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { + ScopedContext Active(hContext->getDevice()); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); + } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { + ScopedContext Active(hContext->getDevice()); + UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); + } + } const bool IsOutOfOrder = pProps ? pProps->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE @@ -130,7 +143,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ std::move(ComputeHipStreams), std::move(TransferHipStreams), hContext, - hDevice, Flags, pProps ? pProps->flags : 0}); + hDevice, Flags, pProps ? pProps->flags : 0, Priority}); *phQueue = QueueImpl.release(); @@ -293,6 +306,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( hDevice, HIPFlags, Flags, + /*priority*/ 0, /*backend_owns*/ pProperties->isNativeHandleOwned}; (*phQueue)->NumComputeStreams = 1; diff --git a/source/adapters/hip/queue.hpp b/source/adapters/hip/queue.hpp index c79bd293a3..ad2f0f016e 100644 --- a/source/adapters/hip/queue.hpp +++ b/source/adapters/hip/queue.hpp @@ -44,6 +44,7 @@ struct ur_queue_handle_t_ { unsigned int LastSyncTransferStreams; unsigned int Flags; ur_queue_flags_t URFlags; + int Priority; // When ComputeStreamSyncMutex and ComputeStreamMutex both need to be // locked at the same time, ComputeStreamSyncMutex should be locked first // to avoid deadlocks @@ -56,7 +57,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(std::vector &&ComputeStreams, std::vector &&TransferStreams, ur_context_handle_t Context, ur_device_handle_t Device, - unsigned int Flags, ur_queue_flags_t URFlags, + unsigned int Flags, ur_queue_flags_t URFlags, int Priority, bool BackendOwns = true) : ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move( TransferStreams)}, @@ -66,7 +67,7 @@ struct ur_queue_handle_t_ { Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIdx{0}, TransferStreamIdx{0}, NumComputeStreams{0}, NumTransferStreams{0}, LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags), - URFlags(URFlags), HasOwnership{BackendOwns} { + URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} { urContextRetain(Context); urDeviceRetain(Device); } From 2a960bafb0f0bfe3d2d8fb8be842bf02f6d02943 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Mon, 8 Jan 2024 10:37:06 +0000 Subject: [PATCH 102/138] [HIP] Update urQueueCreate to fix build --- source/adapters/hip/queue.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index bf4b636c38..6e6496fec1 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -119,15 +119,15 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, unsigned int Flags = 0; ur_queue_flags_t URFlags = 0; - int Priority = 0; // Not guaranteed, but, in ROCm 5.7, 0 is the default + int Priority = 0; // Not guaranteed, but, in ROCm 5.0-6.0, 0 is the default if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { URFlags = pProps->flags; if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) { - ScopedContext Active(hContext->getDevice()); + ScopedContext Active(hDevice); UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(nullptr, &Priority)); } else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) { - ScopedContext Active(hContext->getDevice()); + ScopedContext Active(hDevice); UR_CHECK_ERROR(hipDeviceGetStreamPriorityRange(&Priority, nullptr)); } } From 67c3779cbb13ca68f339519ff15f46faa4d71fbd Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 12 Dec 2023 09:43:09 +0000 Subject: [PATCH 103/138] AMDGPU enable global variable read write --- source/adapters/hip/enqueue.cpp | 68 ++++++++++++++++++++++++++++++--- source/adapters/hip/program.cpp | 18 +++++++++ source/adapters/hip/program.hpp | 2 + 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 7875650b85..ff49e5506a 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1545,15 +1545,71 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - const void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + // Since HIP requires a the global variable to be referenced by name, we use + // metadata to find the correct name to access it by. + auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); + if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) + return UR_RESULT_ERROR_INVALID_VALUE; + std::string DeviceGlobalName = DeviceGlobalNameIt->second; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + hipDeviceptr_t DeviceGlobal = 0; + size_t DeviceGlobalSize = 0; + UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, + hProgram->get(), + DeviceGlobalName.c_str())); + + if (offset + count > DeviceGlobalSize) + return UR_RESULT_ERROR_INVALID_VALUE; + + return urEnqueueUSMMemcpy( + hQueue, blockingWrite, + reinterpret_cast(reinterpret_cast(DeviceGlobal) + + offset), + pSrc, count, numEventsInWaitList, phEventWaitList, phEvent); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t, - void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + // Since HIP requires a the global variable to be referenced by name, we use + // metadata to find the correct name to access it by. + auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); + if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) + return UR_RESULT_ERROR_INVALID_VALUE; + std::string DeviceGlobalName = DeviceGlobalNameIt->second; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + hipDeviceptr_t DeviceGlobal = 0; + size_t DeviceGlobalSize = 0; + UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, + hProgram->get(), + DeviceGlobalName.c_str())); + + if (offset + count > DeviceGlobalSize) + return UR_RESULT_ERROR_INVALID_VALUE; + + return urEnqueueUSMMemcpy( + hQueue, blockingRead, pDst, + reinterpret_cast( + reinterpret_cast(DeviceGlobal) + offset), + count, numEventsInWaitList, phEventWaitList, phEvent); + } catch (ur_result_t Err) { + Result = Err; + } + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 9aa64151e0..81f1be1194 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -78,6 +78,15 @@ void getCoMgrBuildLog(const amd_comgr_data_set_t BuildDataSet, char *BuildLog, } // namespace #endif +std::pair +splitMetadataName(const std::string &metadataName) { + size_t splitPos = metadataName.rfind('@'); + if (splitPos == std::string::npos) + return std::make_pair(metadataName, std::string{}); + return std::make_pair(metadataName.substr(0, splitPos), + metadataName.substr(splitPos, metadataName.length())); +} + ur_result_t ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, size_t Length) { @@ -85,10 +94,19 @@ ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, const ur_program_metadata_t MetadataElement = Metadata[i]; std::string MetadataElementName{MetadataElement.pName}; + auto [Prefix, Tag] = splitMetadataName(MetadataElementName); + if (MetadataElementName == __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION) { assert(MetadataElement.type == UR_PROGRAM_METADATA_TYPE_UINT32); IsRelocatable = MetadataElement.value.data32; + } else if (Tag == __SYCL_UR_PROGRAM_METADATA_GLOBAL_ID_MAPPING) { + const char *MetadataValPtr = + reinterpret_cast(MetadataElement.value.pData) + + sizeof(std::uint64_t); + const char *MetadataValPtrEnd = + MetadataValPtr + MetadataElement.size - sizeof(std::uint64_t); + GlobalIDMD[Prefix] = std::string{MetadataValPtr, MetadataValPtrEnd}; } } return UR_RESULT_SUCCESS; diff --git a/source/adapters/hip/program.hpp b/source/adapters/hip/program.hpp index 4b4e5ec878..dbdf9c55c6 100644 --- a/source/adapters/hip/program.hpp +++ b/source/adapters/hip/program.hpp @@ -29,6 +29,8 @@ struct ur_program_handle_t_ { // Metadata bool IsRelocatable = false; + std::unordered_map GlobalIDMD; + constexpr static size_t MAX_LOG_SIZE = 8192u; char ErrorLog[MAX_LOG_SIZE], InfoLog[MAX_LOG_SIZE]; From c9fba562feb5ea5acfbcc0dbbc5d8a15e9383a2b Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:07:36 +0000 Subject: [PATCH 104/138] Update source/adapters/hip/enqueue.cpp Co-authored-by: Jakub Chlanda --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index ff49e5506a..56dfd20948 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1549,7 +1549,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires a the global variable to be referenced by name, we use + // Since HIP requires the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) From de02e990ea8876db0d54903c6b3e3af8ce88ce6d Mon Sep 17 00:00:00 2001 From: Hugh Delaney <46290137+hdelan@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:07:58 +0000 Subject: [PATCH 105/138] Update source/adapters/hip/enqueue.cpp Co-authored-by: Jakub Chlanda --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 56dfd20948..144191bd35 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1583,7 +1583,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires a the global variable to be referenced by name, we use + // Since HIP requires the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) From 45d76b7817c9654a8ebbbd0a02744f7ceb753227 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 20 Dec 2023 11:42:37 +0000 Subject: [PATCH 106/138] Refactor read write funcs --- source/adapters/hip/enqueue.cpp | 73 +++++++++++++++------------------ 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 144191bd35..0e7e04fc45 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1544,19 +1544,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +namespace { + +enum class GlobalVariableCopy { Read, Write }; + +ur_result_t deviceGlobalCopyHelper( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingWrite, size_t count, size_t offset, const void *pSrc, + bool blocking, size_t count, size_t offset, void *ptr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - // Since HIP requires the global variable to be referenced by name, we use + ur_event_handle_t *phEvent, GlobalVariableCopy CopyType) { + // Since HIP requires a the global variable to be referenced by name, we use // metadata to find the correct name to access it by. auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) return UR_RESULT_ERROR_INVALID_VALUE; std::string DeviceGlobalName = DeviceGlobalNameIt->second; - ur_result_t Result = UR_RESULT_SUCCESS; try { hipDeviceptr_t DeviceGlobal = 0; size_t DeviceGlobalSize = 0; @@ -1567,15 +1570,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( if (offset + count > DeviceGlobalSize) return UR_RESULT_ERROR_INVALID_VALUE; - return urEnqueueUSMMemcpy( - hQueue, blockingWrite, - reinterpret_cast(reinterpret_cast(DeviceGlobal) + - offset), - pSrc, count, numEventsInWaitList, phEventWaitList, phEvent); + void *pSrc, *pDst; + if (CopyType == GlobalVariableCopy::Write) { + pSrc = ptr; + pDst = reinterpret_cast(DeviceGlobal) + offset; + } else { + pSrc = reinterpret_cast(DeviceGlobal) + offset; + pDst = ptr; + } + return urEnqueueUSMMemcpy(hQueue, blocking, pDst, pSrc, count, + numEventsInWaitList, phEventWaitList, phEvent); } catch (ur_result_t Err) { - Result = Err; + return Err; } - return Result; +} +} // namespace + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return deviceGlobalCopyHelper(hQueue, hProgram, name, blockingWrite, count, + offset, const_cast(pSrc), + numEventsInWaitList, phEventWaitList, phEvent, + GlobalVariableCopy::Write); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( @@ -1583,33 +1602,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Since HIP requires the global variable to be referenced by name, we use - // metadata to find the correct name to access it by. - auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name); - if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end()) - return UR_RESULT_ERROR_INVALID_VALUE; - std::string DeviceGlobalName = DeviceGlobalNameIt->second; - - ur_result_t Result = UR_RESULT_SUCCESS; - try { - hipDeviceptr_t DeviceGlobal = 0; - size_t DeviceGlobalSize = 0; - UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize, - hProgram->get(), - DeviceGlobalName.c_str())); - - if (offset + count > DeviceGlobalSize) - return UR_RESULT_ERROR_INVALID_VALUE; - - return urEnqueueUSMMemcpy( - hQueue, blockingRead, pDst, - reinterpret_cast( - reinterpret_cast(DeviceGlobal) + offset), - count, numEventsInWaitList, phEventWaitList, phEvent); - } catch (ur_result_t Err) { - Result = Err; - } - return Result; + return deviceGlobalCopyHelper( + hQueue, hProgram, name, blockingRead, count, offset, pDst, + numEventsInWaitList, phEventWaitList, phEvent, GlobalVariableCopy::Read); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( From 6061b8a4097736627be3b197628620a9c930211b Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Mon, 8 Jan 2024 13:33:17 +0100 Subject: [PATCH 107/138] [Security] update third party packages versions --- third_party/requirements.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 5308c3554a..e2bb3bdcd3 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -1,10 +1,10 @@ alabaster==0.7.12 -Babel==2.7.0 +Babel==2.14.0 bandit==1.6.2 beautifulsoup4==4.11.1 breathe==4.33.1 bs4==0.0.1 -certifi==2019.11.28 +certifi==2023.07.22 chardet==3.0.4 clang-format==15.0.7 colorama==0.4.1 @@ -14,15 +14,15 @@ idna==2.8 imagesize==1.1.0 Jinja2==2.11.3 lxml==4.9.3 -Mako==1.1.0 +Mako==1.3.0 MarkupSafe==1.1.1 packaging==19.2 -Pygments==2.5.2 +Pygments==2.17.2 pyparsing==2.4.5 pytest>=7.0 pytz==2019.3 -PyYAML==5.2 -requests==2.22.0 +PyYAML==6.0.1 +requests==2.31.0 rst2pdf==0.98 six==1.13.0 snowballstemmer==2.0.0 @@ -37,4 +37,4 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 sphinxcontrib-websupport==1.2.4 sphinx-rtd-theme==1.0.0 -urllib3==1.25.7 +urllib3==2.1.0 From 212da4d0922574567827b8d27dd7da4192e10d84 Mon Sep 17 00:00:00 2001 From: StepSecurity Bot Date: Mon, 8 Jan 2024 12:54:37 +0000 Subject: [PATCH 108/138] [StepSecurity] ci: Harden GitHub Actions Signed-off-by: StepSecurity Bot --- .github/workflows/bandit.yml | 2 +- .github/workflows/cmake.yml | 18 +++++++++--------- .github/workflows/codeql.yml | 12 ++++++------ .github/workflows/coverage.yml | 4 ++-- .github/workflows/coverity.yml | 2 +- .github/workflows/docs.yml | 10 +++++----- .github/workflows/e2e_nightly.yml | 4 ++-- .github/workflows/nightly.yml | 2 +- .github/workflows/prerelease.yml | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index e356e0bdb4..67f9b3a7ee 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Clone the git repo - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e697dd6aaf..1c9f740b91 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -36,7 +36,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install apt packages run: | @@ -122,7 +122,7 @@ jobs: runs-on: 'ubuntu-22.04' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt @@ -174,7 +174,7 @@ jobs: runs-on: ${{matrix.adapter.name}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt @@ -240,13 +240,13 @@ jobs: runs-on: ${{matrix.adapter.name}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt - name: Init conda env - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@9f54435e0e72c53962ee863144e47a4b094bfd35 # v2.3.0 with: miniconda-version: "latest" activate-environment: examples @@ -306,9 +306,9 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 @@ -357,9 +357,9 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index bf312be396..5e00191ced 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -18,10 +18,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 with: languages: cpp, python @@ -35,7 +35,7 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 analyze-windows: name: Analyze on Windows @@ -48,10 +48,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 with: languages: cpp, python @@ -65,4 +65,4 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) --config Release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@1500a131381b66de0c52ac28abb13cd79f4b7ecc # v2.22.12 diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 731f7ea320..6f2cb38aab 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -16,7 +16,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install apt packages run: | @@ -72,7 +72,7 @@ jobs: run: ctest -T Coverage - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d # v3.1.4 with: gcov: true gcov_include: source diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ab065ee77e..7e3dae32dd 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Clone the git repo - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9e51af24bc..53734a1d80 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,9 +26,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: 3.9 @@ -41,14 +41,14 @@ jobs: run: python3 -m pip install -r third_party/requirements.txt - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@c5a3e1159e0cbdf0845eb8811bd39e39fc3099c2 # v2.1.3 - name: Build Documentation working-directory: ${{github.workspace}}/scripts run: python3 run.py --core - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@84bb4cd4b733d5c320c9c9cfbc354937524f4d64 # v1.0.10 with: path: ${{github.workspace}}/docs/html @@ -62,4 +62,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@f27bcc15848fdcdcc02f01754eb838e44bcf389b # v1.2.9 diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index e3cda49245..4a3999fc5c 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -29,12 +29,12 @@ jobs: rm -rf ./* || true - name: Checkout UR - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: path: ur-repo - name: Checkout SYCL - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: repository: intel/llvm ref: sycl diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4a81c94e8f..38d3dcef04 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,7 +16,7 @@ jobs: runs-on: 'ubuntu-22.04' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 882b06985a..fe0790cc46 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -12,7 +12,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: Create weekly prerelease run: From 278b6037ff1e3198f894187ec1cca62dffa9a323 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Wed, 1 Nov 2023 17:42:44 +0000 Subject: [PATCH 109/138] [SYCL][HIP] Implement mem_advise for HIP --- source/adapters/hip/enqueue.cpp | 152 ++++++++++++++++++++++++++++++-- 1 file changed, 143 insertions(+), 9 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 7875650b85..e1f2097a07 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -84,6 +84,62 @@ void simpleGuessLocalWorkSize(size_t *ThreadsPerBlock, --ThreadsPerBlock[0]; } } + +ur_result_t setHipMemAdvise(const void *DevPtr, size_t Size, + ur_usm_advice_flags_t URAdviceFlags, + hipDevice_t Device) { + using ur_to_hip_advice_t = std::pair; + + static constexpr std::array + URToHIPMemAdviseDeviceFlags{ + std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY, + hipMemAdviseSetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY, + hipMemAdviseUnsetReadMostly), + std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION, + hipMemAdviseSetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION, + hipMemAdviseUnsetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE, + hipMemAdviseSetAccessedBy), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE, + hipMemAdviseUnsetAccessedBy), + }; + for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) { + if (URAdviceFlags & FlagPair.first) { + UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device)); + } + } + + static constexpr std::array URToHIPMemAdviseHostFlags{ + std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST, + hipMemAdviseSetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST, + hipMemAdviseUnsetPreferredLocation), + std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST, + hipMemAdviseSetAccessedBy), + std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST, + hipMemAdviseUnsetAccessedBy), + }; + + for (auto &FlagPair : URToHIPMemAdviseHostFlags) { + if (URAdviceFlags & FlagPair.first) { + UR_CHECK_ERROR( + hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId)); + } + } + + // Handle unmapped memory advice flags + if (URAdviceFlags & + (UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + return UR_RESULT_SUCCESS; +} + } // namespace UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -1468,22 +1524,100 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return Result; } +/// USM: memadvise API to govern behavior of automatic migration mechanisms UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t, ur_event_handle_t *phEvent) { + ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { + UR_ASSERT(pMem && size > 0, UR_RESULT_ERROR_INVALID_VALUE); void *HIPDevicePtr = const_cast(pMem); -// HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, -// so we can't perform this check for such cases. + ur_device_handle_t Device = hQueue->getContext()->getDevice(); + + // If the device does not support managed memory access, we can't set + // mem_advise. + if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { + setErrorMessage("mem_advise ignored as device does not support " + " managed memory access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // Passing MEM_ADVICE_SET/MEM_ADVICE_CLEAR_PREFERRED_LOCATION to hipMemAdvise + // on a GPU device requires the GPU device to report a non-zero value for + // hipDeviceAttributeConcurrentManagedAccess. Therefore, ignore the mem advice + // if concurrent managed memory access is not available. + if (advice & (UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_DEFAULT)) { + if (!getAttribute(Device, hipDeviceAttributeConcurrentManagedAccess)) { + setErrorMessage("mem_advise ignored as device does not support " + "concurrent managed access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // TODO: If pMem points to valid system-allocated pageable memory, we should + // check that the device also has the hipDeviceAttributePageableMemoryAccess + // property, so that a valid read-only copy can be created on the device. + // This also applies for UR_USM_MEM_ADVICE_SET/MEM_ADVICE_CLEAR_READ_MOSTLY. + } + #if HIP_VERSION_MAJOR >= 5 + // NOTE: The hipPointerGetAttribute API is marked as beta, meaning, while this + // is feature complete, it is still open to changes and outstanding issues. unsigned int PointerRangeSize = 0; - UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, - HIP_POINTER_ATTRIBUTE_RANGE_SIZE, - (hipDeviceptr_t)HIPDevicePtr)); + UR_CHECK_ERROR(hipPointerGetAttribute( + &PointerRangeSize, HIP_POINTER_ATTRIBUTE_RANGE_SIZE, + static_cast(HIPDevicePtr))); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); #endif - // TODO implement a mapping to hipMemAdvise once the expected behaviour - // of urEnqueueUSMAdvise is detailed in the USM extension - return urEnqueueEventsWait(hQueue, 0, nullptr, phEvent); + + ur_result_t Result = UR_RESULT_SUCCESS; + std::unique_ptr EventPtr{nullptr}; + + try { + ScopedContext Active(Device); + + if (phEvent) { + EventPtr = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_USM_ADVISE, hQueue, hQueue->getNextTransferStream())); + EventPtr->start(); + } + + const auto DeviceID = Device->get(); + if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { + UR_CHECK_ERROR( + hipMemAdvise(pMem, size, hipMemAdviseUnsetReadMostly, DeviceID)); + UR_CHECK_ERROR(hipMemAdvise( + pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID)); + UR_CHECK_ERROR( + hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID)); + } else { + Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID); + // UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but + // currently unmapped advice arguments as not supported by this platform. + // Therefore, warn the user instead of throwing and aborting the runtime. + if (Result == UR_RESULT_ERROR_INVALID_ENUMERATION) { + setErrorMessage("mem_advise is ignored as the advice argument is not " + " supported by this device.", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + } + + if (phEvent) { + Result = EventPtr->record(); + *phEvent = EventPtr.release(); + } + } catch (ur_result_t err) { + Result = err; + } catch (...) { + Result = UR_RESULT_ERROR_UNKNOWN; + } + + return Result; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( From 86e86f920fa50db4cf8b6597c975eb099ae3abd4 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Tue, 7 Nov 2023 13:52:32 +0000 Subject: [PATCH 110/138] Change the type of PointerRangeSize which was causing a stack corruption --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index e1f2097a07..413017a3ad 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1566,7 +1566,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, #if HIP_VERSION_MAJOR >= 5 // NOTE: The hipPointerGetAttribute API is marked as beta, meaning, while this // is feature complete, it is still open to changes and outstanding issues. - unsigned int PointerRangeSize = 0; + size_t PointerRangeSize = 0; UR_CHECK_ERROR(hipPointerGetAttribute( &PointerRangeSize, HIP_POINTER_ATTRIBUTE_RANGE_SIZE, static_cast(HIPDevicePtr))); From c9e6c1a5c3910435a2a6ecc3404c70f6740b477e Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Tue, 7 Nov 2023 16:31:35 +0000 Subject: [PATCH 111/138] Update unsupported advice flag warning message --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 413017a3ad..9ada8c7391 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1601,7 +1601,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, // Therefore, warn the user instead of throwing and aborting the runtime. if (Result == UR_RESULT_ERROR_INVALID_ENUMERATION) { setErrorMessage("mem_advise is ignored as the advice argument is not " - " supported by this device.", + "supported by this device", UR_RESULT_SUCCESS); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } From c1ae3377678f880315e686e21800200d831bacb7 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Wed, 8 Nov 2023 11:21:06 +0000 Subject: [PATCH 112/138] Move unsupported advice flags check at the start --- source/adapters/hip/enqueue.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 9ada8c7391..903891423a 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -85,9 +85,17 @@ void simpleGuessLocalWorkSize(size_t *ThreadsPerBlock, } } -ur_result_t setHipMemAdvise(const void *DevPtr, size_t Size, +ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size, ur_usm_advice_flags_t URAdviceFlags, hipDevice_t Device) { + // Handle unmapped memory advice flags + if (URAdviceFlags & + (UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | + UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + using ur_to_hip_advice_t = std::pair; static constexpr std::array @@ -129,14 +137,6 @@ ur_result_t setHipMemAdvise(const void *DevPtr, size_t Size, } } - // Handle unmapped memory advice flags - if (URAdviceFlags & - (UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY | - UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY | - UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - return UR_RESULT_SUCCESS; } From b20a8777d27eab8094290690255600f2d731ed80 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Fri, 8 Dec 2023 16:42:56 +0000 Subject: [PATCH 113/138] Update urEnqueueUSMAdvise entry point to always return a valid event --- source/adapters/hip/enqueue.cpp | 81 ++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 903891423a..2f03af95df 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1532,37 +1532,6 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, void *HIPDevicePtr = const_cast(pMem); ur_device_handle_t Device = hQueue->getContext()->getDevice(); - // If the device does not support managed memory access, we can't set - // mem_advise. - if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { - setErrorMessage("mem_advise ignored as device does not support " - " managed memory access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - // Passing MEM_ADVICE_SET/MEM_ADVICE_CLEAR_PREFERRED_LOCATION to hipMemAdvise - // on a GPU device requires the GPU device to report a non-zero value for - // hipDeviceAttributeConcurrentManagedAccess. Therefore, ignore the mem advice - // if concurrent managed memory access is not available. - if (advice & (UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION | - UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION | - UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE | - UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE | - UR_USM_ADVICE_FLAG_DEFAULT)) { - if (!getAttribute(Device, hipDeviceAttributeConcurrentManagedAccess)) { - setErrorMessage("mem_advise ignored as device does not support " - "concurrent managed access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - // TODO: If pMem points to valid system-allocated pageable memory, we should - // check that the device also has the hipDeviceAttributePageableMemoryAccess - // property, so that a valid read-only copy can be created on the device. - // This also applies for UR_USM_MEM_ADVICE_SET/MEM_ADVICE_CLEAR_READ_MOSTLY. - } - #if HIP_VERSION_MAJOR >= 5 // NOTE: The hipPointerGetAttribute API is marked as beta, meaning, while this // is feature complete, it is still open to changes and outstanding issues. @@ -1574,10 +1543,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, #endif ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr EventPtr{nullptr}; try { ScopedContext Active(Device); + std::unique_ptr EventPtr{nullptr}; if (phEvent) { EventPtr = @@ -1586,6 +1555,48 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, EventPtr->start(); } + // Helper to ensure returning a valid event on early exit. + auto releaseEvent = [&EventPtr, &phEvent]() -> void { + if (phEvent) { + UR_CHECK_ERROR(EventPtr->record()); + *phEvent = EventPtr.release(); + } + }; + + // If the device does not support managed memory access, we can't set + // mem_advise. + if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "managed memory access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // Passing MEM_ADVICE_SET/MEM_ADVICE_CLEAR_PREFERRED_LOCATION to + // hipMemAdvise on a GPU device requires the GPU device to report a non-zero + // value for hipDeviceAttributeConcurrentManagedAccess. Therefore, ignore + // the mem advice if concurrent managed memory access is not available. + if (advice & (UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION | + UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE | + UR_USM_ADVICE_FLAG_DEFAULT)) { + if (!getAttribute(Device, hipDeviceAttributeConcurrentManagedAccess)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "concurrent managed access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + // TODO: If pMem points to valid system-allocated pageable memory, we + // should check that the device also has the + // hipDeviceAttributePageableMemoryAccess property, so that a valid + // read-only copy can be created on the device. This also applies for + // UR_USM_MEM_ADVICE_SET/MEM_ADVICE_CLEAR_READ_MOSTLY. + } + const auto DeviceID = Device->get(); if (advice & UR_USM_ADVICE_FLAG_DEFAULT) { UR_CHECK_ERROR( @@ -1600,6 +1611,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, // currently unmapped advice arguments as not supported by this platform. // Therefore, warn the user instead of throwing and aborting the runtime. if (Result == UR_RESULT_ERROR_INVALID_ENUMERATION) { + releaseEvent(); setErrorMessage("mem_advise is ignored as the advice argument is not " "supported by this device", UR_RESULT_SUCCESS); @@ -1607,10 +1619,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, } } - if (phEvent) { - Result = EventPtr->record(); - *phEvent = EventPtr.release(); - } + releaseEvent(); } catch (ur_result_t err) { Result = err; } catch (...) { From 71dd495e0135fbb41fd7caed9e625d0d1c039a51 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Fri, 8 Dec 2023 16:45:10 +0000 Subject: [PATCH 114/138] Update urEnqueueUSMPrefetch entry point to always return a valid event and ignore flags --- source/adapters/hip/enqueue.cpp | 76 +++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 2f03af95df..a51e0bc18c 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1459,34 +1459,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = flags; + void *HIPDevicePtr = const_cast(pMem); ur_device_handle_t Device = hQueue->getDevice(); - // If the device does not support managed memory access, we can't set - // mem_advise. - if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { - setErrorMessage("mem_advise ignored as device does not support " - " managed memory access", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - hipPointerAttribute_t attribs; - // TODO: hipPointerGetAttributes will fail if pMem is non-HIP allocated - // memory, as it is neither registered as host memory, nor into the address - // space for the current device, meaning the pMem ptr points to a - // system-allocated memory. This means we may need to check system-alloacted - // memory and handle the failure more gracefully. - UR_CHECK_ERROR(hipPointerGetAttributes(&attribs, pMem)); - // async prefetch requires USM pointer (or hip SVM) to work. - if (!attribs.isManaged) { - setErrorMessage("Prefetch hint ignored as prefetch only works with USM", - UR_RESULT_SUCCESS); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; - } - - // HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, - // so we can't perform this check for such cases. +// HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, +// so we can't perform this check for such cases. #if HIP_VERSION_MAJOR >= 5 unsigned int PointerRangeSize = 0; UR_CHECK_ERROR(hipPointerGetAttribute(&PointerRangeSize, @@ -1494,29 +1473,60 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( (hipDeviceptr_t)HIPDevicePtr)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); #endif - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; + ur_result_t Result = UR_RESULT_SUCCESS; - std::unique_ptr EventPtr{nullptr}; try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, phEventWaitList); + + std::unique_ptr EventPtr{nullptr}; + if (phEvent) { EventPtr = std::unique_ptr(ur_event_handle_t_::makeNative( UR_COMMAND_USM_PREFETCH, hQueue, HIPStream)); UR_CHECK_ERROR(EventPtr->start()); } + + // Helper to ensure returning a valid event on early exit. + auto releaseEvent = [&EventPtr, &phEvent]() -> void { + if (phEvent) { + UR_CHECK_ERROR(EventPtr->record()); + *phEvent = EventPtr.release(); + } + }; + + // If the device does not support managed memory access, we can't set + // mem_advise. + if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) { + releaseEvent(); + setErrorMessage("mem_advise ignored as device does not support " + "managed memory access", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + + hipPointerAttribute_t attribs; + // TODO: hipPointerGetAttributes will fail if pMem is non-HIP allocated + // memory, as it is neither registered as host memory, nor into the address + // space for the current device, meaning the pMem ptr points to a + // system-allocated memory. This means we may need to check system-alloacted + // memory and handle the failure more gracefully. + UR_CHECK_ERROR(hipPointerGetAttributes(&attribs, pMem)); + // async prefetch requires USM pointer (or hip SVM) to work. + if (!attribs.isManaged) { + releaseEvent(); + setErrorMessage("Prefetch hint ignored as prefetch only works with USM", + UR_RESULT_SUCCESS); + return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + } + UR_CHECK_ERROR( hipMemPrefetchAsync(pMem, size, hQueue->getDevice()->get(), HIPStream)); - if (phEvent) { - UR_CHECK_ERROR(EventPtr->record()); - *phEvent = EventPtr.release(); - } + releaseEvent(); } catch (ur_result_t Err) { Result = Err; } From c10968f5691c8bc28fc8314ef209589a5c5c6f23 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Fri, 8 Dec 2023 17:09:08 +0000 Subject: [PATCH 115/138] Address issue with getting device handle to follow the new multi-device-ctx changes --- source/adapters/hip/enqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index a51e0bc18c..68e3e665d2 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1540,7 +1540,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { UR_ASSERT(pMem && size > 0, UR_RESULT_ERROR_INVALID_VALUE); void *HIPDevicePtr = const_cast(pMem); - ur_device_handle_t Device = hQueue->getContext()->getDevice(); + ur_device_handle_t Device = hQueue->getDevice(); #if HIP_VERSION_MAJOR >= 5 // NOTE: The hipPointerGetAttribute API is marked as beta, meaning, while this From 0e7285807fdeca2a51de68dffe07b6182428abca Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 9 Jan 2024 11:59:41 +0100 Subject: [PATCH 116/138] [Security] create scorecard.yml --- .github/workflows/scorecard.yml | 73 +++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/scorecard.yml diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000000..033e796b24 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,73 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + workflow_dispatch: + schedule: + - cron: '45 22 * * 4' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # v2.1.2 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 + with: + sarif_file: results.sarif From 2e156f790c2e8473abba198f91b544c1db0ca526 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 3 Nov 2023 10:25:52 +0000 Subject: [PATCH 117/138] [EXP][CMDBUF] Add extra event to get CommandBuffer start time Adds an extra event in the first command list associated to the CommandBuffer execution to obtain the start time of the graph execution. --- source/adapters/level_zero/command_buffer.cpp | 19 +++++++++++++++++++ source/adapters/level_zero/event.cpp | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 4b811ab033..af5fa73077 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -941,6 +941,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ZE2UR_CALL(zeCommandListAppendBarrier, (SignalCommandList->first, RetEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); + + if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { + // We create an additional signal specific to the current execution of the + // CommandBuffer. This signal is needed for profiling the execution time + // of the CommandBuffer. It waits for the WaitEvent to be signaled + // which indicates the start of the CommandBuffer actual execution. + // This event is embedded into the Event return to the user to allow + // the profiling engine to retrieve it. + ur_event_handle_t StartEvent{}; + UR_CALL(createEventAndAssociateQueue( + Queue, &StartEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, + WaitCommandList, false)); + + ZE2UR_CALL(zeCommandListAppendBarrier, + (WaitCommandList->first, StartEvent->ZeEvent, 1, + &(CommandBuffer->WaitEvent->ZeEvent))); + + RetEvent->CommandData = StartEvent; + } } // Execution our command-lists asynchronously diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index d8af1e674d..8dfef4f099 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -13,6 +13,7 @@ #include #include +#include "command_buffer.hpp" #include "common.hpp" #include "event.hpp" #include "ur_level_zero.hpp" @@ -454,6 +455,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ///< bytes returned in propValue ) { std::shared_lock EventLock(Event->Mutex); + + // A Command-buffer consists of three command-lists. + // The start time should therefore be taken from an event associated + // to the first command-list. + if ((Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) && + (PropName == UR_PROFILING_INFO_COMMAND_START) && (Event->CommandData)) { + auto StartEvent = static_cast(Event->CommandData); + return urEventGetProfilingInfo(StartEvent, UR_PROFILING_INFO_COMMAND_END, + PropValueSize, PropValue, PropValueSizeRet); + } + if (Event->UrQueue && (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; @@ -763,6 +775,13 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { return Res; Event->CommandData = nullptr; } + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && + Event->CommandData) { + // Free the memory extra event allocated for profiling purposed. + auto AssociateEvent = static_cast(Event->CommandData); + urEventRelease(AssociateEvent); + Event->CommandData = nullptr; + } if (Event->OwnNativeHandle) { if (DisableEventsCaching) { auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent)); From e8b78408b0b2d6547b7aa07ed3599a54484d6a2e Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 9 Nov 2023 16:44:33 +0000 Subject: [PATCH 118/138] Copy command-buffer event timestamps into a dedicated USM memory region. Get the command-buffer start and end timestamps from this memory. Move events reset from the middle command list to the first to allow the copy of the profiling info in the last command list and relax command list order. --- source/adapters/level_zero/command_buffer.cpp | 92 ++++++++++++------- source/adapters/level_zero/command_buffer.hpp | 5 + source/adapters/level_zero/event.cpp | 80 +++++++++++++--- 3 files changed, 131 insertions(+), 46 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index af5fa73077..d63630c456 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -45,13 +45,13 @@ │ Prefix │ Commands added to UR command-buffer by UR user │ Suffix │ └──────────┴────────────────────────────────────────────────┴─────────┘ - ┌───────────────────┬──────────────────────────────┐ - Prefix │Reset signal event │ Barrier waiting on wait event│ - └───────────────────┴──────────────────────────────┘ + ┌───────────────────┬──────────────┐──────────────────────────────┐ + Prefix │Reset signal event │ Reset events │ Barrier waiting on wait event│ + └───────────────────┴──────────────┘──────────────────────────────┘ ┌─────────────────────────────────────────────┐──────────────┐ - Suffix │Barrier waiting on sync-point event, │ Reset events │ - │signalling the UR command-buffer signal event│ │ + Suffix │Barrier waiting on sync-point event, │ Query CMD │ + │signalling the UR command-buffer signal event│ Timestamps │ └─────────────────────────────────────────────┘──────────────┘ For a call to `urCommandBufferEnqueueExp` with an event_list `EL`, @@ -433,6 +433,10 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + // Dependencies between commands are explicitly enforced by sync points when + // enqueuing. Consequently, relax the command ordering in the command list + // can enable the backend to further optimize the workload + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING; ze_command_list_handle_t ZeCommandList; // TODO We could optimize this by pooling both Level Zero command-lists and UR @@ -499,13 +503,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { ZE2UR_CALL(zeCommandListAppendEventReset, (CommandBuffer->ZeCommandList, CommandBuffer->WaitEvent->ZeEvent)); - // Reset the L0 events we use for command-buffer internal sync-points to the - // non-signalled state - for (auto Event : WaitEventList) { - ZE2UR_CALL(zeCommandListAppendEventReset, - (CommandBuffer->ZeCommandList, Event)); - } - // Close the command list and have it ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandList)); return UR_RESULT_SUCCESS; @@ -899,14 +896,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create command-list to execute before `CommandListPtr` and will signal // when `EventWaitList` dependencies are complete. ur_command_list_ptr_t WaitCommandList{}; + UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, false, + false)); + + // Create a list of events of all the events that compose the command buffer + // workload. + // This loop also resets the L0 events we use for command-buffer internal + // sync-points to the non-signalled state. + // This is required for multiple submissions. + const size_t NumEvents = CommandBuffer->SyncPoints.size(); + std::vector WaitEventList{NumEvents}; + for (size_t i = 0; i < NumEvents; i++) { + auto ZeEvent = CommandBuffer->SyncPoints[i]->ZeEvent; + WaitEventList[i] = ZeEvent; + ZE2UR_CALL(zeCommandListAppendEventReset, + (WaitCommandList->first, ZeEvent)); + } + if (NumEventsInWaitList) { _ur_ze_event_list_t TmpWaitList; UR_CALL(TmpWaitList.createAndRetainUrZeEventList( NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); - UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, - false, false)) - // Update the WaitList of the Wait Event // Events are appended to the WaitList if the WaitList is not empty if (CommandBuffer->WaitEvent->WaitList.isEmpty()) @@ -919,9 +930,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( CommandBuffer->WaitEvent->WaitList.Length, CommandBuffer->WaitEvent->WaitList.ZeEventList)); } else { - UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList, - false, false)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, (WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent)); } @@ -943,22 +951,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( &(CommandBuffer->SignalEvent->ZeEvent))); if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { - // We create an additional signal specific to the current execution of the - // CommandBuffer. This signal is needed for profiling the execution time - // of the CommandBuffer. It waits for the WaitEvent to be signaled - // which indicates the start of the CommandBuffer actual execution. - // This event is embedded into the Event return to the user to allow - // the profiling engine to retrieve it. - ur_event_handle_t StartEvent{}; - UR_CALL(createEventAndAssociateQueue( - Queue, &StartEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, - WaitCommandList, false)); - - ZE2UR_CALL(zeCommandListAppendBarrier, - (WaitCommandList->first, StartEvent->ZeEvent, 1, - &(CommandBuffer->WaitEvent->ZeEvent))); - - RetEvent->CommandData = StartEvent; + // Multiple submissions of a command buffer implies that we need to save + // the event timestamps before resubmiting the command buffer. We + // therefore copy the these timestamps in a dedicated USM memory section + // before completing the command buffer execution, and then attach this + // memory to the event returned to users to allow to allow the profiling + // engine to recover these timestamps. + ur_usm_desc_t USMDesc{}; + ur_usm_device_desc_t UsmDeviceDesc{}; + UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; + ur_usm_host_desc_t UsmHostDesc{}; + UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; + UsmDeviceDesc.pNext = &UsmHostDesc; + USMDesc.pNext = &UsmDeviceDesc; + USMDesc.align = 4; // 4byte-aligned + + size_t Size = WaitEventList.size() * sizeof(ze_kernel_timestamp_result_t); + + struct command_buffer_profiling_t *Profiling = + new command_buffer_profiling_t(); + + Profiling->NumEvents = WaitEventList.size(); + + urUSMSharedAlloc(RetEvent->Context, CommandBuffer->Device, &USMDesc, + nullptr, Size, (void **)&Profiling->Timestamps); + + ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, + (SignalCommandList->first, WaitEventList.size(), + WaitEventList.data(), Profiling->Timestamps, 0, + RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); + + RetEvent->CommandData = static_cast(Profiling); } } diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index b18f1c3497..a43e9e4c52 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -19,6 +19,11 @@ #include "context.hpp" #include "queue.hpp" +struct command_buffer_profiling_t { + ur_exp_command_buffer_sync_point_t NumEvents; + ze_kernel_timestamp_result_t *Timestamps; +}; + struct ur_exp_command_buffer_handle_t_ : public _ur_object { ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 8dfef4f099..5e9397e4e7 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -456,16 +456,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ) { std::shared_lock EventLock(Event->Mutex); - // A Command-buffer consists of three command-lists. - // The start time should therefore be taken from an event associated - // to the first command-list. - if ((Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) && - (PropName == UR_PROFILING_INFO_COMMAND_START) && (Event->CommandData)) { - auto StartEvent = static_cast(Event->CommandData); - return urEventGetProfilingInfo(StartEvent, UR_PROFILING_INFO_COMMAND_END, - PropValueSize, PropValue, PropValueSizeRet); - } - if (Event->UrQueue && (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; @@ -482,6 +472,70 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ze_kernel_timestamp_result_t tsResult; + // A Command-buffer consists of three command-lists for which only a single + // event is returned to users. The actual profiling information related to the + // command-buffer should therefore be extrated from graph events themsleves. + // The timestamps of these events are saved in a memory region attached to + // event usning CommandData field. The timings must therefore be recovered + // from this memory. + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { + if (Event->CommandData) { + struct command_buffer_profiling_t *ProfilingsPtr; + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_START: { + ProfilingsPtr = static_cast( + Event->CommandData); + // Sync-point order does not necessarily match to the order of + // execution. We therefore look for the first command executed. + uint64_t MinStart = ProfilingsPtr->Timestamps->global.kernelStart; + for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { + uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelStart; + if (Timestamp < MinStart) { + MinStart = Timestamp; + } + } + uint64_t ContextStartTime = + (MinStart & TimestampMaxValue) * ZeTimerResolution; + return ReturnValue(ContextStartTime); + } + case UR_PROFILING_INFO_COMMAND_END: { + ProfilingsPtr = static_cast( + Event->CommandData); + // Sync-point order does not necessarily match to the order of + // execution. We therefore look for the last command executed. + uint64_t MaxEnd = ProfilingsPtr->Timestamps->global.kernelEnd; + uint64_t LastStart = ProfilingsPtr->Timestamps->global.kernelStart; + for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { + uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelEnd; + if (Timestamp > MaxEnd) { + MaxEnd = Timestamp; + LastStart = ProfilingsPtr->Timestamps[i].global.kernelStart; + } + } + uint64_t ContextStartTime = (LastStart & TimestampMaxValue); + uint64_t ContextEndTime = (MaxEnd & TimestampMaxValue); + + // + // Handle a possible wrap-around (the underlying HW counter is < + // 64-bit). Note, it will not report correct time if there were multiple + // wrap arounds, and the longer term plan is to enlarge the capacity of + // the HW timestamps. + // + if (ContextEndTime <= ContextStartTime) { + ContextEndTime += TimestampMaxValue; + } + ContextEndTime *= ZeTimerResolution; + return ReturnValue(ContextEndTime); + } + default: + urPrint("urEventGetProfilingInfo: not supported ParamName\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + } + switch (PropName) { case UR_PROFILING_INFO_COMMAND_START: { ZE2UR_CALL(zeEventQueryKernelTimestamp, (Event->ZeEvent, &tsResult)); @@ -778,8 +832,10 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && Event->CommandData) { // Free the memory extra event allocated for profiling purposed. - auto AssociateEvent = static_cast(Event->CommandData); - urEventRelease(AssociateEvent); + struct command_buffer_profiling_t *ProfilingPtr = + static_cast(Event->CommandData); + urUSMFree(Event->Context, (void *)ProfilingPtr->Timestamps); + delete ProfilingPtr; Event->CommandData = nullptr; } if (Event->OwnNativeHandle) { From bd25d685cde0c23f66d1b4e8e499e3da7bdafddd Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 10 Nov 2023 17:29:42 +0000 Subject: [PATCH 119/138] Changes USMShared memory allocation for host only allocation --- source/adapters/level_zero/command_buffer.cpp | 33 +++++++------------ source/adapters/level_zero/event.cpp | 22 ++++++------- 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index d63630c456..7f3f514d9d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -939,6 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; if (Event) { + ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent; UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, false, false)); @@ -946,10 +947,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, SignalCommandList, false)); - ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); - if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { // Multiple submissions of a command buffer implies that we need to save // the event timestamps before resubmiting the command buffer. We @@ -957,33 +954,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // before completing the command buffer execution, and then attach this // memory to the event returned to users to allow to allow the profiling // engine to recover these timestamps. - ur_usm_desc_t USMDesc{}; - ur_usm_device_desc_t UsmDeviceDesc{}; - UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC; - ur_usm_host_desc_t UsmHostDesc{}; - UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC; - UsmDeviceDesc.pNext = &UsmHostDesc; - USMDesc.pNext = &UsmDeviceDesc; - USMDesc.align = 4; // 4byte-aligned - - size_t Size = WaitEventList.size() * sizeof(ze_kernel_timestamp_result_t); + UR_CALL(createEventAndAssociateQueue( + Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false)); - struct command_buffer_profiling_t *Profiling = - new command_buffer_profiling_t(); + command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); Profiling->NumEvents = WaitEventList.size(); - - urUSMSharedAlloc(RetEvent->Context, CommandBuffer->Device, &USMDesc, - nullptr, Size, (void **)&Profiling->Timestamps); + Profiling->Timestamps = + new ze_kernel_timestamp_result_t[Profiling->NumEvents]; ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, (SignalCommandList->first, WaitEventList.size(), - WaitEventList.data(), Profiling->Timestamps, 0, - RetEvent->ZeEvent, 1, + WaitEventList.data(), (void *)Profiling->Timestamps, 0, + SyncEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); RetEvent->CommandData = static_cast(Profiling); } + + ZE2UR_CALL(zeCommandListAppendBarrier, + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(SyncEvent->ZeEvent))); } // Execution our command-lists asynchronously diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 5e9397e4e7..2dc74ff5ac 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -480,14 +480,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( // from this memory. if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { if (Event->CommandData) { - struct command_buffer_profiling_t *ProfilingsPtr; + command_buffer_profiling_t *ProfilingsPtr; switch (PropName) { case UR_PROFILING_INFO_COMMAND_START: { - ProfilingsPtr = static_cast( - Event->CommandData); + ProfilingsPtr = + static_cast(Event->CommandData); // Sync-point order does not necessarily match to the order of // execution. We therefore look for the first command executed. - uint64_t MinStart = ProfilingsPtr->Timestamps->global.kernelStart; + uint64_t MinStart = ProfilingsPtr->Timestamps[0].global.kernelStart; for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelStart; if (Timestamp < MinStart) { @@ -499,12 +499,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return ReturnValue(ContextStartTime); } case UR_PROFILING_INFO_COMMAND_END: { - ProfilingsPtr = static_cast( - Event->CommandData); + ProfilingsPtr = + static_cast(Event->CommandData); // Sync-point order does not necessarily match to the order of // execution. We therefore look for the last command executed. - uint64_t MaxEnd = ProfilingsPtr->Timestamps->global.kernelEnd; - uint64_t LastStart = ProfilingsPtr->Timestamps->global.kernelStart; + uint64_t MaxEnd = ProfilingsPtr->Timestamps[0].global.kernelEnd; + uint64_t LastStart = ProfilingsPtr->Timestamps[0].global.kernelStart; for (uint64_t i = 1; i < ProfilingsPtr->NumEvents; i++) { uint64_t Timestamp = ProfilingsPtr->Timestamps[i].global.kernelEnd; if (Timestamp > MaxEnd) { @@ -832,9 +832,9 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP && Event->CommandData) { // Free the memory extra event allocated for profiling purposed. - struct command_buffer_profiling_t *ProfilingPtr = - static_cast(Event->CommandData); - urUSMFree(Event->Context, (void *)ProfilingPtr->Timestamps); + command_buffer_profiling_t *ProfilingPtr = + static_cast(Event->CommandData); + delete[] ProfilingPtr->Timestamps; delete ProfilingPtr; Event->CommandData = nullptr; } From 1db8fbf4f0c775719cdd2b42de58ddb978ea4e47 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 13 Nov 2023 12:02:23 +0000 Subject: [PATCH 120/138] Fixes event leak --- source/adapters/level_zero/command_buffer.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 7f3f514d9d..579be4447d 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -939,7 +939,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; if (Event) { - ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent; UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, false, false)); @@ -954,9 +953,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // before completing the command buffer execution, and then attach this // memory to the event returned to users to allow to allow the profiling // engine to recover these timestamps. - UR_CALL(createEventAndAssociateQueue( - Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false)); - command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); Profiling->NumEvents = WaitEventList.size(); @@ -966,15 +962,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ZE2UR_CALL(zeCommandListAppendQueryKernelTimestamps, (SignalCommandList->first, WaitEventList.size(), WaitEventList.data(), (void *)Profiling->Timestamps, 0, - SyncEvent->ZeEvent, 1, + RetEvent->ZeEvent, 1, &(CommandBuffer->SignalEvent->ZeEvent))); RetEvent->CommandData = static_cast(Profiling); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); } - - ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(SyncEvent->ZeEvent))); } // Execution our command-lists asynchronously From 35b6a5eab04d57040eb0cf88866c8ef475f27bc9 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 15 Nov 2023 12:12:29 +0000 Subject: [PATCH 121/138] Moves wait-event reset from main CL to suffix CL --- source/adapters/level_zero/command_buffer.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 579be4447d..bbe49cb705 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -498,11 +498,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { (CommandBuffer->ZeCommandList, CommandBuffer->SignalEvent->ZeEvent, NumEvents, WaitEventList.data())); - // Reset the wait-event for the UR command-buffer that is signalled when its - // submission dependencies have been satisfied. - ZE2UR_CALL(zeCommandListAppendEventReset, - (CommandBuffer->ZeCommandList, CommandBuffer->WaitEvent->ZeEvent)); - // Close the command list and have it ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandList)); return UR_RESULT_SUCCESS; @@ -938,10 +933,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_event_handle_t RetEvent{}; // Create a command-list to signal RetEvent on completion ur_command_list_ptr_t SignalCommandList{}; - if (Event) { - UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, - false, false)); + UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, + false, false)); + // Reset the wait-event for the UR command-buffer that is signalled when its + // submission dependencies have been satisfied. + ZE2UR_CALL(zeCommandListAppendEventReset, + (SignalCommandList->first, CommandBuffer->WaitEvent->ZeEvent)); + if (Event) { UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, SignalCommandList, false)); @@ -968,8 +967,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( RetEvent->CommandData = static_cast(Profiling); } else { ZE2UR_CALL(zeCommandListAppendBarrier, - (SignalCommandList->first, RetEvent->ZeEvent, 1, - &(CommandBuffer->SignalEvent->ZeEvent))); + (SignalCommandList->first, RetEvent->ZeEvent, 1, + &(CommandBuffer->SignalEvent->ZeEvent))); } } From d4dca529538c209114d4f03b288dd7bf5ba68ea2 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 9 Jan 2024 15:23:50 +0100 Subject: [PATCH 122/138] Add OpenSSF Scorecard badge to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 226dbfbfe5..1e9c47b755 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Bandit](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml) [![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) [![codecov.io](https://codecov.io/github/oneapi-src/unified-runtime/coverage.svg?branch=main)](https://codecov.io/github/oneapi-src/unified-runtime?branch=master) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-runtime/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-runtime) From 929c9789c354d71ecf1b1fe0091767d075298ee4 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 9 Jan 2024 17:12:30 +0100 Subject: [PATCH 123/138] [Security] add permissions to workflows --- .github/workflows/bandit.yml | 3 +++ .github/workflows/cmake.yml | 3 +++ .github/workflows/codeql.yml | 3 +++ .github/workflows/coverage.yml | 3 +++ .github/workflows/coverity.yml | 2 ++ .github/workflows/e2e_nightly.yml | 3 +++ .github/workflows/nightly.yml | 3 +++ .github/workflows/prerelease.yml | 3 +++ 8 files changed, 23 insertions(+) diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index e356e0bdb4..bb04c5056c 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -7,6 +7,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: linux: name: Bandit diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e697dd6aaf..0976922723 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -6,6 +6,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: ubuntu-build: name: Build - Ubuntu diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index bf312be396..4806c23d3c 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -6,6 +6,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + contents: read + jobs: analyze-ubuntu: name: Analyze on Ubuntu diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 731f7ea320..50f28151ac 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -2,6 +2,9 @@ name: Coverage on: [push, pull_request] +permissions: + contents: read + jobs: ubuntu-build: name: Build - Ubuntu diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ab065ee77e..3b3853c604 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -23,6 +23,8 @@ env: COVERITY_SCAN_BRANCH_PATTERN: "main" TRAVIS_BRANCH: ${{ github.ref_name }} +permissions: + contents: read jobs: linux: diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index e3cda49245..c987b1f0ef 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -5,6 +5,9 @@ on: # Run every day at 23:00 UTC - cron: '0 23 * * *' +permissions: + contents: read + jobs: e2e-build-hw: name: Build SYCL, UR, run E2E diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 4a81c94e8f..bcff394049 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -5,6 +5,9 @@ on: # Run every day at 23:00 UTC - cron: '0 23 * * *' +permissions: + contents: read + jobs: long-fuzz-test: name: Run long fuzz tests diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 882b06985a..80330b2b4d 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -6,6 +6,9 @@ on: # At 23:00 on Friday, GitHub actions schedule is in UTC time. - cron: 0 23 * * 5 +permissions: + contents: read + jobs: weekly-prerelease: runs-on: ubuntu-latest From d358b972dc99a77001084bd97d92e71ae48e6a87 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Mon, 27 Nov 2023 09:26:20 -0800 Subject: [PATCH 124/138] [UR][L0] Make urPlatformGetBackendOption return -ze-opt-level=2 for -O2 Signed-off-by: Sarnie, Nick --- source/adapters/level_zero/platform.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index b7680b1638..e67c84f3e4 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -433,8 +433,8 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=1' for frontend_option = -O1 or -O2. -// Return '-ze-opt-level=2' for frontend_option = -O3. +// Return '-ze-opt-level=1' for frontend_option = -O1. +// Return '-ze-opt-level=2' for frontend_option = -O2 or -O3. // Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for // frontend_option=-ftarget-compile-fast. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( @@ -457,11 +457,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *PlatformOption = "-ze-opt-disable"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv) { + if (FrontendOption == "-O1"sv) { *PlatformOption = "-ze-opt-level=1"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O3"sv) { + if (FrontendOption == "-O2"sv || FrontendOption == "-O3"sv) { *PlatformOption = "-ze-opt-level=2"; return UR_RESULT_SUCCESS; } From b3a1d52d830f5c7533ce3d861eada6035fad5d98 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 28 Nov 2023 07:00:04 -0800 Subject: [PATCH 125/138] also map O1 to -ze-opt-level=2 Signed-off-by: Sarnie, Nick --- source/adapters/level_zero/platform.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index e67c84f3e4..335a920294 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -433,8 +433,7 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { // Returns plugin specific backend option. // Current support is only for optimization options. // Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=1' for frontend_option = -O1. -// Return '-ze-opt-level=2' for frontend_option = -O2 or -O3. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. // Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for // frontend_option=-ftarget-compile-fast. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( @@ -457,11 +456,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( *PlatformOption = "-ze-opt-disable"; return UR_RESULT_SUCCESS; } - if (FrontendOption == "-O1"sv) { - *PlatformOption = "-ze-opt-level=1"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O2"sv || FrontendOption == "-O3"sv) { + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { *PlatformOption = "-ze-opt-level=2"; return UR_RESULT_SUCCESS; } From da50cd775f7ba34c5340da07a45c1e5aa83c5099 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 10 Jan 2024 12:16:30 +0000 Subject: [PATCH 126/138] [CL] Fix Windows CMake, remove explicit Linux path Fixes #1171 by replacing an explicit Linux path used as the value for the internal `OpenCLICDLoaderLibrary` CMake varaible and replacing it with the `OpenCL::OpenCL` target name. This is an target alias provided by the https://github.com/KhronosGroup/OpenCL-ICD-Loader. --- source/adapters/opencl/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index 96d85bf117..c78e75e94f 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -67,16 +67,14 @@ if(UR_OPENCL_ICD_LOADER_LIBRARY) set(OpenCLICDLoaderLibrary ${UR_OPENCL_ICD_LOADER_LIBRARY}) else() find_package(OpenCL 3.0) - if(OpenCL_FOUND) - set(OpenCLICDLoaderLibrary OpenCL::OpenCL) - else() + if(NOT OpenCL_FOUND) FetchContent_Declare(OpenCL-ICD-Loader GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-ICD-Loader.git" GIT_TAG main ) FetchContent_MakeAvailable(OpenCL-ICD-Loader) - set(OpenCLICDLoaderLibrary ${PROJECT_BINARY_DIR}/lib/libOpenCL.so) endif() + set(OpenCLICDLoaderLibrary OpenCL::OpenCL) endif() message(STATUS "OpenCL Include Directory: ${OpenCLIncludeDirectory}") From 0a2b96a0a393f025d1a23736101968f6174dbd29 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Fri, 8 Dec 2023 13:02:39 +0000 Subject: [PATCH 127/138] [HIP] Allow custom location of ROCm components Allow custom location of HIP/HSA include and HIP library directories using the following CMake variables: * `SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR`, * `SYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR`, * `SYCL_BUILD_PI_HIP_LIB_DIR`. --- README.md | 4 ++++ source/adapters/hip/CMakeLists.txt | 14 +++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1e9c47b755..fc6fa4822e 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,10 @@ List of options provided by CMake: | UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD | | UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` | | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | +| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` | +| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` | +| UR_HIP_HSA_INCLUDE_DIR | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include""` | +| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib""` | ### Additional make targets diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 90a235ddeb..01b3fc8d9b 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -10,14 +10,14 @@ set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA") # Set default ROCm installation directory set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") - -set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include") - +# Allow custom location of HIP/HSA include and HIP library directories +set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH + "Custom ROCm HIP include dir") set(UR_HIP_HSA_INCLUDE_DIRS - "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include") - -# Set HIP lib dir -set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib") + "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH + "Custom ROCm HSA include dir") +set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH + "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From 0dd9a24ab91562294b88dda80694dc4f02cb7d5c Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Thu, 14 Dec 2023 08:07:22 -0500 Subject: [PATCH 128/138] Force the setting of custom vars --- source/adapters/hip/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 01b3fc8d9b..21156ecf62 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -17,7 +17,7 @@ set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH - "Custom ROCm HIP library dir") + "Custom ROCm HIP library dir" FORCE) # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From 7cbc2eca2a995536b463696edb556debd57b806d Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Mon, 18 Dec 2023 10:10:27 +0100 Subject: [PATCH 129/138] Update source/adapters/hip/CMakeLists.txt Co-authored-by: Andrey Alekseenko --- source/adapters/hip/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 21156ecf62..01b3fc8d9b 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -17,7 +17,7 @@ set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH - "Custom ROCm HIP library dir" FORCE) + "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) if("${UR_HIP_PLATFORM}" STREQUAL "AMD") From c71a83d290fa575dffe6bf346bf6338996b703be Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 19 Dec 2023 06:38:44 -0500 Subject: [PATCH 130/138] Use PATH --- source/adapters/hip/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 01b3fc8d9b..56e849593f 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -11,12 +11,12 @@ set(UR_HIP_PLATFORM "AMD" CACHE STRING "UR HIP platform, AMD or NVIDIA") # Set default ROCm installation directory set(UR_HIP_ROCM_DIR "/opt/rocm" CACHE STRING "ROCm installation dir") # Allow custom location of HIP/HSA include and HIP library directories -set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH +set(UR_HIP_INCLUDE_DIR "${UR_HIP_ROCM_DIR}/include" CACHE PATH "Custom ROCm HIP include dir") set(UR_HIP_HSA_INCLUDE_DIRS "${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include" CACHE FILEPATH "Custom ROCm HSA include dir") -set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE FILEPATH +set(UR_HIP_LIB_DIR "${UR_HIP_ROCM_DIR}/lib" CACHE PATH "Custom ROCm HIP library dir") # Check if HIP library path exists (AMD platform only) From a1e8fae2d2eb5a4bd0d57ca56e6c579aa4dd842a Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 9 Jan 2024 08:55:11 -0500 Subject: [PATCH 131/138] Check for the new (6.0.0) HSA include location --- source/adapters/hip/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 56e849593f..90162eb2de 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -31,7 +31,8 @@ if("${UR_HIP_PLATFORM}" STREQUAL "AMD") " please check ROCm installation.") endif() - # Check if HSA include path exists + # Check if HSA include path exists. In rocm-6.0.0 the layout of HSA + # directory has changed, check for the new location as well. foreach(D IN LISTS UR_HIP_HSA_INCLUDE_DIRS) if(EXISTS "${D}") set(UR_HIP_HSA_INCLUDE_DIR "${D}") From ee09bc0221551c9a46f10ae000d0040d9354688e Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Wed, 10 Jan 2024 15:16:04 +0100 Subject: [PATCH 132/138] Add OpenCL to Coverity build --- .github/workflows/coverity.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index efde6f0605..9e46aae20a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -53,6 +53,7 @@ jobs: -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so -DUR_BUILD_ADAPTER_NATIVE_CPU=ON -DUR_BUILD_ADAPTER_HIP=ON + -DUR_BUILD_ADAPTER_OPENCL=ON - name: Run Coverity run: | From 66d52ace2d8ca79c3274c805caa495964c123680 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Wed, 10 Jan 2024 14:16:12 +0000 Subject: [PATCH 133/138] Also update README with UR_HIP_HSA_INCLUDE_DIRS change --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fc6fa4822e..57536f237a 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,8 @@ List of options provided by CMake: | UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` | | UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` | | UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` | -| UR_HIP_HSA_INCLUDE_DIR | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include""` | -| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib""` | +| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` | +| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` | ### Additional make targets From bdf29183609b6d75b60666003eaae18b82c9726b Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Thu, 2 Nov 2023 16:25:03 +0100 Subject: [PATCH 134/138] [UR] Add adapter leak-checking tests --- test/layers/validation/leaks.cpp | 13 ++++++++++ test/layers/validation/leaks.out.match | 36 ++++++++++++++++++++------ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/test/layers/validation/leaks.cpp b/test/layers/validation/leaks.cpp index e32aeafc89..794e8a3ef0 100644 --- a/test/layers/validation/leaks.cpp +++ b/test/layers/validation/leaks.cpp @@ -11,6 +11,19 @@ TEST_F(urTest, testUrAdapterGetLeak) { ASSERT_NE(nullptr, adapter); } +TEST_F(urTest, testUrAdapterRetainLeak) { + ur_adapter_handle_t adapter = nullptr; + ASSERT_EQ(urAdapterGet(1, &adapter, nullptr), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); + ASSERT_EQ(urAdapterRetain(adapter), UR_RESULT_SUCCESS); +} + +TEST_F(urTest, testUrAdapterRetainNonexistent) { + ur_adapter_handle_t adapter = (ur_adapter_handle_t)0xBEEF; + ASSERT_EQ(urAdapterRetain(adapter), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); +} + TEST_F(valDeviceTest, testUrContextCreateLeak) { ur_context_handle_t context = nullptr; ASSERT_EQ(urContextCreate(1, &device, nullptr, &context), diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index 9fac722527..90f8713765 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -1,34 +1,53 @@ +.* +\[ RUN \] urTest.testUrAdapterGetLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] urTest.testUrAdapterRetainLeak +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 +\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ +\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: +.* +\[ RUN \] urTest.testUrAdapterRetainNonexistent +\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ +.* +\[ RUN \] valDeviceTest.testUrContextCreateLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainNonexistent +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -(.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +.* +\[ RUN \] valDeviceTest.testUrContextCreateSuccess \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextRetainSuccess \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextReleaseLeak \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 @@ -37,11 +56,12 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* +\[ RUN \] valDeviceTest.testUrContextReleaseNonexistent \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +.* From d6930c70bd7406ef406d8c7d609be2efbda70b3f Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 6 Nov 2023 17:58:15 +0100 Subject: [PATCH 135/138] Add ignore tag to the match.py script --- cmake/match.py | 167 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 111 insertions(+), 56 deletions(-) diff --git a/cmake/match.py b/cmake/match.py index c07cfbc384..3f3f4faff3 100755 --- a/cmake/match.py +++ b/cmake/match.py @@ -5,77 +5,132 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# check if all lines match in a file -# lines in a match file can contain regex inside of double curly braces {{}} +# Check if all input file content matches match file content. +# Lines in a match file can contain regex inside of double curly braces {{}}. +# Regex patterns are limited to single line. +# +# List of available special tags: +# {{OPT}} - makes content in the same line as the tag optional +# {{IGNORE}} - ignores all content until the next successfully matched line or the end of the input +# Special tags are mutually exclusive and are expected to be located at the start of a line. +# +import os import sys import re +from enum import Enum ## @brief print the whole content of input and match files -def print_content(input_lines, match_lines): +def print_content(input_lines, match_lines, ignored_lines): print("--- Input Lines " + "-" * 64) print("".join(input_lines).strip()) print("--- Match Lines " + "-" * 64) print("".join(match_lines).strip()) + print("--- Ignored Lines " + "-" * 62) + print("".join(ignored_lines).strip()) print("-" * 80) -if len(sys.argv) != 3: - print("Usage: python match.py ") - sys.exit(1) - -input_file = sys.argv[1] -match_file = sys.argv[2] - -with open(input_file, 'r') as input, open(match_file, 'r') as match: - input_lines = input.readlines() - match_lines = match.readlines() - -if len(match_lines) < len(input_lines): - print(f"Match length < input length (input: {len(input_lines)}, match: {len(match_lines)})") - print_content(input_lines, match_lines) - sys.exit(1) - -input_idx = 0 -opt = "{{OPT}}" -for i, match_line in enumerate(match_lines): - if match_line.startswith(opt): - optional_line = True - match_line = match_line[len(opt):] - else: - optional_line = False - - # split into parts at {{ }} - match_parts = re.split(r'\{{(.*?)\}}', match_line.strip()) - pattern = "" - for j, part in enumerate(match_parts): - if j % 2 == 0: - pattern += re.escape(part) - else: - pattern += part +## @brief print the incorrect match line +def print_incorrect_match(match_line, present, expected): + print("Line " + str(match_line) + " does not match") + print("is: " + present) + print("expected: " + expected) - # empty input file or end of input file, from now on match file must be optional - if not input_lines: - if optional_line is True: - continue - else: - print("End of input file or empty file.") - print("expected: " + match_line.strip()) + +## @brief pattern matching script status values +class Status(Enum): + INPUT_END = 1 + MATCH_END = 2 + INPUT_AND_MATCH_END = 3 + PROCESSING = 4 + + +## @brief check matching script status +def check_status(input_lines, match_lines): + if not input_lines and not match_lines: + return Status.INPUT_AND_MATCH_END + elif not input_lines: + return Status.INPUT_END + elif not match_lines: + return Status.MATCH_END + return Status.PROCESSING + + +## @brief pattern matching tags. +## Tags are expected to be at the start of the line. +class Tag(Enum): + OPT = "{{OPT}}" # makes the line optional + IGNORE = "{{IGNORE}}" # ignores all input until next match or end of input file + + +## @brief main function for the match file processing script +def main(): + if len(sys.argv) != 3: + print("Usage: python match.py ") + sys.exit(1) + + input_file = sys.argv[1] + match_file = sys.argv[2] + + with open(input_file, 'r') as input, open(match_file, 'r') as match: + input_lines = input.readlines() + match_lines = match.readlines() + + ignored_lines = [] + + input_idx = 0 + match_idx = 0 + tags_in_effect = [] + while True: + # check file status + status = check_status(input_lines[input_idx:], match_lines[match_idx:]) + if (status == Status.INPUT_AND_MATCH_END) or (status == Status.MATCH_END and Tag.IGNORE in tags_in_effect): + # all lines matched or the last line in match file is an ignore tag + sys.exit(0) + elif status == Status.MATCH_END: + print_incorrect_match(match_idx + 1, input_lines[input_idx].strip(), ""); + print_content(input_lines, match_lines, ignored_lines) sys.exit(1) - input_line = input_lines[input_idx].strip() - if not re.fullmatch(pattern, input_line): - if optional_line is True: - continue + input_line = input_lines[input_idx].strip() if input_idx < len(input_lines) else "" + match_line = match_lines[match_idx] + + # check for tags + if match_line.startswith(Tag.OPT.value): + tags_in_effect.append(Tag.OPT) + match_line = match_line[len(Tag.OPT.value):] + elif match_line.startswith(Tag.IGNORE.value): + tags_in_effect.append(Tag.IGNORE) + match_idx += 1 + continue # line with ignore tag should be skipped + + # split into parts at {{ }} + match_parts = re.split(r'\{{(.*?)\}}', match_line.strip()) + pattern = "" + for j, part in enumerate(match_parts): + if j % 2 == 0: + pattern += re.escape(part) + else: + pattern += part + + # match or process tags + if re.fullmatch(pattern, input_line): + input_idx += 1 + match_idx += 1 + tags_in_effect = [] + elif Tag.OPT in tags_in_effect: + match_idx += 1 + tags_in_effect.remove(Tag.OPT) + elif Tag.IGNORE in tags_in_effect: + ignored_lines.append(input_line + os.linesep) + input_idx += 1 else: - print("Line " + str(i+1) + " does not match") - print("is: " + input_line) - print("expected: " + match_line.strip()) - print_content(input_lines, match_lines) + print_incorrect_match(match_idx + 1, input_line, match_line.strip()) + print_content(input_lines, match_lines, ignored_lines) sys.exit(1) - else: - if (input_idx == len(input_lines) - 1): - input_lines = [] - else: - input_idx += 1 + + +if __name__ == "__main__": + main() From 2e10197050f9e1963d2771aefd015738f81d50f8 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Tue, 7 Nov 2023 12:39:07 +0100 Subject: [PATCH 136/138] Refactor leak checking match tests --- test/layers/validation/CMakeLists.txt | 41 +++-- test/layers/validation/leaks.out.match | 134 ++++++++--------- test/layers/validation/leaks_mt.out.match | 175 +++++++++++----------- 3 files changed, 187 insertions(+), 163 deletions(-) diff --git a/test/layers/validation/CMakeLists.txt b/test/layers/validation/CMakeLists.txt index 85d639d196..944202e0d2 100644 --- a/test/layers/validation/CMakeLists.txt +++ b/test/layers/validation/CMakeLists.txt @@ -4,20 +4,20 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(UR_VALIDATION_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(VAL_TEST_PREFIX validation_test) -function(add_validation_test name) - set(TEST_TARGET_NAME validation_test-${name}) - add_ur_executable(${TEST_TARGET_NAME} +function(add_validation_test_executable name) + add_ur_executable(${VAL_TEST_PREFIX}-${name} ${ARGN}) - target_link_libraries(${TEST_TARGET_NAME} + target_link_libraries(${VAL_TEST_PREFIX}-${name} PRIVATE ${PROJECT_NAME}::loader ${PROJECT_NAME}::headers ${PROJECT_NAME}::testing GTest::gtest_main) - add_test(NAME ${name} - COMMAND ${TEST_TARGET_NAME} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endfunction() + +function(set_validation_test_properties name) set_tests_properties(${name} PROPERTIES LABELS "validation") set_property(TEST ${name} PROPERTY ENVIRONMENT "UR_ENABLE_LAYERS=UR_LAYER_FULL_VALIDATION" @@ -25,11 +25,28 @@ function(add_validation_test name) "UR_LOG_VALIDATION=level:debug\;flush:debug\;output:stdout") endfunction() -function(add_validation_match_test name match_file) - add_validation_test(${name} ${ARGN}) - file(READ ${match_file} MATCH_STRING) - set_tests_properties(${name} PROPERTIES - PASS_REGULAR_EXPRESSION "${MATCH_STRING}") +function(add_validation_test name) + add_validation_test_executable(${name} ${ARGN}) + + add_test(NAME ${name} + COMMAND ${VAL_TEST_PREFIX}-${name} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_validation_test_properties(${name}) +endfunction() + +function(add_validation_match_test name) + add_validation_test_executable(${name} ${ARGN}) + + add_test(NAME ${name} + COMMAND ${CMAKE_COMMAND} + -D MODE=stdout + -D TEST_FILE=$ + -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}.out.match + -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set_validation_test_properties(${name}) endfunction() add_validation_test(parameters parameters.cpp) diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index 90f8713765..2a36a22263 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -1,67 +1,67 @@ -.* -\[ RUN \] urTest.testUrAdapterGetLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] urTest.testUrAdapterRetainLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] urTest.testUrAdapterRetainNonexistent -\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -.* -\[ RUN \] valDeviceTest.testUrContextCreateLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextRetainLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextRetainNonexistent -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextCreateSuccess -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextRetainSuccess -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -.* -\[ RUN \] valDeviceTest.testUrContextReleaseLeak -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* -\[ RUN \] valDeviceTest.testUrContextReleaseNonexistent -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -.* +{{IGNORE}} +[ RUN ] urTest.testUrAdapterGetLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] urTest.testUrAdapterRetainLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[ERROR]: Retained 2 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] urTest.testUrAdapterRetainNonexistent +[ERROR]: Attempting to retain nonexistent handle {{[0-9xa-fA-F]+}} +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextCreateLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 2 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainNonexistent +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Attempting to retain nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextCreateSuccess +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextRetainSuccess +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextReleaseLeak +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] valDeviceTest.testUrContextReleaseNonexistent +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} diff --git a/test/layers/validation/leaks_mt.out.match b/test/layers/validation/leaks_mt.out.match index 86de1e1d76..f1bd32f8b5 100644 --- a/test/layers/validation/leaks_mt.out.match +++ b/test/layers/validation/leaks_mt.out.match @@ -1,84 +1,91 @@ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 3 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 4 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 5 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 6 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 7 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 8 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 9 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 9 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -2 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -3 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -4 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -5 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -6 -\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -7 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained -7 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 -\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 -\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ -\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: -(.*) +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 3 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 3 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 2 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 3 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 4 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 5 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 6 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 7 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 8 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 9 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 9 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextReleaseLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextReleaseLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -1 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -2 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -3 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -4 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -5 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -6 +[ERROR]: Attempting to release nonexistent handle {{[0-9xa-fA-F]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to -7 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained -7 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainReleaseLeakMt/0 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} +[ RUN ] threadCountForValDeviceTest/valDeviceTestMultithreaded.testUrContextRetainReleaseLeakMt/1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to {{[1-9]+}} +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 1 +[DEBUG]: Reference count for handle {{[0-9xa-fA-F]+}} changed to 0 +[ERROR]: Retained 1 reference(s) to handle {{[0-9xa-fA-F]+}} +[ERROR]: Handle {{[0-9xa-fA-F]+}} was recorded for first time here: +{{IGNORE}} From 2317492f56717edf84ffd432816c0b2e5cf06c80 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Wed, 13 Dec 2023 12:10:21 +0100 Subject: [PATCH 137/138] [L0][HIP][CUDA] Suppress aborted adapter match tests When tests are aborted or failed at the assertion, the match script receives "Aborted" or "Segmentation fault" as an input and compares it with the match file. Previous match script allowed those tests to pass despite not matching this input. --- test/conformance/enqueue/enqueue_adapter_cuda.match | 1 + test/conformance/enqueue/enqueue_adapter_hip.match | 2 +- test/conformance/event/event_adapter_level_zero.match | 2 +- test/conformance/kernel/kernel_adapter_hip.match | 2 +- test/conformance/program/program_adapter_hip.match | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/conformance/enqueue/enqueue_adapter_cuda.match b/test/conformance/enqueue/enqueue_adapter_cuda.match index f6f0d3e591..2392247314 100644 --- a/test/conformance/enqueue/enqueue_adapter_cuda.match +++ b/test/conformance/enqueue/enqueue_adapter_cuda.match @@ -56,3 +56,4 @@ {{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/NVIDIA_CUDA_BACKEND___{{.*}}___pitch__1__width__1__height__1 {{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT {{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/enqueue/enqueue_adapter_hip.match b/test/conformance/enqueue/enqueue_adapter_hip.match index 9d48681c1a..fe890b62b5 100644 --- a/test/conformance/enqueue/enqueue_adapter_hip.match +++ b/test/conformance/enqueue/enqueue_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/event/event_adapter_level_zero.match b/test/conformance/event/event_adapter_level_zero.match index a316044ab1..c29f67cbc6 100644 --- a/test/conformance/event/event_adapter_level_zero.match +++ b/test/conformance/event/event_adapter_level_zero.match @@ -1,4 +1,4 @@ {{OPT}}urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE {{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED {{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -{{OPT}} Segmentation fault +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index 96d579f088..97864c4e70 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_FUNCTION_NAME {{OPT}}urKernelGetInfoTest.InvalidSizeSmall/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_ARGS diff --git a/test/conformance/program/program_adapter_hip.match b/test/conformance/program/program_adapter_hip.match index 67f98ec2f7..1f95931e09 100644 --- a/test/conformance/program/program_adapter_hip.match +++ b/test/conformance/program/program_adapter_hip.match @@ -1,4 +1,4 @@ -{{OPT}}Segmentation Fault +{{OPT}}{{Segmentation fault|Aborted}} {{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/AMD_HIP_BACKEND___{{.*}}_ {{OPT}}urProgramGetBuildInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_BUILD_INFO_BINARY_TYPE From 8c632085a6bf6b37823734c1c2de31221755d34b Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Mon, 11 Dec 2023 11:11:17 +0100 Subject: [PATCH 138/138] [OpenCL] Fix suffixes in adapter match tests --- .../enqueue/enqueue_adapter_opencl.match | 70 +++++++++---------- .../kernel/kernel_adapter_opencl.match | 10 +-- .../memory/memory_adapter_opencl.match | 2 +- test/conformance/usm/usm_adapter_opencl.match | 32 ++++----- 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match index 54a5ee3762..a034083c87 100644 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -1,35 +1,35 @@ -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}}_ -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}_ +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DTestWithParam.Success/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}} +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match index 799225be19..9a71945c45 100644 --- a/test/conformance/kernel/kernel_adapter_opencl.match +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -1,5 +1,5 @@ -urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__OpenCL___{{.*}}_ -urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__OpenCL___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__OpenCL___{{.*}} +urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/memory/memory_adapter_opencl.match b/test/conformance/memory/memory_adapter_opencl.match index c01e55d804..23dfbbae8c 100644 --- a/test/conformance/memory/memory_adapter_opencl.match +++ b/test/conformance/memory/memory_adapter_opencl.match @@ -1 +1 @@ -urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}}_ +urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index 16211ba8e7..3e729141b9 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -12,24 +12,24 @@ urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEn urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}}_ -urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}}_ +urUSMPoolCreateTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.SuccessWithFlag/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullPointerPoolDesc/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidNullPointerPool/Intel_R__OpenCL___{{.*}} +urUSMPoolCreateTest.InvalidEnumerationFlags/Intel_R__OpenCL___{{.*}} urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_CONTEXT urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__OpenCL___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}}_ -urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}_ -urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}}_ -urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}}_ +urUSMPoolGetInfoTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidEnumerationProperty/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidSizeZero/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/Intel_R__OpenCL___{{.*}} +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__OpenCL___{{.*}} +urUSMPoolDestroyTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolDestroyTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}} +urUSMPoolRetainTest.Success/Intel_R__OpenCL___{{.*}} +urUSMPoolRetainTest.InvalidNullHandlePool/Intel_R__OpenCL___{{.*}} urUSMSharedAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__OpenCL___{{.*}}___UsePoolEnabled