diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 953c9fb024..e697dd6aaf 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -110,7 +110,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" fuzztest-build: name: Build and run quick fuzztest scenarios @@ -346,7 +346,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" macos-build: diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ad1ac23e7a..731f7ea320 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -65,7 +65,7 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "python|umf|loader|validation|tracing|unit|urtrace" + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" - name: Quick Coverage Info working-directory: ${{github.workspace}}/build diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index de327f92c7..ab065ee77e 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -9,9 +9,10 @@ name: coverity-unified-runtime # It runs static analysis build - Coverity. It requires special token (set in CI's secret). on: - push: - branches: ["main"] workflow_dispatch: + schedule: + # Run every day at 22:00 UTC + - cron: '0 22 * * *' env: WORKDIR: ${{ github.workspace }} @@ -22,10 +23,11 @@ env: COVERITY_SCAN_BRANCH_PATTERN: "main" TRAVIS_BRANCH: ${{ github.ref_name }} + jobs: linux: name: Coverity - runs-on: ubuntu-latest + runs-on: coverity steps: - name: Clone the git repo @@ -35,7 +37,20 @@ jobs: run: pip install -r third_party/requirements.txt - name: Configure CMake - run: cmake -B $WORKDIR/build -DUR_ENABLE_TRACING=ON -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON -DUMF_ENABLE_POOL_TRACKING=ON + run: > + cmake + -B $WORKDIR/build + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DUR_BUILD_TESTS=ON + -DUMF_ENABLE_POOL_TRACKING=ON + -DUR_FORMAT_CPP_STYLE=ON + -DCMAKE_BUILD_TYPE=Debug + -DUR_BUILD_ADAPTER_L0=ON + -DUR_BUILD_ADAPTER_CUDA=ON + -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so + -DUR_BUILD_ADAPTER_NATIVE_CPU=ON + -DUR_BUILD_ADAPTER_HIP=ON - name: Run Coverity run: | diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml index b4cff2d1d4..eebb1f7bfa 100644 --- a/.github/workflows/e2e_nightly.yml +++ b/.github/workflows/e2e_nightly.yml @@ -30,7 +30,6 @@ jobs: - name: Checkout UR uses: actions/checkout@v4 with: - ref: adapters path: ur-repo - name: Checkout SYCL diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 527f641a51..4a81c94e8f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -17,9 +17,6 @@ jobs: steps: - uses: actions/checkout@v3 - # with-ref part to be removed after merging 'adapters' branch with 'main' - with: - ref: adapters - name: Install pip packages run: pip install -r third_party/requirements.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bad7a6ca5..80a9f64ea7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) -option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" OFF) +option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON) option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF) option(UR_BUILD_ADAPTER_OPENCL "Build the OpenCL adapter" OFF) option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF) diff --git a/include/ur.py b/include/ur.py deleted file mode 100644 index 09b7955e07..0000000000 --- a/include/ur.py +++ /dev/null @@ -1,4305 +0,0 @@ -""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - @file ur.py - @version v0.9-r0 - - """ -import platform -from ctypes import * -from enum import * - -# ctypes does not define c_intptr_t, so let's define it here manually -c_intptr_t = c_ssize_t - -############################################################################### -__version__ = "1.0" - -############################################################################### -## @brief Defines unique stable identifiers for all functions -class ur_function_v(IntEnum): - CONTEXT_CREATE = 1 ## Enumerator for ::urContextCreate - CONTEXT_RETAIN = 2 ## Enumerator for ::urContextRetain - CONTEXT_RELEASE = 3 ## Enumerator for ::urContextRelease - CONTEXT_GET_INFO = 4 ## Enumerator for ::urContextGetInfo - CONTEXT_GET_NATIVE_HANDLE = 5 ## Enumerator for ::urContextGetNativeHandle - CONTEXT_CREATE_WITH_NATIVE_HANDLE = 6 ## Enumerator for ::urContextCreateWithNativeHandle - CONTEXT_SET_EXTENDED_DELETER = 7 ## Enumerator for ::urContextSetExtendedDeleter - DEVICE_GET = 8 ## Enumerator for ::urDeviceGet - DEVICE_GET_INFO = 9 ## Enumerator for ::urDeviceGetInfo - DEVICE_RETAIN = 10 ## Enumerator for ::urDeviceRetain - DEVICE_RELEASE = 11 ## Enumerator for ::urDeviceRelease - DEVICE_PARTITION = 12 ## Enumerator for ::urDevicePartition - DEVICE_SELECT_BINARY = 13 ## Enumerator for ::urDeviceSelectBinary - DEVICE_GET_NATIVE_HANDLE = 14 ## Enumerator for ::urDeviceGetNativeHandle - DEVICE_CREATE_WITH_NATIVE_HANDLE = 15 ## Enumerator for ::urDeviceCreateWithNativeHandle - DEVICE_GET_GLOBAL_TIMESTAMPS = 16 ## Enumerator for ::urDeviceGetGlobalTimestamps - ENQUEUE_KERNEL_LAUNCH = 17 ## Enumerator for ::urEnqueueKernelLaunch - ENQUEUE_EVENTS_WAIT = 18 ## Enumerator for ::urEnqueueEventsWait - ENQUEUE_EVENTS_WAIT_WITH_BARRIER = 19 ## Enumerator for ::urEnqueueEventsWaitWithBarrier - ENQUEUE_MEM_BUFFER_READ = 20 ## Enumerator for ::urEnqueueMemBufferRead - ENQUEUE_MEM_BUFFER_WRITE = 21 ## Enumerator for ::urEnqueueMemBufferWrite - ENQUEUE_MEM_BUFFER_READ_RECT = 22 ## Enumerator for ::urEnqueueMemBufferReadRect - ENQUEUE_MEM_BUFFER_WRITE_RECT = 23 ## Enumerator for ::urEnqueueMemBufferWriteRect - ENQUEUE_MEM_BUFFER_COPY = 24 ## Enumerator for ::urEnqueueMemBufferCopy - ENQUEUE_MEM_BUFFER_COPY_RECT = 25 ## Enumerator for ::urEnqueueMemBufferCopyRect - ENQUEUE_MEM_BUFFER_FILL = 26 ## Enumerator for ::urEnqueueMemBufferFill - ENQUEUE_MEM_IMAGE_READ = 27 ## Enumerator for ::urEnqueueMemImageRead - ENQUEUE_MEM_IMAGE_WRITE = 28 ## Enumerator for ::urEnqueueMemImageWrite - ENQUEUE_MEM_IMAGE_COPY = 29 ## Enumerator for ::urEnqueueMemImageCopy - ENQUEUE_MEM_BUFFER_MAP = 30 ## Enumerator for ::urEnqueueMemBufferMap - ENQUEUE_MEM_UNMAP = 31 ## Enumerator for ::urEnqueueMemUnmap - ENQUEUE_USM_FILL = 32 ## Enumerator for ::urEnqueueUSMFill - ENQUEUE_USM_MEMCPY = 33 ## Enumerator for ::urEnqueueUSMMemcpy - ENQUEUE_USM_PREFETCH = 34 ## Enumerator for ::urEnqueueUSMPrefetch - ENQUEUE_USM_ADVISE = 35 ## Enumerator for ::urEnqueueUSMAdvise - ENQUEUE_DEVICE_GLOBAL_VARIABLE_WRITE = 38 ## Enumerator for ::urEnqueueDeviceGlobalVariableWrite - ENQUEUE_DEVICE_GLOBAL_VARIABLE_READ = 39 ## Enumerator for ::urEnqueueDeviceGlobalVariableRead - EVENT_GET_INFO = 40 ## Enumerator for ::urEventGetInfo - EVENT_GET_PROFILING_INFO = 41 ## Enumerator for ::urEventGetProfilingInfo - EVENT_WAIT = 42 ## Enumerator for ::urEventWait - EVENT_RETAIN = 43 ## Enumerator for ::urEventRetain - EVENT_RELEASE = 44 ## Enumerator for ::urEventRelease - EVENT_GET_NATIVE_HANDLE = 45 ## Enumerator for ::urEventGetNativeHandle - EVENT_CREATE_WITH_NATIVE_HANDLE = 46 ## Enumerator for ::urEventCreateWithNativeHandle - EVENT_SET_CALLBACK = 47 ## Enumerator for ::urEventSetCallback - KERNEL_CREATE = 48 ## Enumerator for ::urKernelCreate - KERNEL_SET_ARG_VALUE = 49 ## Enumerator for ::urKernelSetArgValue - KERNEL_SET_ARG_LOCAL = 50 ## Enumerator for ::urKernelSetArgLocal - KERNEL_GET_INFO = 51 ## Enumerator for ::urKernelGetInfo - KERNEL_GET_GROUP_INFO = 52 ## Enumerator for ::urKernelGetGroupInfo - KERNEL_GET_SUB_GROUP_INFO = 53 ## Enumerator for ::urKernelGetSubGroupInfo - KERNEL_RETAIN = 54 ## Enumerator for ::urKernelRetain - KERNEL_RELEASE = 55 ## Enumerator for ::urKernelRelease - KERNEL_SET_ARG_POINTER = 56 ## Enumerator for ::urKernelSetArgPointer - KERNEL_SET_EXEC_INFO = 57 ## Enumerator for ::urKernelSetExecInfo - KERNEL_SET_ARG_SAMPLER = 58 ## Enumerator for ::urKernelSetArgSampler - KERNEL_SET_ARG_MEM_OBJ = 59 ## Enumerator for ::urKernelSetArgMemObj - KERNEL_SET_SPECIALIZATION_CONSTANTS = 60 ## Enumerator for ::urKernelSetSpecializationConstants - KERNEL_GET_NATIVE_HANDLE = 61 ## Enumerator for ::urKernelGetNativeHandle - KERNEL_CREATE_WITH_NATIVE_HANDLE = 62 ## Enumerator for ::urKernelCreateWithNativeHandle - MEM_IMAGE_CREATE = 63 ## Enumerator for ::urMemImageCreate - MEM_BUFFER_CREATE = 64 ## Enumerator for ::urMemBufferCreate - MEM_RETAIN = 65 ## Enumerator for ::urMemRetain - MEM_RELEASE = 66 ## Enumerator for ::urMemRelease - MEM_BUFFER_PARTITION = 67 ## Enumerator for ::urMemBufferPartition - MEM_GET_NATIVE_HANDLE = 68 ## Enumerator for ::urMemGetNativeHandle - ENQUEUE_READ_HOST_PIPE = 69 ## Enumerator for ::urEnqueueReadHostPipe - MEM_GET_INFO = 70 ## Enumerator for ::urMemGetInfo - MEM_IMAGE_GET_INFO = 71 ## Enumerator for ::urMemImageGetInfo - PLATFORM_GET = 72 ## Enumerator for ::urPlatformGet - PLATFORM_GET_INFO = 73 ## Enumerator for ::urPlatformGetInfo - PLATFORM_GET_API_VERSION = 74 ## Enumerator for ::urPlatformGetApiVersion - PLATFORM_GET_NATIVE_HANDLE = 75 ## Enumerator for ::urPlatformGetNativeHandle - PLATFORM_CREATE_WITH_NATIVE_HANDLE = 76 ## Enumerator for ::urPlatformCreateWithNativeHandle - PROGRAM_CREATE_WITH_IL = 78 ## Enumerator for ::urProgramCreateWithIL - PROGRAM_CREATE_WITH_BINARY = 79 ## Enumerator for ::urProgramCreateWithBinary - PROGRAM_BUILD = 80 ## Enumerator for ::urProgramBuild - PROGRAM_COMPILE = 81 ## Enumerator for ::urProgramCompile - PROGRAM_LINK = 82 ## Enumerator for ::urProgramLink - PROGRAM_RETAIN = 83 ## Enumerator for ::urProgramRetain - PROGRAM_RELEASE = 84 ## Enumerator for ::urProgramRelease - PROGRAM_GET_FUNCTION_POINTER = 85 ## Enumerator for ::urProgramGetFunctionPointer - PROGRAM_GET_INFO = 86 ## Enumerator for ::urProgramGetInfo - PROGRAM_GET_BUILD_INFO = 87 ## Enumerator for ::urProgramGetBuildInfo - PROGRAM_SET_SPECIALIZATION_CONSTANTS = 88 ## Enumerator for ::urProgramSetSpecializationConstants - PROGRAM_GET_NATIVE_HANDLE = 89 ## Enumerator for ::urProgramGetNativeHandle - PROGRAM_CREATE_WITH_NATIVE_HANDLE = 90 ## Enumerator for ::urProgramCreateWithNativeHandle - QUEUE_GET_INFO = 91 ## Enumerator for ::urQueueGetInfo - QUEUE_CREATE = 92 ## Enumerator for ::urQueueCreate - QUEUE_RETAIN = 93 ## Enumerator for ::urQueueRetain - QUEUE_RELEASE = 94 ## Enumerator for ::urQueueRelease - QUEUE_GET_NATIVE_HANDLE = 95 ## Enumerator for ::urQueueGetNativeHandle - QUEUE_CREATE_WITH_NATIVE_HANDLE = 96 ## Enumerator for ::urQueueCreateWithNativeHandle - QUEUE_FINISH = 97 ## Enumerator for ::urQueueFinish - QUEUE_FLUSH = 98 ## Enumerator for ::urQueueFlush - SAMPLER_CREATE = 101 ## Enumerator for ::urSamplerCreate - SAMPLER_RETAIN = 102 ## Enumerator for ::urSamplerRetain - SAMPLER_RELEASE = 103 ## Enumerator for ::urSamplerRelease - SAMPLER_GET_INFO = 104 ## Enumerator for ::urSamplerGetInfo - SAMPLER_GET_NATIVE_HANDLE = 105 ## Enumerator for ::urSamplerGetNativeHandle - SAMPLER_CREATE_WITH_NATIVE_HANDLE = 106 ## Enumerator for ::urSamplerCreateWithNativeHandle - USM_HOST_ALLOC = 107 ## Enumerator for ::urUSMHostAlloc - USM_DEVICE_ALLOC = 108 ## Enumerator for ::urUSMDeviceAlloc - USM_SHARED_ALLOC = 109 ## Enumerator for ::urUSMSharedAlloc - USM_FREE = 110 ## Enumerator for ::urUSMFree - USM_GET_MEM_ALLOC_INFO = 111 ## Enumerator for ::urUSMGetMemAllocInfo - USM_POOL_CREATE = 112 ## Enumerator for ::urUSMPoolCreate - COMMAND_BUFFER_CREATE_EXP = 113 ## Enumerator for ::urCommandBufferCreateExp - PLATFORM_GET_BACKEND_OPTION = 114 ## Enumerator for ::urPlatformGetBackendOption - MEM_BUFFER_CREATE_WITH_NATIVE_HANDLE = 115 ## Enumerator for ::urMemBufferCreateWithNativeHandle - MEM_IMAGE_CREATE_WITH_NATIVE_HANDLE = 116 ## Enumerator for ::urMemImageCreateWithNativeHandle - ENQUEUE_WRITE_HOST_PIPE = 117 ## Enumerator for ::urEnqueueWriteHostPipe - USM_POOL_RETAIN = 118 ## Enumerator for ::urUSMPoolRetain - USM_POOL_RELEASE = 119 ## Enumerator for ::urUSMPoolRelease - USM_POOL_GET_INFO = 120 ## Enumerator for ::urUSMPoolGetInfo - COMMAND_BUFFER_RETAIN_EXP = 121 ## Enumerator for ::urCommandBufferRetainExp - COMMAND_BUFFER_RELEASE_EXP = 122 ## Enumerator for ::urCommandBufferReleaseExp - COMMAND_BUFFER_FINALIZE_EXP = 123 ## Enumerator for ::urCommandBufferFinalizeExp - COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125 ## Enumerator for ::urCommandBufferAppendKernelLaunchExp - COMMAND_BUFFER_ENQUEUE_EXP = 128 ## Enumerator for ::urCommandBufferEnqueueExp - USM_PITCHED_ALLOC_EXP = 132 ## Enumerator for ::urUSMPitchedAllocExp - BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133## Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp - BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134 ## Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp - BINDLESS_IMAGES_IMAGE_ALLOCATE_EXP = 135 ## Enumerator for ::urBindlessImagesImageAllocateExp - BINDLESS_IMAGES_IMAGE_FREE_EXP = 136 ## Enumerator for ::urBindlessImagesImageFreeExp - BINDLESS_IMAGES_UNSAMPLED_IMAGE_CREATE_EXP = 137## Enumerator for ::urBindlessImagesUnsampledImageCreateExp - BINDLESS_IMAGES_SAMPLED_IMAGE_CREATE_EXP = 138 ## Enumerator for ::urBindlessImagesSampledImageCreateExp - BINDLESS_IMAGES_IMAGE_COPY_EXP = 139 ## Enumerator for ::urBindlessImagesImageCopyExp - BINDLESS_IMAGES_IMAGE_GET_INFO_EXP = 140 ## Enumerator for ::urBindlessImagesImageGetInfoExp - BINDLESS_IMAGES_MIPMAP_GET_LEVEL_EXP = 141 ## Enumerator for ::urBindlessImagesMipmapGetLevelExp - BINDLESS_IMAGES_MIPMAP_FREE_EXP = 142 ## Enumerator for ::urBindlessImagesMipmapFreeExp - BINDLESS_IMAGES_IMPORT_OPAQUE_FD_EXP = 143 ## Enumerator for ::urBindlessImagesImportOpaqueFDExp - BINDLESS_IMAGES_MAP_EXTERNAL_ARRAY_EXP = 144 ## Enumerator for ::urBindlessImagesMapExternalArrayExp - BINDLESS_IMAGES_RELEASE_INTEROP_EXP = 145 ## Enumerator for ::urBindlessImagesReleaseInteropExp - BINDLESS_IMAGES_IMPORT_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXP = 146 ## Enumerator for ::urBindlessImagesImportExternalSemaphoreOpaqueFDExp - BINDLESS_IMAGES_DESTROY_EXTERNAL_SEMAPHORE_EXP = 147## Enumerator for ::urBindlessImagesDestroyExternalSemaphoreExp - BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP = 148 ## Enumerator for ::urBindlessImagesWaitExternalSemaphoreExp - BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP = 149 ## Enumerator for ::urBindlessImagesSignalExternalSemaphoreExp - ENQUEUE_USM_FILL_2D = 151 ## Enumerator for ::urEnqueueUSMFill2D - ENQUEUE_USM_MEMCPY_2D = 152 ## Enumerator for ::urEnqueueUSMMemcpy2D - VIRTUAL_MEM_GRANULARITY_GET_INFO = 153 ## Enumerator for ::urVirtualMemGranularityGetInfo - VIRTUAL_MEM_RESERVE = 154 ## Enumerator for ::urVirtualMemReserve - VIRTUAL_MEM_FREE = 155 ## Enumerator for ::urVirtualMemFree - VIRTUAL_MEM_MAP = 156 ## Enumerator for ::urVirtualMemMap - VIRTUAL_MEM_UNMAP = 157 ## Enumerator for ::urVirtualMemUnmap - VIRTUAL_MEM_SET_ACCESS = 158 ## Enumerator for ::urVirtualMemSetAccess - VIRTUAL_MEM_GET_INFO = 159 ## Enumerator for ::urVirtualMemGetInfo - PHYSICAL_MEM_CREATE = 160 ## Enumerator for ::urPhysicalMemCreate - PHYSICAL_MEM_RETAIN = 161 ## Enumerator for ::urPhysicalMemRetain - PHYSICAL_MEM_RELEASE = 162 ## Enumerator for ::urPhysicalMemRelease - USM_IMPORT_EXP = 163 ## Enumerator for ::urUSMImportExp - USM_RELEASE_EXP = 164 ## Enumerator for ::urUSMReleaseExp - USM_P2P_ENABLE_PEER_ACCESS_EXP = 165 ## Enumerator for ::urUsmP2PEnablePeerAccessExp - USM_P2P_DISABLE_PEER_ACCESS_EXP = 166 ## Enumerator for ::urUsmP2PDisablePeerAccessExp - USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167 ## Enumerator for ::urUsmP2PPeerAccessGetInfoExp - LOADER_CONFIG_CREATE = 172 ## Enumerator for ::urLoaderConfigCreate - LOADER_CONFIG_RELEASE = 173 ## Enumerator for ::urLoaderConfigRelease - LOADER_CONFIG_RETAIN = 174 ## Enumerator for ::urLoaderConfigRetain - LOADER_CONFIG_GET_INFO = 175 ## Enumerator for ::urLoaderConfigGetInfo - LOADER_CONFIG_ENABLE_LAYER = 176 ## Enumerator for ::urLoaderConfigEnableLayer - ADAPTER_RELEASE = 177 ## Enumerator for ::urAdapterRelease - ADAPTER_GET = 178 ## Enumerator for ::urAdapterGet - ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain - ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError - ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo - PROGRAM_BUILD_EXP = 197 ## Enumerator for ::urProgramBuildExp - PROGRAM_COMPILE_EXP = 198 ## Enumerator for ::urProgramCompileExp - PROGRAM_LINK_EXP = 199 ## Enumerator for ::urProgramLinkExp - LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 200 ## Enumerator for ::urLoaderConfigSetCodeLocationCallback - LOADER_INIT = 201 ## Enumerator for ::urLoaderInit - LOADER_TEAR_DOWN = 202 ## Enumerator for ::urLoaderTearDown - COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 203 ## Enumerator for ::urCommandBufferAppendUSMMemcpyExp - COMMAND_BUFFER_APPEND_USM_FILL_EXP = 204 ## Enumerator for ::urCommandBufferAppendUSMFillExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 205 ## Enumerator for ::urCommandBufferAppendMemBufferCopyExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 206## Enumerator for ::urCommandBufferAppendMemBufferWriteExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 207 ## Enumerator for ::urCommandBufferAppendMemBufferReadExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 208## Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 209 ## Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 210## Enumerator for ::urCommandBufferAppendMemBufferReadRectExp - COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 211 ## Enumerator for ::urCommandBufferAppendMemBufferFillExp - COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 212 ## Enumerator for ::urCommandBufferAppendUSMPrefetchExp - COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213 ## Enumerator for ::urCommandBufferAppendUSMAdviseExp - ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214 ## Enumerator for ::urEnqueueCooperativeKernelLaunchExp - KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215## Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp - -class ur_function_t(c_int): - def __str__(self): - return str(ur_function_v(self.value)) - - -############################################################################### -## @brief Defines structure types -class ur_structure_type_v(IntEnum): - CONTEXT_PROPERTIES = 0 ## ::ur_context_properties_t - IMAGE_DESC = 1 ## ::ur_image_desc_t - BUFFER_PROPERTIES = 2 ## ::ur_buffer_properties_t - BUFFER_REGION = 3 ## ::ur_buffer_region_t - BUFFER_CHANNEL_PROPERTIES = 4 ## ::ur_buffer_channel_properties_t - BUFFER_ALLOC_LOCATION_PROPERTIES = 5 ## ::ur_buffer_alloc_location_properties_t - PROGRAM_PROPERTIES = 6 ## ::ur_program_properties_t - USM_DESC = 7 ## ::ur_usm_desc_t - USM_HOST_DESC = 8 ## ::ur_usm_host_desc_t - USM_DEVICE_DESC = 9 ## ::ur_usm_device_desc_t - USM_POOL_DESC = 10 ## ::ur_usm_pool_desc_t - USM_POOL_LIMITS_DESC = 11 ## ::ur_usm_pool_limits_desc_t - DEVICE_BINARY = 12 ## ::ur_device_binary_t - SAMPLER_DESC = 13 ## ::ur_sampler_desc_t - QUEUE_PROPERTIES = 14 ## ::ur_queue_properties_t - QUEUE_INDEX_PROPERTIES = 15 ## ::ur_queue_index_properties_t - CONTEXT_NATIVE_PROPERTIES = 16 ## ::ur_context_native_properties_t - KERNEL_NATIVE_PROPERTIES = 17 ## ::ur_kernel_native_properties_t - QUEUE_NATIVE_PROPERTIES = 18 ## ::ur_queue_native_properties_t - MEM_NATIVE_PROPERTIES = 19 ## ::ur_mem_native_properties_t - EVENT_NATIVE_PROPERTIES = 20 ## ::ur_event_native_properties_t - PLATFORM_NATIVE_PROPERTIES = 21 ## ::ur_platform_native_properties_t - DEVICE_NATIVE_PROPERTIES = 22 ## ::ur_device_native_properties_t - PROGRAM_NATIVE_PROPERTIES = 23 ## ::ur_program_native_properties_t - SAMPLER_NATIVE_PROPERTIES = 24 ## ::ur_sampler_native_properties_t - QUEUE_NATIVE_DESC = 25 ## ::ur_queue_native_desc_t - DEVICE_PARTITION_PROPERTIES = 26 ## ::ur_device_partition_properties_t - KERNEL_ARG_MEM_OBJ_PROPERTIES = 27 ## ::ur_kernel_arg_mem_obj_properties_t - PHYSICAL_MEM_PROPERTIES = 28 ## ::ur_physical_mem_properties_t - KERNEL_ARG_POINTER_PROPERTIES = 29 ## ::ur_kernel_arg_pointer_properties_t - KERNEL_ARG_SAMPLER_PROPERTIES = 30 ## ::ur_kernel_arg_sampler_properties_t - KERNEL_EXEC_INFO_PROPERTIES = 31 ## ::ur_kernel_exec_info_properties_t - KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t - KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t - EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t - EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t - EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t - EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t - EXP_FILE_DESCRIPTOR = 0x2003 ## ::ur_exp_file_descriptor_t - EXP_WIN32_HANDLE = 0x2004 ## ::ur_exp_win32_handle_t - EXP_LAYERED_IMAGE_PROPERTIES = 0x2005 ## ::ur_exp_layered_image_properties_t - EXP_SAMPLER_ADDR_MODES = 0x2006 ## ::ur_exp_sampler_addr_modes_t - -class ur_structure_type_t(c_int): - def __str__(self): - return str(ur_structure_type_v(self.value)) - - -############################################################################### -## @brief Generates generic 'oneAPI' API versions -def UR_MAKE_VERSION( _major, _minor ): - return (( _major << 16 )|( _minor & 0x0000ffff)) - -############################################################################### -## @brief Extracts 'oneAPI' API major version -def UR_MAJOR_VERSION( _ver ): - return ( _ver >> 16 ) - -############################################################################### -## @brief Extracts 'oneAPI' API minor version -def UR_MINOR_VERSION( _ver ): - return ( _ver & 0x0000ffff ) - -############################################################################### -## @brief Calling convention for all API functions -# UR_APICALL not required for python - -############################################################################### -## @brief Microsoft-specific dllexport storage-class attribute -# UR_APIEXPORT not required for python - -############################################################################### -## @brief Microsoft-specific dllexport storage-class attribute -# UR_DLLEXPORT not required for python - -############################################################################### -## @brief GCC-specific dllexport storage-class attribute -# UR_DLLEXPORT not required for python - -############################################################################### -## @brief compiler-independent type -class ur_bool_t(c_ubyte): - pass - -############################################################################### -## @brief Handle of a loader config object -class ur_loader_config_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of an adapter instance -class ur_adapter_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a platform instance -class ur_platform_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of platform's device object -class ur_device_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of context object -class ur_context_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of event object -class ur_event_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of Program object -class ur_program_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of program's Kernel object -class ur_kernel_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a queue object -class ur_queue_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a native object -class ur_native_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of a Sampler object -class ur_sampler_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of memory object which can either be buffer or image -class ur_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of physical memory object -class ur_physical_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Generic macro for enumerator bit masks -def UR_BIT( _i ): - return ( 1 << _i ) - -############################################################################### -## @brief Defines Return/Error codes -class ur_result_v(IntEnum): - SUCCESS = 0 ## Success - ERROR_INVALID_OPERATION = 1 ## Invalid operation - ERROR_INVALID_QUEUE_PROPERTIES = 2 ## Invalid queue properties - ERROR_INVALID_QUEUE = 3 ## Invalid queue - ERROR_INVALID_VALUE = 4 ## Invalid Value - ERROR_INVALID_CONTEXT = 5 ## Invalid context - ERROR_INVALID_PLATFORM = 6 ## Invalid platform - ERROR_INVALID_BINARY = 7 ## Invalid binary - ERROR_INVALID_PROGRAM = 8 ## Invalid program - ERROR_INVALID_SAMPLER = 9 ## Invalid sampler - ERROR_INVALID_BUFFER_SIZE = 10 ## Invalid buffer size - ERROR_INVALID_MEM_OBJECT = 11 ## Invalid memory object - ERROR_INVALID_EVENT = 12 ## Invalid event - ERROR_INVALID_EVENT_WAIT_LIST = 13 ## Returned when the event wait list or the events in the wait list are - ## invalid. - ERROR_MISALIGNED_SUB_BUFFER_OFFSET = 14 ## Misaligned sub buffer offset - ERROR_INVALID_WORK_GROUP_SIZE = 15 ## Invalid work group size - ERROR_COMPILER_NOT_AVAILABLE = 16 ## Compiler not available - ERROR_PROFILING_INFO_NOT_AVAILABLE = 17 ## Profiling info not available - ERROR_DEVICE_NOT_FOUND = 18 ## Device not found - ERROR_INVALID_DEVICE = 19 ## Invalid device - ERROR_DEVICE_LOST = 20 ## Device hung, reset, was removed, or adapter update occurred - ERROR_DEVICE_REQUIRES_RESET = 21 ## Device requires a reset - ERROR_DEVICE_IN_LOW_POWER_STATE = 22 ## Device currently in low power state - ERROR_DEVICE_PARTITION_FAILED = 23 ## Device partitioning failed - ERROR_INVALID_DEVICE_PARTITION_COUNT = 24 ## Invalid counts provided with ::UR_DEVICE_PARTITION_BY_COUNTS - ERROR_INVALID_WORK_ITEM_SIZE = 25 ## Invalid work item size - ERROR_INVALID_WORK_DIMENSION = 26 ## Invalid work dimension - ERROR_INVALID_KERNEL_ARGS = 27 ## Invalid kernel args - ERROR_INVALID_KERNEL = 28 ## Invalid kernel - ERROR_INVALID_KERNEL_NAME = 29 ## [Validation] kernel name is not found in the program - ERROR_INVALID_KERNEL_ARGUMENT_INDEX = 30 ## [Validation] kernel argument index is not valid for kernel - ERROR_INVALID_KERNEL_ARGUMENT_SIZE = 31 ## [Validation] kernel argument size does not match kernel - ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE = 32 ## [Validation] value of kernel attribute is not valid for the kernel or - ## device - ERROR_INVALID_IMAGE_SIZE = 33 ## Invalid image size - ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR = 34 ## Invalid image format descriptor - ERROR_IMAGE_FORMAT_NOT_SUPPORTED = 35 ## Image format not supported - ERROR_MEM_OBJECT_ALLOCATION_FAILURE = 36 ## Memory object allocation failure - ERROR_INVALID_PROGRAM_EXECUTABLE = 37 ## Program object parameter is invalid. - ERROR_UNINITIALIZED = 38 ## [Validation] adapter is not initialized or specific entry-point is not - ## implemented - ERROR_OUT_OF_HOST_MEMORY = 39 ## Insufficient host memory to satisfy call - ERROR_OUT_OF_DEVICE_MEMORY = 40 ## Insufficient device memory to satisfy call - ERROR_OUT_OF_RESOURCES = 41 ## Out of resources - ERROR_PROGRAM_BUILD_FAILURE = 42 ## Error occurred when building program, see build log for details - ERROR_PROGRAM_LINK_FAILURE = 43 ## Error occurred when linking programs, see build log for details - ERROR_UNSUPPORTED_VERSION = 44 ## [Validation] generic error code for unsupported versions - ERROR_UNSUPPORTED_FEATURE = 45 ## [Validation] generic error code for unsupported features - ERROR_INVALID_ARGUMENT = 46 ## [Validation] generic error code for invalid arguments - ERROR_INVALID_NULL_HANDLE = 47 ## [Validation] handle argument is not valid - ERROR_HANDLE_OBJECT_IN_USE = 48 ## [Validation] object pointed to by handle still in-use by device - ERROR_INVALID_NULL_POINTER = 49 ## [Validation] pointer argument may not be nullptr - ERROR_INVALID_SIZE = 50 ## [Validation] invalid size or dimensions (e.g., must not be zero, or is - ## out of bounds) - ERROR_UNSUPPORTED_SIZE = 51 ## [Validation] size argument is not supported by the device (e.g., too - ## large) - ERROR_UNSUPPORTED_ALIGNMENT = 52 ## [Validation] alignment argument is not supported by the device (e.g., - ## too small) - ERROR_INVALID_SYNCHRONIZATION_OBJECT = 53 ## [Validation] synchronization object in invalid state - ERROR_INVALID_ENUMERATION = 54 ## [Validation] enumerator argument is not valid - ERROR_UNSUPPORTED_ENUMERATION = 55 ## [Validation] enumerator argument is not supported by the device - ERROR_UNSUPPORTED_IMAGE_FORMAT = 56 ## [Validation] image format is not supported by the device - ERROR_INVALID_NATIVE_BINARY = 57 ## [Validation] native binary is not supported by the device - ERROR_INVALID_GLOBAL_NAME = 58 ## [Validation] global variable is not found in the program - ERROR_INVALID_FUNCTION_NAME = 59 ## [Validation] function name is not found in the program - ERROR_INVALID_GROUP_SIZE_DIMENSION = 60 ## [Validation] group size dimension is not valid for the kernel or - ## device - ERROR_INVALID_GLOBAL_WIDTH_DIMENSION = 61 ## [Validation] global width dimension is not valid for the kernel or - ## device - ERROR_PROGRAM_UNLINKED = 62 ## [Validation] compiled program or program with imports needs to be - ## linked before kernels can be created from it. - ERROR_OVERLAPPING_REGIONS = 63 ## [Validation] copy operations do not support overlapping regions of - ## memory - ERROR_INVALID_HOST_PTR = 64 ## Invalid host pointer - ERROR_INVALID_USM_SIZE = 65 ## Invalid USM size - ERROR_OBJECT_ALLOCATION_FAILURE = 66 ## Objection allocation failure - ERROR_ADAPTER_SPECIFIC = 67 ## An adapter specific warning/error has been reported and can be - ## retrieved via the urPlatformGetLastError entry point. - ERROR_LAYER_NOT_PRESENT = 68 ## A requested layer was not found by the loader. - ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000 ## Invalid Command-Buffer - ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001## Sync point is not valid for the command-buffer - ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002 ## Sync point wait list is invalid - ERROR_UNKNOWN = 0x7ffffffe ## Unknown or internal error - -class ur_result_t(c_int): - def __str__(self): - return str(ur_result_v(self.value)) - - -############################################################################### -## @brief Base for all properties types -class ur_base_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Base for all descriptor types -class ur_base_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief 3D offset argument passed to buffer rect operations -class ur_rect_offset_t(Structure): - _fields_ = [ - ("x", c_ulonglong), ## [in] x offset (bytes) - ("y", c_ulonglong), ## [in] y offset (scalar) - ("z", c_ulonglong) ## [in] z offset (scalar) - ] - -############################################################################### -## @brief 3D region argument passed to buffer rect operations -class ur_rect_region_t(Structure): - _fields_ = [ - ("width", c_ulonglong), ## [in] width (bytes) - ("height", c_ulonglong), ## [in] height (scalar) - ("depth", c_ulonglong) ## [in] scalar (scalar) - ] - -############################################################################### -## @brief Supported device initialization flags -class ur_device_init_flags_v(IntEnum): - GPU = UR_BIT(0) ## initialize GPU device adapters. - CPU = UR_BIT(1) ## initialize CPU device adapters. - FPGA = UR_BIT(2) ## initialize FPGA device adapters. - MCA = UR_BIT(3) ## initialize MCA device adapters. - VPU = UR_BIT(4) ## initialize VPU device adapters. - -class ur_device_init_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Supported loader info -class ur_loader_config_info_v(IntEnum): - AVAILABLE_LAYERS = 0 ## [char[]] Null-terminated, semi-colon separated list of available - ## layers. - REFERENCE_COUNT = 1 ## [uint32_t] Reference count of the loader config object. - -class ur_loader_config_info_t(c_int): - def __str__(self): - return str(ur_loader_config_info_v(self.value)) - - -############################################################################### -## @brief Code location data -class ur_code_location_t(Structure): - _fields_ = [ - ("functionName", c_char_p), ## [in][out] Function name. - ("sourceFile", c_char_p), ## [in][out] Source code file. - ("lineNumber", c_ulong), ## [in][out] Source code line number. - ("columnNumber", c_ulong) ## [in][out] Source code column number. - ] - -############################################################################### -## @brief Code location callback with user data. -def ur_code_location_callback_t(user_defined_callback): - @CFUNCTYPE(ur_code_location_t, c_void_p) - def ur_code_location_callback_t_wrapper(pUserData): - return user_defined_callback(pUserData) - return ur_code_location_callback_t_wrapper - -############################################################################### -## @brief Supported adapter info -class ur_adapter_info_v(IntEnum): - BACKEND = 0 ## [::ur_adapter_backend_t] Identifies the native backend supported by - ## the adapter. - REFERENCE_COUNT = 1 ## [uint32_t] Reference count of the adapter. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - -class ur_adapter_info_t(c_int): - def __str__(self): - return str(ur_adapter_info_v(self.value)) - - -############################################################################### -## @brief Identifies backend of the adapter -class ur_adapter_backend_v(IntEnum): - UNKNOWN = 0 ## The backend is not a recognized one - LEVEL_ZERO = 1 ## The backend is Level Zero - OPENCL = 2 ## The backend is OpenCL - CUDA = 3 ## The backend is CUDA - HIP = 4 ## The backend is HIP - NATIVE_CPU = 5 ## The backend is Native CPU - -class ur_adapter_backend_t(c_int): - def __str__(self): - return str(ur_adapter_backend_v(self.value)) - - -############################################################################### -## @brief Supported platform info -class ur_platform_info_v(IntEnum): - NAME = 1 ## [char[]] The string denoting name of the platform. The size of the - ## info needs to be dynamically queried. - VENDOR_NAME = 2 ## [char[]] The string denoting name of the vendor of the platform. The - ## size of the info needs to be dynamically queried. - VERSION = 3 ## [char[]] The string denoting the version of the platform. The size of - ## the info needs to be dynamically queried. - EXTENSIONS = 4 ## [char[]] The string denoting extensions supported by the platform. The - ## size of the info needs to be dynamically queried. - PROFILE = 5 ## [char[]] The string denoting profile of the platform. The size of the - ## info needs to be dynamically queried. - BACKEND = 6 ## [::ur_platform_backend_t] The backend of the platform. Identifies the - ## native backend adapter implementing this platform. - -class ur_platform_info_t(c_int): - def __str__(self): - return str(ur_platform_info_v(self.value)) - - -############################################################################### -## @brief Supported API versions -## -## @details -## - API versions contain major and minor attributes, use -## ::UR_MAJOR_VERSION and ::UR_MINOR_VERSION -class ur_api_version_v(IntEnum): - _0_6 = UR_MAKE_VERSION( 0, 6 ) ## version 0.6 - _0_7 = UR_MAKE_VERSION( 0, 7 ) ## version 0.7 - _0_8 = UR_MAKE_VERSION( 0, 8 ) ## version 0.8 - _0_9 = UR_MAKE_VERSION( 0, 9 ) ## version 0.9 - CURRENT = UR_MAKE_VERSION( 0, 9 ) ## latest known version - -class ur_api_version_t(c_int): - def __str__(self): - return str(ur_api_version_v(self.value)) - - -############################################################################### -## @brief Native platform creation properties -class ur_platform_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Identifies native backend adapters -class ur_platform_backend_v(IntEnum): - UNKNOWN = 0 ## The backend is not a recognized one - LEVEL_ZERO = 1 ## The backend is Level Zero - OPENCL = 2 ## The backend is OpenCL - CUDA = 3 ## The backend is CUDA - HIP = 4 ## The backend is HIP - NATIVE_CPU = 5 ## The backend is Native CPU - -class ur_platform_backend_t(c_int): - def __str__(self): - return str(ur_platform_backend_v(self.value)) - - -############################################################################### -## @brief Target identification strings for -## ::ur_device_binary_t.pDeviceTargetSpec -## A device type represented by a particular target triple requires -## specific -## binary images. We need to map the image type onto the device target triple -UR_DEVICE_BINARY_TARGET_UNKNOWN = "" - -############################################################################### -## @brief SPIR-V 32-bit image <-> "spir", 32-bit OpenCL device -UR_DEVICE_BINARY_TARGET_SPIRV32 = "spir" - -############################################################################### -## @brief SPIR-V 64-bit image <-> "spir64", 64-bit OpenCL device -UR_DEVICE_BINARY_TARGET_SPIRV64 = "spir64" - -############################################################################### -## @brief Device-specific binary images produced from SPIR-V 64-bit <-> various -## "spir64_*" triples for specific 64-bit OpenCL CPU devices -UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 = "spir64_x86_64" - -############################################################################### -## @brief Generic GPU device (64-bit OpenCL) -UR_DEVICE_BINARY_TARGET_SPIRV64_GEN = "spir64_gen" - -############################################################################### -## @brief 64-bit OpenCL FPGA device -UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA = "spir64_fpga" - -############################################################################### -## @brief PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device -UR_DEVICE_BINARY_TARGET_NVPTX64 = "nvptx64" - -############################################################################### -## @brief AMD GCN -UR_DEVICE_BINARY_TARGET_AMDGCN = "amdgcn" - -############################################################################### -## @brief Native CPU -UR_DEVICE_BINARY_TARGET_NATIVE_CPU = "native_cpu" - -############################################################################### -## @brief Device Binary Type -class ur_device_binary_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_DEVICE_BINARY - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("pDeviceTargetSpec", c_char_p) ## [in] null-terminated string representation of the device's target architecture. - ## For example: - ## + ::UR_DEVICE_BINARY_TARGET_UNKNOWN - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV32 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_GEN - ## + ::UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA - ## + ::UR_DEVICE_BINARY_TARGET_NVPTX64 - ## + ::UR_DEVICE_BINARY_TARGET_AMDGCN - ] - -############################################################################### -## @brief Supported device types -class ur_device_type_v(IntEnum): - DEFAULT = 1 ## The default device type as preferred by the runtime - ALL = 2 ## Devices of all types - GPU = 3 ## Graphics Processing Unit - CPU = 4 ## Central Processing Unit - FPGA = 5 ## Field Programmable Gate Array - MCA = 6 ## Memory Copy Accelerator - VPU = 7 ## Vision Processing Unit - -class ur_device_type_t(c_int): - def __str__(self): - return str(ur_device_type_v(self.value)) - - -############################################################################### -## @brief Supported device info -class ur_device_info_v(IntEnum): - TYPE = 0 ## [::ur_device_type_t] type of the device - VENDOR_ID = 1 ## [uint32_t] vendor Id of the device - DEVICE_ID = 2 ## [uint32_t] Id of the device - MAX_COMPUTE_UNITS = 3 ## [uint32_t] the number of compute units - MAX_WORK_ITEM_DIMENSIONS = 4 ## [uint32_t] max work item dimensions - MAX_WORK_ITEM_SIZES = 5 ## [size_t[]] return an array of max work item sizes - MAX_WORK_GROUP_SIZE = 6 ## [size_t] max work group size - SINGLE_FP_CONFIG = 7 ## [::ur_device_fp_capability_flags_t] single precision floating point - ## capability - HALF_FP_CONFIG = 8 ## [::ur_device_fp_capability_flags_t] half precision floating point - ## capability - DOUBLE_FP_CONFIG = 9 ## [::ur_device_fp_capability_flags_t] double precision floating point - ## capability - QUEUE_PROPERTIES = 10 ## [::ur_queue_flags_t] command queue properties supported by the device - PREFERRED_VECTOR_WIDTH_CHAR = 11 ## [uint32_t] preferred vector width for char - PREFERRED_VECTOR_WIDTH_SHORT = 12 ## [uint32_t] preferred vector width for short - PREFERRED_VECTOR_WIDTH_INT = 13 ## [uint32_t] preferred vector width for int - PREFERRED_VECTOR_WIDTH_LONG = 14 ## [uint32_t] preferred vector width for long - PREFERRED_VECTOR_WIDTH_FLOAT = 15 ## [uint32_t] preferred vector width for float - PREFERRED_VECTOR_WIDTH_DOUBLE = 16 ## [uint32_t] preferred vector width for double - PREFERRED_VECTOR_WIDTH_HALF = 17 ## [uint32_t] preferred vector width for half float - NATIVE_VECTOR_WIDTH_CHAR = 18 ## [uint32_t] native vector width for char - NATIVE_VECTOR_WIDTH_SHORT = 19 ## [uint32_t] native vector width for short - NATIVE_VECTOR_WIDTH_INT = 20 ## [uint32_t] native vector width for int - NATIVE_VECTOR_WIDTH_LONG = 21 ## [uint32_t] native vector width for long - NATIVE_VECTOR_WIDTH_FLOAT = 22 ## [uint32_t] native vector width for float - NATIVE_VECTOR_WIDTH_DOUBLE = 23 ## [uint32_t] native vector width for double - NATIVE_VECTOR_WIDTH_HALF = 24 ## [uint32_t] native vector width for half float - MAX_CLOCK_FREQUENCY = 25 ## [uint32_t] max clock frequency in MHz - MEMORY_CLOCK_RATE = 26 ## [uint32_t] memory clock frequency in MHz - ADDRESS_BITS = 27 ## [uint32_t] address bits - MAX_MEM_ALLOC_SIZE = 28 ## [uint64_t] max memory allocation size - IMAGE_SUPPORTED = 29 ## [::ur_bool_t] images are supported - MAX_READ_IMAGE_ARGS = 30 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the read_only qualifier - MAX_WRITE_IMAGE_ARGS = 31 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the write_only qualifier - MAX_READ_WRITE_IMAGE_ARGS = 32 ## [uint32_t] max number of image objects arguments of a kernel declared - ## with the read_write qualifier - IMAGE2D_MAX_WIDTH = 33 ## [size_t] max width of Image2D object - IMAGE2D_MAX_HEIGHT = 34 ## [size_t] max height of Image2D object - IMAGE3D_MAX_WIDTH = 35 ## [size_t] max width of Image3D object - IMAGE3D_MAX_HEIGHT = 36 ## [size_t] max height of Image3D object - IMAGE3D_MAX_DEPTH = 37 ## [size_t] max depth of Image3D object - IMAGE_MAX_BUFFER_SIZE = 38 ## [size_t] max image buffer size - IMAGE_MAX_ARRAY_SIZE = 39 ## [size_t] max image array size - MAX_SAMPLERS = 40 ## [uint32_t] max number of samplers that can be used in a kernel - MAX_PARAMETER_SIZE = 41 ## [size_t] max size in bytes of all arguments passed to a kernel - MEM_BASE_ADDR_ALIGN = 42 ## [uint32_t] memory base address alignment - GLOBAL_MEM_CACHE_TYPE = 43 ## [::ur_device_mem_cache_type_t] global memory cache type - GLOBAL_MEM_CACHELINE_SIZE = 44 ## [uint32_t] global memory cache line size in bytes - GLOBAL_MEM_CACHE_SIZE = 45 ## [uint64_t] size of global memory cache in bytes - GLOBAL_MEM_SIZE = 46 ## [uint64_t] size of global memory in bytes - GLOBAL_MEM_FREE = 47 ## [uint64_t] size of global memory which is free in bytes - MAX_CONSTANT_BUFFER_SIZE = 48 ## [uint64_t] max constant buffer size in bytes - MAX_CONSTANT_ARGS = 49 ## [uint32_t] max number of __const declared arguments in a kernel - LOCAL_MEM_TYPE = 50 ## [::ur_device_local_mem_type_t] local memory type - LOCAL_MEM_SIZE = 51 ## [uint64_t] local memory size in bytes - ERROR_CORRECTION_SUPPORT = 52 ## [::ur_bool_t] support error correction to global and local memory - HOST_UNIFIED_MEMORY = 53 ## [::ur_bool_t] unified host device memory - PROFILING_TIMER_RESOLUTION = 54 ## [size_t] profiling timer resolution in nanoseconds - ENDIAN_LITTLE = 55 ## [::ur_bool_t] little endian byte order - AVAILABLE = 56 ## [::ur_bool_t] device is available - COMPILER_AVAILABLE = 57 ## [::ur_bool_t] device compiler is available - LINKER_AVAILABLE = 58 ## [::ur_bool_t] device linker is available - EXECUTION_CAPABILITIES = 59 ## [::ur_device_exec_capability_flags_t] device kernel execution - ## capability bit-field - QUEUE_ON_DEVICE_PROPERTIES = 60 ## [::ur_queue_flags_t] device command queue property bit-field - QUEUE_ON_HOST_PROPERTIES = 61 ## [::ur_queue_flags_t] host queue property bit-field - BUILT_IN_KERNELS = 62 ## [char[]] a semi-colon separated list of built-in kernels - PLATFORM = 63 ## [::ur_platform_handle_t] the platform associated with the device - REFERENCE_COUNT = 64 ## [uint32_t] Reference count of the device object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - IL_VERSION = 65 ## [char[]] IL version - NAME = 66 ## [char[]] Device name - VENDOR = 67 ## [char[]] Device vendor - DRIVER_VERSION = 68 ## [char[]] Driver version - PROFILE = 69 ## [char[]] Device profile - VERSION = 70 ## [char[]] Device version - BACKEND_RUNTIME_VERSION = 71 ## [char[]] Version of backend runtime - EXTENSIONS = 72 ## [char[]] Return a space separated list of extension names - PRINTF_BUFFER_SIZE = 73 ## [size_t] Maximum size in bytes of internal printf buffer - PREFERRED_INTEROP_USER_SYNC = 74 ## [::ur_bool_t] prefer user synchronization when sharing object with - ## other API - PARENT_DEVICE = 75 ## [::ur_device_handle_t] return parent device handle - SUPPORTED_PARTITIONS = 76 ## [::ur_device_partition_t[]] Returns an array of partition types - ## supported by the device - PARTITION_MAX_SUB_DEVICES = 77 ## [uint32_t] maximum number of sub-devices when the device is - ## partitioned - PARTITION_AFFINITY_DOMAIN = 78 ## [::ur_device_affinity_domain_flags_t] Returns a bit-field of the - ## supported affinity domains for partitioning. - ## If the device does not support any affinity domains, then 0 will be returned. - PARTITION_TYPE = 79 ## [::ur_device_partition_property_t[]] return an array of - ## ::ur_device_partition_property_t for properties specified in - ## ::urDevicePartition - MAX_NUM_SUB_GROUPS = 80 ## [uint32_t] max number of sub groups - SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81 ## [::ur_bool_t] support sub group independent forward progress - SUB_GROUP_SIZES_INTEL = 82 ## [uint32_t[]] return an array of sub group sizes supported on Intel - ## device - USM_HOST_SUPPORT = 83 ## [::ur_device_usm_access_capability_flags_t] support USM host memory - ## access - USM_DEVICE_SUPPORT = 84 ## [::ur_device_usm_access_capability_flags_t] support USM device memory - ## access - USM_SINGLE_SHARED_SUPPORT = 85 ## [::ur_device_usm_access_capability_flags_t] support USM single device - ## shared memory access - USM_CROSS_SHARED_SUPPORT = 86 ## [::ur_device_usm_access_capability_flags_t] support USM cross device - ## shared memory access - USM_SYSTEM_SHARED_SUPPORT = 87 ## [::ur_device_usm_access_capability_flags_t] support USM system wide - ## shared memory access - UUID = 88 ## [char[]] return device UUID - PCI_ADDRESS = 89 ## [char[]] return device PCI address - GPU_EU_COUNT = 90 ## [uint32_t] return Intel GPU EU count - GPU_EU_SIMD_WIDTH = 91 ## [uint32_t] return Intel GPU EU SIMD width - GPU_EU_SLICES = 92 ## [uint32_t] return Intel GPU number of slices - GPU_EU_COUNT_PER_SUBSLICE = 93 ## [uint32_t] return Intel GPU EU count per subslice - GPU_SUBSLICES_PER_SLICE = 94 ## [uint32_t] return Intel GPU number of subslices per slice - GPU_HW_THREADS_PER_EU = 95 ## [uint32_t] return Intel GPU number of threads per EU - MAX_MEMORY_BANDWIDTH = 96 ## [uint32_t] return max memory bandwidth in Mb/s - IMAGE_SRGB = 97 ## [::ur_bool_t] device supports sRGB images - BUILD_ON_SUBDEVICE = 98 ## [::ur_bool_t] Return true if sub-device should do its own program - ## build - ATOMIC_64 = 99 ## [::ur_bool_t] support 64 bit atomics - ATOMIC_MEMORY_ORDER_CAPABILITIES = 100 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory order capabilities - ATOMIC_MEMORY_SCOPE_CAPABILITIES = 101 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory scope capabilities - ATOMIC_FENCE_ORDER_CAPABILITIES = 102 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory fence order capabilities - ATOMIC_FENCE_SCOPE_CAPABILITIES = 103 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory fence scope capabilities - BFLOAT16 = 104 ## [::ur_bool_t] support for bfloat16 - MAX_COMPUTE_QUEUE_INDICES = 105 ## [uint32_t] Returns 1 if the device doesn't have a notion of a - ## queue index. Otherwise, returns the number of queue indices that are - ## available for this device. - KERNEL_SET_SPECIALIZATION_CONSTANTS = 106 ## [::ur_bool_t] support the ::urKernelSetSpecializationConstants entry - ## point - MEMORY_BUS_WIDTH = 107 ## [uint32_t] return the width in bits of the memory bus interface of the - ## device. - MAX_WORK_GROUPS_3D = 108 ## [size_t[3]] return max 3D work groups - ASYNC_BARRIER = 109 ## [::ur_bool_t] return true if Async Barrier is supported - MEM_CHANNEL_SUPPORT = 110 ## [::ur_bool_t] return true if specifying memory channels is supported - HOST_PIPE_READ_WRITE_SUPPORTED = 111 ## [::ur_bool_t] Return true if the device supports enqueueing commands - ## to read and write pipes from the host. - MAX_REGISTERS_PER_WORK_GROUP = 112 ## [uint32_t] The maximum number of registers available per block. - IP_VERSION = 113 ## [uint32_t] The device IP version. The meaning of the device IP version - ## is implementation-defined, but newer devices should have a higher - ## version than older devices. - VIRTUAL_MEMORY_SUPPORT = 114 ## [::ur_bool_t] return true if the device supports virtual memory. - ESIMD_SUPPORT = 115 ## [::ur_bool_t] return true if the device supports ESIMD. - BINDLESS_IMAGES_SUPPORT_EXP = 0x2000 ## [::ur_bool_t] returns true if the device supports the creation of - ## bindless images - BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001 ## [::ur_bool_t] returns true if the device supports the creation of - ## bindless images backed by shared USM - BINDLESS_IMAGES_1D_USM_SUPPORT_EXP = 0x2002 ## [::ur_bool_t] returns true if the device supports the creation of 1D - ## bindless images backed by USM - BINDLESS_IMAGES_2D_USM_SUPPORT_EXP = 0x2003 ## [::ur_bool_t] returns true if the device supports the creation of 2D - ## bindless images backed by USM - IMAGE_PITCH_ALIGN_EXP = 0x2004 ## [uint32_t] returns the required alignment of the pitch between two - ## rows of an image in bytes - MAX_IMAGE_LINEAR_WIDTH_EXP = 0x2005 ## [size_t] returns the maximum linear width allowed for images allocated - ## using USM - MAX_IMAGE_LINEAR_HEIGHT_EXP = 0x2006 ## [size_t] returns the maximum linear height allowed for images - ## allocated using USM - MAX_IMAGE_LINEAR_PITCH_EXP = 0x2007 ## [size_t] returns the maximum linear pitch allowed for images allocated - ## using USM - MIPMAP_SUPPORT_EXP = 0x2008 ## [::ur_bool_t] returns true if the device supports allocating mipmap - ## resources - MIPMAP_ANISOTROPY_SUPPORT_EXP = 0x2009 ## [::ur_bool_t] returns true if the device supports sampling mipmap - ## images with anisotropic filtering - MIPMAP_MAX_ANISOTROPY_EXP = 0x200A ## [uint32_t] returns the maximum anisotropic ratio supported by the - ## device - MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP = 0x200B ## [::ur_bool_t] returns true if the device supports using images created - ## from individual mipmap levels - INTEROP_MEMORY_IMPORT_SUPPORT_EXP = 0x200C ## [::ur_bool_t] returns true if the device supports importing external - ## memory resources - INTEROP_MEMORY_EXPORT_SUPPORT_EXP = 0x200D ## [::ur_bool_t] returns true if the device supports exporting internal - ## memory resources - INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP = 0x200E ## [::ur_bool_t] returns true if the device supports importing external - ## semaphore resources - INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP = 0x200F ## [::ur_bool_t] returns true if the device supports exporting internal - ## event resources - -class ur_device_info_t(c_int): - def __str__(self): - return str(ur_device_info_v(self.value)) - - -############################################################################### -## @brief Device affinity domain -class ur_device_affinity_domain_flags_v(IntEnum): - NUMA = UR_BIT(0) ## Split the device into sub devices comprised of compute units that - ## share a NUMA node. - L4_CACHE = UR_BIT(1) ## Split the device into sub devices comprised of compute units that - ## share a level 4 data cache. - L3_CACHE = UR_BIT(2) ## Split the device into sub devices comprised of compute units that - ## share a level 3 data cache. - L2_CACHE = UR_BIT(3) ## Split the device into sub devices comprised of compute units that - ## share a level 2 data cache. - L1_CACHE = UR_BIT(4) ## Split the device into sub devices comprised of compute units that - ## share a level 1 data cache. - NEXT_PARTITIONABLE = UR_BIT(5) ## Split the device along the next partitionable affinity domain. - ## The implementation shall find the first level along which the device - ## or sub device may be further subdivided in the order: - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, - ## and partition the device into sub devices comprised of compute units - ## that share memory subsystems at this level. - -class ur_device_affinity_domain_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Partition Properties -class ur_device_partition_v(IntEnum): - EQUALLY = 0x1086 ## Partition Equally - BY_COUNTS = 0x1087 ## Partition by counts - BY_AFFINITY_DOMAIN = 0x1088 ## Partition by affinity domain - BY_CSLICE = 0x1089 ## Partition by c-slice - -class ur_device_partition_t(c_int): - def __str__(self): - return str(ur_device_partition_v(self.value)) - - -############################################################################### -## @brief Device partition value. -class ur_device_partition_value_t(Structure): - _fields_ = [ - ("equally", c_ulong), ## [in] Number of compute units per sub-device when partitioning with - ## ::UR_DEVICE_PARTITION_EQUALLY. - ("count", c_ulong), ## [in] Number of compute units in a sub-device when partitioning with - ## ::UR_DEVICE_PARTITION_BY_COUNTS. - ("affinity_domain", ur_device_affinity_domain_flags_t) ## [in] The affinity domain to partition for when partitioning with - ## ::UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN. - ] - -############################################################################### -## @brief Device partition property -class ur_device_partition_property_t(Structure): - _fields_ = [ - ("type", ur_device_partition_t), ## [in] The partitioning type to be used. - ("value", ur_device_partition_value_t) ## [in][tagged_by(type)] The partitioning value. - ] - -############################################################################### -## @brief Device Partition Properties -class ur_device_partition_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("pProperties", POINTER(ur_device_partition_property_t)), ## [in] Pointer to the beginning of the properties array. - ("PropCount", c_size_t) ## [in] The length of properties pointed to by `pProperties`. - ] - -############################################################################### -## @brief FP capabilities -class ur_device_fp_capability_flags_v(IntEnum): - CORRECTLY_ROUNDED_DIVIDE_SQRT = UR_BIT(0) ## Support correctly rounded divide and sqrt - ROUND_TO_NEAREST = UR_BIT(1) ## Support round to nearest - ROUND_TO_ZERO = UR_BIT(2) ## Support round to zero - ROUND_TO_INF = UR_BIT(3) ## Support round to infinity - INF_NAN = UR_BIT(4) ## Support INF to NAN - DENORM = UR_BIT(5) ## Support denorm - FMA = UR_BIT(6) ## Support FMA - SOFT_FLOAT = UR_BIT(7) ## Basic floating point operations implemented in software. - -class ur_device_fp_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Device memory cache type -class ur_device_mem_cache_type_v(IntEnum): - NONE = 0 ## Has none cache - READ_ONLY_CACHE = 1 ## Has read only cache - READ_WRITE_CACHE = 2 ## Has read write cache - -class ur_device_mem_cache_type_t(c_int): - def __str__(self): - return str(ur_device_mem_cache_type_v(self.value)) - - -############################################################################### -## @brief Device local memory type -class ur_device_local_mem_type_v(IntEnum): - NONE = 0 ## No local memory support - LOCAL = 1 ## Dedicated local memory - GLOBAL = 2 ## Global memory - -class ur_device_local_mem_type_t(c_int): - def __str__(self): - return str(ur_device_local_mem_type_v(self.value)) - - -############################################################################### -## @brief Device kernel execution capability -class ur_device_exec_capability_flags_v(IntEnum): - KERNEL = UR_BIT(0) ## Support kernel execution - NATIVE_KERNEL = UR_BIT(1) ## Support native kernel execution - -class ur_device_exec_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Native device creation properties -class ur_device_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Memory order capabilities -class ur_memory_order_capability_flags_v(IntEnum): - RELAXED = UR_BIT(0) ## Relaxed memory ordering - ACQUIRE = UR_BIT(1) ## Acquire memory ordering - RELEASE = UR_BIT(2) ## Release memory ordering - ACQ_REL = UR_BIT(3) ## Acquire/release memory ordering - SEQ_CST = UR_BIT(4) ## Sequentially consistent memory ordering - -class ur_memory_order_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Memory scope capabilities -class ur_memory_scope_capability_flags_v(IntEnum): - WORK_ITEM = UR_BIT(0) ## Work item scope - SUB_GROUP = UR_BIT(1) ## Sub group scope - WORK_GROUP = UR_BIT(2) ## Work group scope - DEVICE = UR_BIT(3) ## Device scope - SYSTEM = UR_BIT(4) ## System scope - -class ur_memory_scope_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM access capabilities -class ur_device_usm_access_capability_flags_v(IntEnum): - ACCESS = UR_BIT(0) ## Memory can be accessed - ATOMIC_ACCESS = UR_BIT(1) ## Memory can be accessed atomically - CONCURRENT_ACCESS = UR_BIT(2) ## Memory can be accessed concurrently - ATOMIC_CONCURRENT_ACCESS = UR_BIT(3) ## Memory can be accessed atomically and concurrently - -class ur_device_usm_access_capability_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Context property type -class ur_context_flags_v(IntEnum): - TBD = UR_BIT(0) ## reserved for future use - -class ur_context_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Context creation properties -class ur_context_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_context_flags_t) ## [in] context creation flags. - ] - -############################################################################### -## @brief Supported context info -class ur_context_info_v(IntEnum): - NUM_DEVICES = 0 ## [uint32_t] The number of the devices in the context - DEVICES = 1 ## [::ur_device_handle_t[]] The array of the device handles in the - ## context - REFERENCE_COUNT = 2 ## [uint32_t] Reference count of the context object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - USM_MEMCPY2D_SUPPORT = 3 ## [::ur_bool_t] to indicate if the ::urEnqueueUSMMemcpy2D entrypoint is - ## supported. - USM_FILL2D_SUPPORT = 4 ## [::ur_bool_t] to indicate if the ::urEnqueueUSMFill2D entrypoint is - ## supported. - ATOMIC_MEMORY_ORDER_CAPABILITIES = 5 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory order capabilities. - ATOMIC_MEMORY_SCOPE_CAPABILITIES = 6 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory scope capabilities. - ATOMIC_FENCE_ORDER_CAPABILITIES = 7 ## [::ur_memory_order_capability_flags_t] return a bit-field of atomic - ## memory fence order capabilities. - ## Zero is returned if the backend does not support context-level fences. - ATOMIC_FENCE_SCOPE_CAPABILITIES = 8 ## [::ur_memory_scope_capability_flags_t] return a bit-field of atomic - ## memory fence scope capabilities. - ## Zero is returned if the backend does not support context-level fences. - -class ur_context_info_t(c_int): - def __str__(self): - return str(ur_context_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urContextCreateWithNativeHandle. -class ur_context_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Context's extended deleter callback function with user data. -def ur_context_extended_deleter_t(user_defined_callback): - @CFUNCTYPE(None, c_void_p) - def ur_context_extended_deleter_t_wrapper(pUserData): - return user_defined_callback(pUserData) - return ur_context_extended_deleter_t_wrapper - -############################################################################### -## @brief Memory flags -class ur_mem_flags_v(IntEnum): - READ_WRITE = UR_BIT(0) ## The memory object will be read and written by a kernel. This is the - ## default - WRITE_ONLY = UR_BIT(1) ## The memory object will be written but not read by a kernel - READ_ONLY = UR_BIT(2) ## The memory object is a read-only inside a kernel - USE_HOST_POINTER = UR_BIT(3) ## Use memory pointed by a host pointer parameter as the storage bits for - ## the memory object - ALLOC_HOST_POINTER = UR_BIT(4) ## Allocate memory object from host accessible memory - ALLOC_COPY_HOST_POINTER = UR_BIT(5) ## Allocate memory and copy the data from host pointer pointed memory - -class ur_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Memory types -class ur_mem_type_v(IntEnum): - BUFFER = 0 ## Buffer object - IMAGE2D = 1 ## 2D image object - IMAGE3D = 2 ## 3D image object - IMAGE2D_ARRAY = 3 ## 2D image array object - IMAGE1D = 4 ## 1D image object - IMAGE1D_ARRAY = 5 ## 1D image array object - IMAGE1D_BUFFER = 6 ## 1D image buffer object - -class ur_mem_type_t(c_int): - def __str__(self): - return str(ur_mem_type_v(self.value)) - - -############################################################################### -## @brief Memory Information type -class ur_mem_info_v(IntEnum): - SIZE = 0 ## [size_t] actual size of of memory object in bytes - CONTEXT = 1 ## [::ur_context_handle_t] context in which the memory object was created - -class ur_mem_info_t(c_int): - def __str__(self): - return str(ur_mem_info_v(self.value)) - - -############################################################################### -## @brief Image channel order info: number of channels and the channel layout -class ur_image_channel_order_v(IntEnum): - A = 0 ## channel order A - R = 1 ## channel order R - RG = 2 ## channel order RG - RA = 3 ## channel order RA - RGB = 4 ## channel order RGB - RGBA = 5 ## channel order RGBA - BGRA = 6 ## channel order BGRA - ARGB = 7 ## channel order ARGB - ABGR = 8 ## channel order ABGR - INTENSITY = 9 ## channel order intensity - LUMINANCE = 10 ## channel order luminance - RX = 11 ## channel order Rx - RGX = 12 ## channel order RGx - RGBX = 13 ## channel order RGBx - SRGBA = 14 ## channel order sRGBA - -class ur_image_channel_order_t(c_int): - def __str__(self): - return str(ur_image_channel_order_v(self.value)) - - -############################################################################### -## @brief Image channel type info: describe the size of the channel data type -class ur_image_channel_type_v(IntEnum): - SNORM_INT8 = 0 ## channel type snorm int8 - SNORM_INT16 = 1 ## channel type snorm int16 - UNORM_INT8 = 2 ## channel type unorm int8 - UNORM_INT16 = 3 ## channel type unorm int16 - UNORM_SHORT_565 = 4 ## channel type unorm short 565 - UNORM_SHORT_555 = 5 ## channel type unorm short 555 - INT_101010 = 6 ## channel type int 101010 - SIGNED_INT8 = 7 ## channel type signed int8 - SIGNED_INT16 = 8 ## channel type signed int16 - SIGNED_INT32 = 9 ## channel type signed int32 - UNSIGNED_INT8 = 10 ## channel type unsigned int8 - UNSIGNED_INT16 = 11 ## channel type unsigned int16 - UNSIGNED_INT32 = 12 ## channel type unsigned int32 - HALF_FLOAT = 13 ## channel type half float - FLOAT = 14 ## channel type float - -class ur_image_channel_type_t(c_int): - def __str__(self): - return str(ur_image_channel_type_v(self.value)) - - -############################################################################### -## @brief Image information types -class ur_image_info_v(IntEnum): - FORMAT = 0 ## [::ur_image_format_t] image format - ELEMENT_SIZE = 1 ## [size_t] element size - ROW_PITCH = 2 ## [size_t] row pitch - SLICE_PITCH = 3 ## [size_t] slice pitch - WIDTH = 4 ## [size_t] image width - HEIGHT = 5 ## [size_t] image height - DEPTH = 6 ## [size_t] image depth - -class ur_image_info_t(c_int): - def __str__(self): - return str(ur_image_info_v(self.value)) - - -############################################################################### -## @brief Image format including channel layout and data type -class ur_image_format_t(Structure): - _fields_ = [ - ("channelOrder", ur_image_channel_order_t), ## [in] image channel order - ("channelType", ur_image_channel_type_t) ## [in] image channel type - ] - -############################################################################### -## @brief Image descriptor type. -class ur_image_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_IMAGE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("type", ur_mem_type_t), ## [in][nocheck] memory object type - ("width", c_size_t), ## [in] image width - ("height", c_size_t), ## [in] image height - ("depth", c_size_t), ## [in] image depth - ("arraySize", c_size_t), ## [in] image array size - ("rowPitch", c_size_t), ## [in] image row pitch - ("slicePitch", c_size_t), ## [in] image slice pitch - ("numMipLevel", c_ulong), ## [in] number of MIP levels - ("numSamples", c_ulong) ## [in] number of samples - ] - -############################################################################### -## @brief Buffer creation properties -class ur_buffer_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("pHost", c_void_p) ## [in][optional] pointer to the buffer data - ] - -############################################################################### -## @brief Buffer memory channel creation properties -## -## @details -## - Specify these properties in ::urMemBufferCreate via -## ::ur_buffer_properties_t as part of a `pNext` chain. -## -## @remarks -## _Analogues_ -## - cl_intel_mem_channel_property -class ur_buffer_channel_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("channel", c_ulong) ## [in] Identifies the channel/region to which the buffer should be mapped. - ] - -############################################################################### -## @brief Buffer allocation location creation properties -## -## @details -## - Specify these properties in ::urMemBufferCreate via -## ::ur_buffer_properties_t as part of a `pNext` chain. -## -## @remarks -## _Analogues_ -## - cl_intel_mem_alloc_buffer_location -class ur_buffer_alloc_location_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory - ## should be allocated. - ] - -############################################################################### -## @brief Buffer region type, used to describe a sub buffer -class ur_buffer_region_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_BUFFER_REGION - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("origin", c_size_t), ## [in] buffer origin offset - ("size", c_size_t) ## [in] size of the buffer region - ] - -############################################################################### -## @brief Buffer creation type -class ur_buffer_create_type_v(IntEnum): - REGION = 0 ## buffer create type is region - -class ur_buffer_create_type_t(c_int): - def __str__(self): - return str(ur_buffer_create_type_v(self.value)) - - -############################################################################### -## @brief Native memory object creation properties -class ur_mem_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Sampler Filter Mode -class ur_sampler_filter_mode_v(IntEnum): - NEAREST = 0 ## Filter mode nearest. - LINEAR = 1 ## Filter mode linear. - -class ur_sampler_filter_mode_t(c_int): - def __str__(self): - return str(ur_sampler_filter_mode_v(self.value)) - - -############################################################################### -## @brief Sampler addressing mode -class ur_sampler_addressing_mode_v(IntEnum): - NONE = 0 ## None - CLAMP_TO_EDGE = 1 ## Clamp to edge - CLAMP = 2 ## Clamp - REPEAT = 3 ## Repeat - MIRRORED_REPEAT = 4 ## Mirrored Repeat - -class ur_sampler_addressing_mode_t(c_int): - def __str__(self): - return str(ur_sampler_addressing_mode_v(self.value)) - - -############################################################################### -## @brief Get sample object information -class ur_sampler_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the sampler object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] Sampler context info - NORMALIZED_COORDS = 2 ## [::ur_bool_t] Sampler normalized coordinate setting - ADDRESSING_MODE = 3 ## [::ur_sampler_addressing_mode_t] Sampler addressing mode setting - FILTER_MODE = 4 ## [::ur_sampler_filter_mode_t] Sampler filter mode setting - -class ur_sampler_info_t(c_int): - def __str__(self): - return str(ur_sampler_info_v(self.value)) - - -############################################################################### -## @brief Sampler description. -class ur_sampler_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_SAMPLER_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("normalizedCoords", c_bool), ## [in] Specify if image coordinates are normalized (true) or not (false) - ("addressingMode", ur_sampler_addressing_mode_t), ## [in] Specify the address mode of the sampler - ("filterMode", ur_sampler_filter_mode_t) ## [in] Specify the filter mode of the sampler - ] - -############################################################################### -## @brief Native sampler creation properties -class ur_sampler_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief USM host memory property flags -class ur_usm_host_mem_flags_v(IntEnum): - INITIAL_PLACEMENT = UR_BIT(0) ## Optimize shared allocation for first access on the host - -class ur_usm_host_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM device memory property flags -class ur_usm_device_mem_flags_v(IntEnum): - WRITE_COMBINED = UR_BIT(0) ## Memory should be allocated write-combined (WC) - INITIAL_PLACEMENT = UR_BIT(1) ## Optimize shared allocation for first access on the device - DEVICE_READ_ONLY = UR_BIT(2) ## Memory is only possibly modified from the host, but read-only in all - ## device code - -class ur_usm_device_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM memory property flags -class ur_usm_pool_flags_v(IntEnum): - ZERO_INITIALIZE_BLOCK = UR_BIT(0) ## All coarse-grain allocations (allocations from the driver) will be - ## zero-initialized. - -class ur_usm_pool_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief USM allocation type -class ur_usm_type_v(IntEnum): - UNKNOWN = 0 ## Unknown USM type - HOST = 1 ## Host USM type - DEVICE = 2 ## Device USM type - SHARED = 3 ## Shared USM type - -class ur_usm_type_t(c_int): - def __str__(self): - return str(ur_usm_type_v(self.value)) - - -############################################################################### -## @brief USM memory allocation information type -class ur_usm_alloc_info_v(IntEnum): - TYPE = 0 ## [::ur_usm_type_t] Memory allocation type info - BASE_PTR = 1 ## [void *] Memory allocation base pointer info - SIZE = 2 ## [size_t] Memory allocation size info - DEVICE = 3 ## [::ur_device_handle_t] Memory allocation device info - POOL = 4 ## [::ur_usm_pool_handle_t] Memory allocation pool info - -class ur_usm_alloc_info_t(c_int): - def __str__(self): - return str(ur_usm_alloc_info_v(self.value)) - - -############################################################################### -## @brief USM memory advice -class ur_usm_advice_flags_v(IntEnum): - DEFAULT = UR_BIT(0) ## The USM memory advice is default - SET_READ_MOSTLY = UR_BIT(1) ## Hint that memory will be read from frequently and written to rarely - CLEAR_READ_MOSTLY = UR_BIT(2) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_READ_MOSTLY - SET_PREFERRED_LOCATION = UR_BIT(3) ## Hint that the preferred memory location is the specified device - CLEAR_PREFERRED_LOCATION = UR_BIT(4) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION - SET_NON_ATOMIC_MOSTLY = UR_BIT(5) ## Hint that memory will mostly be accessed non-atomically - CLEAR_NON_ATOMIC_MOSTLY = UR_BIT(6) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY - BIAS_CACHED = UR_BIT(7) ## Hint that memory should be cached - BIAS_UNCACHED = UR_BIT(8) ## Hint that memory should be not be cached - SET_ACCESSED_BY_DEVICE = UR_BIT(9) ## Hint that memory will be mostly accessed by the specified device - CLEAR_ACCESSED_BY_DEVICE = UR_BIT(10) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE - SET_ACCESSED_BY_HOST = UR_BIT(11) ## Hint that memory will be mostly accessed by the host - CLEAR_ACCESSED_BY_HOST = UR_BIT(12) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST - SET_PREFERRED_LOCATION_HOST = UR_BIT(13) ## Hint that the preferred memory location is the host - CLEAR_PREFERRED_LOCATION_HOST = UR_BIT(14) ## Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST - -class ur_usm_advice_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Handle of USM pool -class ur_usm_pool_handle_t(c_void_p): - pass - -############################################################################### -## @brief USM allocation descriptor type. -class ur_usm_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("hints", ur_usm_advice_flags_t), ## [in] Memory advice hints - ("align", c_ulong) ## [in] alignment of the USM memory object - ## Must be zero or a power of 2. - ## Must be equal to or smaller than the size of the largest data type - ## supported by `hDevice`. - ] - -############################################################################### -## @brief USM host allocation descriptor type. -## -## @details -## - Specify these properties in ::urUSMHostAlloc and ::urUSMSharedAlloc -## via ::ur_usm_desc_t as part of a `pNext` chain. -class ur_usm_host_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_HOST_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_host_mem_flags_t) ## [in] host memory allocation flags - ] - -############################################################################### -## @brief USM device allocation descriptor type. -## -## @details -## - Specify these properties in ::urUSMDeviceAlloc and ::urUSMSharedAlloc -## via ::ur_usm_desc_t as part of a `pNext` chain. -class ur_usm_device_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_USM_DEVICE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_device_mem_flags_t) ## [in] device memory allocation flags. - ] - -############################################################################### -## @brief USM pool descriptor type -class ur_usm_pool_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be ::UR_STRUCTURE_TYPE_USM_POOL_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("flags", ur_usm_pool_flags_t) ## [in] memory allocation flags - ] - -############################################################################### -## @brief USM pool limits descriptor type -## -## @details -## - Specify these properties in ::urUSMPoolCreate via ::ur_usm_pool_desc_t -## as part of a `pNext` chain. -class ur_usm_pool_limits_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("maxPoolableSize", c_size_t), ## [in] Allocations up to this limit will be subject to pooling - ("minDriverAllocSize", c_size_t) ## [in] Minimum allocation size that will be requested from the driver - ] - -############################################################################### -## @brief Get USM memory pool information -class ur_usm_pool_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the pool object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] USM memory pool context info - -class ur_usm_pool_info_t(c_int): - def __str__(self): - return str(ur_usm_pool_info_v(self.value)) - - -############################################################################### -## @brief Virtual memory granularity info -class ur_virtual_mem_granularity_info_v(IntEnum): - MINIMUM = 0x30100 ## [size_t] size in bytes of the minimum virtual memory granularity. - RECOMMENDED = 0x30101 ## [size_t] size in bytes of the recommended virtual memory granularity. - -class ur_virtual_mem_granularity_info_t(c_int): - def __str__(self): - return str(ur_virtual_mem_granularity_info_v(self.value)) - - -############################################################################### -## @brief Virtual memory access mode flags. -class ur_virtual_mem_access_flags_v(IntEnum): - NONE = UR_BIT(0) ## Virtual memory has no access. - READ_WRITE = UR_BIT(1) ## Virtual memory has both read and write access. - READ_ONLY = UR_BIT(2) ## Virtual memory has read only access. - -class ur_virtual_mem_access_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Virtual memory range info queries. -class ur_virtual_mem_info_v(IntEnum): - ACCESS_MODE = 0 ## [::ur_virtual_mem_access_flags_t] access flags of a mapped virtual - ## memory range. - -class ur_virtual_mem_info_t(c_int): - def __str__(self): - return str(ur_virtual_mem_info_v(self.value)) - - -############################################################################### -## @brief Physical memory creation properties. -class ur_physical_mem_flags_v(IntEnum): - TBD = UR_BIT(0) ## reserved for future use. - -class ur_physical_mem_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Physical memory creation properties. -class ur_physical_mem_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_physical_mem_flags_t) ## [in] physical memory creation flags - ] - -############################################################################### -## @brief Program metadata property type. -class ur_program_metadata_type_v(IntEnum): - UINT32 = 0 ## type is a 32-bit integer. - UINT64 = 1 ## type is a 64-bit integer. - BYTE_ARRAY = 2 ## type is a byte array. - STRING = 3 ## type is a null-terminated string. - -class ur_program_metadata_type_t(c_int): - def __str__(self): - return str(ur_program_metadata_type_v(self.value)) - - -############################################################################### -## @brief Program metadata value union. -class ur_program_metadata_value_t(Structure): - _fields_ = [ - ("data32", c_ulong), ## [in] inline storage for the 32-bit data, type - ## ::UR_PROGRAM_METADATA_TYPE_UINT32. - ("data64", c_ulonglong), ## [in] inline storage for the 64-bit data, type - ## ::UR_PROGRAM_METADATA_TYPE_UINT64. - ("pString", c_char_p), ## [in] pointer to null-terminated string data, type - ## ::UR_PROGRAM_METADATA_TYPE_STRING. - ("pData", c_void_p) ## [in] pointer to binary data, type - ## ::UR_PROGRAM_METADATA_TYPE_BYTE_ARRAY. - ] - -############################################################################### -## @brief Program metadata property. -class ur_program_metadata_t(Structure): - _fields_ = [ - ("pName", c_char_p), ## [in] null-terminated metadata name. - ("type", ur_program_metadata_type_t), ## [in] the type of metadata value. - ("size", c_size_t), ## [in] size in bytes of the data pointed to by value.pData, or 0 when - ## value size is less than 64-bits and is stored directly in value.data. - ("value", ur_program_metadata_value_t) ## [in][tagged_by(type)] the metadata value storage. - ] - -############################################################################### -## @brief Program creation properties. -class ur_program_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("count", c_ulong), ## [in] the number of entries in pMetadatas, if count is greater than - ## zero then pMetadatas must not be null. - ("pMetadatas", POINTER(ur_program_metadata_t)) ## [in][optional][range(0,count)] pointer to array of metadata entries. - ] - -############################################################################### -## @brief Get Program object information -class ur_program_info_v(IntEnum): - REFERENCE_COUNT = 0 ## [uint32_t] Reference count of the program object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 1 ## [::ur_context_handle_t] Program context info. - NUM_DEVICES = 2 ## [uint32_t] Return number of devices associated with Program. - DEVICES = 3 ## [::ur_device_handle_t[]] Return list of devices associated with - ## Program. - SOURCE = 4 ## [char[]] Return program source associated with Program. - BINARY_SIZES = 5 ## [size_t[]] Return program binary sizes for each device. - BINARIES = 6 ## [unsigned char[]] Return program binaries for all devices for this - ## Program. - NUM_KERNELS = 7 ## [size_t] Number of kernels in Program, return type size_t. - KERNEL_NAMES = 8 ## [char[]] Return a null-terminated, semi-colon separated list of kernel - ## names in Program. - -class ur_program_info_t(c_int): - def __str__(self): - return str(ur_program_info_v(self.value)) - - -############################################################################### -## @brief Program object build status -class ur_program_build_status_v(IntEnum): - NONE = 0 ## Program build status none - ERROR = 1 ## Program build error - SUCCESS = 2 ## Program build success - IN_PROGRESS = 3 ## Program build in progress - -class ur_program_build_status_t(c_int): - def __str__(self): - return str(ur_program_build_status_v(self.value)) - - -############################################################################### -## @brief Program object binary type -class ur_program_binary_type_v(IntEnum): - NONE = 0 ## No program binary is associated with device - COMPILED_OBJECT = 1 ## Program binary is compiled object - LIBRARY = 2 ## Program binary is library object - EXECUTABLE = 3 ## Program binary is executable - -class ur_program_binary_type_t(c_int): - def __str__(self): - return str(ur_program_binary_type_v(self.value)) - - -############################################################################### -## @brief Get Program object build information -class ur_program_build_info_v(IntEnum): - STATUS = 0 ## [::ur_program_build_status_t] Program build status. - OPTIONS = 1 ## [char[]] Null-terminated options string specified by last build, - ## compile or link operation performed on the program. - LOG = 2 ## [char[]] Null-terminated program build log. - BINARY_TYPE = 3 ## [::ur_program_binary_type_t] Program binary type. - -class ur_program_build_info_t(c_int): - def __str__(self): - return str(ur_program_build_info_v(self.value)) - - -############################################################################### -## @brief Specialization constant information -class ur_specialization_constant_info_t(Structure): - _fields_ = [ - ("id", c_ulong), ## [in] specialization constant Id - ("size", c_size_t), ## [in] size of the specialization constant value - ("pValue", c_void_p) ## [in] pointer to the specialization constant value bytes - ] - -############################################################################### -## @brief Native program creation properties -class ur_program_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an - ## interoperability operation in the application that asked to not - ## transfer the ownership to the unified-runtime. - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgValue. -class ur_kernel_arg_value_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgLocal. -class ur_kernel_arg_local_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Get Kernel object information -class ur_kernel_info_v(IntEnum): - FUNCTION_NAME = 0 ## [char[]] Return null-terminated kernel function name. - NUM_ARGS = 1 ## [size_t] Return Kernel number of arguments. - REFERENCE_COUNT = 2 ## [uint32_t] Reference count of the kernel object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - CONTEXT = 3 ## [::ur_context_handle_t] Return Context object associated with Kernel. - PROGRAM = 4 ## [::ur_program_handle_t] Return Program object associated with Kernel. - ATTRIBUTES = 5 ## [char[]] Return null-terminated kernel attributes string. - NUM_REGS = 6 ## [uint32_t] Return the number of registers used by the compiled kernel - ## (device specific). - -class ur_kernel_info_t(c_int): - def __str__(self): - return str(ur_kernel_info_v(self.value)) - - -############################################################################### -## @brief Get Kernel Work Group information -class ur_kernel_group_info_v(IntEnum): - GLOBAL_WORK_SIZE = 0 ## [size_t[3]] Return Work Group maximum global size - WORK_GROUP_SIZE = 1 ## [size_t] Return maximum Work Group size - COMPILE_WORK_GROUP_SIZE = 2 ## [size_t[3]] Return Work Group size required by the source code, such - ## as __attribute__((required_work_group_size(X,Y,Z)) - LOCAL_MEM_SIZE = 3 ## [size_t] Return local memory required by the Kernel - PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4 ## [size_t] Return preferred multiple of Work Group size for launch - PRIVATE_MEM_SIZE = 5 ## [size_t] Return minimum amount of private memory in bytes used by each - ## work item in the Kernel - -class ur_kernel_group_info_t(c_int): - def __str__(self): - return str(ur_kernel_group_info_v(self.value)) - - -############################################################################### -## @brief Get Kernel SubGroup information -class ur_kernel_sub_group_info_v(IntEnum): - MAX_SUB_GROUP_SIZE = 0 ## [uint32_t] Return maximum SubGroup size - MAX_NUM_SUB_GROUPS = 1 ## [uint32_t] Return maximum number of SubGroup - COMPILE_NUM_SUB_GROUPS = 2 ## [uint32_t] Return number of SubGroup required by the source code - SUB_GROUP_SIZE_INTEL = 3 ## [uint32_t] Return SubGroup size required by Intel - -class ur_kernel_sub_group_info_t(c_int): - def __str__(self): - return str(ur_kernel_sub_group_info_v(self.value)) - - -############################################################################### -## @brief Kernel Cache Configuration. -class ur_kernel_cache_config_v(IntEnum): - DEFAULT = 0 ## No preference for SLM or data cache. - LARGE_SLM = 1 ## Large Shared Local Memory (SLM) size. - LARGE_DATA = 2 ## Large General Data size. - -class ur_kernel_cache_config_t(c_int): - def __str__(self): - return str(ur_kernel_cache_config_v(self.value)) - - -############################################################################### -## @brief Set additional Kernel execution information -class ur_kernel_exec_info_v(IntEnum): - USM_INDIRECT_ACCESS = 0 ## [::ur_bool_t] Kernel might access data through USM pointer. - USM_PTRS = 1 ## [void *[]] Provide an explicit array of USM pointers that the kernel - ## will access. - CACHE_CONFIG = 2 ## [::ur_kernel_cache_config_t] Provide the preferred cache configuration - -class ur_kernel_exec_info_t(c_int): - def __str__(self): - return str(ur_kernel_exec_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urKernelSetArgPointer. -class ur_kernel_arg_pointer_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetExecInfo. -class ur_kernel_exec_info_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgSampler. -class ur_kernel_arg_sampler_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES - ("pNext", c_void_p) ## [in,out][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Properties for for ::urKernelSetArgMemObj. -class ur_kernel_arg_mem_obj_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("memoryAccess", ur_mem_flags_t) ## [in] Memory access flag. Allowed values are: ::UR_MEM_FLAG_READ_WRITE, - ## ::UR_MEM_FLAG_WRITE_ONLY, ::UR_MEM_FLAG_READ_ONLY. - ] - -############################################################################### -## @brief Properties for for ::urKernelCreateWithNativeHandle. -class ur_kernel_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Query queue info -class ur_queue_info_v(IntEnum): - CONTEXT = 0 ## [::ur_queue_handle_t] context associated with this queue. - DEVICE = 1 ## [::ur_device_handle_t] device associated with this queue. - DEVICE_DEFAULT = 2 ## [::ur_queue_handle_t] the current default queue of the underlying - ## device. - FLAGS = 3 ## [::ur_queue_flags_t] the properties associated with - ## ::ur_queue_properties_t::flags. - REFERENCE_COUNT = 4 ## [uint32_t] Reference count of the queue object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - SIZE = 5 ## [uint32_t] The size of the queue - EMPTY = 6 ## [::ur_bool_t] return true if the queue was empty at the time of the - ## query - -class ur_queue_info_t(c_int): - def __str__(self): - return str(ur_queue_info_v(self.value)) - - -############################################################################### -## @brief Queue property flags -class ur_queue_flags_v(IntEnum): - OUT_OF_ORDER_EXEC_MODE_ENABLE = UR_BIT(0) ## Enable/disable out of order execution - PROFILING_ENABLE = UR_BIT(1) ## Enable/disable profiling - ON_DEVICE = UR_BIT(2) ## Is a device queue - ON_DEVICE_DEFAULT = UR_BIT(3) ## Is the default queue for a device - DISCARD_EVENTS = UR_BIT(4) ## Events will be discarded - PRIORITY_LOW = UR_BIT(5) ## Low priority queue - PRIORITY_HIGH = UR_BIT(6) ## High priority queue - SUBMISSION_BATCHED = UR_BIT(7) ## Hint: enqueue and submit in a batch later. No change in queue - ## semantics. Implementation chooses submission mode. - SUBMISSION_IMMEDIATE = UR_BIT(8) ## Hint: enqueue and submit immediately. No change in queue semantics. - ## Implementation chooses submission mode. - USE_DEFAULT_STREAM = UR_BIT(9) ## Use the default stream. Only meaningful for CUDA. Other platforms may - ## ignore this flag. - SYNC_WITH_DEFAULT_STREAM = UR_BIT(10) ## Synchronize with the default stream. Only meaningful for CUDA. Other - ## platforms may ignore this flag. - -class ur_queue_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Queue creation properties -class ur_queue_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("flags", ur_queue_flags_t) ## [in] Bitfield of queue creation flags - ] - -############################################################################### -## @brief Queue index creation properties -## -## @details -## - Specify these properties in ::urQueueCreate via -## ::ur_queue_properties_t as part of a `pNext` chain. -class ur_queue_index_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("computeIndex", c_ulong) ## [in] Specifies the compute index as described in the - ## sycl_ext_intel_queue_index extension. - ] - -############################################################################### -## @brief Descriptor for ::urQueueGetNativeHandle and -## ::urQueueCreateWithNativeHandle. -## -## @details -## - Specify this descriptor in ::urQueueGetNativeHandle directly or -## ::urQueueCreateWithNativeHandle via ::ur_queue_native_properties_t as -## part of a `pNext` chain. -class ur_queue_native_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("pNativeData", c_void_p) ## [in][optional] Adapter-specific metadata needed to create the handle. - ] - -############################################################################### -## @brief Properties for for ::urQueueCreateWithNativeHandle. -class ur_queue_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Command type -class ur_command_v(IntEnum): - KERNEL_LAUNCH = 0 ## Event created by ::urEnqueueKernelLaunch - EVENTS_WAIT = 1 ## Event created by ::urEnqueueEventsWait - EVENTS_WAIT_WITH_BARRIER = 2 ## Event created by ::urEnqueueEventsWaitWithBarrier - MEM_BUFFER_READ = 3 ## Event created by ::urEnqueueMemBufferRead - MEM_BUFFER_WRITE = 4 ## Event created by ::urEnqueueMemBufferWrite - MEM_BUFFER_READ_RECT = 5 ## Event created by ::urEnqueueMemBufferReadRect - MEM_BUFFER_WRITE_RECT = 6 ## Event created by ::urEnqueueMemBufferWriteRect - MEM_BUFFER_COPY = 7 ## Event created by ::urEnqueueMemBufferCopy - MEM_BUFFER_COPY_RECT = 8 ## Event created by ::urEnqueueMemBufferCopyRect - MEM_BUFFER_FILL = 9 ## Event created by ::urEnqueueMemBufferFill - MEM_IMAGE_READ = 10 ## Event created by ::urEnqueueMemImageRead - MEM_IMAGE_WRITE = 11 ## Event created by ::urEnqueueMemImageWrite - MEM_IMAGE_COPY = 12 ## Event created by ::urEnqueueMemImageCopy - MEM_BUFFER_MAP = 14 ## Event created by ::urEnqueueMemBufferMap - MEM_UNMAP = 16 ## Event created by ::urEnqueueMemUnmap - USM_FILL = 17 ## Event created by ::urEnqueueUSMFill - USM_MEMCPY = 18 ## Event created by ::urEnqueueUSMMemcpy - USM_PREFETCH = 19 ## Event created by ::urEnqueueUSMPrefetch - USM_ADVISE = 20 ## Event created by ::urEnqueueUSMAdvise - USM_FILL_2D = 21 ## Event created by ::urEnqueueUSMFill2D - USM_MEMCPY_2D = 22 ## Event created by ::urEnqueueUSMMemcpy2D - DEVICE_GLOBAL_VARIABLE_WRITE = 23 ## Event created by ::urEnqueueDeviceGlobalVariableWrite - DEVICE_GLOBAL_VARIABLE_READ = 24 ## Event created by ::urEnqueueDeviceGlobalVariableRead - READ_HOST_PIPE = 25 ## Event created by ::urEnqueueReadHostPipe - WRITE_HOST_PIPE = 26 ## Event created by ::urEnqueueWriteHostPipe - COMMAND_BUFFER_ENQUEUE_EXP = 0x1000 ## Event created by ::urCommandBufferEnqueueExp - INTEROP_SEMAPHORE_WAIT_EXP = 0x2000 ## Event created by ::urBindlessImagesWaitExternalSemaphoreExp - INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001 ## Event created by ::urBindlessImagesSignalExternalSemaphoreExp - -class ur_command_t(c_int): - def __str__(self): - return str(ur_command_v(self.value)) - - -############################################################################### -## @brief Event Status -class ur_event_status_v(IntEnum): - COMPLETE = 0 ## Command is complete - RUNNING = 1 ## Command is running - SUBMITTED = 2 ## Command is submitted - QUEUED = 3 ## Command is queued - -class ur_event_status_t(c_int): - def __str__(self): - return str(ur_event_status_v(self.value)) - - -############################################################################### -## @brief Event query information type -class ur_event_info_v(IntEnum): - COMMAND_QUEUE = 0 ## [::ur_queue_handle_t] Command queue information of an event object - CONTEXT = 1 ## [::ur_context_handle_t] Context information of an event object - COMMAND_TYPE = 2 ## [::ur_command_t] Command type information of an event object - COMMAND_EXECUTION_STATUS = 3 ## [::ur_event_status_t] Command execution status of an event object - REFERENCE_COUNT = 4 ## [uint32_t] Reference count of the event object. - ## The reference count returned should be considered immediately stale. - ## It is unsuitable for general use in applications. This feature is - ## provided for identifying memory leaks. - -class ur_event_info_t(c_int): - def __str__(self): - return str(ur_event_info_v(self.value)) - - -############################################################################### -## @brief Profiling query information type -class ur_profiling_info_v(IntEnum): - COMMAND_QUEUED = 0 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event is enqueued - COMMAND_SUBMIT = 1 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event is submitted - COMMAND_START = 2 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event starts execution - COMMAND_END = 3 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event has finished execution - COMMAND_COMPLETE = 4 ## [uint64_t] A 64-bit value of current device counter in nanoseconds - ## when the event and any child events enqueued by this event on the - ## device have finished execution - -class ur_profiling_info_t(c_int): - def __str__(self): - return str(ur_profiling_info_v(self.value)) - - -############################################################################### -## @brief Properties for for ::urEventCreateWithNativeHandle. -class ur_event_native_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("isNativeHandleOwned", c_bool) ## [in] Indicates UR owns the native handle or if it came from an interoperability - ## operation in the application that asked to not transfer the ownership to - ## the unified-runtime. - ] - -############################################################################### -## @brief Event states for all events. -class ur_execution_info_v(IntEnum): - COMPLETE = 0 ## Indicates that the event has completed. - RUNNING = 1 ## Indicates that the device has started processing this event. - SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. - QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of - ## events. - -class ur_execution_info_t(c_int): - def __str__(self): - return str(ur_execution_info_v(self.value)) - - -############################################################################### -## @brief Event callback function that can be registered by the application. -def ur_event_callback_t(user_defined_callback): - @CFUNCTYPE(None, ur_event_handle_t, ur_execution_info_t, c_void_p) - def ur_event_callback_t_wrapper(hEvent, execStatus, pUserData): - return user_defined_callback(hEvent, execStatus, pUserData) - return ur_event_callback_t_wrapper - -############################################################################### -## @brief Map flags -class ur_map_flags_v(IntEnum): - READ = UR_BIT(0) ## Map for read access - WRITE = UR_BIT(1) ## Map for write access - WRITE_INVALIDATE_REGION = UR_BIT(2) ## Map for discard_write access - -class ur_map_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Map flags -class ur_usm_migration_flags_v(IntEnum): - DEFAULT = UR_BIT(0) ## Default migration TODO: Add more enums! - -class ur_usm_migration_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief Handle of bindless image -class ur_exp_image_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of bindless image memory -class ur_exp_image_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of interop memory -class ur_exp_interop_mem_handle_t(c_void_p): - pass - -############################################################################### -## @brief Handle of interop semaphore -class ur_exp_interop_semaphore_handle_t(c_void_p): - pass - -############################################################################### -## @brief Dictates the type of memory copy. -class ur_exp_image_copy_flags_v(IntEnum): - HOST_TO_DEVICE = UR_BIT(0) ## Host to device - DEVICE_TO_HOST = UR_BIT(1) ## Device to host - DEVICE_TO_DEVICE = UR_BIT(2) ## Device to device - -class ur_exp_image_copy_flags_t(c_int): - def __str__(self): - return hex(self.value) - - -############################################################################### -## @brief File descriptor -class ur_exp_file_descriptor_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("fd", c_int) ## [in] A file descriptor used for Linux and & MacOS operating systems. - ] - -############################################################################### -## @brief Windows specific file handle -class ur_exp_win32_handle_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure - ("handle", c_void_p) ## [in] A win32 file handle. - ] - -############################################################################### -## @brief Describes mipmap sampler properties -## -## @details -## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t -## as part of a `pNext` chain. -class ur_exp_sampler_mip_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("minMipmapLevelClamp", c_float), ## [in] minimum mipmap level from which we can sample, minimum value - ## being 0 - ("maxMipmapLevelClamp", c_float), ## [in] maximum mipmap level from which we can sample, maximum value - ## being the number of levels - ("maxAnisotropy", c_float), ## [in] anisotropic ratio used when samplling the mipmap with anisotropic - ## filtering - ("mipFilterMode", ur_sampler_filter_mode_t) ## [in] mipmap filter mode used for filtering between mipmap levels - ] - -############################################################################### -## @brief Describes unique sampler addressing mode per dimension -## -## @details -## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t -## as part of a `pNext` chain. -class ur_exp_sampler_addr_modes_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("addrModes", ur_sampler_addressing_mode_t * 3) ## [in] Specify the address mode of the sampler per dimension - ] - -############################################################################### -## @brief Describes an interop memory resource descriptor -class ur_exp_interop_mem_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Describes an interop semaphore resource descriptor -class ur_exp_interop_semaphore_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief Describes layered image properties -## -## @details -## - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp -## or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as -## part of a `pNext` chain. -class ur_exp_layered_image_properties_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES - ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("numLayers", c_ulong) ## [in] number of layers the image should have - ] - -############################################################################### -## @brief The extension string which defines support for command-buffers which -## is returned when querying device extensions. -UR_COMMAND_BUFFER_EXTENSION_STRING_EXP = "ur_exp_command_buffer" - -############################################################################### -## @brief Command-Buffer Descriptor Type -class ur_exp_command_buffer_desc_t(Structure): - _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure - ] - -############################################################################### -## @brief A value that identifies a command inside of a command-buffer, used for -## defining dependencies between commands in the same command-buffer. -class ur_exp_command_buffer_sync_point_t(c_ulong): - pass - -############################################################################### -## @brief Handle of Command-Buffer object -class ur_exp_command_buffer_handle_t(c_void_p): - pass - -############################################################################### -## @brief The extension string which defines support for cooperative-kernels -## which is returned when querying device extensions. -UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP = "ur_exp_cooperative_kernels" - -############################################################################### -## @brief The extension string which defines support for test -## which is returned when querying device extensions. -UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP = "ur_exp_multi_device_compile" - -############################################################################### -## @brief Supported peer info -class ur_exp_peer_info_v(IntEnum): - UR_PEER_ACCESS_SUPPORTED = 0 ## [uint32_t] 1 if P2P access is supported otherwise P2P access is not - ## supported. - UR_PEER_ATOMICS_SUPPORTED = 1 ## [uint32_t] 1 if atomic operations are supported over the P2P link, - ## otherwise such operations are not supported. - -class ur_exp_peer_info_t(c_int): - def __str__(self): - return str(ur_exp_peer_info_v(self.value)) - - -############################################################################### -__use_win_types = "Windows" == platform.uname()[0] - -############################################################################### -## @brief Function-pointer for urPlatformGet -if __use_win_types: - _urPlatformGet_t = WINFUNCTYPE( ur_result_t, POINTER(ur_adapter_handle_t), c_ulong, c_ulong, POINTER(ur_platform_handle_t), POINTER(c_ulong) ) -else: - _urPlatformGet_t = CFUNCTYPE( ur_result_t, POINTER(ur_adapter_handle_t), c_ulong, c_ulong, POINTER(ur_platform_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetInfo -if __use_win_types: - _urPlatformGetInfo_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_platform_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urPlatformGetInfo_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_platform_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetNativeHandle -if __use_win_types: - _urPlatformGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_native_handle_t) ) -else: - _urPlatformGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformCreateWithNativeHandle -if __use_win_types: - _urPlatformCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, POINTER(ur_platform_native_properties_t), POINTER(ur_platform_handle_t) ) -else: - _urPlatformCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, POINTER(ur_platform_native_properties_t), POINTER(ur_platform_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetApiVersion -if __use_win_types: - _urPlatformGetApiVersion_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_api_version_t) ) -else: - _urPlatformGetApiVersion_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, POINTER(ur_api_version_t) ) - -############################################################################### -## @brief Function-pointer for urPlatformGetBackendOption -if __use_win_types: - _urPlatformGetBackendOption_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, c_char_p, POINTER(c_char_p) ) -else: - _urPlatformGetBackendOption_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, c_char_p, POINTER(c_char_p) ) - - -############################################################################### -## @brief Table of Platform functions pointers -class ur_platform_dditable_t(Structure): - _fields_ = [ - ("pfnGet", c_void_p), ## _urPlatformGet_t - ("pfnGetInfo", c_void_p), ## _urPlatformGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urPlatformGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urPlatformCreateWithNativeHandle_t - ("pfnGetApiVersion", c_void_p), ## _urPlatformGetApiVersion_t - ("pfnGetBackendOption", c_void_p) ## _urPlatformGetBackendOption_t - ] - -############################################################################### -## @brief Function-pointer for urContextCreate -if __use_win_types: - _urContextCreate_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_properties_t), POINTER(ur_context_handle_t) ) -else: - _urContextCreate_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_properties_t), POINTER(ur_context_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextRetain -if __use_win_types: - _urContextRetain_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t ) -else: - _urContextRetain_t = CFUNCTYPE( ur_result_t, ur_context_handle_t ) - -############################################################################### -## @brief Function-pointer for urContextRelease -if __use_win_types: - _urContextRelease_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t ) -else: - _urContextRelease_t = CFUNCTYPE( ur_result_t, ur_context_handle_t ) - -############################################################################### -## @brief Function-pointer for urContextGetInfo -if __use_win_types: - _urContextGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_context_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urContextGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_context_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urContextGetNativeHandle -if __use_win_types: - _urContextGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_native_handle_t) ) -else: - _urContextGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextCreateWithNativeHandle -if __use_win_types: - _urContextCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_native_properties_t), POINTER(ur_context_handle_t) ) -else: - _urContextCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, c_ulong, POINTER(ur_device_handle_t), POINTER(ur_context_native_properties_t), POINTER(ur_context_handle_t) ) - -############################################################################### -## @brief Function-pointer for urContextSetExtendedDeleter -if __use_win_types: - _urContextSetExtendedDeleter_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_void_p ) -else: - _urContextSetExtendedDeleter_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_void_p ) - - -############################################################################### -## @brief Table of Context functions pointers -class ur_context_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urContextCreate_t - ("pfnRetain", c_void_p), ## _urContextRetain_t - ("pfnRelease", c_void_p), ## _urContextRelease_t - ("pfnGetInfo", c_void_p), ## _urContextGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urContextGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urContextCreateWithNativeHandle_t - ("pfnSetExtendedDeleter", c_void_p) ## _urContextSetExtendedDeleter_t - ] - -############################################################################### -## @brief Function-pointer for urEventGetInfo -if __use_win_types: - _urEventGetInfo_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_event_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urEventGetInfo_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_event_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urEventGetProfilingInfo -if __use_win_types: - _urEventGetProfilingInfo_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_profiling_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urEventGetProfilingInfo_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_profiling_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urEventWait -if __use_win_types: - _urEventWait_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_event_handle_t) ) -else: - _urEventWait_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventRetain -if __use_win_types: - _urEventRetain_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t ) -else: - _urEventRetain_t = CFUNCTYPE( ur_result_t, ur_event_handle_t ) - -############################################################################### -## @brief Function-pointer for urEventRelease -if __use_win_types: - _urEventRelease_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t ) -else: - _urEventRelease_t = CFUNCTYPE( ur_result_t, ur_event_handle_t ) - -############################################################################### -## @brief Function-pointer for urEventGetNativeHandle -if __use_win_types: - _urEventGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, POINTER(ur_native_handle_t) ) -else: - _urEventGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventCreateWithNativeHandle -if __use_win_types: - _urEventCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_event_native_properties_t), POINTER(ur_event_handle_t) ) -else: - _urEventCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_event_native_properties_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEventSetCallback -if __use_win_types: - _urEventSetCallback_t = WINFUNCTYPE( ur_result_t, ur_event_handle_t, ur_execution_info_t, c_void_p, c_void_p ) -else: - _urEventSetCallback_t = CFUNCTYPE( ur_result_t, ur_event_handle_t, ur_execution_info_t, c_void_p, c_void_p ) - - -############################################################################### -## @brief Table of Event functions pointers -class ur_event_dditable_t(Structure): - _fields_ = [ - ("pfnGetInfo", c_void_p), ## _urEventGetInfo_t - ("pfnGetProfilingInfo", c_void_p), ## _urEventGetProfilingInfo_t - ("pfnWait", c_void_p), ## _urEventWait_t - ("pfnRetain", c_void_p), ## _urEventRetain_t - ("pfnRelease", c_void_p), ## _urEventRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urEventGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urEventCreateWithNativeHandle_t - ("pfnSetCallback", c_void_p) ## _urEventSetCallback_t - ] - -############################################################################### -## @brief Function-pointer for urProgramCreateWithIL -if __use_win_types: - _urProgramCreateWithIL_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithIL_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramCreateWithBinary -if __use_win_types: - _urProgramCreateWithBinary_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(c_ubyte), POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithBinary_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(c_ubyte), POINTER(ur_program_properties_t), POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramBuild -if __use_win_types: - _urProgramBuild_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) -else: - _urProgramBuild_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramCompile -if __use_win_types: - _urProgramCompile_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) -else: - _urProgramCompile_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramLink -if __use_win_types: - _urProgramLink_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) -else: - _urProgramLink_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramRetain -if __use_win_types: - _urProgramRetain_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t ) -else: - _urProgramRetain_t = CFUNCTYPE( ur_result_t, ur_program_handle_t ) - -############################################################################### -## @brief Function-pointer for urProgramRelease -if __use_win_types: - _urProgramRelease_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t ) -else: - _urProgramRelease_t = CFUNCTYPE( ur_result_t, ur_program_handle_t ) - -############################################################################### -## @brief Function-pointer for urProgramGetFunctionPointer -if __use_win_types: - _urProgramGetFunctionPointer_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_program_handle_t, c_char_p, POINTER(c_void_p) ) -else: - _urProgramGetFunctionPointer_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_program_handle_t, c_char_p, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urProgramGetInfo -if __use_win_types: - _urProgramGetInfo_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, ur_program_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urProgramGetInfo_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, ur_program_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urProgramGetBuildInfo -if __use_win_types: - _urProgramGetBuildInfo_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, ur_device_handle_t, ur_program_build_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urProgramGetBuildInfo_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, ur_device_handle_t, ur_program_build_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urProgramSetSpecializationConstants -if __use_win_types: - _urProgramSetSpecializationConstants_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) -else: - _urProgramSetSpecializationConstants_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) - -############################################################################### -## @brief Function-pointer for urProgramGetNativeHandle -if __use_win_types: - _urProgramGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, POINTER(ur_native_handle_t) ) -else: - _urProgramGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urProgramCreateWithNativeHandle -if __use_win_types: - _urProgramCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_program_native_properties_t), POINTER(ur_program_handle_t) ) -else: - _urProgramCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_program_native_properties_t), POINTER(ur_program_handle_t) ) - - -############################################################################### -## @brief Table of Program functions pointers -class ur_program_dditable_t(Structure): - _fields_ = [ - ("pfnCreateWithIL", c_void_p), ## _urProgramCreateWithIL_t - ("pfnCreateWithBinary", c_void_p), ## _urProgramCreateWithBinary_t - ("pfnBuild", c_void_p), ## _urProgramBuild_t - ("pfnCompile", c_void_p), ## _urProgramCompile_t - ("pfnLink", c_void_p), ## _urProgramLink_t - ("pfnRetain", c_void_p), ## _urProgramRetain_t - ("pfnRelease", c_void_p), ## _urProgramRelease_t - ("pfnGetFunctionPointer", c_void_p), ## _urProgramGetFunctionPointer_t - ("pfnGetInfo", c_void_p), ## _urProgramGetInfo_t - ("pfnGetBuildInfo", c_void_p), ## _urProgramGetBuildInfo_t - ("pfnSetSpecializationConstants", c_void_p), ## _urProgramSetSpecializationConstants_t - ("pfnGetNativeHandle", c_void_p), ## _urProgramGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p) ## _urProgramCreateWithNativeHandle_t - ] - -############################################################################### -## @brief Function-pointer for urProgramBuildExp -if __use_win_types: - _urProgramBuildExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) -else: - _urProgramBuildExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramCompileExp -if __use_win_types: - _urProgramCompileExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) -else: - _urProgramCompileExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) - -############################################################################### -## @brief Function-pointer for urProgramLinkExp -if __use_win_types: - _urProgramLinkExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) -else: - _urProgramLinkExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) - - -############################################################################### -## @brief Table of ProgramExp functions pointers -class ur_program_exp_dditable_t(Structure): - _fields_ = [ - ("pfnBuildExp", c_void_p), ## _urProgramBuildExp_t - ("pfnCompileExp", c_void_p), ## _urProgramCompileExp_t - ("pfnLinkExp", c_void_p) ## _urProgramLinkExp_t - ] - -############################################################################### -## @brief Function-pointer for urKernelCreate -if __use_win_types: - _urKernelCreate_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_char_p, POINTER(ur_kernel_handle_t) ) -else: - _urKernelCreate_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_char_p, POINTER(ur_kernel_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetInfo -if __use_win_types: - _urKernelGetInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetGroupInfo -if __use_win_types: - _urKernelGetGroupInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetGroupInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelGetSubGroupInfo -if __use_win_types: - _urKernelGetSubGroupInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_sub_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urKernelGetSubGroupInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_device_handle_t, ur_kernel_sub_group_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urKernelRetain -if __use_win_types: - _urKernelRetain_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t ) -else: - _urKernelRetain_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelRelease -if __use_win_types: - _urKernelRelease_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t ) -else: - _urKernelRelease_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelGetNativeHandle -if __use_win_types: - _urKernelGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(ur_native_handle_t) ) -else: - _urKernelGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelCreateWithNativeHandle -if __use_win_types: - _urKernelCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_program_handle_t, POINTER(ur_kernel_native_properties_t), POINTER(ur_kernel_handle_t) ) -else: - _urKernelCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_program_handle_t, POINTER(ur_kernel_native_properties_t), POINTER(ur_kernel_handle_t) ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgValue -if __use_win_types: - _urKernelSetArgValue_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_value_properties_t), c_void_p ) -else: - _urKernelSetArgValue_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_value_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgLocal -if __use_win_types: - _urKernelSetArgLocal_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_local_properties_t) ) -else: - _urKernelSetArgLocal_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, c_size_t, POINTER(ur_kernel_arg_local_properties_t) ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgPointer -if __use_win_types: - _urKernelSetArgPointer_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_pointer_properties_t), c_void_p ) -else: - _urKernelSetArgPointer_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_pointer_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetExecInfo -if __use_win_types: - _urKernelSetExecInfo_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_exec_info_t, c_size_t, POINTER(ur_kernel_exec_info_properties_t), c_void_p ) -else: - _urKernelSetExecInfo_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, ur_kernel_exec_info_t, c_size_t, POINTER(ur_kernel_exec_info_properties_t), c_void_p ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgSampler -if __use_win_types: - _urKernelSetArgSampler_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_sampler_properties_t), ur_sampler_handle_t ) -else: - _urKernelSetArgSampler_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_sampler_properties_t), ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelSetArgMemObj -if __use_win_types: - _urKernelSetArgMemObj_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_mem_obj_properties_t), ur_mem_handle_t ) -else: - _urKernelSetArgMemObj_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_kernel_arg_mem_obj_properties_t), ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urKernelSetSpecializationConstants -if __use_win_types: - _urKernelSetSpecializationConstants_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) -else: - _urKernelSetSpecializationConstants_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, c_ulong, POINTER(ur_specialization_constant_info_t) ) - - -############################################################################### -## @brief Table of Kernel functions pointers -class ur_kernel_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urKernelCreate_t - ("pfnGetInfo", c_void_p), ## _urKernelGetInfo_t - ("pfnGetGroupInfo", c_void_p), ## _urKernelGetGroupInfo_t - ("pfnGetSubGroupInfo", c_void_p), ## _urKernelGetSubGroupInfo_t - ("pfnRetain", c_void_p), ## _urKernelRetain_t - ("pfnRelease", c_void_p), ## _urKernelRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urKernelGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urKernelCreateWithNativeHandle_t - ("pfnSetArgValue", c_void_p), ## _urKernelSetArgValue_t - ("pfnSetArgLocal", c_void_p), ## _urKernelSetArgLocal_t - ("pfnSetArgPointer", c_void_p), ## _urKernelSetArgPointer_t - ("pfnSetExecInfo", c_void_p), ## _urKernelSetExecInfo_t - ("pfnSetArgSampler", c_void_p), ## _urKernelSetArgSampler_t - ("pfnSetArgMemObj", c_void_p), ## _urKernelSetArgMemObj_t - ("pfnSetSpecializationConstants", c_void_p) ## _urKernelSetSpecializationConstants_t - ] - -############################################################################### -## @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp -if __use_win_types: - _urKernelSuggestMaxCooperativeGroupCountExp_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) -else: - _urKernelSuggestMaxCooperativeGroupCountExp_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) - - -############################################################################### -## @brief Table of KernelExp functions pointers -class ur_kernel_exp_dditable_t(Structure): - _fields_ = [ - ("pfnSuggestMaxCooperativeGroupCountExp", c_void_p) ## _urKernelSuggestMaxCooperativeGroupCountExp_t - ] - -############################################################################### -## @brief Function-pointer for urSamplerCreate -if __use_win_types: - _urSamplerCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_sampler_desc_t), POINTER(ur_sampler_handle_t) ) -else: - _urSamplerCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_sampler_desc_t), POINTER(ur_sampler_handle_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerRetain -if __use_win_types: - _urSamplerRetain_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t ) -else: - _urSamplerRetain_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urSamplerRelease -if __use_win_types: - _urSamplerRelease_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t ) -else: - _urSamplerRelease_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t ) - -############################################################################### -## @brief Function-pointer for urSamplerGetInfo -if __use_win_types: - _urSamplerGetInfo_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t, ur_sampler_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urSamplerGetInfo_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t, ur_sampler_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerGetNativeHandle -if __use_win_types: - _urSamplerGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_sampler_handle_t, POINTER(ur_native_handle_t) ) -else: - _urSamplerGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_sampler_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urSamplerCreateWithNativeHandle -if __use_win_types: - _urSamplerCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_sampler_native_properties_t), POINTER(ur_sampler_handle_t) ) -else: - _urSamplerCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_sampler_native_properties_t), POINTER(ur_sampler_handle_t) ) - - -############################################################################### -## @brief Table of Sampler functions pointers -class ur_sampler_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urSamplerCreate_t - ("pfnRetain", c_void_p), ## _urSamplerRetain_t - ("pfnRelease", c_void_p), ## _urSamplerRelease_t - ("pfnGetInfo", c_void_p), ## _urSamplerGetInfo_t - ("pfnGetNativeHandle", c_void_p), ## _urSamplerGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p) ## _urSamplerCreateWithNativeHandle_t - ] - -############################################################################### -## @brief Function-pointer for urMemImageCreate -if __use_win_types: - _urMemImageCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), c_void_p, POINTER(ur_mem_handle_t) ) -else: - _urMemImageCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), c_void_p, POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemBufferCreate -if __use_win_types: - _urMemBufferCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, c_size_t, POINTER(ur_buffer_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_mem_flags_t, c_size_t, POINTER(ur_buffer_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemRetain -if __use_win_types: - _urMemRetain_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t ) -else: - _urMemRetain_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urMemRelease -if __use_win_types: - _urMemRelease_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t ) -else: - _urMemRelease_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urMemBufferPartition -if __use_win_types: - _urMemBufferPartition_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_flags_t, ur_buffer_create_type_t, POINTER(ur_buffer_region_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferPartition_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_flags_t, ur_buffer_create_type_t, POINTER(ur_buffer_region_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemGetNativeHandle -if __use_win_types: - _urMemGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, POINTER(ur_native_handle_t) ) -else: - _urMemGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemBufferCreateWithNativeHandle -if __use_win_types: - _urMemBufferCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemBufferCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemImageCreateWithNativeHandle -if __use_win_types: - _urMemImageCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) -else: - _urMemImageCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_native_properties_t), POINTER(ur_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urMemGetInfo -if __use_win_types: - _urMemGetInfo_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urMemGetInfo_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urMemImageGetInfo -if __use_win_types: - _urMemImageGetInfo_t = WINFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_image_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urMemImageGetInfo_t = CFUNCTYPE( ur_result_t, ur_mem_handle_t, ur_image_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Mem functions pointers -class ur_mem_dditable_t(Structure): - _fields_ = [ - ("pfnImageCreate", c_void_p), ## _urMemImageCreate_t - ("pfnBufferCreate", c_void_p), ## _urMemBufferCreate_t - ("pfnRetain", c_void_p), ## _urMemRetain_t - ("pfnRelease", c_void_p), ## _urMemRelease_t - ("pfnBufferPartition", c_void_p), ## _urMemBufferPartition_t - ("pfnGetNativeHandle", c_void_p), ## _urMemGetNativeHandle_t - ("pfnBufferCreateWithNativeHandle", c_void_p), ## _urMemBufferCreateWithNativeHandle_t - ("pfnImageCreateWithNativeHandle", c_void_p), ## _urMemImageCreateWithNativeHandle_t - ("pfnGetInfo", c_void_p), ## _urMemGetInfo_t - ("pfnImageGetInfo", c_void_p) ## _urMemImageGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urPhysicalMemCreate -if __use_win_types: - _urPhysicalMemCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_physical_mem_properties_t), POINTER(ur_physical_mem_handle_t) ) -else: - _urPhysicalMemCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_physical_mem_properties_t), POINTER(ur_physical_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urPhysicalMemRetain -if __use_win_types: - _urPhysicalMemRetain_t = WINFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) -else: - _urPhysicalMemRetain_t = CFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urPhysicalMemRelease -if __use_win_types: - _urPhysicalMemRelease_t = WINFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) -else: - _urPhysicalMemRelease_t = CFUNCTYPE( ur_result_t, ur_physical_mem_handle_t ) - - -############################################################################### -## @brief Table of PhysicalMem functions pointers -class ur_physical_mem_dditable_t(Structure): - _fields_ = [ - ("pfnCreate", c_void_p), ## _urPhysicalMemCreate_t - ("pfnRetain", c_void_p), ## _urPhysicalMemRetain_t - ("pfnRelease", c_void_p) ## _urPhysicalMemRelease_t - ] - -############################################################################### -## @brief Function-pointer for urAdapterGet -if __use_win_types: - _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) -else: - _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urAdapterRelease -if __use_win_types: - _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterRetain -if __use_win_types: - _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterGetLastError -if __use_win_types: - _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) -else: - _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) - -############################################################################### -## @brief Function-pointer for urAdapterGetInfo -if __use_win_types: - _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Global functions pointers -class ur_global_dditable_t(Structure): - _fields_ = [ - ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t - ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t - ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t - ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t - ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urEnqueueKernelLaunch -if __use_win_types: - _urEnqueueKernelLaunch_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueKernelLaunch_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueEventsWait -if __use_win_types: - _urEnqueueEventsWait_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueEventsWait_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueEventsWaitWithBarrier -if __use_win_types: - _urEnqueueEventsWaitWithBarrier_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueEventsWaitWithBarrier_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferRead -if __use_win_types: - _urEnqueueMemBufferRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferWrite -if __use_win_types: - _urEnqueueMemBufferWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferReadRect -if __use_win_types: - _urEnqueueMemBufferReadRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferReadRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferWriteRect -if __use_win_types: - _urEnqueueMemBufferWriteRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferWriteRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferCopy -if __use_win_types: - _urEnqueueMemBufferCopy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferCopy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferCopyRect -if __use_win_types: - _urEnqueueMemBufferCopyRect_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferCopyRect_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferFill -if __use_win_types: - _urEnqueueMemBufferFill_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemBufferFill_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageRead -if __use_win_types: - _urEnqueueMemImageRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageWrite -if __use_win_types: - _urEnqueueMemImageWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemImageCopy -if __use_win_types: - _urEnqueueMemImageCopy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemImageCopy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemBufferMap -if __use_win_types: - _urEnqueueMemBufferMap_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_map_flags_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t), POINTER(c_void_p) ) -else: - _urEnqueueMemBufferMap_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_bool, ur_map_flags_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t), POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urEnqueueMemUnmap -if __use_win_types: - _urEnqueueMemUnmap_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueMemUnmap_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_mem_handle_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMFill -if __use_win_types: - _urEnqueueUSMFill_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMFill_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMMemcpy -if __use_win_types: - _urEnqueueUSMMemcpy_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMMemcpy_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMPrefetch -if __use_win_types: - _urEnqueueUSMPrefetch_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMPrefetch_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMAdvise -if __use_win_types: - _urEnqueueUSMAdvise_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMAdvise_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMFill2D -if __use_win_types: - _urEnqueueUSMFill2D_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_size_t, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMFill2D_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_size_t, c_size_t, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueUSMMemcpy2D -if __use_win_types: - _urEnqueueUSMMemcpy2D_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_size_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueUSMMemcpy2D_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_bool, c_void_p, c_size_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueDeviceGlobalVariableWrite -if __use_win_types: - _urEnqueueDeviceGlobalVariableWrite_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueDeviceGlobalVariableWrite_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueDeviceGlobalVariableRead -if __use_win_types: - _urEnqueueDeviceGlobalVariableRead_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueDeviceGlobalVariableRead_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueReadHostPipe -if __use_win_types: - _urEnqueueReadHostPipe_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueReadHostPipe_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urEnqueueWriteHostPipe -if __use_win_types: - _urEnqueueWriteHostPipe_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueWriteHostPipe_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_program_handle_t, c_char_p, c_bool, c_void_p, c_size_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of Enqueue functions pointers -class ur_enqueue_dditable_t(Structure): - _fields_ = [ - ("pfnKernelLaunch", c_void_p), ## _urEnqueueKernelLaunch_t - ("pfnEventsWait", c_void_p), ## _urEnqueueEventsWait_t - ("pfnEventsWaitWithBarrier", c_void_p), ## _urEnqueueEventsWaitWithBarrier_t - ("pfnMemBufferRead", c_void_p), ## _urEnqueueMemBufferRead_t - ("pfnMemBufferWrite", c_void_p), ## _urEnqueueMemBufferWrite_t - ("pfnMemBufferReadRect", c_void_p), ## _urEnqueueMemBufferReadRect_t - ("pfnMemBufferWriteRect", c_void_p), ## _urEnqueueMemBufferWriteRect_t - ("pfnMemBufferCopy", c_void_p), ## _urEnqueueMemBufferCopy_t - ("pfnMemBufferCopyRect", c_void_p), ## _urEnqueueMemBufferCopyRect_t - ("pfnMemBufferFill", c_void_p), ## _urEnqueueMemBufferFill_t - ("pfnMemImageRead", c_void_p), ## _urEnqueueMemImageRead_t - ("pfnMemImageWrite", c_void_p), ## _urEnqueueMemImageWrite_t - ("pfnMemImageCopy", c_void_p), ## _urEnqueueMemImageCopy_t - ("pfnMemBufferMap", c_void_p), ## _urEnqueueMemBufferMap_t - ("pfnMemUnmap", c_void_p), ## _urEnqueueMemUnmap_t - ("pfnUSMFill", c_void_p), ## _urEnqueueUSMFill_t - ("pfnUSMMemcpy", c_void_p), ## _urEnqueueUSMMemcpy_t - ("pfnUSMPrefetch", c_void_p), ## _urEnqueueUSMPrefetch_t - ("pfnUSMAdvise", c_void_p), ## _urEnqueueUSMAdvise_t - ("pfnUSMFill2D", c_void_p), ## _urEnqueueUSMFill2D_t - ("pfnUSMMemcpy2D", c_void_p), ## _urEnqueueUSMMemcpy2D_t - ("pfnDeviceGlobalVariableWrite", c_void_p), ## _urEnqueueDeviceGlobalVariableWrite_t - ("pfnDeviceGlobalVariableRead", c_void_p), ## _urEnqueueDeviceGlobalVariableRead_t - ("pfnReadHostPipe", c_void_p), ## _urEnqueueReadHostPipe_t - ("pfnWriteHostPipe", c_void_p) ## _urEnqueueWriteHostPipe_t - ] - -############################################################################### -## @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp -if __use_win_types: - _urEnqueueCooperativeKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urEnqueueCooperativeKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of EnqueueExp functions pointers -class ur_enqueue_exp_dditable_t(Structure): - _fields_ = [ - ("pfnCooperativeKernelLaunchExp", c_void_p) ## _urEnqueueCooperativeKernelLaunchExp_t - ] - -############################################################################### -## @brief Function-pointer for urQueueGetInfo -if __use_win_types: - _urQueueGetInfo_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_queue_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urQueueGetInfo_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_queue_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urQueueCreate -if __use_win_types: - _urQueueCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_properties_t), POINTER(ur_queue_handle_t) ) -else: - _urQueueCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_properties_t), POINTER(ur_queue_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueRetain -if __use_win_types: - _urQueueRetain_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueRetain_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueRelease -if __use_win_types: - _urQueueRelease_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueRelease_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueGetNativeHandle -if __use_win_types: - _urQueueGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, POINTER(ur_queue_native_desc_t), POINTER(ur_native_handle_t) ) -else: - _urQueueGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, POINTER(ur_queue_native_desc_t), POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueCreateWithNativeHandle -if __use_win_types: - _urQueueCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_native_properties_t), POINTER(ur_queue_handle_t) ) -else: - _urQueueCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_queue_native_properties_t), POINTER(ur_queue_handle_t) ) - -############################################################################### -## @brief Function-pointer for urQueueFinish -if __use_win_types: - _urQueueFinish_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueFinish_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - -############################################################################### -## @brief Function-pointer for urQueueFlush -if __use_win_types: - _urQueueFlush_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t ) -else: - _urQueueFlush_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t ) - - -############################################################################### -## @brief Table of Queue functions pointers -class ur_queue_dditable_t(Structure): - _fields_ = [ - ("pfnGetInfo", c_void_p), ## _urQueueGetInfo_t - ("pfnCreate", c_void_p), ## _urQueueCreate_t - ("pfnRetain", c_void_p), ## _urQueueRetain_t - ("pfnRelease", c_void_p), ## _urQueueRelease_t - ("pfnGetNativeHandle", c_void_p), ## _urQueueGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urQueueCreateWithNativeHandle_t - ("pfnFinish", c_void_p), ## _urQueueFinish_t - ("pfnFlush", c_void_p) ## _urQueueFlush_t - ] - -############################################################################### -## @brief Function-pointer for urBindlessImagesUnsampledImageHandleDestroyExp -if __use_win_types: - _urBindlessImagesUnsampledImageHandleDestroyExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) -else: - _urBindlessImagesUnsampledImageHandleDestroyExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSampledImageHandleDestroyExp -if __use_win_types: - _urBindlessImagesSampledImageHandleDestroyExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) -else: - _urBindlessImagesSampledImageHandleDestroyExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageAllocateExp -if __use_win_types: - _urBindlessImagesImageAllocateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesImageAllocateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageFreeExp -if __use_win_types: - _urBindlessImagesImageFreeExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) -else: - _urBindlessImagesImageFreeExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesUnsampledImageCreateExp -if __use_win_types: - _urBindlessImagesUnsampledImageCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) -else: - _urBindlessImagesUnsampledImageCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSampledImageCreateExp -if __use_win_types: - _urBindlessImagesSampledImageCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_sampler_handle_t, POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) -else: - _urBindlessImagesSampledImageCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_sampler_handle_t, POINTER(ur_mem_handle_t), POINTER(ur_exp_image_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageCopyExp -if __use_win_types: - _urBindlessImagesImageCopyExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_void_p, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesImageCopyExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, c_void_p, c_void_p, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_image_copy_flags_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, ur_rect_region_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImageGetInfoExp -if __use_win_types: - _urBindlessImagesImageGetInfoExp_t = WINFUNCTYPE( ur_result_t, ur_exp_image_mem_handle_t, ur_image_info_t, c_void_p, POINTER(c_size_t) ) -else: - _urBindlessImagesImageGetInfoExp_t = CFUNCTYPE( ur_result_t, ur_exp_image_mem_handle_t, ur_image_info_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMipmapGetLevelExp -if __use_win_types: - _urBindlessImagesMipmapGetLevelExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, c_ulong, POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesMipmapGetLevelExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t, c_ulong, POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMipmapFreeExp -if __use_win_types: - _urBindlessImagesMipmapFreeExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) -else: - _urBindlessImagesMipmapFreeExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_image_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImportOpaqueFDExp -if __use_win_types: - _urBindlessImagesImportOpaqueFDExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_exp_interop_mem_desc_t), POINTER(ur_exp_interop_mem_handle_t) ) -else: - _urBindlessImagesImportOpaqueFDExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, c_size_t, POINTER(ur_exp_interop_mem_desc_t), POINTER(ur_exp_interop_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesMapExternalArrayExp -if __use_win_types: - _urBindlessImagesMapExternalArrayExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_interop_mem_handle_t, POINTER(ur_exp_image_mem_handle_t) ) -else: - _urBindlessImagesMapExternalArrayExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_image_format_t), POINTER(ur_image_desc_t), ur_exp_interop_mem_handle_t, POINTER(ur_exp_image_mem_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesReleaseInteropExp -if __use_win_types: - _urBindlessImagesReleaseInteropExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_mem_handle_t ) -else: - _urBindlessImagesReleaseInteropExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_mem_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesImportExternalSemaphoreOpaqueFDExp -if __use_win_types: - _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_interop_semaphore_desc_t), POINTER(ur_exp_interop_semaphore_handle_t) ) -else: - _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_interop_semaphore_desc_t), POINTER(ur_exp_interop_semaphore_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesDestroyExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesDestroyExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_semaphore_handle_t ) -else: - _urBindlessImagesDestroyExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_exp_interop_semaphore_handle_t ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesWaitExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesWaitExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesWaitExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - -############################################################################### -## @brief Function-pointer for urBindlessImagesSignalExternalSemaphoreExp -if __use_win_types: - _urBindlessImagesSignalExternalSemaphoreExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urBindlessImagesSignalExternalSemaphoreExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_exp_interop_semaphore_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of BindlessImagesExp functions pointers -class ur_bindless_images_exp_dditable_t(Structure): - _fields_ = [ - ("pfnUnsampledImageHandleDestroyExp", c_void_p), ## _urBindlessImagesUnsampledImageHandleDestroyExp_t - ("pfnSampledImageHandleDestroyExp", c_void_p), ## _urBindlessImagesSampledImageHandleDestroyExp_t - ("pfnImageAllocateExp", c_void_p), ## _urBindlessImagesImageAllocateExp_t - ("pfnImageFreeExp", c_void_p), ## _urBindlessImagesImageFreeExp_t - ("pfnUnsampledImageCreateExp", c_void_p), ## _urBindlessImagesUnsampledImageCreateExp_t - ("pfnSampledImageCreateExp", c_void_p), ## _urBindlessImagesSampledImageCreateExp_t - ("pfnImageCopyExp", c_void_p), ## _urBindlessImagesImageCopyExp_t - ("pfnImageGetInfoExp", c_void_p), ## _urBindlessImagesImageGetInfoExp_t - ("pfnMipmapGetLevelExp", c_void_p), ## _urBindlessImagesMipmapGetLevelExp_t - ("pfnMipmapFreeExp", c_void_p), ## _urBindlessImagesMipmapFreeExp_t - ("pfnImportOpaqueFDExp", c_void_p), ## _urBindlessImagesImportOpaqueFDExp_t - ("pfnMapExternalArrayExp", c_void_p), ## _urBindlessImagesMapExternalArrayExp_t - ("pfnReleaseInteropExp", c_void_p), ## _urBindlessImagesReleaseInteropExp_t - ("pfnImportExternalSemaphoreOpaqueFDExp", c_void_p), ## _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t - ("pfnDestroyExternalSemaphoreExp", c_void_p), ## _urBindlessImagesDestroyExternalSemaphoreExp_t - ("pfnWaitExternalSemaphoreExp", c_void_p), ## _urBindlessImagesWaitExternalSemaphoreExp_t - ("pfnSignalExternalSemaphoreExp", c_void_p) ## _urBindlessImagesSignalExternalSemaphoreExp_t - ] - -############################################################################### -## @brief Function-pointer for urUSMHostAlloc -if __use_win_types: - _urUSMHostAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMHostAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMDeviceAlloc -if __use_win_types: - _urUSMDeviceAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMDeviceAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMSharedAlloc -if __use_win_types: - _urUSMSharedAlloc_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) -else: - _urUSMSharedAlloc_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urUSMFree -if __use_win_types: - _urUSMFree_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) -else: - _urUSMFree_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) - -############################################################################### -## @brief Function-pointer for urUSMGetMemAllocInfo -if __use_win_types: - _urUSMGetMemAllocInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, ur_usm_alloc_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUSMGetMemAllocInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, ur_usm_alloc_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urUSMPoolCreate -if __use_win_types: - _urUSMPoolCreate_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_pool_desc_t), POINTER(ur_usm_pool_handle_t) ) -else: - _urUSMPoolCreate_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, POINTER(ur_usm_pool_desc_t), POINTER(ur_usm_pool_handle_t) ) - -############################################################################### -## @brief Function-pointer for urUSMPoolRetain -if __use_win_types: - _urUSMPoolRetain_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) -else: - _urUSMPoolRetain_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) - -############################################################################### -## @brief Function-pointer for urUSMPoolRelease -if __use_win_types: - _urUSMPoolRelease_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) -else: - _urUSMPoolRelease_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t ) - -############################################################################### -## @brief Function-pointer for urUSMPoolGetInfo -if __use_win_types: - _urUSMPoolGetInfo_t = WINFUNCTYPE( ur_result_t, ur_usm_pool_handle_t, ur_usm_pool_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUSMPoolGetInfo_t = CFUNCTYPE( ur_result_t, ur_usm_pool_handle_t, ur_usm_pool_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of USM functions pointers -class ur_usm_dditable_t(Structure): - _fields_ = [ - ("pfnHostAlloc", c_void_p), ## _urUSMHostAlloc_t - ("pfnDeviceAlloc", c_void_p), ## _urUSMDeviceAlloc_t - ("pfnSharedAlloc", c_void_p), ## _urUSMSharedAlloc_t - ("pfnFree", c_void_p), ## _urUSMFree_t - ("pfnGetMemAllocInfo", c_void_p), ## _urUSMGetMemAllocInfo_t - ("pfnPoolCreate", c_void_p), ## _urUSMPoolCreate_t - ("pfnPoolRetain", c_void_p), ## _urUSMPoolRetain_t - ("pfnPoolRelease", c_void_p), ## _urUSMPoolRelease_t - ("pfnPoolGetInfo", c_void_p) ## _urUSMPoolGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urUSMPitchedAllocExp -if __use_win_types: - _urUSMPitchedAllocExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, c_size_t, c_size_t, POINTER(c_void_p), POINTER(c_size_t) ) -else: - _urUSMPitchedAllocExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_usm_desc_t), ur_usm_pool_handle_t, c_size_t, c_size_t, c_size_t, POINTER(c_void_p), POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urUSMImportExp -if __use_win_types: - _urUSMImportExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urUSMImportExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urUSMReleaseExp -if __use_win_types: - _urUSMReleaseExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) -else: - _urUSMReleaseExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p ) - - -############################################################################### -## @brief Table of USMExp functions pointers -class ur_usm_exp_dditable_t(Structure): - _fields_ = [ - ("pfnPitchedAllocExp", c_void_p), ## _urUSMPitchedAllocExp_t - ("pfnImportExp", c_void_p), ## _urUSMImportExp_t - ("pfnReleaseExp", c_void_p) ## _urUSMReleaseExp_t - ] - -############################################################################### -## @brief Function-pointer for urCommandBufferCreateExp -if __use_win_types: - _urCommandBufferCreateExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_command_buffer_desc_t), POINTER(ur_exp_command_buffer_handle_t) ) -else: - _urCommandBufferCreateExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, POINTER(ur_exp_command_buffer_desc_t), POINTER(ur_exp_command_buffer_handle_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferRetainExp -if __use_win_types: - _urCommandBufferRetainExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferRetainExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferReleaseExp -if __use_win_types: - _urCommandBufferReleaseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferReleaseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferFinalizeExp -if __use_win_types: - _urCommandBufferFinalizeExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) -else: - _urCommandBufferFinalizeExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendKernelLaunchExp -if __use_win_types: - _urCommandBufferAppendKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp -if __use_win_types: - _urCommandBufferAppendUSMMemcpyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMMemcpyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMFillExp -if __use_win_types: - _urCommandBufferAppendUSMFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp -if __use_win_types: - _urCommandBufferAppendMemBufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp -if __use_win_types: - _urCommandBufferAppendMemBufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferReadExp -if __use_win_types: - _urCommandBufferAppendMemBufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp -if __use_win_types: - _urCommandBufferAppendMemBufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemBufferFillExp -if __use_win_types: - _urCommandBufferAppendMemBufferFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendMemBufferFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp -if __use_win_types: - _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp -if __use_win_types: - _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) -else: - _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) - -############################################################################### -## @brief Function-pointer for urCommandBufferEnqueueExp -if __use_win_types: - _urCommandBufferEnqueueExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) -else: - _urCommandBufferEnqueueExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) - - -############################################################################### -## @brief Table of CommandBufferExp functions pointers -class ur_command_buffer_exp_dditable_t(Structure): - _fields_ = [ - ("pfnCreateExp", c_void_p), ## _urCommandBufferCreateExp_t - ("pfnRetainExp", c_void_p), ## _urCommandBufferRetainExp_t - ("pfnReleaseExp", c_void_p), ## _urCommandBufferReleaseExp_t - ("pfnFinalizeExp", c_void_p), ## _urCommandBufferFinalizeExp_t - ("pfnAppendKernelLaunchExp", c_void_p), ## _urCommandBufferAppendKernelLaunchExp_t - ("pfnAppendUSMMemcpyExp", c_void_p), ## _urCommandBufferAppendUSMMemcpyExp_t - ("pfnAppendUSMFillExp", c_void_p), ## _urCommandBufferAppendUSMFillExp_t - ("pfnAppendMemBufferCopyExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyExp_t - ("pfnAppendMemBufferWriteExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteExp_t - ("pfnAppendMemBufferReadExp", c_void_p), ## _urCommandBufferAppendMemBufferReadExp_t - ("pfnAppendMemBufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyRectExp_t - ("pfnAppendMemBufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteRectExp_t - ("pfnAppendMemBufferReadRectExp", c_void_p), ## _urCommandBufferAppendMemBufferReadRectExp_t - ("pfnAppendMemBufferFillExp", c_void_p), ## _urCommandBufferAppendMemBufferFillExp_t - ("pfnAppendUSMPrefetchExp", c_void_p), ## _urCommandBufferAppendUSMPrefetchExp_t - ("pfnAppendUSMAdviseExp", c_void_p), ## _urCommandBufferAppendUSMAdviseExp_t - ("pfnEnqueueExp", c_void_p) ## _urCommandBufferEnqueueExp_t - ] - -############################################################################### -## @brief Function-pointer for urUsmP2PEnablePeerAccessExp -if __use_win_types: - _urUsmP2PEnablePeerAccessExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) -else: - _urUsmP2PEnablePeerAccessExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urUsmP2PDisablePeerAccessExp -if __use_win_types: - _urUsmP2PDisablePeerAccessExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) -else: - _urUsmP2PDisablePeerAccessExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urUsmP2PPeerAccessGetInfoExp -if __use_win_types: - _urUsmP2PPeerAccessGetInfoExp_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urUsmP2PPeerAccessGetInfoExp_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_handle_t, ur_exp_peer_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of UsmP2PExp functions pointers -class ur_usm_p2p_exp_dditable_t(Structure): - _fields_ = [ - ("pfnEnablePeerAccessExp", c_void_p), ## _urUsmP2PEnablePeerAccessExp_t - ("pfnDisablePeerAccessExp", c_void_p), ## _urUsmP2PDisablePeerAccessExp_t - ("pfnPeerAccessGetInfoExp", c_void_p) ## _urUsmP2PPeerAccessGetInfoExp_t - ] - -############################################################################### -## @brief Function-pointer for urVirtualMemGranularityGetInfo -if __use_win_types: - _urVirtualMemGranularityGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urVirtualMemGranularityGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urVirtualMemReserve -if __use_win_types: - _urVirtualMemReserve_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(c_void_p) ) -else: - _urVirtualMemReserve_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, POINTER(c_void_p) ) - -############################################################################### -## @brief Function-pointer for urVirtualMemFree -if __use_win_types: - _urVirtualMemFree_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urVirtualMemFree_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemMap -if __use_win_types: - _urVirtualMemMap_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_physical_mem_handle_t, c_size_t, ur_virtual_mem_access_flags_t ) -else: - _urVirtualMemMap_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_physical_mem_handle_t, c_size_t, ur_virtual_mem_access_flags_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemUnmap -if __use_win_types: - _urVirtualMemUnmap_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) -else: - _urVirtualMemUnmap_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemSetAccess -if __use_win_types: - _urVirtualMemSetAccess_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_access_flags_t ) -else: - _urVirtualMemSetAccess_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_access_flags_t ) - -############################################################################### -## @brief Function-pointer for urVirtualMemGetInfo -if __use_win_types: - _urVirtualMemGetInfo_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urVirtualMemGetInfo_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_void_p, c_size_t, ur_virtual_mem_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of VirtualMem functions pointers -class ur_virtual_mem_dditable_t(Structure): - _fields_ = [ - ("pfnGranularityGetInfo", c_void_p), ## _urVirtualMemGranularityGetInfo_t - ("pfnReserve", c_void_p), ## _urVirtualMemReserve_t - ("pfnFree", c_void_p), ## _urVirtualMemFree_t - ("pfnMap", c_void_p), ## _urVirtualMemMap_t - ("pfnUnmap", c_void_p), ## _urVirtualMemUnmap_t - ("pfnSetAccess", c_void_p), ## _urVirtualMemSetAccess_t - ("pfnGetInfo", c_void_p) ## _urVirtualMemGetInfo_t - ] - -############################################################################### -## @brief Function-pointer for urDeviceGet -if __use_win_types: - _urDeviceGet_t = WINFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_device_type_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) -else: - _urDeviceGet_t = CFUNCTYPE( ur_result_t, ur_platform_handle_t, ur_device_type_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetInfo -if __use_win_types: - _urDeviceGetInfo_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urDeviceGetInfo_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, ur_device_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceRetain -if __use_win_types: - _urDeviceRetain_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t ) -else: - _urDeviceRetain_t = CFUNCTYPE( ur_result_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urDeviceRelease -if __use_win_types: - _urDeviceRelease_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t ) -else: - _urDeviceRelease_t = CFUNCTYPE( ur_result_t, ur_device_handle_t ) - -############################################################################### -## @brief Function-pointer for urDevicePartition -if __use_win_types: - _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) -else: - _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceSelectBinary -if __use_win_types: - _urDeviceSelectBinary_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_binary_t), c_ulong, POINTER(c_ulong) ) -else: - _urDeviceSelectBinary_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_binary_t), c_ulong, POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetNativeHandle -if __use_win_types: - _urDeviceGetNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_native_handle_t) ) -else: - _urDeviceGetNativeHandle_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_native_handle_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceCreateWithNativeHandle -if __use_win_types: - _urDeviceCreateWithNativeHandle_t = WINFUNCTYPE( ur_result_t, ur_native_handle_t, ur_platform_handle_t, POINTER(ur_device_native_properties_t), POINTER(ur_device_handle_t) ) -else: - _urDeviceCreateWithNativeHandle_t = CFUNCTYPE( ur_result_t, ur_native_handle_t, ur_platform_handle_t, POINTER(ur_device_native_properties_t), POINTER(ur_device_handle_t) ) - -############################################################################### -## @brief Function-pointer for urDeviceGetGlobalTimestamps -if __use_win_types: - _urDeviceGetGlobalTimestamps_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(c_ulonglong), POINTER(c_ulonglong) ) -else: - _urDeviceGetGlobalTimestamps_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(c_ulonglong), POINTER(c_ulonglong) ) - - -############################################################################### -## @brief Table of Device functions pointers -class ur_device_dditable_t(Structure): - _fields_ = [ - ("pfnGet", c_void_p), ## _urDeviceGet_t - ("pfnGetInfo", c_void_p), ## _urDeviceGetInfo_t - ("pfnRetain", c_void_p), ## _urDeviceRetain_t - ("pfnRelease", c_void_p), ## _urDeviceRelease_t - ("pfnPartition", c_void_p), ## _urDevicePartition_t - ("pfnSelectBinary", c_void_p), ## _urDeviceSelectBinary_t - ("pfnGetNativeHandle", c_void_p), ## _urDeviceGetNativeHandle_t - ("pfnCreateWithNativeHandle", c_void_p), ## _urDeviceCreateWithNativeHandle_t - ("pfnGetGlobalTimestamps", c_void_p) ## _urDeviceGetGlobalTimestamps_t - ] - -############################################################################### -class ur_dditable_t(Structure): - _fields_ = [ - ("Platform", ur_platform_dditable_t), - ("Context", ur_context_dditable_t), - ("Event", ur_event_dditable_t), - ("Program", ur_program_dditable_t), - ("ProgramExp", ur_program_exp_dditable_t), - ("Kernel", ur_kernel_dditable_t), - ("KernelExp", ur_kernel_exp_dditable_t), - ("Sampler", ur_sampler_dditable_t), - ("Mem", ur_mem_dditable_t), - ("PhysicalMem", ur_physical_mem_dditable_t), - ("Global", ur_global_dditable_t), - ("Enqueue", ur_enqueue_dditable_t), - ("EnqueueExp", ur_enqueue_exp_dditable_t), - ("Queue", ur_queue_dditable_t), - ("BindlessImagesExp", ur_bindless_images_exp_dditable_t), - ("USM", ur_usm_dditable_t), - ("USMExp", ur_usm_exp_dditable_t), - ("CommandBufferExp", ur_command_buffer_exp_dditable_t), - ("UsmP2PExp", ur_usm_p2p_exp_dditable_t), - ("VirtualMem", ur_virtual_mem_dditable_t), - ("Device", ur_device_dditable_t) - ] - -############################################################################### -## @brief ur device-driver interfaces -class UR_DDI: - def __init__(self, version : ur_api_version_t): - # load the ur_loader library - if "Windows" == platform.uname()[0]: - self.__dll = WinDLL("ur_loader.dll", winmode=0) - else: - self.__dll = CDLL("libur_loader.so") - - # fill the ddi tables - self.__dditable = ur_dditable_t() - - # initialize the UR - self.__dll.urLoaderInit(0, 0) - - # call driver to get function pointers - Platform = ur_platform_dditable_t() - r = ur_result_v(self.__dll.urGetPlatformProcAddrTable(version, byref(Platform))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Platform = Platform - - # attach function interface to function address - self.urPlatformGet = _urPlatformGet_t(self.__dditable.Platform.pfnGet) - self.urPlatformGetInfo = _urPlatformGetInfo_t(self.__dditable.Platform.pfnGetInfo) - self.urPlatformGetNativeHandle = _urPlatformGetNativeHandle_t(self.__dditable.Platform.pfnGetNativeHandle) - self.urPlatformCreateWithNativeHandle = _urPlatformCreateWithNativeHandle_t(self.__dditable.Platform.pfnCreateWithNativeHandle) - self.urPlatformGetApiVersion = _urPlatformGetApiVersion_t(self.__dditable.Platform.pfnGetApiVersion) - self.urPlatformGetBackendOption = _urPlatformGetBackendOption_t(self.__dditable.Platform.pfnGetBackendOption) - - # call driver to get function pointers - Context = ur_context_dditable_t() - r = ur_result_v(self.__dll.urGetContextProcAddrTable(version, byref(Context))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Context = Context - - # attach function interface to function address - self.urContextCreate = _urContextCreate_t(self.__dditable.Context.pfnCreate) - self.urContextRetain = _urContextRetain_t(self.__dditable.Context.pfnRetain) - self.urContextRelease = _urContextRelease_t(self.__dditable.Context.pfnRelease) - self.urContextGetInfo = _urContextGetInfo_t(self.__dditable.Context.pfnGetInfo) - self.urContextGetNativeHandle = _urContextGetNativeHandle_t(self.__dditable.Context.pfnGetNativeHandle) - self.urContextCreateWithNativeHandle = _urContextCreateWithNativeHandle_t(self.__dditable.Context.pfnCreateWithNativeHandle) - self.urContextSetExtendedDeleter = _urContextSetExtendedDeleter_t(self.__dditable.Context.pfnSetExtendedDeleter) - - # call driver to get function pointers - Event = ur_event_dditable_t() - r = ur_result_v(self.__dll.urGetEventProcAddrTable(version, byref(Event))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Event = Event - - # attach function interface to function address - self.urEventGetInfo = _urEventGetInfo_t(self.__dditable.Event.pfnGetInfo) - self.urEventGetProfilingInfo = _urEventGetProfilingInfo_t(self.__dditable.Event.pfnGetProfilingInfo) - self.urEventWait = _urEventWait_t(self.__dditable.Event.pfnWait) - self.urEventRetain = _urEventRetain_t(self.__dditable.Event.pfnRetain) - self.urEventRelease = _urEventRelease_t(self.__dditable.Event.pfnRelease) - self.urEventGetNativeHandle = _urEventGetNativeHandle_t(self.__dditable.Event.pfnGetNativeHandle) - self.urEventCreateWithNativeHandle = _urEventCreateWithNativeHandle_t(self.__dditable.Event.pfnCreateWithNativeHandle) - self.urEventSetCallback = _urEventSetCallback_t(self.__dditable.Event.pfnSetCallback) - - # call driver to get function pointers - Program = ur_program_dditable_t() - r = ur_result_v(self.__dll.urGetProgramProcAddrTable(version, byref(Program))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Program = Program - - # attach function interface to function address - self.urProgramCreateWithIL = _urProgramCreateWithIL_t(self.__dditable.Program.pfnCreateWithIL) - self.urProgramCreateWithBinary = _urProgramCreateWithBinary_t(self.__dditable.Program.pfnCreateWithBinary) - self.urProgramBuild = _urProgramBuild_t(self.__dditable.Program.pfnBuild) - self.urProgramCompile = _urProgramCompile_t(self.__dditable.Program.pfnCompile) - self.urProgramLink = _urProgramLink_t(self.__dditable.Program.pfnLink) - self.urProgramRetain = _urProgramRetain_t(self.__dditable.Program.pfnRetain) - self.urProgramRelease = _urProgramRelease_t(self.__dditable.Program.pfnRelease) - self.urProgramGetFunctionPointer = _urProgramGetFunctionPointer_t(self.__dditable.Program.pfnGetFunctionPointer) - self.urProgramGetInfo = _urProgramGetInfo_t(self.__dditable.Program.pfnGetInfo) - self.urProgramGetBuildInfo = _urProgramGetBuildInfo_t(self.__dditable.Program.pfnGetBuildInfo) - self.urProgramSetSpecializationConstants = _urProgramSetSpecializationConstants_t(self.__dditable.Program.pfnSetSpecializationConstants) - self.urProgramGetNativeHandle = _urProgramGetNativeHandle_t(self.__dditable.Program.pfnGetNativeHandle) - self.urProgramCreateWithNativeHandle = _urProgramCreateWithNativeHandle_t(self.__dditable.Program.pfnCreateWithNativeHandle) - - # call driver to get function pointers - ProgramExp = ur_program_exp_dditable_t() - r = ur_result_v(self.__dll.urGetProgramExpProcAddrTable(version, byref(ProgramExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.ProgramExp = ProgramExp - - # attach function interface to function address - self.urProgramBuildExp = _urProgramBuildExp_t(self.__dditable.ProgramExp.pfnBuildExp) - self.urProgramCompileExp = _urProgramCompileExp_t(self.__dditable.ProgramExp.pfnCompileExp) - self.urProgramLinkExp = _urProgramLinkExp_t(self.__dditable.ProgramExp.pfnLinkExp) - - # call driver to get function pointers - Kernel = ur_kernel_dditable_t() - r = ur_result_v(self.__dll.urGetKernelProcAddrTable(version, byref(Kernel))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Kernel = Kernel - - # attach function interface to function address - self.urKernelCreate = _urKernelCreate_t(self.__dditable.Kernel.pfnCreate) - self.urKernelGetInfo = _urKernelGetInfo_t(self.__dditable.Kernel.pfnGetInfo) - self.urKernelGetGroupInfo = _urKernelGetGroupInfo_t(self.__dditable.Kernel.pfnGetGroupInfo) - self.urKernelGetSubGroupInfo = _urKernelGetSubGroupInfo_t(self.__dditable.Kernel.pfnGetSubGroupInfo) - self.urKernelRetain = _urKernelRetain_t(self.__dditable.Kernel.pfnRetain) - self.urKernelRelease = _urKernelRelease_t(self.__dditable.Kernel.pfnRelease) - self.urKernelGetNativeHandle = _urKernelGetNativeHandle_t(self.__dditable.Kernel.pfnGetNativeHandle) - self.urKernelCreateWithNativeHandle = _urKernelCreateWithNativeHandle_t(self.__dditable.Kernel.pfnCreateWithNativeHandle) - self.urKernelSetArgValue = _urKernelSetArgValue_t(self.__dditable.Kernel.pfnSetArgValue) - self.urKernelSetArgLocal = _urKernelSetArgLocal_t(self.__dditable.Kernel.pfnSetArgLocal) - self.urKernelSetArgPointer = _urKernelSetArgPointer_t(self.__dditable.Kernel.pfnSetArgPointer) - self.urKernelSetExecInfo = _urKernelSetExecInfo_t(self.__dditable.Kernel.pfnSetExecInfo) - self.urKernelSetArgSampler = _urKernelSetArgSampler_t(self.__dditable.Kernel.pfnSetArgSampler) - self.urKernelSetArgMemObj = _urKernelSetArgMemObj_t(self.__dditable.Kernel.pfnSetArgMemObj) - self.urKernelSetSpecializationConstants = _urKernelSetSpecializationConstants_t(self.__dditable.Kernel.pfnSetSpecializationConstants) - - # call driver to get function pointers - KernelExp = ur_kernel_exp_dditable_t() - r = ur_result_v(self.__dll.urGetKernelExpProcAddrTable(version, byref(KernelExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.KernelExp = KernelExp - - # attach function interface to function address - self.urKernelSuggestMaxCooperativeGroupCountExp = _urKernelSuggestMaxCooperativeGroupCountExp_t(self.__dditable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp) - - # call driver to get function pointers - Sampler = ur_sampler_dditable_t() - r = ur_result_v(self.__dll.urGetSamplerProcAddrTable(version, byref(Sampler))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Sampler = Sampler - - # attach function interface to function address - self.urSamplerCreate = _urSamplerCreate_t(self.__dditable.Sampler.pfnCreate) - self.urSamplerRetain = _urSamplerRetain_t(self.__dditable.Sampler.pfnRetain) - self.urSamplerRelease = _urSamplerRelease_t(self.__dditable.Sampler.pfnRelease) - self.urSamplerGetInfo = _urSamplerGetInfo_t(self.__dditable.Sampler.pfnGetInfo) - self.urSamplerGetNativeHandle = _urSamplerGetNativeHandle_t(self.__dditable.Sampler.pfnGetNativeHandle) - self.urSamplerCreateWithNativeHandle = _urSamplerCreateWithNativeHandle_t(self.__dditable.Sampler.pfnCreateWithNativeHandle) - - # call driver to get function pointers - Mem = ur_mem_dditable_t() - r = ur_result_v(self.__dll.urGetMemProcAddrTable(version, byref(Mem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Mem = Mem - - # attach function interface to function address - self.urMemImageCreate = _urMemImageCreate_t(self.__dditable.Mem.pfnImageCreate) - self.urMemBufferCreate = _urMemBufferCreate_t(self.__dditable.Mem.pfnBufferCreate) - self.urMemRetain = _urMemRetain_t(self.__dditable.Mem.pfnRetain) - self.urMemRelease = _urMemRelease_t(self.__dditable.Mem.pfnRelease) - self.urMemBufferPartition = _urMemBufferPartition_t(self.__dditable.Mem.pfnBufferPartition) - self.urMemGetNativeHandle = _urMemGetNativeHandle_t(self.__dditable.Mem.pfnGetNativeHandle) - self.urMemBufferCreateWithNativeHandle = _urMemBufferCreateWithNativeHandle_t(self.__dditable.Mem.pfnBufferCreateWithNativeHandle) - self.urMemImageCreateWithNativeHandle = _urMemImageCreateWithNativeHandle_t(self.__dditable.Mem.pfnImageCreateWithNativeHandle) - self.urMemGetInfo = _urMemGetInfo_t(self.__dditable.Mem.pfnGetInfo) - self.urMemImageGetInfo = _urMemImageGetInfo_t(self.__dditable.Mem.pfnImageGetInfo) - - # call driver to get function pointers - PhysicalMem = ur_physical_mem_dditable_t() - r = ur_result_v(self.__dll.urGetPhysicalMemProcAddrTable(version, byref(PhysicalMem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.PhysicalMem = PhysicalMem - - # attach function interface to function address - self.urPhysicalMemCreate = _urPhysicalMemCreate_t(self.__dditable.PhysicalMem.pfnCreate) - self.urPhysicalMemRetain = _urPhysicalMemRetain_t(self.__dditable.PhysicalMem.pfnRetain) - self.urPhysicalMemRelease = _urPhysicalMemRelease_t(self.__dditable.PhysicalMem.pfnRelease) - - # call driver to get function pointers - Global = ur_global_dditable_t() - r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Global = Global - - # attach function interface to function address - self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) - self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) - self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) - self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) - self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) - - # call driver to get function pointers - Enqueue = ur_enqueue_dditable_t() - r = ur_result_v(self.__dll.urGetEnqueueProcAddrTable(version, byref(Enqueue))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Enqueue = Enqueue - - # attach function interface to function address - self.urEnqueueKernelLaunch = _urEnqueueKernelLaunch_t(self.__dditable.Enqueue.pfnKernelLaunch) - self.urEnqueueEventsWait = _urEnqueueEventsWait_t(self.__dditable.Enqueue.pfnEventsWait) - self.urEnqueueEventsWaitWithBarrier = _urEnqueueEventsWaitWithBarrier_t(self.__dditable.Enqueue.pfnEventsWaitWithBarrier) - self.urEnqueueMemBufferRead = _urEnqueueMemBufferRead_t(self.__dditable.Enqueue.pfnMemBufferRead) - self.urEnqueueMemBufferWrite = _urEnqueueMemBufferWrite_t(self.__dditable.Enqueue.pfnMemBufferWrite) - self.urEnqueueMemBufferReadRect = _urEnqueueMemBufferReadRect_t(self.__dditable.Enqueue.pfnMemBufferReadRect) - self.urEnqueueMemBufferWriteRect = _urEnqueueMemBufferWriteRect_t(self.__dditable.Enqueue.pfnMemBufferWriteRect) - self.urEnqueueMemBufferCopy = _urEnqueueMemBufferCopy_t(self.__dditable.Enqueue.pfnMemBufferCopy) - self.urEnqueueMemBufferCopyRect = _urEnqueueMemBufferCopyRect_t(self.__dditable.Enqueue.pfnMemBufferCopyRect) - self.urEnqueueMemBufferFill = _urEnqueueMemBufferFill_t(self.__dditable.Enqueue.pfnMemBufferFill) - self.urEnqueueMemImageRead = _urEnqueueMemImageRead_t(self.__dditable.Enqueue.pfnMemImageRead) - self.urEnqueueMemImageWrite = _urEnqueueMemImageWrite_t(self.__dditable.Enqueue.pfnMemImageWrite) - self.urEnqueueMemImageCopy = _urEnqueueMemImageCopy_t(self.__dditable.Enqueue.pfnMemImageCopy) - self.urEnqueueMemBufferMap = _urEnqueueMemBufferMap_t(self.__dditable.Enqueue.pfnMemBufferMap) - self.urEnqueueMemUnmap = _urEnqueueMemUnmap_t(self.__dditable.Enqueue.pfnMemUnmap) - self.urEnqueueUSMFill = _urEnqueueUSMFill_t(self.__dditable.Enqueue.pfnUSMFill) - self.urEnqueueUSMMemcpy = _urEnqueueUSMMemcpy_t(self.__dditable.Enqueue.pfnUSMMemcpy) - self.urEnqueueUSMPrefetch = _urEnqueueUSMPrefetch_t(self.__dditable.Enqueue.pfnUSMPrefetch) - self.urEnqueueUSMAdvise = _urEnqueueUSMAdvise_t(self.__dditable.Enqueue.pfnUSMAdvise) - self.urEnqueueUSMFill2D = _urEnqueueUSMFill2D_t(self.__dditable.Enqueue.pfnUSMFill2D) - self.urEnqueueUSMMemcpy2D = _urEnqueueUSMMemcpy2D_t(self.__dditable.Enqueue.pfnUSMMemcpy2D) - self.urEnqueueDeviceGlobalVariableWrite = _urEnqueueDeviceGlobalVariableWrite_t(self.__dditable.Enqueue.pfnDeviceGlobalVariableWrite) - self.urEnqueueDeviceGlobalVariableRead = _urEnqueueDeviceGlobalVariableRead_t(self.__dditable.Enqueue.pfnDeviceGlobalVariableRead) - self.urEnqueueReadHostPipe = _urEnqueueReadHostPipe_t(self.__dditable.Enqueue.pfnReadHostPipe) - self.urEnqueueWriteHostPipe = _urEnqueueWriteHostPipe_t(self.__dditable.Enqueue.pfnWriteHostPipe) - - # call driver to get function pointers - EnqueueExp = ur_enqueue_exp_dditable_t() - r = ur_result_v(self.__dll.urGetEnqueueExpProcAddrTable(version, byref(EnqueueExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.EnqueueExp = EnqueueExp - - # attach function interface to function address - self.urEnqueueCooperativeKernelLaunchExp = _urEnqueueCooperativeKernelLaunchExp_t(self.__dditable.EnqueueExp.pfnCooperativeKernelLaunchExp) - - # call driver to get function pointers - Queue = ur_queue_dditable_t() - r = ur_result_v(self.__dll.urGetQueueProcAddrTable(version, byref(Queue))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Queue = Queue - - # attach function interface to function address - self.urQueueGetInfo = _urQueueGetInfo_t(self.__dditable.Queue.pfnGetInfo) - self.urQueueCreate = _urQueueCreate_t(self.__dditable.Queue.pfnCreate) - self.urQueueRetain = _urQueueRetain_t(self.__dditable.Queue.pfnRetain) - self.urQueueRelease = _urQueueRelease_t(self.__dditable.Queue.pfnRelease) - self.urQueueGetNativeHandle = _urQueueGetNativeHandle_t(self.__dditable.Queue.pfnGetNativeHandle) - self.urQueueCreateWithNativeHandle = _urQueueCreateWithNativeHandle_t(self.__dditable.Queue.pfnCreateWithNativeHandle) - self.urQueueFinish = _urQueueFinish_t(self.__dditable.Queue.pfnFinish) - self.urQueueFlush = _urQueueFlush_t(self.__dditable.Queue.pfnFlush) - - # call driver to get function pointers - BindlessImagesExp = ur_bindless_images_exp_dditable_t() - r = ur_result_v(self.__dll.urGetBindlessImagesExpProcAddrTable(version, byref(BindlessImagesExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.BindlessImagesExp = BindlessImagesExp - - # attach function interface to function address - self.urBindlessImagesUnsampledImageHandleDestroyExp = _urBindlessImagesUnsampledImageHandleDestroyExp_t(self.__dditable.BindlessImagesExp.pfnUnsampledImageHandleDestroyExp) - self.urBindlessImagesSampledImageHandleDestroyExp = _urBindlessImagesSampledImageHandleDestroyExp_t(self.__dditable.BindlessImagesExp.pfnSampledImageHandleDestroyExp) - self.urBindlessImagesImageAllocateExp = _urBindlessImagesImageAllocateExp_t(self.__dditable.BindlessImagesExp.pfnImageAllocateExp) - self.urBindlessImagesImageFreeExp = _urBindlessImagesImageFreeExp_t(self.__dditable.BindlessImagesExp.pfnImageFreeExp) - self.urBindlessImagesUnsampledImageCreateExp = _urBindlessImagesUnsampledImageCreateExp_t(self.__dditable.BindlessImagesExp.pfnUnsampledImageCreateExp) - self.urBindlessImagesSampledImageCreateExp = _urBindlessImagesSampledImageCreateExp_t(self.__dditable.BindlessImagesExp.pfnSampledImageCreateExp) - self.urBindlessImagesImageCopyExp = _urBindlessImagesImageCopyExp_t(self.__dditable.BindlessImagesExp.pfnImageCopyExp) - self.urBindlessImagesImageGetInfoExp = _urBindlessImagesImageGetInfoExp_t(self.__dditable.BindlessImagesExp.pfnImageGetInfoExp) - self.urBindlessImagesMipmapGetLevelExp = _urBindlessImagesMipmapGetLevelExp_t(self.__dditable.BindlessImagesExp.pfnMipmapGetLevelExp) - self.urBindlessImagesMipmapFreeExp = _urBindlessImagesMipmapFreeExp_t(self.__dditable.BindlessImagesExp.pfnMipmapFreeExp) - self.urBindlessImagesImportOpaqueFDExp = _urBindlessImagesImportOpaqueFDExp_t(self.__dditable.BindlessImagesExp.pfnImportOpaqueFDExp) - self.urBindlessImagesMapExternalArrayExp = _urBindlessImagesMapExternalArrayExp_t(self.__dditable.BindlessImagesExp.pfnMapExternalArrayExp) - self.urBindlessImagesReleaseInteropExp = _urBindlessImagesReleaseInteropExp_t(self.__dditable.BindlessImagesExp.pfnReleaseInteropExp) - self.urBindlessImagesImportExternalSemaphoreOpaqueFDExp = _urBindlessImagesImportExternalSemaphoreOpaqueFDExp_t(self.__dditable.BindlessImagesExp.pfnImportExternalSemaphoreOpaqueFDExp) - self.urBindlessImagesDestroyExternalSemaphoreExp = _urBindlessImagesDestroyExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnDestroyExternalSemaphoreExp) - self.urBindlessImagesWaitExternalSemaphoreExp = _urBindlessImagesWaitExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnWaitExternalSemaphoreExp) - self.urBindlessImagesSignalExternalSemaphoreExp = _urBindlessImagesSignalExternalSemaphoreExp_t(self.__dditable.BindlessImagesExp.pfnSignalExternalSemaphoreExp) - - # call driver to get function pointers - USM = ur_usm_dditable_t() - r = ur_result_v(self.__dll.urGetUSMProcAddrTable(version, byref(USM))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.USM = USM - - # attach function interface to function address - self.urUSMHostAlloc = _urUSMHostAlloc_t(self.__dditable.USM.pfnHostAlloc) - self.urUSMDeviceAlloc = _urUSMDeviceAlloc_t(self.__dditable.USM.pfnDeviceAlloc) - self.urUSMSharedAlloc = _urUSMSharedAlloc_t(self.__dditable.USM.pfnSharedAlloc) - self.urUSMFree = _urUSMFree_t(self.__dditable.USM.pfnFree) - self.urUSMGetMemAllocInfo = _urUSMGetMemAllocInfo_t(self.__dditable.USM.pfnGetMemAllocInfo) - self.urUSMPoolCreate = _urUSMPoolCreate_t(self.__dditable.USM.pfnPoolCreate) - self.urUSMPoolRetain = _urUSMPoolRetain_t(self.__dditable.USM.pfnPoolRetain) - self.urUSMPoolRelease = _urUSMPoolRelease_t(self.__dditable.USM.pfnPoolRelease) - self.urUSMPoolGetInfo = _urUSMPoolGetInfo_t(self.__dditable.USM.pfnPoolGetInfo) - - # call driver to get function pointers - USMExp = ur_usm_exp_dditable_t() - r = ur_result_v(self.__dll.urGetUSMExpProcAddrTable(version, byref(USMExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.USMExp = USMExp - - # attach function interface to function address - self.urUSMPitchedAllocExp = _urUSMPitchedAllocExp_t(self.__dditable.USMExp.pfnPitchedAllocExp) - self.urUSMImportExp = _urUSMImportExp_t(self.__dditable.USMExp.pfnImportExp) - self.urUSMReleaseExp = _urUSMReleaseExp_t(self.__dditable.USMExp.pfnReleaseExp) - - # call driver to get function pointers - CommandBufferExp = ur_command_buffer_exp_dditable_t() - r = ur_result_v(self.__dll.urGetCommandBufferExpProcAddrTable(version, byref(CommandBufferExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.CommandBufferExp = CommandBufferExp - - # attach function interface to function address - self.urCommandBufferCreateExp = _urCommandBufferCreateExp_t(self.__dditable.CommandBufferExp.pfnCreateExp) - self.urCommandBufferRetainExp = _urCommandBufferRetainExp_t(self.__dditable.CommandBufferExp.pfnRetainExp) - self.urCommandBufferReleaseExp = _urCommandBufferReleaseExp_t(self.__dditable.CommandBufferExp.pfnReleaseExp) - self.urCommandBufferFinalizeExp = _urCommandBufferFinalizeExp_t(self.__dditable.CommandBufferExp.pfnFinalizeExp) - self.urCommandBufferAppendKernelLaunchExp = _urCommandBufferAppendKernelLaunchExp_t(self.__dditable.CommandBufferExp.pfnAppendKernelLaunchExp) - self.urCommandBufferAppendUSMMemcpyExp = _urCommandBufferAppendUSMMemcpyExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMMemcpyExp) - self.urCommandBufferAppendUSMFillExp = _urCommandBufferAppendUSMFillExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMFillExp) - self.urCommandBufferAppendMemBufferCopyExp = _urCommandBufferAppendMemBufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyExp) - self.urCommandBufferAppendMemBufferWriteExp = _urCommandBufferAppendMemBufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteExp) - self.urCommandBufferAppendMemBufferReadExp = _urCommandBufferAppendMemBufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadExp) - self.urCommandBufferAppendMemBufferCopyRectExp = _urCommandBufferAppendMemBufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyRectExp) - self.urCommandBufferAppendMemBufferWriteRectExp = _urCommandBufferAppendMemBufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteRectExp) - self.urCommandBufferAppendMemBufferReadRectExp = _urCommandBufferAppendMemBufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadRectExp) - self.urCommandBufferAppendMemBufferFillExp = _urCommandBufferAppendMemBufferFillExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferFillExp) - self.urCommandBufferAppendUSMPrefetchExp = _urCommandBufferAppendUSMPrefetchExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMPrefetchExp) - self.urCommandBufferAppendUSMAdviseExp = _urCommandBufferAppendUSMAdviseExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMAdviseExp) - self.urCommandBufferEnqueueExp = _urCommandBufferEnqueueExp_t(self.__dditable.CommandBufferExp.pfnEnqueueExp) - - # call driver to get function pointers - UsmP2PExp = ur_usm_p2p_exp_dditable_t() - r = ur_result_v(self.__dll.urGetUsmP2PExpProcAddrTable(version, byref(UsmP2PExp))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.UsmP2PExp = UsmP2PExp - - # attach function interface to function address - self.urUsmP2PEnablePeerAccessExp = _urUsmP2PEnablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnEnablePeerAccessExp) - self.urUsmP2PDisablePeerAccessExp = _urUsmP2PDisablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnDisablePeerAccessExp) - self.urUsmP2PPeerAccessGetInfoExp = _urUsmP2PPeerAccessGetInfoExp_t(self.__dditable.UsmP2PExp.pfnPeerAccessGetInfoExp) - - # call driver to get function pointers - VirtualMem = ur_virtual_mem_dditable_t() - r = ur_result_v(self.__dll.urGetVirtualMemProcAddrTable(version, byref(VirtualMem))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.VirtualMem = VirtualMem - - # attach function interface to function address - self.urVirtualMemGranularityGetInfo = _urVirtualMemGranularityGetInfo_t(self.__dditable.VirtualMem.pfnGranularityGetInfo) - self.urVirtualMemReserve = _urVirtualMemReserve_t(self.__dditable.VirtualMem.pfnReserve) - self.urVirtualMemFree = _urVirtualMemFree_t(self.__dditable.VirtualMem.pfnFree) - self.urVirtualMemMap = _urVirtualMemMap_t(self.__dditable.VirtualMem.pfnMap) - self.urVirtualMemUnmap = _urVirtualMemUnmap_t(self.__dditable.VirtualMem.pfnUnmap) - self.urVirtualMemSetAccess = _urVirtualMemSetAccess_t(self.__dditable.VirtualMem.pfnSetAccess) - self.urVirtualMemGetInfo = _urVirtualMemGetInfo_t(self.__dditable.VirtualMem.pfnGetInfo) - - # call driver to get function pointers - Device = ur_device_dditable_t() - r = ur_result_v(self.__dll.urGetDeviceProcAddrTable(version, byref(Device))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Device = Device - - # attach function interface to function address - self.urDeviceGet = _urDeviceGet_t(self.__dditable.Device.pfnGet) - self.urDeviceGetInfo = _urDeviceGetInfo_t(self.__dditable.Device.pfnGetInfo) - self.urDeviceRetain = _urDeviceRetain_t(self.__dditable.Device.pfnRetain) - self.urDeviceRelease = _urDeviceRelease_t(self.__dditable.Device.pfnRelease) - self.urDevicePartition = _urDevicePartition_t(self.__dditable.Device.pfnPartition) - self.urDeviceSelectBinary = _urDeviceSelectBinary_t(self.__dditable.Device.pfnSelectBinary) - self.urDeviceGetNativeHandle = _urDeviceGetNativeHandle_t(self.__dditable.Device.pfnGetNativeHandle) - self.urDeviceCreateWithNativeHandle = _urDeviceCreateWithNativeHandle_t(self.__dditable.Device.pfnCreateWithNativeHandle) - self.urDeviceGetGlobalTimestamps = _urDeviceGetGlobalTimestamps_t(self.__dditable.Device.pfnGetGlobalTimestamps) - - # success! diff --git a/include/ur_api.h b/include/ur_api.h index 09f6d77a6b..0403e2b306 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -224,48 +224,48 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -3287,6 +3287,25 @@ typedef struct ur_usm_device_desc_t { } ur_usm_device_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief USM allocation location desc +/// +/// @details +/// - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and +/// ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain. +/// +/// @remarks +/// _Analogues_ +/// - cl_intel_mem_alloc_buffer_location +typedef struct ur_usm_alloc_location_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t location; ///< [in] Identifies the ID of global memory partition to which the memory + ///< should be allocated. + +} ur_usm_alloc_location_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief USM pool descriptor type typedef struct ur_usm_pool_desc_t { @@ -3324,6 +3343,7 @@ typedef struct ur_usm_pool_limits_desc_t { /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3369,6 +3389,7 @@ urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -3417,6 +3438,7 @@ urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5972,7 +5994,7 @@ urEnqueueEventsWaitWithBarrier( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -6021,7 +6043,7 @@ urEnqueueMemBufferRead( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being written @@ -6080,7 +6102,7 @@ urEnqueueMemBufferWrite( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -6146,7 +6168,7 @@ urEnqueueMemBufferReadRect( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -6199,8 +6221,8 @@ urEnqueueMemBufferWriteRect( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_mem_handle_t hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -6252,8 +6274,8 @@ urEnqueueMemBufferCopy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_mem_handle_t hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t region, ///< [in] source 3D rectangular region descriptor: width, height, depth @@ -6307,7 +6329,7 @@ urEnqueueMemBufferCopyRect( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object const void *pPattern, ///< [in] pointer to the fill pattern size_t patternSize, ///< [in] size in bytes of the pattern size_t offset, ///< [in] offset into the buffer @@ -6357,7 +6379,7 @@ urEnqueueMemBufferFill( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image ur_rect_region_t region, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D @@ -6410,7 +6432,7 @@ urEnqueueMemImageRead( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image ur_rect_region_t region, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D @@ -6457,8 +6479,8 @@ urEnqueueMemImageWrite( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_mem_handle_t hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image ur_rect_offset_t dstOrigin, ///< [in] defines the (x,y,z) offset in pixels in the destination 1D, 2D, @@ -6543,7 +6565,7 @@ typedef enum ur_usm_migration_flag_t { UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -6611,7 +6633,7 @@ urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -6631,7 +6653,7 @@ urEnqueueMemUnmap( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. const void *pPattern, ///< [in] pointer with the bytes of the pattern to set. @@ -6674,8 +6696,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object + void *pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void *pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -6720,7 +6742,7 @@ urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object + const void *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list @@ -6762,7 +6784,7 @@ urEnqueueUSMPrefetch( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object + const void *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular @@ -6803,7 +6825,7 @@ urEnqueueUSMAdvise( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void *pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -6853,9 +6875,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void *pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void *pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. size_t height, ///< [in] the height of columns to be copied. @@ -7128,21 +7151,6 @@ typedef struct ur_exp_interop_semaphore_desc_t { } ur_exp_interop_semaphore_desc_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Describes layered image properties -/// -/// @details -/// - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp -/// or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as -/// part of a `pNext` chain. -typedef struct ur_exp_layered_image_properties_t { - ur_structure_type_t stype; ///< [in] type of this structure, must be - ///< ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES - void *pNext; ///< [in,out][optional] pointer to extension-specific structure - uint32_t numLayers; ///< [in] number of layers the image should have - -} ur_exp_layered_image_properties_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// @@ -9856,7 +9864,7 @@ typedef struct ur_enqueue_mem_unmap_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_enqueue_usm_fill_params_t { ur_queue_handle_t *phQueue; - void **pptr; + void **ppMem; size_t *ppatternSize; const void **ppPattern; size_t *psize; diff --git a/include/ur_print.hpp b/include/ur_print.hpp index dc7442068c..70e5b9886d 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -267,6 +267,7 @@ inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_alloc_location_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_limits_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_usm_pool_info_t value); @@ -317,7 +318,6 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_layered_image_properties_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); @@ -993,6 +993,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES: os << "UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: + os << "UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; @@ -1011,9 +1014,6 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE: os << "UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE"; break; - case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: - os << "UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES"; - break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES"; break; @@ -1204,6 +1204,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + const ur_usm_alloc_location_desc_t *pstruct = (const ur_usm_alloc_location_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: { const ur_exp_command_buffer_desc_t *pstruct = (const ur_exp_command_buffer_desc_t *)ptr; printPtr(os, pstruct); @@ -1234,11 +1239,6 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; - case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: { - const ur_exp_layered_image_properties_t *pstruct = (const ur_exp_layered_image_properties_t *)ptr; - printPtr(os, pstruct); - } break; - case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: { const ur_exp_sampler_addr_modes_t *pstruct = (const ur_exp_sampler_addr_modes_t *)ptr; printPtr(os, pstruct); @@ -6537,6 +6537,31 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_device_des return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_alloc_location_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_usm_alloc_location_desc_t params) { + os << "(struct ur_usm_alloc_location_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".location = "; + + os << (params.location); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_pool_desc_t type /// @returns /// std::ostream & @@ -9096,31 +9121,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_se return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_layered_image_properties_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_layered_image_properties_t params) { - os << "(struct ur_exp_layered_image_properties_t){"; - - os << ".stype = "; - - os << (params.stype); - - os << ", "; - os << ".pNext = "; - - ur::details::printStruct(os, - (params.pNext)); - - os << ", "; - os << ".numLayers = "; - - os << (params.numLayers); - - os << "}"; - return os; -} -/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_desc_t type /// @returns /// std::ostream & @@ -12512,10 +12512,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phQueue)); os << ", "; - os << ".ptr = "; + os << ".pMem = "; ur::details::printPtr(os, - *(params->pptr)); + *(params->ppMem)); os << ", "; os << ".patternSize = "; diff --git a/scripts/YaML.md b/scripts/YaML.md index 291e4263c7..ee22cd39d6 100644 --- a/scripts/YaML.md +++ b/scripts/YaML.md @@ -616,13 +616,18 @@ class ur_name_t(Structure): - `out` is used for params that are write-only; if the param is a pointer, then the memory being pointed to is also write-only - `in,out` is used for params that are both read and write; typically this is used for pointers to other data structures that contain both read and write params - `nocheck` is used to specify that no additional validation checks will be generated. - + `desc` may include one the following annotations: {`"[optional]"`, `"[range(start,end)]"`, `"[release]"`, `"[typename(typeVarName)]"`} + + `desc` may include one the following annotations: {`"[optional]"`, `"[range(start,end)]"`, `"[release]"`, `"[typename(typeVarName)]"`, `"[bounds(offset,size)]"`} - `optional` is used for params that are handles or pointers where it is legal for the value to be `nullptr` - `range` is used for params that are array pointers to specify the valid range that the is valid to read + `start` and `end` must be an ISO-C standard identifier or literal + `start` is inclusive and `end` is exclusive - `release` is used for params that are handles or pointers to handles where the function will destroy any backing memory associated with the handle(s) - `typename` is used to denote the type enum for params that are opaque pointers to values of tagged data types. + - `bounds` is used for params that are memory objects or USM allocations. It specifies the range within the memory allocation represented by the param that will be accessed by the operation. + + `offset` and `size` must be an ISO-C standard identifier or literal + + The sum of `offset` and `size` will be compared against the size of the memory allocation represented by the param. + + If `offset` and `size` are not both integers they must be of the types `$x_rect_offset` and `$x_rect_region` respectively. + + If `bounds` is used the operation must also take a parameter of type `$x_queue_handle_t` + `type` must be an ISO-C standard identifier + `name` must be a unique ISO-C standard identifier - A param may take the following optional scalar field: {`init`, `version`} diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index b9d4130d5a..cf2c8e870b 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -53,8 +53,8 @@ Adapter Change Process ====================== 1. Create a pull request containing the adapter changes in the - `oneapi-src/unified-runtime`_ project targeting the `adapters - `_ branch. + `oneapi-src/unified-runtime`_ project targeting the `main + `_ branch. 2. Create a draft pull request in the `intel/llvm`_ project to take advantage of the pre-merge testing. Add any required implementation changes in diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index c794c199d9..fe6a1ac32b 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -68,7 +68,6 @@ Enums ${X}_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC ${X}_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR ${X}_STRUCTURE_TYPE_EXP_WIN32_HANDLE - ${X}_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES ${X}_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES * ${x}_device_info_t @@ -129,7 +128,6 @@ Types * ${x}_exp_interop_semaphore_desc_t * ${x}_exp_file_descriptor_t * ${x}_exp_win32_handle_t -* ${x}_exp_layered_image_properties_t * ${x}_exp_sampler_addr_modes_t Functions @@ -184,6 +182,8 @@ Changelog +----------+-------------------------------------------------------------+ | 8.0 | Added structure for sampler addressing modes per dimension. | +------------------------------------------------------------------------+ +| 9.0 | Remove layered image properties struct. | ++------------------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index 7da1c8f680..7af03074c9 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -158,7 +158,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -211,7 +211,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -265,7 +265,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(bufferOrigin, region)] handle of the buffer object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -341,7 +341,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(bufferOrigin, region)] handle of the buffer object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -414,10 +414,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBufferSrc - desc: "[in] handle of the src buffer object" + desc: "[in][bounds(srcOffset, size)] handle of the src buffer object" - type: $x_mem_handle_t name: hBufferDst - desc: "[in] handle of the dest buffer object" + desc: "[in][bounds(dstOffset, size)] handle of the dest buffer object" - type: size_t name: srcOffset desc: "[in] offset into hBufferSrc to begin copying from" @@ -466,10 +466,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBufferSrc - desc: "[in] handle of the source buffer object" + desc: "[in][bounds(srcOrigin, region)] handle of the source buffer object" - type: $x_mem_handle_t name: hBufferDst - desc: "[in] handle of the dest buffer object" + desc: "[in][bounds(dstOrigin, region)] handle of the dest buffer object" - type: $x_rect_offset_t name: srcOrigin desc: "[in] 3D offset in the source buffer" @@ -537,7 +537,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: "const void*" name: pPattern desc: "[in] pointer to the fill pattern" @@ -595,7 +595,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImage - desc: "[in] handle of the image object" + desc: "[in][bounds(origin, region)] handle of the image object" - type: bool name: blockingRead desc: "[in] indicates blocking (true), non-blocking (false)" @@ -654,7 +654,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImage - desc: "[in] handle of the image object" + desc: "[in][bounds(origin, region)] handle of the image object" - type: bool name: blockingWrite desc: "[in] indicates blocking (true), non-blocking (false)" @@ -711,10 +711,10 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hImageSrc - desc: "[in] handle of the src image object" + desc: "[in][bounds(srcOrigin, region)] handle of the src image object" - type: $x_mem_handle_t name: hImageDst - desc: "[in] handle of the dest image object" + desc: "[in][bounds(dstOrigin, region)] handle of the dest image object" - type: $x_rect_offset_t name: srcOrigin desc: "[in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D image" @@ -842,7 +842,7 @@ params: desc: "[in] handle of the queue object" - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object" + desc: "[in][bounds(offset, size)] handle of the buffer object" - type: bool name: blockingMap desc: "[in] indicates blocking (true), non-blocking (false)" @@ -996,8 +996,8 @@ params: name: hQueue desc: "[in] handle of the queue object" - type: void* - name: ptr - desc: "[in] pointer to USM memory object" + name: pMem + desc: "[in][bounds(0, size)] pointer to USM memory object" - type: size_t name: patternSize desc: "[in] the size in bytes of the pattern. Must be a power of 2 and less than or equal to width." @@ -1050,10 +1050,10 @@ params: desc: "[in] blocking or non-blocking copy" - type: void* name: pDst - desc: "[in] pointer to the destination USM memory object" + desc: "[in][bounds(0, size)] pointer to the destination USM memory object" - type: "const void*" name: pSrc - desc: "[in] pointer to the source USM memory object" + desc: "[in][bounds(0, size)] pointer to the source USM memory object" - type: size_t name: size desc: "[in] size in bytes to be copied" @@ -1097,7 +1097,7 @@ params: desc: "[in] handle of the queue object" - type: "const void*" name: pMem - desc: "[in] pointer to the USM memory object" + desc: "[in][bounds(0, size)] pointer to the USM memory object" - type: size_t name: size desc: "[in] size in bytes to be fetched" @@ -1144,7 +1144,7 @@ params: desc: "[in] handle of the queue object" - type: "const void*" name: pMem - desc: "[in] pointer to the USM memory object" + desc: "[in][bounds(0, size)] pointer to the USM memory object" - type: size_t name: size desc: "[in] size in bytes to be advised" @@ -1176,7 +1176,7 @@ params: desc: "[in] handle of the queue to submit to." - type: void* name: pMem - desc: "[in] pointer to memory to be filled." + desc: "[in][bounds(0, pitch * height)] pointer to memory to be filled." - type: size_t name: pitch desc: "[in] the total width of the destination memory including padding." @@ -1238,13 +1238,13 @@ params: desc: "[in] indicates if this operation should block the host." - type: void* name: pDst - desc: "[in] pointer to memory where data will be copied." + desc: "[in][bounds(0, dstPitch * height)] pointer to memory where data will be copied." - type: size_t name: dstPitch desc: "[in] the total width of the source memory including padding." - type: "const void*" name: pSrc - desc: "[in] pointer to memory to be copied." + desc: "[in][bounds(0, srcPitch * height)] pointer to memory to be copied." - type: size_t name: srcPitch desc: "[in] the total width of the source memory including padding." diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index b5f87a6633..d2e508c4a7 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -107,12 +107,9 @@ etors: - name: EXP_WIN32_HANDLE desc: $x_exp_win32_handle_t value: "0x2004" - - name: EXP_LAYERED_IMAGE_PROPERTIES - desc: $x_exp_layered_image_properties_t - value: "0x2005" - name: EXP_SAMPLER_ADDR_MODES desc: $x_exp_sampler_addr_modes_t - value: "0x2006" + value: "0x2005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -205,20 +202,6 @@ name: $x_exp_interop_semaphore_desc_t base: $x_base_desc_t members: [] --- #-------------------------------------------------------------------------- -type: struct -desc: "Describes layered image properties" -details: - - Specify these properties in $xBindlessImagesUnsampledImageCreateExp or - $xBindlessImagesSampledImageCreateExp via $x_image_desc_t as part of a - `pNext` chain. -class: $xBindlessImages -name: $x_exp_layered_image_properties_t -base: $x_base_properties_t -members: - - type: uint32_t - name: numLayers - desc: "[in] number of layers the image should have" ---- #-------------------------------------------------------------------------- type: function desc: "USM allocate pitched memory" class: $xUSM diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index deb5ee9604..6195cd4980 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -666,3 +666,6 @@ etors: - name: KERNEL_ARG_LOCAL_PROPERTIES desc: $x_kernel_arg_local_properties_t value: '33' +- name: USM_ALLOC_LOCATION_DESC + desc: $x_usm_alloc_location_desc_t + value: '35' diff --git a/scripts/core/usm.yml b/scripts/core/usm.yml index 0b793d7226..1476eec34a 100644 --- a/scripts/core/usm.yml +++ b/scripts/core/usm.yml @@ -175,6 +175,23 @@ members: desc: "[in] device memory allocation flags." --- #-------------------------------------------------------------------------- type: struct +desc: "USM allocation location desc" +details: + - Specify these properties in $xUSMHostAlloc, $xUSMDeviceAlloc and + $xUSMSharedAlloc via $x_usm_desc_t as part of a `pNext` chain. +analogue: + - "cl_intel_mem_alloc_buffer_location" +class: $xUSM +name: $x_usm_alloc_location_desc_t +base: $x_base_desc_t +members: + - type: uint32_t + name: location + desc: > + [in] Identifies the ID of global memory partition to which the memory + should be allocated. +--- #-------------------------------------------------------------------------- +type: struct desc: "USM pool descriptor type" class: $xUSM name: $x_usm_pool_desc_t @@ -212,6 +229,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -253,6 +271,7 @@ details: - "Allocations served from different memory pools must be isolated and must not reside on the same page." - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext @@ -298,6 +317,7 @@ details: - "Any flags/hints passed through pUSMDesc only affect the single allocation." - "See also $x_usm_host_desc_t." - "See also $x_usm_device_desc_t." + - "See also $x_usm_alloc_location_desc_t." params: - type: $x_context_handle_t name: hContext diff --git a/scripts/ctest_parser.py b/scripts/ctest_parser.py index f41ba5ea60..1f9c4f6cfe 100644 --- a/scripts/ctest_parser.py +++ b/scripts/ctest_parser.py @@ -15,11 +15,13 @@ TMP_RESULTS_FILE = "tmp-results-file.json" def get_cts_test_suite_names(working_directory): - process = Popen(["ctest", "--show-only=json-v1"], cwd=working_directory, + process = Popen(["ctest", "--show-only=json-v1"], cwd=working_directory, stdout=PIPE, env=os.environ.copy()) out,_ = process.communicate() testsuites = json.loads(out) - return [test['name']for test in testsuites['tests']] + return [ + test['name'][:test['name'].rfind('-')] for test in testsuites['tests'] + ] def percent(amount, total): return round((amount / total) * 100, 2) @@ -39,7 +41,7 @@ def summarize_results(results): crash_rate = percent(total_crashed, total) ljust_param = len(str(total)) - + print( f"""[CTest Parser] Results: Total [{str(total).ljust(ljust_param)}] @@ -85,7 +87,7 @@ def run(args): results[suite] = {} test_executable = f"{args.ctest_path}/bin/test-{suite}" process = Popen([test_executable, "--gtest_list_tests"], env=env, - stdout=DEVNULL if args.quiet else None, + stdout=DEVNULL if args.quiet else None, stderr=DEVNULL if args.quiet else None) process.wait() try: @@ -98,8 +100,8 @@ def run(args): for suite in test_suite_names: ctest_path = f"{args.ctest_path}/test/conformance/{suite}" - process = Popen(['ctest',ctest_path], env=env, cwd=ctest_path, - stdout=DEVNULL if args.quiet else None, + process = Popen(['ctest',ctest_path], env=env, cwd=ctest_path, + stdout=DEVNULL if args.quiet else None, stderr=DEVNULL if args.quiet else None) process.wait() @@ -111,7 +113,7 @@ def run(args): except FileNotFoundError: results[suite]['actual'] = None print('\033[91m' + f"Conformance test suite '{suite}' : likely crashed!" + '\033[0m') - + return results def dir_path(string): diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 492ff88df8..eebb954487 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -70,26 +70,6 @@ def _mako_ddi_h(path, namespace, tags, version, revision, specs, meta): specs=specs, meta=meta) -""" - generates python files from the specification documents -""" -def _mako_api_py(path, namespace, tags, version, revision, specs, meta): - template = "api.py.mako" - fin = os.path.join("templates", template) - - filename = "%s.py"%(namespace) - fout = os.path.join(path, filename) - - print("Generating %s..."%fout) - return util.makoWrite( - fin, fout, - ver=version, - rev=revision, - namespace=namespace, - tags=tags, - specs=specs, - meta=meta) - """ generates c/c++ files from the specification documents """ @@ -101,13 +81,6 @@ def _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, spec return loc -""" - generates python files from the specification documents -""" -def _generate_api_py(incpath, namespace, tags, version, revision, specs, meta): - loc = _mako_api_py(incpath, namespace, tags, version, revision, specs, meta) - return loc - """ Entry-point: generates api code @@ -118,7 +91,6 @@ def generate_api(incpath, srcpath, namespace, tags, version, revision, specs, me loc = 0 loc += _generate_api_cpp(incpath, srcpath, namespace, tags, version, revision, specs, meta) - loc += _generate_api_py(incpath, namespace, tags, version, revision, specs, meta) print("Generated %s lines of code.\n"%loc) templates_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index a1477ce534..332af88cc7 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -338,6 +338,15 @@ def __validate_params(d, tags): if not param_traits.is_range(item): raise Exception(prefix+"handle type must include a range(start, end) as part of 'desc'") + if param_traits.is_bounds(item): + has_queue = False + for p in d['params']: + if re.match(r"hQueue$", p['name']): + has_queue = True + + if not has_queue: + raise Exception(prefix+"bounds must only be used on entry points which take a `hQueue` parameter") + ver = __validate_version(item, prefix=prefix, base_version=d_ver) if ver < max_ver: raise Exception(prefix+"'version' must be increasing: %s"%item['version']) diff --git a/scripts/templates/api.py.mako b/scripts/templates/api.py.mako deleted file mode 100644 index 7815f2cf53..0000000000 --- a/scripts/templates/api.py.mako +++ /dev/null @@ -1,196 +0,0 @@ -<% -import re -from templates import helper as th -%><% - n=namespace - N=n.upper() - - x=tags['$x'] - X=x.upper() -%>""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - @file ${n}.py - @version v${ver}-r${rev} - - """ -import platform -from ctypes import * -from enum import * - -# ctypes does not define c_intptr_t, so let's define it here manually -c_intptr_t = c_ssize_t - -${"###############################################################################"} -__version__ = "1.0" - -%for s in specs: -%for obj in s['objects']: -%if not re.match(r"class", obj['type']) and not re.match(r"function", obj['type']): -${"###############################################################################"} -%for line in th.make_desc_lines(n, tags, obj): -${"##"} ${line} -%endfor -%for line in th.make_details_lines(n, tags, obj): -${"##"} ${line} -%endfor -## MACRO ###################################################################### -%if re.match(r"macro", obj['type']): -%if re.match(r".*\(.*\)", obj['name']): -def ${th.make_macro_name(n, tags, obj)}: - return ${th.subt(n, tags, obj['value'])} -%elif 'altvalue' not in obj and not obj['value'].startswith("__"): -${th.make_macro_name(n, tags, obj)} = ${th.subt(n, tags, obj['value'])} -%else: -# ${th.make_macro_name(n, tags, obj)} not required for python -%endif -## TYPEDEF #################################################################### -%elif re.match(r"typedef", obj['type']): -class ${th.make_type_name(n, tags, obj)}(${th.get_ctype_name(n, tags, {'type': obj['value']})}): - pass -## FPTR TYPEDEF ############################################################### -%elif re.match(r"fptr_typedef", obj['type']): -def ${th.make_type_name(n, tags, obj)}(user_defined_callback): - @CFUNCTYPE(${th.get_ctype_name(n, tags, {'type': obj['return']})}\ -%if 'params' in obj: -%for param in obj['params']: -, ${th.get_ctype_name(n, tags, {'type': param['type']})}\ -%endfor -%endif -) - def ${th.make_type_name(n, tags, obj)}_wrapper(\ -%if 'params' in obj: -%for index, item in enumerate(obj['params']): -${item['name']}\ -%if index < (len(obj['params']) - 1): -, \ -%endif -%endfor -%endif -): - return user_defined_callback(\ -%if 'params' in obj: -%for index, item in enumerate(obj['params']): -${item['name']}\ -%if index < (len(obj['params']) - 1): -, \ -%endif -%endfor -%endif -) - return ${th.make_type_name(n, tags, obj)}_wrapper -## ENUM ####################################################################### -%elif re.match(r"enum", obj['type']): -class ${re.sub(r"(\w+)_t", r"\1_v", th.make_type_name(n, tags, obj))}(IntEnum): - %for line in th.make_etor_lines(n, tags, obj, py=True, meta=meta): - ${line} - %endfor - -class ${th.make_type_name(n, tags, obj)}(c_int): - def __str__(self): - %if th.type_traits.is_flags(obj['name']): - return hex(self.value) - %else: - return str(${re.sub(r"(\w+)_t", r"\1_v", th.make_type_name(n, tags, obj))}(self.value)) - %endif - -## STRUCT/UNION ############################################################### -%elif re.match(r"struct|union", obj['type']): -class ${th.make_type_name(n, tags, obj)}(Structure): - _fields_ = [ - %for line in th.make_member_lines(n, tags, obj, py=True, meta=meta): - ${line} - %endfor - ] -## HANDLE ##################################################################### -%elif re.match(r"handle", obj['type']): -class ${th.make_type_name(n, tags, obj)}(c_void_p): - pass -%endif - -%endif # !class && !function -%endfor # objects -%endfor # specs -${"###############################################################################"} -__use_win_types = "Windows" == platform.uname()[0] -<% - tables = th.get_pfntables(specs, meta, n, tags) -%> -%for tbl in tables: -%for obj in tbl['functions']: -${"###############################################################################"} -${"##"} @brief Function-pointer for ${th.make_func_name(n, tags, obj)} -%if 'condition' not in obj: -if __use_win_types: - _${th.make_func_name(n, tags, obj)}_t = WINFUNCTYPE( ${x}_result_t, ${", ".join(th.make_param_lines(n, tags, obj, py=True, meta=meta, format=["type"]))} ) -else: - _${th.make_func_name(n, tags, obj)}_t = CFUNCTYPE( ${x}_result_t, ${", ".join(th.make_param_lines(n, tags, obj, py=True, meta=meta, format=["type"]))} ) -%endif # condition - -%endfor # functions - -${"###############################################################################"} -${"##"} @brief Table of ${tbl['name']} functions pointers -class ${tbl['type']}(Structure): - _fields_ = [ - %for obj in tbl['functions']: - %if 'condition' not in obj: - %if loop.index < len(tbl['functions'])-1: - ${th.append_ws("(\""+th.make_pfn_name(n, tags, obj)+"\", c_void_p),", 63)} ## _${th.make_func_name(n, tags, obj)}_t - %else: - ${th.append_ws("(\""+th.make_pfn_name(n, tags, obj)+"\", c_void_p)", 63)} ## _${th.make_func_name(n, tags, obj)}_t - %endif - %endif # condition - %endfor - ] - -%endfor # tables -${"###############################################################################"} -class ${n}_dditable_t(Structure): - _fields_ = [ - %for tbl in tables: - %if loop.index < len(tables)-1: - ("${tbl['name']}", ${tbl['type']}), - %else: - ("${tbl['name']}", ${tbl['type']}) - %endif - %endfor - ] - -${"###############################################################################"} -${"##"} @brief ${n} device-driver interfaces -class ${N}_DDI: - def __init__(self, version : ${x}_api_version_t): - # load the ${x}_loader library - if "Windows" == platform.uname()[0]: - self.__dll = WinDLL("${x}_loader.dll", winmode=0) - else: - self.__dll = CDLL("lib${x}_loader.so") - - # fill the ddi tables - self.__dditable = ${n}_dditable_t() - - # initialize the UR - self.__dll.${x}LoaderInit(0, 0) - - %for tbl in tables: - # call driver to get function pointers - ${tbl['name']} = ${tbl['type']}() - r = ${x}_result_v(self.__dll.${tbl['export']['name']}(version, byref(${tbl['name']}))) - if r != ${x}_result_v.SUCCESS: - raise Exception(r) - self.__dditable.${tbl['name']} = ${tbl['name']} - - # attach function interface to function address - %for obj in tbl['functions']: - %if 'condition' not in obj: - self.${th.make_func_name(n, tags, obj)} = _${th.make_func_name(n, tags, obj)}_t(self.__dditable.${tbl['name']}.${th.make_pfn_name(n, tags, obj)}) - %endif - %endfor # functions - - %endfor # tables - # success! diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 4fbb2ca47b..d7d29dc0a8 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -356,6 +356,7 @@ class param_traits: RE_RELEASE = r".*\[release\].*" RE_TYPENAME = r".*\[typename\((.+),\s(.+)\)\].*" RE_TAGGED = r".*\[tagged_by\((.+)\)].*" + RE_BOUNDS = r".*\[bounds\((.+),\s*(.+)\)].*" @classmethod def is_mbz(cls, item): @@ -412,6 +413,13 @@ def is_tagged(cls, item): return True if re.match(cls.RE_TAGGED, item['desc']) else False except: return False + + @classmethod + def is_bounds(cls, item): + try: + return True if re.match(cls.RE_BOUNDS, item['desc']) else False + except: + return False @classmethod def tagged_member(cls, item): @@ -457,6 +465,22 @@ def typename_size(cls, item): else: return None + @classmethod + def bounds_offset(cls, item): + match = re.match(cls.RE_BOUNDS, item['desc']) + if match: + return match.group(1) + else: + return None + + @classmethod + def bounds_size(cls, item): + match = re.match(cls.RE_BOUNDS, item['desc']) + if match: + return match.group(2) + else: + return None + """ Extracts traits from a function object """ @@ -681,18 +705,8 @@ def make_flags_bitmask(namespace, tags, obj, meta): Public: returns c/c++ name of etor """ -def make_etor_name(namespace, tags, enum, etor, py=False, meta=None): - if py: - # e.g., "ENUM_NAME_ETOR_NAME" -> "ETOR_NAME" - if type_traits.is_flags(enum): - prefix = re.sub(r"(\w+)_flags_t", r"\1_flag", subt(namespace, tags, enum)).upper() - else: - prefix = re.sub(r"(\w+)_t", r"\1", subt(namespace, tags, enum)).upper() - name = re.sub(r"%s_(\w+)"%prefix, r"\1", subt(namespace, tags, etor)) - name = re.sub(r"^(\d+\w*)", r"_\1", name) - else: - name = subt(namespace, tags, etor) - return name +def make_etor_name(namespace, tags, enum, etor, meta=None): + return subt(namespace, tags, etor) """ Private: @@ -718,33 +732,28 @@ def _get_value_name(namespace, tags, value): Public: returns a list of strings for declaring each enumerator in an enumeration c++ format: "ETOR_NAME = VALUE, ///< DESCRIPTION" - python format: "ETOR_NAME = VALUE, ## DESCRIPTION" """ -def make_etor_lines(namespace, tags, obj, py=False, meta=None): +def make_etor_lines(namespace, tags, obj, meta=None): lines = [] for item in obj['etors']: - name = make_etor_name(namespace, tags, obj['name'], item['name'], py, meta) + name = make_etor_name(namespace, tags, obj['name'], item['name'], meta) if 'value' in item: - delim = "," if not py else "" + delim = "," value = _get_value_name(namespace, tags, item['value']) prologue = "%s = %s%s"%(name, value, delim) - elif py: - prologue = "%s = auto()"%(name) else: prologue = "%s,"%(name) - comment_style = "##" if py else "///<" for line in split_line(subt(namespace, tags, item['desc'], True), 70): - lines.append("%s%s %s"%(append_ws(prologue, 48), comment_style, line)) + lines.append("%s%s %s"%(append_ws(prologue, 48), "///<", line)) prologue = "" - if not py: - lines += [ - "/// @cond", - "%sFORCE_UINT32 = 0x7fffffff"%make_enum_name(namespace, tags, obj)[:-1].upper(), - "/// @endcond", - ] + lines += [ + "/// @cond", + "%sFORCE_UINT32 = 0x7fffffff"%make_enum_name(namespace, tags, obj)[:-1].upper(), + "/// @endcond", + ] return lines @@ -759,43 +768,6 @@ def _get_type_name(namespace, tags, obj, item): name = subt(namespace, tags, type,) return name -""" -Private: - returns python c_type name of any type -""" -def get_ctype_name(namespace, tags, item): - name = subt(namespace, tags, item['type']) - name = _remove_const(name) - name = re.sub(r"void\*", "c_void_p", name) - name = re.sub(r"char\*", "c_char_p", name) - name = re.sub(r"bool", "c_bool", name) - name = re.sub(r"uint8_t", "c_ubyte", name) - name = re.sub(r"uint16_t", "c_ushort", name) - name = re.sub(r"uint32_t", "c_ulong", name) - name = re.sub(r"uint64_t", "c_ulonglong", name) - name = re.sub(r"int8_t", "c_byte", name) - name = re.sub(r"int16_t", "c_short", name) - name = re.sub(r"int32_t", "c_long", name) - name = re.sub(r"int64_t", "c_longlong", name) - name = re.sub(r"size_t", "c_size_t", name) - name = re.sub(r"float", "c_float", name) - name = re.sub(r"double", "c_double", name) - name = re.sub(r"\bchar", "c_char", name) - name = re.sub(r"\bint", "c_int", name) - # Handle void - if re.match(r"void", name): - if not re.match(r"_void_", name): # its not c_void_p - name = re.sub(r"void", "None", name) - - while type_traits.is_pointer(name): - name = "POINTER(%s)"%_remove_ptr(name) - - if 'name' in item and type_traits.is_array(item['type']): - length = subt(namespace, tags, type_traits.get_array_length(item['type'])) - name = "%s * %s"%(type_traits.get_array_element_type(name), length) - - return name - """ Public: returns c/c++ name of member of struct/class @@ -812,32 +784,21 @@ def make_member_name(namespace, tags, item, prefix="", remove_array=False): Public: returns a list of strings for each member of a structure or class c++ format: "TYPE NAME = INIT, ///< DESCRIPTION" - python format: "("NAME", TYPE)" ## DESCRIPTION" """ -def make_member_lines(namespace, tags, obj, prefix="", py=False, meta=None): +def make_member_lines(namespace, tags, obj, prefix="", meta=None): lines = [] if 'members' not in obj: return lines for i, item in enumerate(obj['members']): - name = make_member_name(namespace, tags, item, prefix, remove_array=py) + name = make_member_name(namespace, tags, item, prefix) + tname = _get_type_name(namespace, tags, obj, item) - if py: - tname = get_ctype_name(namespace, tags, item) - else: - tname = _get_type_name(namespace, tags, obj, item) - - if py: - delim = "," if i < (len(obj['members'])-1) else "" - prologue = "(\"%s\", %s)%s"%(name, tname, delim) - else: - array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" - prologue = "%s %s %s;"%(tname, name, array_suffix) + array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" + prologue = "%s %s %s;"%(tname, name, array_suffix) - comment_style = "##" if py else "///<" - ws_count = 64 if py else 48 for line in split_line(subt(namespace, tags, item['desc'], True), 70): - lines.append("%s%s %s"%(append_ws(prologue, ws_count), comment_style, line)) + lines.append("%s%s %s"%(append_ws(prologue, 48), "///<", line)) prologue = "" return lines @@ -854,7 +815,7 @@ def _get_param_name(namespace, tags, item): returns a list of c++ strings for each parameter of a function format: "TYPE NAME = INIT, ///< DESCRIPTION" """ -def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, format=["type", "name", "delim", "desc"], delim=",", replacements={}): +def make_param_lines(namespace, tags, obj, decl=False, meta=None, format=["type", "name", "delim", "desc"], delim=",", replacements={}): lines = [] params = obj['params'] @@ -866,19 +827,8 @@ def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, form name = _get_param_name(namespace, tags, item) if replacements.get(name): name = replacements[name] - if py: - tname = get_ctype_name(namespace, tags, item) - # Handle fptr_typedef - # On Python side, passing a function pointer to a CFUNCTYPE is a bit awkward - # So solve this, if we encounter a function pointer type, we relpace it with - # c_void_p - a generic void pointer - if len(fptr_types) > 0: - for fptr_type in fptr_types: - if tname == subt(namespace, tags, fptr_type): - tname = 'c_void_p' # Substitute function pointers to c_void_p - break - else: - tname = _get_type_name(namespace, tags, obj, item) + + tname = _get_type_name(namespace, tags, obj, item) words = [] if "type*" in format: @@ -902,7 +852,7 @@ def make_param_lines(namespace, tags, obj, py=False, decl=False, meta=None, form else: lines.append(prologue) - if "type" in format and len(lines) == 0 and not py: + if "type" in format and len(lines) == 0: lines = ["void"] return lines @@ -1041,7 +991,35 @@ def make_pfncb_param_type(namespace, tags, obj): """ Public: - returns a dict of auto-generated c++ parameter validation checks + returns an appropriate bounds helper function call for an entry point + parameter with the [bounds] tag +""" +def get_bounds_check(param, bounds_error): + # Images need their own helper, since function signature wise they would be + # identical to buffer rect + bounds_function = 'boundsImage' if 'image' in param['name'].lower() else 'bounds' + bounds_check = "auto {0} = {1}({2}, {3}, {4})".format( + bounds_error, + bounds_function, + param["name"], + param_traits.bounds_offset(param), + param_traits.bounds_size(param), + ) + bounds_check += '; {0} != UR_RESULT_SUCCESS'.format(bounds_error) + + # USM bounds checks need the queue handle parameter to be able to use the + # GetMemAllocInfo entry point + if type_traits.is_pointer(param['type']): + # If no `hQueue` parameter exists that should have been caught at spec + # generation. + return re.sub(r'bounds\(', 'bounds(hQueue, ', bounds_check) + + return bounds_check + +""" +Public: + returns a dict of auto-generated c++ parameter validation checks for the + given function (specified by `obj`) """ def make_param_checks(namespace, tags, obj, cpp=False, meta=None): checks = {} @@ -1054,6 +1032,13 @@ def make_param_checks(namespace, tags, obj, cpp=False, meta=None): if key not in checks: checks[key] = [] checks[key].append(subt(namespace, tags, code.group(1), False, cpp)) + + for p in obj.get('params', []): + if param_traits.is_bounds(p): + if 'boundsError' not in checks: + checks['boundsError'] = [] + checks['boundsError'].append(get_bounds_check(p, 'boundsError')) + return checks """ diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 0c9a3ed8b0..eaca102ea9 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -71,6 +71,9 @@ namespace ur_loader break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } @@ -127,14 +130,17 @@ namespace ur_loader %else: <%param_replacements={}%> %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if 0 == i: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // extract platform's function pointer table auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable; auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; + <%break%> %endif + %endfor + %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): %if 'range' in item: <% add_local = True @@ -143,6 +149,7 @@ namespace ur_loader for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i ) ${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle; %else: + %if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': // convert loader handle to platform handle %if item['optional']: ${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr; @@ -150,6 +157,7 @@ namespace ur_loader ${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle; %endif %endif + %endif %endfor // forward to device-platform @@ -170,7 +178,7 @@ namespace ur_loader %if item['release']: // release loader handle ${item['factory']}.release( ${item['name']} ); - %else: + %elif not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': try { %if 'range' in item: diff --git a/scripts/templates/valddi.cpp.mako b/scripts/templates/valddi.cpp.mako index f3ec24bfb9..2e9bac3200 100644 --- a/scripts/templates/valddi.cpp.mako +++ b/scripts/templates/valddi.cpp.mako @@ -82,7 +82,7 @@ namespace ur_validation_layer %elif func_name == n + "AdapterRetain": if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) { - refCountContext.decrementRefCount(${object_param}, true); + refCountContext.incrementRefCount(${object_param}, true); } %elif func_name == n + "AdapterGet": if( context.enableLeakChecking && phAdapters && result == UR_RESULT_SUCCESS ) diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 8298d513d8..73ac777edb 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -40,15 +40,13 @@ ur_context_handle_t_::getOwningURPool(umf_memory_pool_t *UMFPool) { UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { - std::ignore = DeviceCount; - assert(DeviceCount == 1); ur_result_t RetErr = UR_RESULT_SUCCESS; std::unique_ptr ContextPtr{nullptr}; try { // Create a scoped context. ContextPtr = std::unique_ptr( - new ur_context_handle_t_{*phDevices}); + new ur_context_handle_t_{phDevices, DeviceCount}); static std::once_flag InitFlag; std::call_once( @@ -78,9 +76,9 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, switch (uint32_t{propName}) { case UR_CONTEXT_INFO_NUM_DEVICES: - return ReturnValue(1); + return ReturnValue(static_cast(hContext->Devices.size())); case UR_CONTEXT_INFO_DEVICES: - return ReturnValue(hContext->getDevice()); + return ReturnValue(hContext->getDevices()); case UR_CONTEXT_INFO_REFERENCE_COUNT: return ReturnValue(hContext->getReferenceCount()); case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: @@ -124,8 +122,10 @@ urContextRetain(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { + // FIXME: this entry point has been deprecated in the SYCL RT and should be + // changed to unsupported once the deprecation period has elapsed *phNativeContext = reinterpret_cast( - hContext->getDevice()->getNativeContext()); + hContext->getDevices()[0]->getNativeContext()); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/hip/context.hpp b/source/adapters/hip/context.hpp index d5eb2e1df8..69d4df9b6d 100644 --- a/source/adapters/hip/context.hpp +++ b/source/adapters/hip/context.hpp @@ -28,26 +28,26 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// /// One of the main differences between the UR API and the HIP driver API is /// that the second modifies the state of the threads by assigning -/// `hipCtx_t` objects to threads. `hipCtx_t` objects store data associated +/// \c hipCtx_t objects to threads. \c hipCtx_t objects store data associated /// with a given device and control access to said device from the user side. /// UR API context are objects that are passed to functions, and not bound /// to threads. -/// The ur_context_handle_t_ object doesn't implement this behavior. It only -/// holds the HIP context data. The RAII object \ref ScopedContext implements -/// the active context behavior. /// -/// Primary vs UserDefined context +/// Since the \c ur_context_handle_t can contain multiple devices, and a \c +/// hipCtx_t refers to only a single device, the \c hipCtx_t is more tightly +/// coupled to a \c ur_device_handle_t than a \c ur_context_handle_t. In order +/// to remove some ambiguities about the different semantics of \c +/// \c ur_context_handle_t and native \c hipCtx_t, we access the native \c +/// hipCtx_t solely through the \c ur_device_handle_t class, by using the object +/// \ref ScopedContext, which sets the active device (by setting the active +/// native \c hipCtx_t). /// -/// HIP has two different types of context, the Primary context, -/// which is usable by all threads on a given process for a given device, and -/// the aforementioned custom contexts. -/// The HIP documentation, and performance analysis, suggest using the Primary -/// context whenever possible. The Primary context is also used by the HIP -/// Runtime API. For UR applications to interop with HIP Runtime API, they have -/// to use the primary context - and make that active in the thread. The -/// `ur_context_handle_t_` object can be constructed with a `kind` parameter -/// that allows to construct a Primary or `UserDefined` context, so that -/// the UR object interface is always the same. +/// Primary vs User-defined \c hipCtx_t +/// +/// HIP has two different types of \c hipCtx_t, the Primary context, which is +/// usable by all threads on a given process for a given device, and the +/// aforementioned custom \c hipCtx_t s. The HIP documentation, confirmed with +/// performance analysis, suggest using the Primary context whenever possible. /// /// Destructor callback /// @@ -57,6 +57,16 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// See proposal for details. /// https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md /// +/// Memory Management for Devices in a Context <\b> +/// +/// A \c ur_mem_handle_t is associated with a \c ur_context_handle_t_, which +/// may refer to multiple devices. Therefore the \c ur_mem_handle_t must +/// handle a native allocation for each device in the context. UR is +/// responsible for automatically handling event dependencies for kernels +/// writing to or reading from the same \c ur_mem_handle_t and migrating memory +/// between native allocations for devices in the same \c ur_context_handle_t_ +/// if necessary. +/// struct ur_context_handle_t_ { struct deleter_data { @@ -68,15 +78,22 @@ struct ur_context_handle_t_ { using native_type = hipCtx_t; - ur_device_handle_t DeviceId; + std::vector Devices; + std::atomic_uint32_t RefCount; - ur_context_handle_t_(ur_device_handle_t DevId) - : DeviceId{DevId}, RefCount{1} { - urDeviceRetain(DeviceId); + ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) + : Devices{Devs, Devs + NumDevices}, RefCount{1} { + for (auto &Dev : Devices) { + urDeviceRetain(Dev); + } }; - ~ur_context_handle_t_() { urDeviceRelease(DeviceId); } + ~ur_context_handle_t_() { + for (auto &Dev : Devices) { + urDeviceRelease(Dev); + } + } void invokeExtendedDeleters() { std::lock_guard Guard(Mutex); @@ -91,7 +108,9 @@ struct ur_context_handle_t_ { ExtendedDeleters.emplace_back(deleter_data{Function, UserData}); } - ur_device_handle_t getDevice() const noexcept { return DeviceId; } + const std::vector &getDevices() const noexcept { + return Devices; + } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 278894c436..e40470f9aa 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -549,6 +549,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, SupportedExtensions += "cl_khr_fp64 "; } + SupportedExtensions += "cl_khr_fp16 "; + return ReturnValue(SupportedExtensions.c_str()); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index 83cc2ee954..bea2c46fb5 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -25,12 +25,13 @@ struct ur_device_handle_t_ { std::atomic_uint32_t RefCount; ur_platform_handle_t Platform; hipCtx_t HIPContext; + uint32_t DeviceIndex; public: ur_device_handle_t_(native_type HipDevice, hipCtx_t Context, - ur_platform_handle_t Platform) + ur_platform_handle_t Platform, uint32_t DeviceIndex) : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), - HIPContext(Context) {} + HIPContext(Context), DeviceIndex(DeviceIndex) {} ~ur_device_handle_t_() { UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice)); @@ -42,7 +43,11 @@ struct ur_device_handle_t_ { ur_platform_handle_t getPlatform() const noexcept { return Platform; }; - hipCtx_t getNativeContext() { return HIPContext; }; + hipCtx_t getNativeContext() const noexcept { return HIPContext; }; + + // Returns the index of the device relative to the other devices in the same + // platform + uint32_t getIndex() const noexcept { return DeviceIndex; }; }; int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute); diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 1a73618c77..078d3ae399 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -36,19 +36,18 @@ static size_t imageElementByteSize(hipArray_Format ArrayFormat) { return 0; } -ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, - hipStream_t Stream, uint32_t NumEventsInWaitList, +ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { if (!EventWaitList) { return UR_RESULT_SUCCESS; } try { - ScopedContext Active(CommandQueue->getDevice()); - auto Result = forLatestEvents( EventWaitList, NumEventsInWaitList, [Stream](ur_event_handle_t Event) -> ur_result_t { - if (Event->getStream() == Stream) { + ScopedContext Active(Event->getDevice()); + if (Event->isCompleted() || Event->getStream() == Stream) { return UR_RESULT_SUCCESS; } else { UR_CHECK_ERROR(hipStreamWaitEvent(Stream, Event->get(), 0)); @@ -95,6 +94,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetImplEvent{nullptr}; @@ -102,8 +102,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( try { ScopedContext Active(hQueue->getDevice()); hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -112,9 +112,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( UR_CHECK_ERROR(RetImplEvent->start()); } - UR_CHECK_ERROR(hipMemcpyHtoDAsync( - std::get(hBuffer->Mem).getWithOffset(offset), - const_cast(pSrc), size, HIPStream)); + UR_CHECK_ERROR( + hipMemcpyHtoDAsync(std::get(hBuffer->Mem) + .getPtrWithOffset(hQueue->getDevice(), offset), + const_cast(pSrc), size, HIPStream)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -141,15 +142,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); UR_ASSERT(!(phEventWaitList != NULL && numEventsInWaitList == 0), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); - ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetImplEvent{nullptr}; + ur_lock MemoryMigrationLock{hBuffer->MemoryMigrationMutex}; + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hBuffer->LastEventWritingToMemObj && + hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hBuffer->LastEventWritingToMemObj)); + } + + ScopedContext Active(Device); + + // Use the default stream if copying from another device + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -158,9 +178,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( UR_CHECK_ERROR(RetImplEvent->start()); } + // Copying from the device with latest version of memory, not necessarily + // the device associated with the Queue UR_CHECK_ERROR(hipMemcpyDtoHAsync( - pDst, std::get(hBuffer->Mem).getWithOffset(offset), size, - HIPStream)); + pDst, + std::get(hBuffer->Mem).getPtrWithOffset(Device, offset), + size, HIPStream)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -175,9 +198,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( } } catch (ur_result_t err) { - Result = err; + return err; } - return Result; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( @@ -190,9 +213,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + std::vector DepEvents( + phEventWaitList, phEventWaitList + numEventsInWaitList); + std::vector> MemMigrationLocks; + + // phEventWaitList only contains events that are handed to UR by the SYCL + // runtime. However since UR handles memory dependencies within a context + // we may need to add more events to our dependent events list if the UR + // context contains multiple devices + if (hQueue->getContext()->Devices.size() > 1) { + MemMigrationLocks.reserve(hKernel->Args.MemObjArgs.size()); + for (auto &MemArg : hKernel->Args.MemObjArgs) { + bool PushBack = false; + if (auto MemDepEvent = MemArg.Mem->LastEventWritingToMemObj; + MemDepEvent && std::find(DepEvents.begin(), DepEvents.end(), + MemDepEvent) == DepEvents.end()) { + DepEvents.push_back(MemDepEvent); + PushBack = true; + } + if ((MemArg.AccessFlags & + (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY)) || + PushBack) { + if (std::find_if(MemMigrationLocks.begin(), MemMigrationLocks.end(), + [MemArg](auto &Lock) { + return Lock.first == MemArg.Mem; + }) == MemMigrationLocks.end()) + MemMigrationLocks.emplace_back( + std::pair{MemArg.Mem, ur_lock{MemArg.Mem->MemoryMigrationMutex}}); + } + } + } + + // Early exit for zero size range kernel if (*pGlobalWorkSize == 0) { - return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitList, phEvent); + if (DepEvents.size()) { + return urEnqueueEventsWaitWithBarrier(hQueue, DepEvents.size(), + phEventWaitList, phEvent); + } + return UR_RESULT_SUCCESS; } // Set the number of threads per block to the number of threads per warp @@ -265,8 +323,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( numEventsInWaitList, phEventWaitList, Guard, &StreamToken); hipFunction_t HIPFunc = hKernel->get(); - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + if (DepEvents.size()) { + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, DepEvents.size(), + DepEvents.data())); + } + + // For memory migration across devices in the same context + if (hQueue->getContext()->Devices.size() > 1) { + for (auto &MemArg : hKernel->Args.MemObjArgs) { + migrateMemoryToDeviceIfNeeded(MemArg.Mem, hQueue->getDevice()); + } + } // Set the implicit global offset parameter if kernel has offset variant if (hKernel->getWithOffsetParameter()) { @@ -293,6 +360,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_CHECK_ERROR(RetImplEvent->start()); } + // Once event has been started we can unlock MemoryMigrationMutex + if (hQueue->getContext()->Devices.size() > 1) { + for (auto &MemArg : hKernel->Args.MemObjArgs) { + // Telling the ur_mem_handle_t that it will need to wait on this kernel + // if it has been written to + if (phEvent && (MemArg.AccessFlags & + (UR_MEM_FLAG_READ_WRITE | UR_MEM_FLAG_WRITE_ONLY))) { + MemArg.Mem->setLastEventWritingToMemObj(RetImplEvent.get()); + } + } + // We can release the MemoryMigrationMutexes now + MemMigrationLocks.clear(); + } + // Set local mem max size if env var is present static const char *LocalMemSzPtrUR = std::getenv("UR_HIP_MAX_LOCAL_MEM_SIZE"); @@ -509,16 +590,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( UR_ASSERT(!(hostSlicePitch != 0 && hostSlicePitch % hostRowPitch != 0), UR_RESULT_ERROR_INVALID_SIZE); - ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = std::get(hBuffer->Mem).getVoid(); std::unique_ptr RetImplEvent{nullptr}; + ur_result_t Result = UR_RESULT_SUCCESS; + ur_lock MemoryMigrationLock(hBuffer->MemoryMigrationMutex); + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hBuffer->LastEventWritingToMemObj && + hBuffer->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hBuffer->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hBuffer->LastEventWritingToMemObj)); + } - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + ScopedContext Active(Device); + + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); if (phEvent) { RetImplEvent = @@ -527,10 +624,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( UR_CHECK_ERROR(RetImplEvent->start()); } - Result = commonEnqueueMemBufferCopyRect( + void *DevPtr = std::get(hBuffer->Mem).getVoid(Device); + UR_CHECK_ERROR(commonEnqueueMemBufferCopyRect( HIPStream, region, &DevPtr, hipMemoryTypeDevice, bufferOrigin, bufferRowPitch, bufferSlicePitch, pDst, hipMemoryTypeHost, hostOrigin, - hostRowPitch, hostSlicePitch); + hostRowPitch, hostSlicePitch)); if (phEvent) { UR_CHECK_ERROR(RetImplEvent->record()); @@ -558,7 +656,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - void *DevPtr = std::get(hBuffer->Mem).getVoid(); + void *DevPtr = std::get(hBuffer->Mem).getVoid(hQueue->getDevice()); std::unique_ptr RetImplEvent{nullptr}; try { @@ -626,8 +724,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( UR_CHECK_ERROR(RetImplEvent->start()); } - auto Src = std::get(hBufferSrc->Mem).getWithOffset(srcOffset); - auto Dst = std::get(hBufferDst->Mem).getWithOffset(dstOffset); + auto Src = std::get(hBufferSrc->Mem) + .getPtrWithOffset(hQueue->getDevice(), srcOffset); + auto Dst = std::get(hBufferDst->Mem) + .getPtrWithOffset(hQueue->getDevice(), dstOffset); UR_CHECK_ERROR(hipMemcpyDtoDAsync(Dst, Src, size, Stream)); @@ -652,8 +752,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - void *SrcPtr = std::get(hBufferSrc->Mem).getVoid(); - void *DstPtr = std::get(hBufferDst->Mem).getVoid(); + void *SrcPtr = + std::get(hBufferSrc->Mem).getVoid(hQueue->getDevice()); + void *DstPtr = + std::get(hBufferDst->Mem).getVoid(hQueue->getDevice()); std::unique_ptr RetImplEvent{nullptr}; try { @@ -762,7 +864,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( UR_CHECK_ERROR(RetImplEvent->start()); } - auto DstDevice = std::get(hBuffer->Mem).getWithOffset(offset); + auto DstDevice = std::get(hBuffer->Mem) + .getPtrWithOffset(hQueue->getDevice(), offset); auto N = size / patternSize; // pattern size in bytes @@ -882,21 +985,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); ur_result_t Result = UR_RESULT_SUCCESS; + ur_lock MemoryMigrationLock{hImage->MemoryMigrationMutex}; + auto Device = hQueue->getDevice(); + hipStream_t HIPStream = hQueue->getNextTransferStream(); + try { - ScopedContext Active(hQueue->getDevice()); - hipStream_t HIPStream = hQueue->getNextTransferStream(); + // Note that this entry point may be called on a queue that may not be the + // last queue to write to the MemBuffer, meaning we must perform the copy + // from a different device + if (hImage->LastEventWritingToMemObj && + hImage->LastEventWritingToMemObj->getDevice() != hQueue->getDevice()) { + Device = hImage->LastEventWritingToMemObj->getDevice(); + ScopedContext Active(Device); + HIPStream = hipStream_t{0}; // Default stream for different device + // We may have to wait for an event on another queue if it is the last + // event writing to mem obj + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, 1, + &hImage->LastEventWritingToMemObj)); + } + + ScopedContext Active(Device); if (phEventWaitList) { - Result = enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, - phEventWaitList); + UR_CHECK_ERROR(enqueueEventsWait(hQueue, HIPStream, numEventsInWaitList, + phEventWaitList)); } - hipArray *Array = std::get(hImage->Mem).getArray(); + hipArray *Array = std::get(hImage->Mem).getArray(Device); hipArray_Format Format; size_t NumChannels; @@ -950,8 +1069,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_rect_offset_t origin, ur_rect_region_t region, size_t, size_t, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImage->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImage->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); ur_result_t Result = UR_RESULT_SUCCESS; @@ -964,7 +1082,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( phEventWaitList); } - hipArray *Array = std::get(hImage->Mem).getArray(); + hipArray *Array = + std::get(hImage->Mem).getArray(hQueue->getDevice()); hipArray_Format Format; size_t NumChannels; @@ -1017,10 +1136,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( ur_rect_offset_t dstOrigin, ur_rect_region_t region, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - UR_ASSERT(hImageSrc->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - UR_ASSERT(hImageDst->MemType == ur_mem_handle_t_::Type::Surface, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImageSrc->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hImageDst->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hImageSrc->Mem).getImageType() == std::get(hImageDst->Mem).getImageType(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -1035,12 +1152,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( phEventWaitList); } - hipArray *SrcArray = std::get(hImageSrc->Mem).getArray(); + hipArray *SrcArray = + std::get(hImageSrc->Mem).getArray(hQueue->getDevice()); hipArray_Format SrcFormat; size_t SrcNumChannels; getArrayDesc(SrcArray, SrcFormat, SrcNumChannels); - hipArray *DstArray = std::get(hImageDst->Mem).getArray(); + hipArray *DstArray = + std::get(hImageDst->Mem).getArray(hQueue->getDevice()); hipArray_Format DstFormat; size_t DstNumChannels; getArrayDesc(DstArray, DstFormat, DstNumChannels); @@ -1101,8 +1220,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - UR_ASSERT(hBuffer->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hBuffer->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto &BufferImpl = std::get(hBuffer->Mem); UR_ASSERT(offset + size <= BufferImpl.getSize(), UR_RESULT_ERROR_INVALID_SIZE); @@ -1161,8 +1279,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { ur_result_t Result = UR_RESULT_SUCCESS; - UR_ASSERT(hMem->MemType == ur_mem_handle_t_::Type::Buffer, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); + UR_ASSERT(hMem->isBuffer(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hMem->Mem).getMapPtr() != nullptr, UR_RESULT_ERROR_INVALID_MEM_OBJECT); UR_ASSERT(std::get(hMem->Mem).getMapPtr() == pMappedPtr, @@ -1302,7 +1419,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { void *HIPDevicePtr = const_cast(pMem); - ur_device_handle_t Device = hQueue->getContext()->getDevice(); + ur_device_handle_t Device = hQueue->getDevice(); // If the device does not support managed memory access, we can't set // mem_advise. diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 4871335c9f..2af6c5e910 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -193,7 +193,7 @@ urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { try { auto Context = phEventWaitList[0]->getContext(); - ScopedContext Active(Context->getDevice()); + ScopedContext Active(phEventWaitList[0]->getDevice()); auto WaitFunc = [Context](ur_event_handle_t Event) -> ur_result_t { UR_ASSERT(Event, UR_RESULT_ERROR_INVALID_EVENT); @@ -292,7 +292,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { std::unique_ptr event_ptr{hEvent}; ur_result_t Result = UR_RESULT_ERROR_INVALID_EVENT; try { - ScopedContext Active(hEvent->getContext()->getDevice()); Result = hEvent->release(); } catch (...) { Result = UR_RESULT_ERROR_OUT_OF_RESOURCES; diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index bfa05b59d7..ecb995dfbe 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -28,6 +28,8 @@ struct ur_event_handle_t_ { ur_queue_handle_t getQueue() const noexcept { return Queue; } + ur_device_handle_t getDevice() const noexcept { return Queue->getDevice(); } + hipStream_t getStream() const noexcept { return Stream; } uint32_t getComputeStreamToken() const noexcept { return StreamToken; } diff --git a/source/adapters/hip/kernel.cpp b/source/adapters/hip/kernel.cpp index cc6f4384bc..ec58bafcc6 100644 --- a/source/adapters/hip/kernel.cpp +++ b/source/adapters/hip/kernel.cpp @@ -19,7 +19,7 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, std::unique_ptr RetKernel{nullptr}; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); hipFunction_t HIPFunc; hipError_t KernelError = @@ -263,9 +263,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( - ur_kernel_handle_t hKernel, uint32_t argIndex, - const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *Properties, + ur_mem_handle_t hArgValue) { // Below sets kernel arg when zero-sized buffers are handled. // In such case the corresponding memory is null. if (hArgValue == nullptr) { @@ -275,8 +276,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_result_t Result = UR_RESULT_SUCCESS; try { - if (hArgValue->MemType == ur_mem_handle_t_::Type::Surface) { - auto array = std::get(hArgValue->Mem).getArray(); + auto Device = hKernel->getProgram()->getDevice(); + hKernel->Args.addMemObjArg(argIndex, hArgValue, Properties->memoryAccess); + if (hArgValue->isImage()) { + auto array = std::get(hArgValue->Mem).getArray(Device); hipArray_Format Format; size_t NumChannels; getArrayDesc(array, Format, NumChannels); @@ -288,10 +291,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( "uint32, float, and half."); } hipSurfaceObject_t hipSurf = - std::get(hArgValue->Mem).getSurface(); + std::get(hArgValue->Mem).getSurface(Device); hKernel->setKernelArg(argIndex, sizeof(hipSurf), (void *)&hipSurf); } else { - void *HIPPtr = std::get(hArgValue->Mem).getVoid(); + void *HIPPtr = std::get(hArgValue->Mem).getVoid(Device); hKernel->setKernelArg(argIndex, sizeof(void *), (void *)&HIPPtr); } } catch (ur_result_t Err) { diff --git a/source/adapters/hip/kernel.hpp b/source/adapters/hip/kernel.hpp index f13478a69c..83693a3d41 100644 --- a/source/adapters/hip/kernel.hpp +++ b/source/adapters/hip/kernel.hpp @@ -57,6 +57,14 @@ struct ur_kernel_handle_t_ { args_size_t ParamSizes; args_index_t Indices; args_size_t OffsetPerIndex; + // A struct to keep track of memargs so that we can do dependency analysis + // at urEnqueueKernelLaunch + struct mem_obj_arg { + ur_mem_handle_t_ *Mem; + int Index; + ur_mem_flags_t AccessFlags; + }; + std::vector MemObjArgs; std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0}; @@ -110,6 +118,20 @@ struct ur_kernel_handle_t_ { Size + AlignedLocalOffset - LocalOffset); } + void addMemObjArg(int Index, ur_mem_handle_t hMem, ur_mem_flags_t Flags) { + assert(hMem && "Invalid mem handle"); + // To avoid redundancy we are not storing mem obj with index i at index + // i in the vec of MemObjArgs. + for (auto &Arg : MemObjArgs) { + if (Arg.Index == Index) { + // Overwrite the mem obj with the same index + Arg = arguments::mem_obj_arg{hMem, Index, Flags}; + return; + } + } + MemObjArgs.push_back(arguments::mem_obj_arg{hMem, Index, Flags}); + } + void setImplicitOffset(size_t Size, std::uint32_t *ImplicitOffset) { assert(Size == sizeof(std::uint32_t) * 3); std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size); @@ -167,10 +189,10 @@ struct ur_kernel_handle_t_ { const char *getName() const noexcept { return Name.c_str(); } - /// Get the number of kernel arguments, excluding the implicit global offset. - /// Note this only returns the current known number of arguments, not the - /// real one required by the kernel, since this cannot be queried from - /// the HIP Driver API + /// Get the number of kernel arguments, excluding the implicit global + /// offset. Note this only returns the current known number of arguments, + /// not the real one required by the kernel, since this cannot be queried + /// from the HIP Driver API uint32_t getNumArgs() const noexcept { return Args.Indices.size() - 1; } void setKernelArg(int Index, size_t Size, const void *Arg) { diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index 899dad5674..68ded26263 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -55,28 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { return UR_RESULT_SUCCESS; } - ScopedContext Active(uniqueMemObj->getContext()->getDevice()); - - if (hMem->MemType == ur_mem_handle_t_::Type::Buffer) { - auto &hBuffer = std::get(uniqueMemObj->Mem); - switch (hBuffer.MemAllocMode) { - case BufferMem::AllocMode::CopyIn: - case BufferMem::AllocMode::Classic: - UR_CHECK_ERROR(hipFree((void *)hBuffer.Ptr)); - break; - case BufferMem::AllocMode::UseHostPtr: - UR_CHECK_ERROR(hipHostUnregister(hBuffer.HostPtr)); - break; - case BufferMem::AllocMode::AllocHostPtr: - UR_CHECK_ERROR(hipFreeHost(hBuffer.HostPtr)); - }; - } - - else if (hMem->MemType == ur_mem_handle_t_::Type::Surface) { - auto &hImage = std::get(uniqueMemObj->Mem); - UR_CHECK_ERROR(hipDestroySurfaceObject(hImage.getSurface())); - UR_CHECK_ERROR(hipFreeArray(hImage.getArray())); - } + UR_CHECK_ERROR(hMem->clear()); } catch (ur_result_t Err) { Result = Err; @@ -123,49 +102,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_mem_handle_t RetMemObj = nullptr; try { - ScopedContext Active(hContext->getDevice()); - void *Ptr; - auto pHost = pProperties ? pProperties->pHost : nullptr; + auto HostPtr = pProperties ? pProperties->pHost : nullptr; BufferMem::AllocMode AllocMode = BufferMem::AllocMode::Classic; - if ((flags & UR_MEM_FLAG_USE_HOST_POINTER) && EnableUseHostPtr) { - UR_CHECK_ERROR(hipHostRegister(pHost, size, hipHostRegisterMapped)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); AllocMode = BufferMem::AllocMode::UseHostPtr; } else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { - UR_CHECK_ERROR(hipHostMalloc(&pHost, size)); - UR_CHECK_ERROR(hipHostGetDevicePointer(&Ptr, pHost, 0)); + UR_CHECK_ERROR(hipHostMalloc(&HostPtr, size)); AllocMode = BufferMem::AllocMode::AllocHostPtr; - } else { - UR_CHECK_ERROR(hipMalloc(&Ptr, size)); - if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { - AllocMode = BufferMem::AllocMode::CopyIn; - } + } else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + AllocMode = BufferMem::AllocMode::CopyIn; } - if (Result == UR_RESULT_SUCCESS) { - ur_mem_handle_t parentBuffer = nullptr; - - auto DevPtr = reinterpret_cast(Ptr); - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, parentBuffer, flags, AllocMode, DevPtr, pHost, size}); - if (URMemObj != nullptr) { - RetMemObj = URMemObj.release(); - if (PerformInitialCopy) { - // Operates on the default stream of the current HIP context. - UR_CHECK_ERROR(hipMemcpyHtoD(DevPtr, pHost, size)); - // Synchronize with default stream implicitly used by hipMemcpyHtoD - // to make buffer data available on device before any other UR call - // uses it. - if (Result == UR_RESULT_SUCCESS) { - hipStream_t defaultStream = 0; - UR_CHECK_ERROR(hipStreamSynchronize(defaultStream)); - } - } - } else { - Result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + auto URMemObj = std::unique_ptr( + new ur_mem_handle_t_{hContext, flags, AllocMode, HostPtr, size}); + if (URMemObj == nullptr) { + throw UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // First allocation will be made at urMemBufferCreate if context only + // has one device + if (PerformInitialCopy && HostPtr) { + // Perform initial copy to every device in context + for (auto &Device : hContext->getDevices()) { + ScopedContext Active(Device); + // getPtr may allocate mem if not already allocated + const auto &Ptr = std::get(URMemObj->Mem).getPtr(Device); + UR_CHECK_ERROR(hipMemcpyHtoD(Ptr, HostPtr, size)); + // TODO check if we can remove this + // Synchronize with default stream implicitly used by cuMemcpyHtoD + // to make buffer data available on device before any other UR + // call uses it. + // hipStream_t defaultStream = 0; + // UR_CHECK_ERROR(hipStreamSynchronize(defaultStream)); } } + RetMemObj = URMemObj.release(); } catch (ur_result_t Err) { Result = Err; } catch (...) { @@ -215,27 +186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( auto &BufferImpl = std::get(hBuffer->Mem); UR_ASSERT(((pRegion->origin + pRegion->size) <= BufferImpl.getSize()), UR_RESULT_ERROR_INVALID_BUFFER_SIZE); - // Retained indirectly due to retaining parent buffer below. - ur_context_handle_t Context = hBuffer->Context; - BufferMem::AllocMode AllocMode = BufferMem::AllocMode::Classic; - - UR_ASSERT(BufferImpl.Ptr != BufferMem::native_type{0}, - UR_RESULT_ERROR_INVALID_MEM_OBJECT); - BufferMem::native_type Ptr = BufferImpl.getWithOffset(pRegion->origin); - - void *HostPtr = nullptr; - if (BufferImpl.HostPtr) { - HostPtr = static_cast(BufferImpl.HostPtr) + pRegion->origin; + for (auto Device : hBuffer->Context->getDevices()) { + BufferImpl.getPtr(Device); // This is allocating a dev ptr behind the scenes + // which is necessary before SubBuffer partition } ReleaseGuard ReleaseGuard(hBuffer); std::unique_ptr RetMemObj{nullptr}; try { - ScopedContext Active(Context->getDevice()); - - RetMemObj = std::unique_ptr{new ur_mem_handle_t_{ - Context, hBuffer, flags, AllocMode, Ptr, HostPtr, pRegion->size}}; + RetMemObj = std::unique_ptr{ + new ur_mem_handle_t_{hBuffer, pRegion->origin}}; } catch (ur_result_t Err) { *phMem = nullptr; return Err; @@ -258,19 +219,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, UR_RESULT_ERROR_INVALID_ENUMERATION); - UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); + // FIXME: Only getting info for the first device in the context. This + // should be fine in general + auto Device = hMemory->getContext()->getDevices()[0]; + ScopedContext Active(Device); - ScopedContext Active(hMemory->getContext()->getDevice()); + UrReturnHelper ReturnValue(propSize, pMemInfo, pPropSizeRet); switch (MemInfoType) { case UR_MEM_INFO_SIZE: { try { - const auto MemVisitor = [](auto &&Mem) -> size_t { + const auto MemVisitor = [Device](auto &&Mem) -> size_t { using T = std::decay_t; if constexpr (std::is_same_v) { size_t AllocSize = 0; hipDeviceptr_t BasePtr = nullptr; - UR_CHECK_ERROR(hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.Ptr)); + UR_CHECK_ERROR( + hipMemGetAddressRange(&BasePtr, &AllocSize, Mem.getPtr(Device))); return AllocSize; } else if constexpr (std::is_same_v) { #if HIP_VERSION < 50600000 @@ -278,7 +243,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, #else HIP_ARRAY3D_DESCRIPTOR ArrayDescriptor; UR_CHECK_ERROR( - hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray())); + hipArray3DGetDescriptor(&ArrayDescriptor, Mem.getArray(Device))); const auto PixelSizeBytes = GetHipFormatPixelSize(ArrayDescriptor.Format) * ArrayDescriptor.NumChannels; @@ -317,30 +282,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, /// \param[out] phNativeMem Set to the native handle of the UR mem object. /// /// \return UR_RESULT_SUCCESS -UR_APIEXPORT ur_result_t UR_APICALL -urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { -#if defined(__HIP_PLATFORM_NVIDIA__) - if (sizeof(BufferMem::native_type) > sizeof(ur_native_handle_t)) { - // Check that all the upper bits that cannot be represented by - // ur_native_handle_t are empty. - // NOTE: The following shift might trigger a warning, but the check in the - // if above makes sure that this does not underflow. - BufferMem::native_type UpperBits = std::get(hMem->Mem).get() >> - (sizeof(ur_native_handle_t) * CHAR_BIT); - if (UpperBits) { - // Return an error if any of the remaining bits is non-zero. - return UR_RESULT_ERROR_INVALID_MEM_OBJECT; - } - } - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).get()); -#elif defined(__HIP_PLATFORM_AMD__) - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).get()); -#else -#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); -#endif - return UR_RESULT_SUCCESS; +UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t, + ur_native_handle_t *) { + // FIXME: there is no good way of doing this with a multi device context. + // If we return a single pointer, how would we know which device's allocation + // it should be? + // If we return a vector of pointers, this is OK for read only access but if + // we write to a buffer, how would we know which one had been written to? + // Should unused allocations be updated afterwards? We have no way of knowing + // any of these things in the current API design. + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( @@ -356,7 +307,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -/// \TODO Not implemented UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -389,145 +339,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); } - ur_result_t Result = UR_RESULT_SUCCESS; - // We only support RBGA channel order // TODO: check SYCL CTS and spec. May also have to support BGRA UR_ASSERT(pImageFormat->channelOrder == UR_IMAGE_CHANNEL_ORDER_RGBA, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); - // We have to use hipArray3DCreate, which has some caveats. The height and - // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc gives - // a minimum value of 1, so we need to convert the answer. - HIP_ARRAY3D_DESCRIPTOR ArrayDesc; - ArrayDesc.NumChannels = 4; // Only support 4 channel image - ArrayDesc.Flags = 0; // No flags required - ArrayDesc.Width = pImageDesc->width; - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - ArrayDesc.Height = 0; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = 0; - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - ArrayDesc.Height = pImageDesc->height; - ArrayDesc.Depth = pImageDesc->depth; - } + auto URMemObj = std::unique_ptr( + new ur_mem_handle_t_{hContext, flags, *pImageFormat, *pImageDesc, pHost}); - // We need to get this now in bytes for calculating the total image size later - size_t PixelTypeSizeBytes; - - switch (pImageFormat->channelType) { - - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8; - PixelTypeSizeBytes = 1; - break; - case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_HALF; - PixelTypeSizeBytes = 2; - break; - case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: - ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32; - PixelTypeSizeBytes = 4; - break; - case UR_IMAGE_CHANNEL_TYPE_FLOAT: - ArrayDesc.Format = HIP_AD_FORMAT_FLOAT; - PixelTypeSizeBytes = 4; - break; - default: - // urMemImageCreate given unsupported image_channel_data_type - return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + if (URMemObj == nullptr) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - // When a dimension isn't used image_desc has the size set to 1 - size_t PixelSizeBytes = - PixelTypeSizeBytes * 4; // 4 is the only number of channels we support - size_t ImageSizeBytes = PixelSizeBytes * pImageDesc->width * - pImageDesc->height * pImageDesc->depth; - - ScopedContext Active(hContext->getDevice()); - hipArray *ImageArray; - UR_CHECK_ERROR(hipArray3DCreate(reinterpret_cast(&ImageArray), - &ArrayDesc)); - - try { - if (PerformInitialCopy) { - // We have to use a different copy function for each image dimensionality - if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) { - UR_CHECK_ERROR(hipMemcpyHtoA(ImageArray, 0, pHost, ImageSizeBytes)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) { - hip_Memcpy2D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc)); - } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) { - HIP_MEMCPY3D CpyDesc; - memset(&CpyDesc, 0, sizeof(CpyDesc)); - CpyDesc.srcMemoryType = hipMemoryType::hipMemoryTypeHost; - CpyDesc.srcHost = pHost; - CpyDesc.dstMemoryType = hipMemoryType::hipMemoryTypeArray; - CpyDesc.dstArray = reinterpret_cast(ImageArray); - CpyDesc.WidthInBytes = PixelSizeBytes * pImageDesc->width; - CpyDesc.Height = pImageDesc->height; - CpyDesc.Depth = pImageDesc->depth; - UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc)); - } + if (PerformInitialCopy) { + for (const auto &Dev : hContext->getDevices()) { + UR_CHECK_ERROR(migrateMemoryToDeviceIfNeeded(URMemObj.get(), Dev)); } - - // HIP_RESOURCE_DESC is a union of different structs, shown here - // We need to fill it as described here to use it for a surface or texture - // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and - // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array - // handle. - // HIP_RESOURCE_DESC::flags must be set to zero - - hipResourceDesc ImageResDesc; - ImageResDesc.res.array.array = ImageArray; - ImageResDesc.resType = hipResourceTypeArray; - - hipSurfaceObject_t Surface; - UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc)); - - auto URMemObj = std::unique_ptr(new ur_mem_handle_t_{ - hContext, ImageArray, Surface, flags, pImageDesc->type, pHost}); - - if (URMemObj == nullptr) { - return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - *phMem = URMemObj.release(); - } catch (ur_result_t Err) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return Err; - } catch (...) { - UR_CHECK_ERROR(hipFreeArray(ImageArray)); - return UR_RESULT_ERROR_UNKNOWN; } - return Result; + *phMem = URMemObj.release(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, @@ -536,14 +366,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hMemory->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); - ScopedContext Active(hMemory->getContext()->getDevice()); + // FIXME: only getting infor for first image in ctx + auto Device = hMemory->getContext()->getDevices()[0]; + ScopedContext Active(Device); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); try { HIP_ARRAY3D_DESCRIPTOR ArrayInfo; #if HIP_VERSION >= 50600000 UR_CHECK_ERROR(hipArray3DGetDescriptor( - &ArrayInfo, std::get(hMemory->Mem).getArray())); + &ArrayInfo, std::get(hMemory->Mem).getArray(Device))); #else return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; #endif @@ -625,3 +457,174 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } + +inline ur_result_t +allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { + ScopedContext Active(hDevice); + ur_lock LockGuard(Mem->MemoryAllocationMutex); + + if (Mem->isBuffer()) { + auto &Buffer = std::get(Mem->Mem); + hipDeviceptr_t &DevPtr = Buffer.Ptrs[hDevice->getIndex()]; + + // Allocation has already been made + if (DevPtr != BufferMem::native_type{0}) { + return UR_RESULT_SUCCESS; + } + + if (Buffer.MemAllocMode == BufferMem::AllocMode::AllocHostPtr) { + // Host allocation has already been made + UR_CHECK_ERROR(hipHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0)); + } else if (Buffer.MemAllocMode == BufferMem::AllocMode::UseHostPtr) { + UR_CHECK_ERROR( + hipHostRegister(Buffer.HostPtr, Buffer.Size, hipHostRegisterMapped)); + UR_CHECK_ERROR(hipHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0)); + } else { + UR_CHECK_ERROR(hipMalloc(&DevPtr, Buffer.Size)); + } + } else { + hipArray *ImageArray; + hipSurfaceObject_t Surface; + try { + auto &Image = std::get(Mem->Mem); + // Allocation has already been made + if (Image.Arrays[hDevice->getIndex()]) { + return UR_RESULT_SUCCESS; + } + UR_CHECK_ERROR(hipArray3DCreate( + reinterpret_cast(&ImageArray), &Image.ArrayDesc)); + Image.Arrays[hDevice->getIndex()] = ImageArray; + // HIP_RESOURCE_DESC is a union of different structs, shown here + // We need to fill it as described here to use it for a surface or texture + // HIP_RESOURCE_DESC::resType must be HIP_RESOURCE_TYPE_ARRAY and + // HIP_RESOURCE_DESC::res::array::hArray must be set to a valid HIP array + // handle. + // HIP_RESOURCE_DESC::flags must be set to zero + hipResourceDesc ImageResDesc; + ImageResDesc.res.array.array = ImageArray; + ImageResDesc.resType = hipResourceTypeArray; + + UR_CHECK_ERROR(hipCreateSurfaceObject(&Surface, &ImageResDesc)); + Image.SurfObjs[hDevice->getIndex()] = Surface; + } catch (ur_result_t Err) { + if (ImageArray) { + UR_CHECK_ERROR(hipFreeArray(ImageArray)); + } + return Err; + } catch (...) { + if (ImageArray) { + UR_CHECK_ERROR(hipFreeArray(ImageArray)); + } + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; +} + +namespace { +inline ur_result_t migrateBufferToDevice(ur_mem_handle_t Mem, + ur_device_handle_t hDevice) { + auto &Buffer = std::get(Mem->Mem); + if (Mem->LastEventWritingToMemObj == nullptr) { + // Device allocation being initialized from host for the first time + if (Buffer.HostPtr) { + UR_CHECK_ERROR( + hipMemcpyHtoD(Buffer.getPtr(hDevice), Buffer.HostPtr, Buffer.Size)); + } + } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { + UR_CHECK_ERROR( + hipMemcpyDtoD(Buffer.getPtr(hDevice), + Buffer.getPtr(Mem->LastEventWritingToMemObj->getDevice()), + Buffer.Size)); + } + return UR_RESULT_SUCCESS; +} + +inline ur_result_t migrateImageToDevice(ur_mem_handle_t Mem, + ur_device_handle_t hDevice) { + auto &Image = std::get(Mem->Mem); + // When a dimension isn't used image_desc has the size set to 1 + size_t PixelSizeBytes = Image.PixelTypeSizeBytes * + 4; // 4 is the only number of channels we support + size_t ImageSizeBytes = PixelSizeBytes * Image.ImageDesc.width * + Image.ImageDesc.height * Image.ImageDesc.depth; + + hipArray *ImageArray = Image.getArray(hDevice); + + hip_Memcpy2D CpyDesc2D; + HIP_MEMCPY3D CpyDesc3D; + // We have to use a different copy function for each image + // dimensionality + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + memset(&CpyDesc2D, 0, sizeof(CpyDesc2D)); + CpyDesc2D.srcMemoryType = hipMemoryType::hipMemoryTypeHost; + CpyDesc2D.dstMemoryType = hipMemoryType::hipMemoryTypeArray; + CpyDesc2D.dstArray = reinterpret_cast(ImageArray); + CpyDesc2D.WidthInBytes = PixelSizeBytes * Image.ImageDesc.width; + CpyDesc2D.Height = Image.ImageDesc.height; + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + memset(&CpyDesc3D, 0, sizeof(CpyDesc3D)); + CpyDesc3D.srcMemoryType = hipMemoryType::hipMemoryTypeHost; + CpyDesc3D.dstMemoryType = hipMemoryType::hipMemoryTypeArray; + CpyDesc3D.dstArray = reinterpret_cast(ImageArray); + CpyDesc3D.WidthInBytes = PixelSizeBytes * Image.ImageDesc.width; + CpyDesc3D.Height = Image.ImageDesc.height; + CpyDesc3D.Depth = Image.ImageDesc.depth; + } + + if (Mem->LastEventWritingToMemObj == nullptr) { + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + UR_CHECK_ERROR( + hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + CpyDesc2D.srcHost = Image.HostPtr; + UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + CpyDesc3D.srcHost = Image.HostPtr; + UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); + } + } else if (Mem->LastEventWritingToMemObj->getDevice() != hDevice) { + if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + // FIXME: 1D memcpy from DtoD going through the host. + UR_CHECK_ERROR(hipMemcpyAtoH( + Image.HostPtr, + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()), + 0 /*srcOffset*/, ImageSizeBytes)); + UR_CHECK_ERROR( + hipMemcpyHtoA(ImageArray, 0, Image.HostPtr, ImageSizeBytes)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + CpyDesc2D.srcArray = + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + UR_CHECK_ERROR(hipMemcpyParam2D(&CpyDesc2D)); + } else if (Image.ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + CpyDesc3D.srcArray = + Image.getArray(Mem->LastEventWritingToMemObj->getDevice()); + UR_CHECK_ERROR(hipDrvMemcpy3D(&CpyDesc3D)); + } + } + return UR_RESULT_SUCCESS; +} +} // namespace + +// If calling this entry point it is necessary to lock the memoryMigrationMutex +// beforehand +ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t Mem, + const ur_device_handle_t hDevice) { + UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + // Device allocation has already been initialized with most up to date + // data in buffer + if (Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()]) { + return UR_RESULT_SUCCESS; + } + + ScopedContext Active(hDevice); + if (Mem->isBuffer()) { + UR_CHECK_ERROR(migrateBufferToDevice(Mem, hDevice)); + } else { + UR_CHECK_ERROR(migrateImageToDevice(Mem, hDevice)); + } + + Mem->HaveMigratedToDeviceSinceLastWrite[hDevice->getIndex()] = true; + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 2732b22a6e..d36b9ee001 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -10,18 +10,25 @@ #pragma once #include "common.hpp" +#include "context.hpp" +#include "event.hpp" #include #include +ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); +ur_result_t migrateMemoryToDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); + // Handler for plain, pointer-based HIP allocations struct BufferMem { using native_type = hipDeviceptr_t; // If this allocation is a sub-buffer (i.e., a view on an existing // allocation), this is the pointer to the parent handler structure - ur_mem_handle_t Parent; - // HIP handler for the pointer - native_type Ptr; + ur_mem_handle_t Parent = nullptr; + // Outer mem holding this struct in variant + ur_mem_handle_t OuterMemStruct; /// Pointer associated with this device on the host void *HostPtr; @@ -50,20 +57,44 @@ struct BufferMem { AllocHostPtr } MemAllocMode; - BufferMem(ur_mem_handle_t Parent, AllocMode Mode, hipDeviceptr_t Ptr, - void *HostPtr, size_t Size) - : Parent{Parent}, Ptr{Ptr}, HostPtr{HostPtr}, Size{Size}, MapSize{0}, - MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, - MemAllocMode{Mode} {}; +private: + // Vector of HIP pointers + std::vector Ptrs; + +public: + BufferMem(ur_context_handle_t Context, ur_mem_handle_t OuterMemStruct, + AllocMode Mode, void *HostPtr, size_t Size) + : OuterMemStruct{OuterMemStruct}, HostPtr{HostPtr}, Size{Size}, + MapSize{0}, MapOffset{0}, MapPtr{nullptr}, MapFlags{UR_MAP_FLAG_WRITE}, + MemAllocMode{Mode}, Ptrs(Context->Devices.size(), native_type{0}){}; + + BufferMem(const BufferMem &Buffer) = default; - native_type get() const noexcept { return Ptr; } + // This will allocate memory on device if there isn't already an active + // allocation on the device + native_type getPtr(const ur_device_handle_t Device) { + return getPtrWithOffset(Device, 0); + } + + // This will allocate memory on device with index Index if there isn't already + // an active allocation on the device + native_type getPtrWithOffset(const ur_device_handle_t Device, size_t Offset) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return reinterpret_cast( + reinterpret_cast(Ptrs[Device->getIndex()]) + Offset); + } - native_type getWithOffset(size_t Offset) const noexcept { - return reinterpret_cast(reinterpret_cast(Ptr) + - Offset); + // This will allocate memory on device if there isn't already an active + // allocation on the device + void *getVoid(const ur_device_handle_t Device) { + return reinterpret_cast(getPtrWithOffset(Device, 0)); } - void *getVoid() const noexcept { return reinterpret_cast(Ptr); } + bool isSubBuffer() const noexcept { return Parent != nullptr; } size_t getSize() const noexcept { return Size; } @@ -107,28 +138,240 @@ struct BufferMem { assert(MapPtr != nullptr); return MapFlags; } + + ur_result_t clear() { + if (Parent != nullptr) { + return UR_RESULT_SUCCESS; + } + + switch (MemAllocMode) { + case AllocMode::CopyIn: + case AllocMode::Classic: + for (auto &DevPtr : Ptrs) { + if (DevPtr != native_type{0}) { + UR_CHECK_ERROR(hipFree(DevPtr)); + } + } + break; + case AllocMode::UseHostPtr: + UR_CHECK_ERROR(hipHostUnregister(HostPtr)); + break; + case AllocMode::AllocHostPtr: + UR_CHECK_ERROR(hipFreeHost(HostPtr)); + } + return UR_RESULT_SUCCESS; + } + + friend struct ur_mem_handle_t_; + friend ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); }; // Handler data for surface object (i.e. Images) struct SurfaceMem { - hipArray *Array; - hipSurfaceObject_t SurfObj; - ur_mem_type_t ImageType; +private: + std::vector Arrays; + std::vector SurfObjs; + +public: + ur_mem_handle_t OuterMemStruct; + + ur_image_format_t ImageFormat; + ur_image_desc_t ImageDesc; + HIP_ARRAY3D_DESCRIPTOR ArrayDesc; + size_t PixelTypeSizeBytes; + void *HostPtr; + + SurfaceMem(ur_context_handle_t Context, ur_mem_handle_t OuterMemStruct, + ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, + void *HostPtr) + : Arrays(Context->Devices.size(), nullptr), + SurfObjs(Context->Devices.size(), nullptr), + OuterMemStruct{OuterMemStruct}, + ImageFormat{ImageFormat}, ImageDesc{ImageDesc}, HostPtr{HostPtr} { + // We have to use hipArray3DCreate, which has some caveats. The height and + // depth parameters must be set to 0 produce 1D or 2D arrays. image_desc + // gives a minimum value of 1, so we need to convert the answer. + ArrayDesc.NumChannels = 4; // Only support 4 channel image + ArrayDesc.Flags = 0; // No flags required + ArrayDesc.Width = ImageDesc.width; + if (ImageDesc.type == UR_MEM_TYPE_IMAGE1D) { + ArrayDesc.Height = 0; + ArrayDesc.Depth = 0; + } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE2D) { + ArrayDesc.Height = ImageDesc.height; + ArrayDesc.Depth = 0; + } else if (ImageDesc.type == UR_MEM_TYPE_IMAGE3D) { + ArrayDesc.Height = ImageDesc.height; + ArrayDesc.Depth = ImageDesc.depth; + } + + // We need to get PixelTypeSizeBytes for calculating the total image size + // later + switch (ImageFormat.channelType) { + + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT8; + PixelTypeSizeBytes = 1; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT8; + PixelTypeSizeBytes = 1; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT16; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT16; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + ArrayDesc.Format = HIP_AD_FORMAT_HALF; + PixelTypeSizeBytes = 2; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + ArrayDesc.Format = HIP_AD_FORMAT_UNSIGNED_INT32; + PixelTypeSizeBytes = 4; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + ArrayDesc.Format = HIP_AD_FORMAT_SIGNED_INT32; + PixelTypeSizeBytes = 4; + break; + case UR_IMAGE_CHANNEL_TYPE_FLOAT: + ArrayDesc.Format = HIP_AD_FORMAT_FLOAT; + PixelTypeSizeBytes = 4; + break; + default: + // urMemImageCreate given unsupported image_channel_data_type + detail::ur::die("Bad image format given to ur_image_ constructor"); + } + } + + // Will allocate a new array on device if not already allocated + hipArray *getArray(const ur_device_handle_t Device) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return Arrays[Device->getIndex()]; + } - SurfaceMem(hipArray *Array, hipSurfaceObject_t Surf, ur_mem_type_t ImageType) - : Array{Array}, SurfObj{Surf}, ImageType{ImageType} {}; + // Will allocate a new surface on device if not already allocated + hipSurfaceObject_t getSurface(const ur_device_handle_t Device) { + if (ur_result_t Err = + allocateMemObjOnDeviceIfNeeded(OuterMemStruct, Device); + Err != UR_RESULT_SUCCESS) { + throw Err; + } + return SurfObjs[Device->getIndex()]; + } - hipArray *getArray() const noexcept { return Array; } + ur_mem_type_t getImageType() const noexcept { return ImageDesc.type; } - hipSurfaceObject_t getSurface() const noexcept { return SurfObj; } + ur_result_t clear() { + for (auto Array : Arrays) { + if (Array) { + UR_CHECK_ERROR(hipFreeArray(Array)); + } + } + for (auto Surf : SurfObjs) { + if (Surf != hipSurfaceObject_t{0}) { + UR_CHECK_ERROR(hipDestroySurfaceObject(Surf)); + } + } + return UR_RESULT_SUCCESS; + } - ur_mem_type_t getImageType() const noexcept { return ImageType; } + friend ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t, + const ur_device_handle_t); }; /// UR Mem mapping to HIP memory allocations, both data and texture/surface. /// \brief Represents non-SVM allocations on the HIP backend. /// Keeps tracks of all mapped regions used for Map/Unmap calls. /// Only one region can be active at the same time per allocation. +/// +/// The ur_mem_handle_t is responsible for memory allocation and migration +/// across devices in the same ur_context_handle_t. If a kernel writes to a +/// ur_mem_handle_t then it will write to LastEventWritingToMemObj. Then all +/// subsequent operations that want to read from the ur_mem_handle_t must wait +/// on the event referring to the last write. +/// +/// Since urMemBufferCreate/urMemImageCreate do not take a queue or device +/// object, only a ur_context_handle_t, at mem obj creation we don't know which +/// device we must make a native image/allocation on. Therefore no allocations +/// are made at urMemBufferCreate/urMemImageCreate. Instead device +/// images/allocations are made lazily. These allocations are made implicitly +/// with a call to getPtr/getArray which will allocate a new allocation/image on +/// device if need be. +/// +/// Memory migration between native allocations for devices in the same +/// ur_context_handle_t will occur at: +/// +/// 1. urEnqueueKernelLaunch +/// 2. urEnqueueMem(Buffer|Image)Read(Rect) +/// +/// Migrations will occur in both cases if the most recent version of data +/// is on a different device, marked by LastEventWritingToMemObj->getDevice(). +/// +/// Example trace: +/// ~~~~~~~~~~~~~~ +/// +/// =====> urContextCreate([device0, device1], ...) // associated with [q0, q1] +/// -> OUT: hContext +/// +/// =====> urMemBufferCreate(hContext,...); +/// -> No native allocations made +/// -> OUT: hBuffer +/// +/// =====> urEnqueueMemBufferWrite(q0, hBuffer,...); +/// -> Allocation made on q0 ie device0 +/// -> New allocation initialized with host data. +/// +/// =====> urKernelSetArgMemObj(hKernel0, hBuffer, ...); +/// -> ur_kernel_handle_t associated with a ur_program_handle_t, +/// which is in turn unique to a device. So we can set the kernel +/// arg with the ptr of the device specific allocation. +/// -> hKernel0->getProgram()->getDevice() == device0 +/// -> allocateMemObjOnDeviceIfNeeded(device0); +/// -> Native allocation already made on device0, continue. +/// +/// =====> urEnqueueKernelLaunch(q0, hKernel0, ...); +/// -> Suppose that hKernel0 writes to hBuffer. +/// -> Call hBuffer->setLastEventWritingToMemObj with return event +/// from this operation +/// -> Enqueue native kernel launch +/// +/// =====> urKernelSetArgMemObj(hKernel1, hBuffer, ...); +/// -> hKernel1->getProgram()->getDevice() == device1 +/// -> New allocation will be made on device1 when calling +/// getPtr(device1) +/// -> No native allocation on device1 +/// -> Make native allocation on device1 +/// +/// =====> urEnqueueKernelLaunch(q1, hKernel1, ...); +/// -> Suppose hKernel1 wants to read from hBuffer and not write. +/// -> migrateMemoryToDeviceIfNeeded(device1); +/// -> hBuffer->LastEventWritingToMemObj is not nullptr +/// -> Check if memory has been migrated to device1 since the +/// last write +/// -> Hasn't been migrated +/// -> Wait on LastEventWritingToMemObj. +/// -> Migrate memory from device0's native allocation to +/// device1's native allocation. +/// -> Enqueue native kernel launch +/// +/// =====> urEnqueueKernelLaunch(q0, hKernel0, ...); +/// -> migrateMemoryToDeviceIfNeeded(device0); +/// -> hBuffer->LastEventWritingToMemObj refers to an event +/// from q0 +/// -> Migration not necessary +/// -> Enqueue native kernel launch +/// struct ur_mem_handle_t_ { // TODO: Move as much shared data up as possible @@ -140,36 +383,76 @@ struct ur_mem_handle_t_ { /// Reference counting of the handler std::atomic_uint32_t RefCount; - enum class Type { Buffer, Surface } MemType; // Original mem flags passed ur_mem_flags_t MemFlags; + // If we make a ur_mem_handle_t_ from a native allocation, it can be useful to + // associate it with the device that holds the native allocation. + ur_device_handle_t DeviceWithNativeAllocation{nullptr}; + + // Has the memory been migrated to a device since the last write? + std::vector HaveMigratedToDeviceSinceLastWrite; + + // We should wait on this event prior to migrating memory across allocations + // in this ur_mem_handle_t_ + ur_event_handle_t LastEventWritingToMemObj{nullptr}; + + // Enumerates all possible types of accesses. + enum access_mode_t { unknown, read_write, read_only, write_only }; + + ur_mutex MemoryAllocationMutex; // A mutex for allocations + ur_mutex MemoryMigrationMutex; // A mutex for memory transfers + /// A UR Memory object represents either plain memory allocations ("Buffers" /// in OpenCL) or typed allocations ("Images" in OpenCL). /// In HIP their API handlers are different. Whereas "Buffers" are allocated /// as pointer-like structs, "Images" are stored in Textures or Surfaces. - /// This union allows implementation to use either from the same handler. + /// This variant allows implementation to use either from the same handler. std::variant Mem; - /// Constructs the UR MEM handler for a non-typed allocation ("buffer") - ur_mem_handle_t_(ur_context Ctxt, ur_mem Parent, ur_mem_flags_t MemFlags, - BufferMem::AllocMode Mode, hipDeviceptr_t Ptr, void *HostPtr, - size_t Size) - : Context{Ctxt}, RefCount{1}, MemType{Type::Buffer}, MemFlags{MemFlags}, - Mem{BufferMem{Parent, Mode, Ptr, HostPtr, Size}} { - if (isSubBuffer()) { - urMemRetain(std::get(Mem).Parent); - } else { - urContextRetain(Context); + /// Constructs the UR mem handler for a non-typed allocation ("buffer") + ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, + BufferMem::AllocMode Mode, void *HostPtr, size_t Size) + : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), + Mem{std::in_place_type, Ctxt, this, Mode, HostPtr, Size} { + urContextRetain(Context); + }; + + // Subbuffer constructor + ur_mem_handle_t_(ur_mem Parent, size_t SubBufferOffset) + : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), + false), + Mem{BufferMem{std::get(Parent->Mem)}} { + auto &SubBuffer = std::get(Mem); + SubBuffer.Parent = Parent; + SubBuffer.OuterMemStruct = this; + if (SubBuffer.HostPtr) { + SubBuffer.HostPtr = + static_cast(SubBuffer.HostPtr) + SubBufferOffset; + } + for (auto &DevPtr : SubBuffer.Ptrs) { + if (DevPtr) { + DevPtr = static_cast(DevPtr) + SubBufferOffset; + } } + urMemRetain(Parent); }; - /// Constructs the UR allocation for an Image object - ur_mem_handle_t_(ur_context Ctxt, hipArray *Array, hipSurfaceObject_t Surf, - ur_mem_flags_t MemFlags, ur_mem_type_t ImageType, void *) - : Context{Ctxt}, RefCount{1}, MemType{Type::Surface}, MemFlags{MemFlags}, - Mem{SurfaceMem{Array, Surf, ImageType}} { + /// Constructs the UR mem handler for an Image object + ur_mem_handle_t_(ur_context Ctxt, ur_mem_flags_t MemFlags, + ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, + void *HostPtr) + : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), + Mem{std::in_place_type, + Ctxt, + this, + ImageFormat, + ImageDesc, + HostPtr} { urContextRetain(Context); } @@ -181,13 +464,24 @@ struct ur_mem_handle_t_ { urContextRelease(Context); } - bool isBuffer() const noexcept { return MemType == Type::Buffer; } + bool isBuffer() const noexcept { + return std::holds_alternative(Mem); + } bool isSubBuffer() const noexcept { return (isBuffer() && (std::get(Mem).Parent != nullptr)); } - bool isImage() const noexcept { return MemType == Type::Surface; } + bool isImage() const noexcept { + return std::holds_alternative(Mem); + } + + ur_result_t clear() { + if (isBuffer()) { + return std::get(Mem).clear(); + } + return std::get(Mem).clear(); + } ur_context getContext() const noexcept { return Context; } @@ -196,4 +490,19 @@ struct ur_mem_handle_t_ { uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } + + void setLastEventWritingToMemObj(ur_event_handle_t NewEvent) { + assert(NewEvent && "Invalid event!"); + // This entry point should only ever be called when using multi device ctx + assert(Context->Devices.size() > 1); + if (LastEventWritingToMemObj != nullptr) { + urEventRelease(LastEventWritingToMemObj); + } + urEventRetain(NewEvent); + LastEventWritingToMemObj = NewEvent; + for (const auto &Device : Context->getDevices()) { + HaveMigratedToDeviceSinceLastWrite[Device->getIndex()] = + Device == NewEvent->getDevice(); + } + } }; diff --git a/source/adapters/hip/platform.cpp b/source/adapters/hip/platform.cpp index 5f35b55f1f..287f941c30 100644 --- a/source/adapters/hip/platform.cpp +++ b/source/adapters/hip/platform.cpp @@ -47,9 +47,6 @@ urPlatformGetInfo(ur_platform_handle_t, ur_platform_info_t propName, /// There is only one HIP platform, and contains all devices on the system. /// Triggers the HIP Driver initialization (hipInit) the first time, so this /// must be the first UR API called. -/// -/// However because multiple devices in a context is not currently supported, -/// place each device in a separate platform. UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { @@ -57,7 +54,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, try { static std::once_flag InitFlag; static uint32_t NumPlatforms = 1; - static std::vector PlatformIds; + static ur_platform_handle_t_ Platform; UR_ASSERT(phPlatforms || pNumPlatforms, UR_RESULT_ERROR_INVALID_VALUE); UR_ASSERT(!phPlatforms || NumEntries > 0, UR_RESULT_ERROR_INVALID_VALUE); @@ -79,22 +76,18 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, return; } try { - // make one platform per device - NumPlatforms = NumDevices; - PlatformIds.resize(NumDevices); - - for (int i = 0; i < NumDevices; ++i) { + for (auto i = 0u; i < static_cast(NumDevices); ++i) { hipDevice_t Device; UR_CHECK_ERROR(hipDeviceGet(&Device, i)); hipCtx_t Context; UR_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Context, Device)); - PlatformIds[i].Devices.emplace_back( - new ur_device_handle_t_{Device, Context, &PlatformIds[i]}); + Platform.Devices.emplace_back( + new ur_device_handle_t_{Device, Context, &Platform, i}); } // Setup EvBase { - ScopedContext Active(PlatformIds.front().Devices.front().get()); + ScopedContext Active(Platform.Devices.front().get()); hipEvent_t EvBase; UR_CHECK_ERROR(hipEventCreate(&EvBase)); UR_CHECK_ERROR(hipEventRecord(EvBase, 0)); @@ -103,17 +96,11 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } } catch (const std::bad_alloc &) { // Signal out-of-memory situation - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); + Platform.Devices.clear(); Err = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (ur_result_t CatchErr) { // Clear and rethrow to allow retry - for (int i = 0; i < NumDevices; ++i) { - PlatformIds[i].Devices.clear(); - } - PlatformIds.clear(); + Platform.Devices.clear(); Err = CatchErr; throw CatchErr; } catch (...) { @@ -128,9 +115,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (phPlatforms != nullptr) { - for (unsigned i = 0; i < std::min(NumEntries, NumPlatforms); ++i) { - phPlatforms[i] = &PlatformIds[i]; - } + *phPlatforms = &Platform; } return Result; diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 2c71c53208..0cf539602b 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -74,14 +74,6 @@ void getCoMgrBuildLog(const amd_comgr_data_set_t BuildDataSet, char *BuildLog, } // namespace #endif -ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Ctxt) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, Context{ - Ctxt} { - urContextRetain(Context); -} - -ur_program_handle_t_::~ur_program_handle_t_() { urContextRelease(Context); } - ur_result_t ur_program_handle_t_::setMetadata(const ur_program_metadata_t *Metadata, size_t Length) { @@ -135,8 +127,8 @@ ur_result_t ur_program_handle_t_::finalizeRelocatable() { std::string ISA = "amdgcn-amd-amdhsa--"; hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties( - &Props, Context->getDevice()->get()) == hipSuccess); + detail::ur::assertion(hipGetDeviceProperties(&Props, getDevice()->get()) == + hipSuccess); ISA += Props.gcnArchName; UR_CHECK_ERROR(amd_comgr_action_info_set_isa_name(Action, ISA.data())); @@ -222,18 +214,13 @@ ur_result_t getKernelNames(ur_program_handle_t) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -/// HIP will handle the PTX/HIPBIN binaries internally through hipModule_t -/// object. So, urProgramCreateWithIL and urProgramCreateWithBinary are -/// equivalent in terms of HIP adapter. See \ref urProgramCreateWithBinary. +/// A program must be specific to a device so this entry point is UNSUPPORTED UR_APIEXPORT ur_result_t UR_APICALL -urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, - size_t length, const ur_program_properties_t *pProperties, - ur_program_handle_t *phProgram) { - ur_device_handle_t hDevice = hContext->getDevice(); - const auto pBinary = reinterpret_cast(pIL); - - return urProgramCreateWithBinary(hContext, hDevice, length, pBinary, - pProperties, phProgram); +urProgramCreateWithIL(ur_context_handle_t, const void *, size_t, + const ur_program_properties_t *, ur_program_handle_t *) { + detail::ur::die("urProgramCreateWithIL not implemented for HIP adapter" + " please use urProgramCreateWithBinary instead"); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } /// HIP will handle the PTX/HIPBIN binaries internally through a call to @@ -268,7 +255,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); hProgram->buildProgram(pOptions); @@ -340,7 +327,7 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, case UR_PROGRAM_INFO_NUM_DEVICES: return ReturnValue(1u); case UR_PROGRAM_INFO_DEVICES: - return ReturnValue(&hProgram->Context->DeviceId, 1); + return ReturnValue(hProgram->getDevice(), 1); case UR_PROGRAM_INFO_SOURCE: return ReturnValue(hProgram->Binary); case UR_PROGRAM_INFO_BINARY_SIZES: @@ -380,7 +367,7 @@ urProgramRelease(ur_program_handle_t hProgram) { ur_result_t Result = UR_RESULT_ERROR_INVALID_PROGRAM; try { - ScopedContext Active(hProgram->getContext()->getDevice()); + ScopedContext Active(hProgram->getDevice()); auto HIPModule = hProgram->get(); if (HIPModule) { UR_CHECK_ERROR(hipModuleUnload(HIPModule)); @@ -422,13 +409,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *pProperties, ur_program_handle_t *phProgram) { UR_ASSERT(pBinary != nullptr && size != 0, UR_RESULT_ERROR_INVALID_BINARY); - UR_ASSERT(hContext->getDevice()->get() == hDevice->get(), + UR_ASSERT(std::find(hContext->getDevices().begin(), + hContext->getDevices().end(), + hDevice) != hContext->getDevices().end(), UR_RESULT_ERROR_INVALID_CONTEXT); ur_result_t Result = UR_RESULT_SUCCESS; std::unique_ptr RetProgram{ - new ur_program_handle_t_{hContext}}; + new ur_program_handle_t_{hContext, hDevice}}; // TODO: Set metadata here and use reqd_work_group_size information. // See urProgramCreateWithBinary in CUDA adapter. @@ -469,8 +458,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { // Check if device passed is the same the device bound to the context - UR_ASSERT(hDevice == hProgram->getContext()->getDevice(), - UR_RESULT_ERROR_INVALID_DEVICE); + UR_ASSERT(hDevice == hProgram->getDevice(), UR_RESULT_ERROR_INVALID_DEVICE); hipFunction_t Func; hipError_t Ret = hipModuleGetFunction(&Func, hProgram->get(), pFunctionName); diff --git a/source/adapters/hip/program.hpp b/source/adapters/hip/program.hpp index ff9b68fc92..4b4e5ec878 100644 --- a/source/adapters/hip/program.hpp +++ b/source/adapters/hip/program.hpp @@ -23,6 +23,7 @@ struct ur_program_handle_t_ { size_t BinarySizeInBytes; std::atomic_uint32_t RefCount; ur_context_handle_t Context; + ur_device_handle_t Device; std::string ExecutableCache; // Metadata @@ -34,8 +35,17 @@ struct ur_program_handle_t_ { std::string BuildOptions; ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; - ur_program_handle_t_(ur_context_handle_t Ctxt); - ~ur_program_handle_t_(); + ur_program_handle_t_(ur_context_handle_t Ctxt, ur_device_handle_t Device) + : Module{nullptr}, Binary{}, + BinarySizeInBytes{0}, RefCount{1}, Context{Ctxt}, Device{Device} { + urContextRetain(Context); + urDeviceRetain(Device); + } + + ~ur_program_handle_t_() { + urContextRelease(Context); + urDeviceRelease(Device); + } ur_result_t setMetadata(const ur_program_metadata_t *Metadata, size_t Length); @@ -44,6 +54,7 @@ struct ur_program_handle_t_ { ur_result_t buildProgram(const char *BuildOptions); ur_result_t finalizeRelocatable(); ur_context_handle_t getContext() const { return Context; }; + ur_device_handle_t getDevice() const { return Device; }; native_type get() const noexcept { return Module; }; diff --git a/source/adapters/hip/queue.cpp b/source/adapters/hip/queue.cpp index 910d7cf512..f01fc0e180 100644 --- a/source/adapters/hip/queue.cpp +++ b/source/adapters/hip/queue.cpp @@ -110,14 +110,13 @@ hipStream_t ur_queue_handle_t_::getNextTransferStream() { UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProps, ur_queue_handle_t *phQueue) { + UR_ASSERT(std::find(hContext->getDevices().begin(), + hContext->getDevices().end(), + hDevice) != hContext->getDevices().end(), + UR_RESULT_ERROR_INVALID_CONTEXT); try { std::unique_ptr QueueImpl{nullptr}; - if (hContext->getDevice() != hDevice) { - *phQueue = nullptr; - return UR_RESULT_ERROR_INVALID_DEVICE; - } - unsigned int Flags = 0; const bool IsOutOfOrder = @@ -198,7 +197,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { if (!hQueue->backendHasOwnership()) return UR_RESULT_SUCCESS; - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); hQueue->forEachStream([](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -219,7 +218,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { try { - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); hQueue->syncStreams([&Result](hipStream_t S) { UR_CHECK_ERROR(hipStreamSynchronize(S)); @@ -251,7 +250,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - ScopedContext Active(hQueue->getContext()->getDevice()); + ScopedContext Active(hQueue->getDevice()); *phNativeQueue = reinterpret_cast(hQueue->getNextComputeStream()); return UR_RESULT_SUCCESS; @@ -291,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( new ur_queue_handle_t_{std::move(ComputeHIPStreams), std::move(TransferHIPStreams), hContext, - hContext->getDevice(), + hDevice, HIPFlags, Flags, /*backend_owns*/ pProperties->isNativeHandleOwned}; diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 7af7401f87..e63379d13b 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -66,11 +66,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, return umfPoolMallocHelper(hPool, ppMem, size, alignment); } -UR_APIEXPORT ur_result_t UR_APICALL USMFreeImpl(ur_context_handle_t hContext, - void *pMem) { +UR_APIEXPORT ur_result_t UR_APICALL +USMFreeImpl([[maybe_unused]] ur_context_handle_t hContext, void *pMem) { ur_result_t Result = UR_RESULT_SUCCESS; try { - ScopedContext Active(hContext->getDevice()); hipPointerAttribute_t hipPointerAttributeType; UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); unsigned int Type = hipPointerAttributeType.memoryType; @@ -98,12 +97,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, } } -ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_device_mem_flags_t *, - size_t Size, +ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t, + ur_device_handle_t Device, + ur_usm_device_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); + ScopedContext Active(Device); UR_CHECK_ERROR(hipMalloc(ResultPtr, Size)); } catch (ur_result_t Err) { return Err; @@ -113,12 +112,13 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, - ur_device_handle_t, ur_usm_host_mem_flags_t *, +ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t, + ur_device_handle_t Device, + ur_usm_host_mem_flags_t *, ur_usm_device_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); + ScopedContext Active(Device); UR_CHECK_ERROR(hipMallocManaged(ResultPtr, Size, hipMemAttachGlobal)); } catch (ur_result_t Err) { return Err; @@ -128,11 +128,11 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t Context, +ur_result_t USMHostAllocImpl(void **ResultPtr, + [[maybe_unused]] ur_context_handle_t Context, ur_usm_host_mem_flags_t *, size_t Size, [[maybe_unused]] uint32_t Alignment) { try { - ScopedContext Active(Context->getDevice()); UR_CHECK_ERROR(hipHostMalloc(ResultPtr, Size)); } catch (ur_result_t Err) { return Err; @@ -152,7 +152,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropValueSizeRet); try { - ScopedContext Active(hContext->getDevice()); switch (propName) { case UR_USM_ALLOC_INFO_TYPE: { unsigned int Value; @@ -190,9 +189,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, #endif return ReturnValue(UR_USM_TYPE_UNKNOWN); } - case UR_USM_ALLOC_INFO_BASE_PTR: - case UR_USM_ALLOC_INFO_SIZE: - return UR_RESULT_ERROR_INVALID_VALUE; case UR_USM_ALLOC_INFO_DEVICE: { // get device index associated with this pointer UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); @@ -222,6 +218,9 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } return ReturnValue(Pool); } + case UR_USM_ALLOC_INFO_BASE_PTR: + case UR_USM_ALLOC_INFO_SIZE: + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -346,25 +345,26 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) .second; - auto Device = Context->DeviceId; - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - DeviceMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) - .second; - - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - SharedMemPool = - umf::poolMakeUnique( - {std::move(MemProvider)}, - this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) - .second; - Context->addPool(this); + for (const auto &Device : Context->getDevices()) { + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + DeviceMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]) + .second; + + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + SharedMemPool = + umf::poolMakeUnique( + {std::move(MemProvider)}, + this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]) + .second; + Context->addPool(this); + } } bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index b80c5aef5d..7203d5a238 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -22,8 +22,19 @@ endif() if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) message(STATUS "Download Level Zero loader and headers from github.com") + # Workaround warnings/errors for Level Zero build + set(CMAKE_CXX_FLAGS_BAK "${CMAKE_CXX_FLAGS}") + if (UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-truncation") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++98-compat-extra-semi") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option") + endif() + set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.11.0) + set(LEVEL_ZERO_LOADER_TAG v1.15.1) # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 set(CMAKE_INCLUDE_CURRENT_DIR OFF) @@ -42,6 +53,9 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) FetchContent_MakeAvailable(level-zero-loader) FetchContent_GetProperties(level-zero-loader) + # Restore original flags + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_BAK}") + target_compile_options(ze_loader PRIVATE $<$,GNU;Clang;Intel;IntelLLVM>:-Wno-error> $<$:/WX- /UUNICODE> diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 1850083caa..d43ae07cdb 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -156,17 +156,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( - [[maybe_unused]] ur_adapter_handle_t - AdapterHandle, ///< [in] handle of the platform instance + ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. - [[maybe_unused]] int32_t - *Error ///< [out] pointer to an integer where the adapter specific - ///< error code will be stored. + int32_t *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { - AdapterHandle = &Adapter; *Message = ErrorMessage; - Error = &ErrorAdapterNativeCode; + *Error = ErrorAdapterNativeCode; return ErrorMessageCode; } diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 94935ee59e..96935d470e 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -115,6 +115,9 @@ struct ur_context_handle_t_ : _ur_object { SharedReadOnlyMemProxyPools; umf::pool_unique_handle_t HostMemProxyPool; + // Map associating pools created with urUsmPoolCreate and internal pools + std::list UsmPoolHandles{}; + // We need to store all memory allocations in the context because there could // be kernels with indirect access. Kernels with indirect access start to // reference all existing memory allocations at the time when they are diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 0b0463cfb1..abdfd2e541 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -12,6 +12,7 @@ #include "ur_level_zero.hpp" #include #include +#include UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance @@ -88,6 +89,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_SUCCESS; } +uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { + // Cache GlobalMemSize + Device->ZeGlobalMemSize.Compute = + [Device](struct ze_global_memsize &GlobalMemSize) { + for (const auto &ZeDeviceMemoryExtProperty : + Device->ZeDeviceMemoryProperties->second) { + GlobalMemSize.value += ZeDeviceMemoryExtProperty.physicalSize; + } + if (GlobalMemSize.value == 0) { + for (const auto &ZeDeviceMemoryProperty : + Device->ZeDeviceMemoryProperties->first) { + GlobalMemSize.value += ZeDeviceMemoryProperty.totalSize; + } + } + }; + return Device->ZeGlobalMemSize.operator->()->value; +} + UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve @@ -249,22 +268,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(uint32_t{64}); } case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: - return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + // if not optimized for 32-bit access, return total memory size. + // otherwise, return only maximum allocatable size. + if (Device->useOptimized32bitAccess() == 0) { + return ReturnValue(uint64_t{calculateGlobalMemSize(Device)}); + } else { + return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize}); + } case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: { - uint64_t GlobalMemSize = 0; // Support to read physicalSize depends on kernel, // so fallback into reading totalSize if physicalSize // is not available. - for (const auto &ZeDeviceMemoryExtProperty : - Device->ZeDeviceMemoryProperties->second) { - GlobalMemSize += ZeDeviceMemoryExtProperty.physicalSize; - } - if (GlobalMemSize == 0) { - for (const auto &ZeDeviceMemoryProperty : - Device->ZeDeviceMemoryProperties->first) { - GlobalMemSize += ZeDeviceMemoryProperty.totalSize; - } - } + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); return ReturnValue(uint64_t{GlobalMemSize}); } case UR_DEVICE_INFO_LOCAL_MEM_SIZE: @@ -339,8 +354,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE)); case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. - if (pSize && !Device->isSubDevice()) { - *pSize = 0; + if (Device->SubDeviceCreationProperty == std::nullopt || + !Device->isSubDevice()) { + if (pSize) + *pSize = 0; return UR_RESULT_SUCCESS; } @@ -351,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(cslice); } - return ReturnValue(Device->SubDeviceCreationProperty); + return ReturnValue(*Device->SubDeviceCreationProperty); } // Everything under here is not supported yet case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: @@ -637,6 +654,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } + // Calculate the global memory size as the max limit that can be reported as + // "free" memory for the user to allocate. + uint64_t GlobalMemSize = calculateGlobalMemSize(Device); // Only report device memory which zeMemAllocDevice can allocate from. // Currently this is only the one enumerated with ordinal 0. uint64_t FreeMemory = 0; @@ -661,7 +681,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } } } - return ReturnValue(FreeMemory); + return ReturnValue(std::min(GlobalMemSize, FreeMemory)); } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. @@ -903,6 +923,22 @@ ur_device_handle_t_::useImmediateCommandLists() { } } +int32_t ur_device_handle_t_::useOptimized32bitAccess() { + static const int32_t Optimize32bitAccessMode = [this] { + // If device is Intel(R) Data Center GPU Max, + // use default provided by L0 driver. + // TODO: Use IP versioning to select based on range of devices + if (this->isPVC()) + return -1; + const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS"); + if (!UrRet) + return 0; + return std::atoi(UrRet); + }(); + + return Optimize32bitAccessMode; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. @@ -1188,16 +1224,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); for (uint32_t I = 0; I < NumDevices; I++) { - Device->SubDevices[I]->SubDeviceCreationProperty = - Properties->pProperties[0]; - if (Properties->pProperties[0].type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { // In case the value is NEXT_PARTITIONABLE, we need to change it to the // chosen domain. This will always be NUMA since that's the only domain // supported by level zero. - Device->SubDevices[I]->SubDeviceCreationProperty.value.affinity_domain = - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; OutDevices[I] = Device->SubDevices[I]; // reusing the same pi_device needs to increment the reference count diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 35404c6525..3b91b70058 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,10 @@ enum EventsScope { LastCommandInBatchHostVisible }; +struct ze_global_memsize { + uint64_t value; +}; + struct ur_device_handle_t_ : _ur_object { ur_device_handle_t_(ze_device_handle_t Device, ur_platform_handle_t Plt, ur_device_handle_t ParentDevice = nullptr) @@ -112,7 +117,7 @@ struct ur_device_handle_t_ : _ur_object { // If this device is a subdevice, this variable contains the properties that // were used during its creation. - ur_device_partition_property_t SubDeviceCreationProperty; + std::optional SubDeviceCreationProperty; // PI platform to which this device belongs. // This field is only set at _ur_device_handle_t creation time, and cannot @@ -141,6 +146,22 @@ struct ur_device_handle_t_ : _ur_object { // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; + // Returns whether large allocations are being used + // or not to have a consistent behavior throughout + // the adapter between the creation of large allocations + // and the compilation of kernels into stateful and + // stateless modes. + // With stateful mode, kernels are compiled with + // pointer-arithmetic optimizations for optimized + // access of allocations smaller than 4GB. + // In stateless mode, such optimizations are not + // applied. + // Even if a GPU supports both modes, L0 driver may + // provide support for only one, like for Intel(R) + // Data Center GPU Max, for which L0 driver only + // supports stateless. + int32_t useOptimized32bitAccess(); + bool isSubDevice() { return RootDevice != nullptr; } // Is this a Data Center GPU Max series (aka PVC)? @@ -170,4 +191,5 @@ struct ur_device_handle_t_ : _ur_object { ZeDeviceMemoryAccessProperties; ZeCache> ZeDeviceCacheProperties; ZeCache> ZeDeviceIpVersionExt; + ZeCache ZeGlobalMemSize; }; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index b979c8ab15..d8af1e674d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -165,10 +165,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // event signal because it is already guaranteed that previous commands // in this queue are completed when the signal is started. // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { if (EventWaitList.Length) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, @@ -181,6 +187,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( (CmdList->first, Event->ZeEvent, EventWaitList.Length, EventWaitList.ZeEventList)); } + return UR_RESULT_SUCCESS; }; @@ -964,8 +971,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool HostVisible, ur_event_handle_t *RetEvent) { - bool ProfilingEnabled = - !Queue || (Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled(); if (auto CachedEvent = Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { diff --git a/source/adapters/level_zero/kernel.hpp b/source/adapters/level_zero/kernel.hpp index 64f6e4f939..4ef21ce18b 100644 --- a/source/adapters/level_zero/kernel.hpp +++ b/source/adapters/level_zero/kernel.hpp @@ -16,13 +16,15 @@ struct ur_kernel_handle_t_ : _ur_object { ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_program_handle_t Program) - : Program{Program}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{nullptr}, Program{Program}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } ur_kernel_handle_t_(ze_kernel_handle_t Kernel, bool OwnZeHandle, ur_context_handle_t Context) - : Context{Context}, ZeKernel{Kernel}, SubmissionsCount{0}, MemAllocs{} { + : Context{Context}, Program{nullptr}, ZeKernel{Kernel}, + SubmissionsCount{0}, MemAllocs{} { OwnNativeHandle = OwnZeHandle; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index aefa661dac..fa3ef18e47 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -2078,9 +2078,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, auto &Allocation = Allocations[Device]; // Sub-buffers don't maintain own allocations but rely on parent buffer. - if (isSubBuffer()) { - UR_CALL(SubBuffer.Parent->getZeHandle(ZeHandle, AccessMode, Device)); - ZeHandle += SubBuffer.Origin; + if (SubBuffer) { + UR_CALL(SubBuffer->Parent->getZeHandle(ZeHandle, AccessMode, Device)); + ZeHandle += SubBuffer->Origin; // Still store the allocation info in the PI sub-buffer for // getZeHandlePtr to work. At least zeKernelSetArgumentValue needs to // be given a pointer to the allocation handle rather than its value. @@ -2312,7 +2312,7 @@ ur_result_t _ur_buffer::free() { // Buffer constructor _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, char *HostPtr, bool ImportedHostPtr = false) - : ur_mem_handle_t_(Context), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context), Size(Size) { // We treat integrated devices (physical memory shared with the CPU) // differently from discrete devices (those with distinct memories). @@ -2347,7 +2347,7 @@ _ur_buffer::_ur_buffer(ur_context_handle_t Context, ur_device_handle_t Device, _ur_buffer::_ur_buffer(ur_context_handle_t Context, size_t Size, ur_device_handle_t Device, char *ZeMemHandle, bool OwnZeMemHandle) - : ur_mem_handle_t_(Context, Device), Size(Size), SubBuffer{nullptr, 0} { + : ur_mem_handle_t_(Context, Device), Size(Size) { // Device == nullptr means host allocation Allocations[Device].ZeHandle = ZeMemHandle; diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 54f9a84e6b..8efd5b136e 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,8 @@ struct ur_mem_handle_t_ : _ur_object { virtual ~ur_mem_handle_t_() = default; protected: - ur_mem_handle_t_(ur_context_handle_t Context) : UrContext{Context} {} + ur_mem_handle_t_(ur_context_handle_t Context) + : UrContext{Context}, UrDevice{nullptr} {} ur_mem_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) : UrContext{Context}, UrDevice(Device) {} @@ -101,7 +103,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { // Sub-buffer constructor _ur_buffer(_ur_buffer *Parent, size_t Origin, size_t Size) : ur_mem_handle_t_(Parent->UrContext), - Size(Size), SubBuffer{Parent, Origin} {} + Size(Size), SubBuffer{{Parent, Origin}} {} // Interop-buffer constructor _ur_buffer(ur_context_handle_t Context, size_t Size, @@ -121,8 +123,7 @@ struct _ur_buffer final : ur_mem_handle_t_ { ur_device_handle_t Device = nullptr) override; bool isImage() const override { return false; } - - bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } + bool isSubBuffer() const { return SubBuffer != std::nullopt; } // Frees all allocations made for the buffer. ur_result_t free(); @@ -174,10 +175,11 @@ struct _ur_buffer final : ur_mem_handle_t_ { size_t Size; size_t getAlignment() const; - struct { + struct SubBuffer_t { _ur_buffer *Parent; - size_t Origin; // only valid if Parent != nullptr - } SubBuffer; + size_t Origin; + }; + std::optional SubBuffer; }; struct _ur_image final : ur_mem_handle_t_ { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index f7b9576189..86aa4ec745 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -10,11 +10,13 @@ #pragma once #include "common.hpp" +#include "ze_api.h" struct ur_device_handle_t_; struct ur_platform_handle_t_ : public _ur_platform { - ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver} {} + ur_platform_handle_t_(ze_driver_handle_t Driver) + : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 92a3c87aea..f118a5b9dd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -148,9 +148,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) ? ZE_MODULE_FORMAT_IL_SPIRV : ZE_MODULE_FORMAT_NATIVE; + ZeModuleDesc.inputSize = hProgram->CodeLength; ZeModuleDesc.pInputModule = hProgram->Code.get(); - ZeModuleDesc.pBuildFlags = pOptions; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + std::string ZeBuildOptions{}; + if (pOptions) { + ZeBuildOptions += pOptions; + } + + if (phDevices[0]->useOptimized32bitAccess() == 0) { + ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required"; + } + + ZeModuleDesc.pBuildFlags = ZeBuildOptions.c_str(); ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; @@ -234,8 +249,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( // This produces better code because the driver can do cross-module // optimizations. Therefore, we just remember the compilation flags, so we // can use them later. - if (Options) + if (Options) { Program->BuildFlags = Options; + + // if large allocations are selected, then pass + // ze-opt-greater-than-4GB-buffer-required to disable + // stateful optimizations and be able to use larger than + // 4GB allocations on these kernels. + if (Context->Devices[0]->useOptimized32bitAccess() == 0) { + Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required"; + } + } Program->State = ur_program_handle_t_::Object; return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp old mode 100755 new mode 100644 index 994f595a5d..f07e0df675 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -219,7 +219,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( if (ImmCmdList == Queue->CommandListMap.end()) continue; - auto EventList = ImmCmdList->second.EventList; + const auto &EventList = ImmCmdList->second.EventList; for (auto It = EventList.crbegin(); It != EventList.crend(); It++) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, ((*It)->ZeEvent)); @@ -391,11 +391,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // At this point only the thread creating the queue will have associated // command-lists. Other threads have not accessed the queue yet. So we can // only warmup the initial thread's command-lists. - auto QueueGroup = Q->ComputeQueueGroupsByTID.get(); + const auto &QueueGroup = Q->ComputeQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(false, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); if (Q->useCopyEngine()) { - auto QueueGroup = Q->CopyQueueGroupsByTID.get(); + const auto &QueueGroup = Q->CopyQueueGroupsByTID.get(); UR_CALL(warmupQueueGroup(true, QueueGroup.UpperIndex - QueueGroup.LowerIndex + 1)); } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 9c90a999b3..88281925ce 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -424,7 +424,8 @@ struct ur_queue_handle_t_ : _ur_object { // checked. Otherwise, the OpenCommandList containing compute commands is // checked. bool hasOpenCommandList(bool IsCopy) const { - auto CommandBatch = (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; + const auto &CommandBatch = + (IsCopy) ? CopyCommandBatch : ComputeCommandBatch; return CommandBatch.OpenCommandList != CommandListMap.end(); } @@ -515,6 +516,11 @@ struct ur_queue_handle_t_ : _ur_object { // lists in the queue. ur_result_t insertStartBarrierIfDiscardEventsMode(ur_command_list_ptr_t &CmdList); + + // returns true if queue has profiling enabled + bool isProfilingEnabled() { + return ((this->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0); + } }; // This helper function creates a ur_event_handle_t and associate a diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index daec0408fb..d2dfc9b37d 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -178,15 +178,24 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, ZeDesc.flags = 0; ZeDesc.ordinal = 0; - ZeStruct RelaxedDesc; - if (Size > Device->ZeDeviceProperties->maxMemAllocSize) { - // Tell Level-Zero to accept Size > maxMemAllocSize + if (Device->useOptimized32bitAccess() == 0 && + (Size > Device->ZeDeviceProperties->maxMemAllocSize)) { + // Tell Level-Zero to accept Size > maxMemAllocSize if + // large allocations are used. + ZeStruct RelaxedDesc; RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE; ZeDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, - Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocDevice, (Context->ZeContext, &ZeDesc, Size, Alignment, + Device->ZeDevice, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -224,8 +233,15 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, ZeDevDesc.pNext = &RelaxedDesc; } - ZE2UR_CALL(zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, - Size, Alignment, Device->ZeDevice, ResultPtr)); + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeMemAllocShared, (Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, + Alignment, Device->ZeDevice, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -252,8 +268,15 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, // TODO: translate PI properties to Level Zero flags ZeStruct ZeHostDesc; ZeHostDesc.flags = 0; - ZE2UR_CALL(zeMemAllocHost, - (Context->ZeContext, &ZeHostDesc, Size, Alignment, ResultPtr)); + ze_result_t ZeResult = + ZE_CALL_NOCHECK(zeMemAllocHost, (Context->ZeContext, &ZeHostDesc, Size, + Alignment, ResultPtr)); + if (ZeResult != ZE_RESULT_SUCCESS) { + if (ZeResult == ZE_RESULT_ERROR_UNSUPPORTED_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + return ze2urResult(ZeResult); + } UR_ASSERT(Alignment == 0 || reinterpret_cast(*ResultPtr) % Alignment == 0, @@ -597,6 +620,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( ZE2UR_CALL(zeMemGetAddressRange, (Context->ZeContext, Ptr, nullptr, &Size)); return ReturnValue(Size); } + case UR_USM_ALLOC_INFO_POOL: { + auto UMFPool = umfPoolByPtr(Ptr); + if (!UMFPool) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + std::shared_lock ContextLock(Context->Mutex); + + auto SearchMatchingPool = + [](std::unordered_map + &PoolMap, + umf_memory_pool_handle_t UMFPool) { + for (auto &PoolPair : PoolMap) { + if (PoolPair.second.get() == UMFPool) { + return true; + } + } + return false; + }; + + for (auto &Pool : Context->UsmPoolHandles) { + if (SearchMatchingPool(Pool->DeviceMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (SearchMatchingPool(Pool->SharedMemPools, UMFPool)) { + return ReturnValue(Pool); + } + if (Pool->HostMemPool.get() == UMFPool) { + return ReturnValue(Pool); + } + } + + return UR_RESULT_ERROR_INVALID_VALUE; + } default: urPrint("urUSMGetMemAllocInfo: unsupported ParamName\n"); return UR_RESULT_ERROR_INVALID_VALUE; @@ -746,6 +803,7 @@ ur_result_t L0HostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) { + this->Context = Context; zeroInit = static_cast(PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK); @@ -829,6 +887,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( try { *Pool = reinterpret_cast( new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + } catch (const UsmAllocationException &Ex) { return Ex.getError(); } @@ -846,6 +908,8 @@ ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool ) { if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); delete Pool; } return UR_RESULT_SUCCESS; @@ -859,13 +923,19 @@ ur_result_t urUSMPoolGetInfo( ///< property size_t *PropSizeRet ///< [out] size in bytes returned in pool property value ) { - std::ignore = Pool; - std::ignore = PropName; - std::ignore = PropSize; - std::ignore = PropValue; - std::ignore = PropSizeRet; - urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } } // If indirect access tracking is not enabled then this functions just performs diff --git a/source/adapters/level_zero/usm.hpp b/source/adapters/level_zero/usm.hpp index 01e215c578..958fca9354 100644 --- a/source/adapters/level_zero/usm.hpp +++ b/source/adapters/level_zero/usm.hpp @@ -29,6 +29,8 @@ struct ur_usm_pool_handle_t_ : _ur_object { SharedReadOnlyMemPools; umf::pool_unique_handle_t HostMemPool; + ur_context_handle_t Context{}; + ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); }; diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index f93c648ab7..3432ce780e 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -60,7 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_ASSERT(hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - switch (propName) { + switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: return ReturnValue(UR_DEVICE_TYPE_CPU); case UR_DEVICE_INFO_PARENT_DEVICE: diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index a4e91e3dc0..f016830d11 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -2917,7 +2917,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -2956,7 +2957,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -2997,7 +2999,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -3050,7 +3053,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -3105,9 +3109,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -3144,9 +3150,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -3195,10 +3203,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3234,7 +3243,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -3278,7 +3288,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -3322,9 +3333,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -3368,7 +3381,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -3445,7 +3459,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -3468,7 +3482,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( // if the driver has created a custom function, then call it instead of using the generic path auto pfnUSMFill = d_context.urDdiTable.Enqueue.pfnUSMFill; if (nullptr != pfnUSMFill) { - result = pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + result = pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } else { // generic implementation @@ -3487,9 +3501,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -3522,9 +3538,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3558,9 +3575,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -3588,7 +3606,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -3635,10 +3654,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index f1d710ebb4..8ae1e77755 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -66,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, switch (propName) { case UR_ADAPTER_INFO_BACKEND: - return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: return ReturnValue(adapter.RefCount.load()); default: diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 24d60e62f5..6830a28eec 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -25,77 +25,6 @@ cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { return CLFlags; } -ur_result_t ValidateBufferSize(ur_mem_handle_t Buffer, size_t Size, - size_t Origin) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (Size + Origin > BufferSize) - return UR_RESULT_ERROR_INVALID_SIZE; - return UR_RESULT_SUCCESS; -} - -ur_result_t ValidateBufferRectSize(ur_mem_handle_t Buffer, - ur_rect_region_t Region, - ur_rect_offset_t Offset) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (Offset.x >= BufferSize || Offset.y >= BufferSize || - Offset.z >= BufferSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - if ((Region.width + Offset.x) * (Region.height + Offset.y) * - (Region.depth + Offset.z) > - BufferSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t ValidateImageSize(ur_mem_handle_t Image, ur_rect_region_t Region, - ur_rect_offset_t Origin) { - size_t Width = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_WIDTH, sizeof(Width), &Width, - nullptr)); - if (Region.width + Origin.x > Width) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - size_t Height = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_HEIGHT, sizeof(Height), &Height, - nullptr)); - - // CL returns a height and depth of 0 for images that don't have those - // dimensions, but regions for enqueue operations must set these to 1, so we - // need to make this adjustment to validate. - if (Height == 0) - Height = 1; - - if (Region.height + Origin.y > Height) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - size_t Depth = 0; - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), - CL_IMAGE_DEPTH, sizeof(Depth), &Depth, - nullptr)); - if (Depth == 0) - Depth = 1; - - if (Region.depth + Origin.z > Depth) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -141,16 +70,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueReadBuffer( + CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -158,16 +84,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueWriteBuffer( + CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -182,18 +105,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueReadBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( @@ -208,18 +128,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueWriteBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( @@ -228,18 +145,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto ClErr = clEnqueueCopyBuffer( + CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferSrc, size, srcOffset)); - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferDst, size, dstOffset)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -253,19 +166,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueCopyBufferRect( + CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), SrcOrigin, DstOrigin, Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferSrc, region, srcOrigin)); - UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferDst, region, dstOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( @@ -276,16 +185,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - auto ClErr = (clEnqueueFillBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, - numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - if (ClErr != CL_SUCCESS) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } - return mapCLErrorToUR(ClErr); + CL_RETURN_ON_FAILURE( + clEnqueueFillBuffer(cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, + patternSize, offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; } auto NumValues = size / sizeof(uint64_t); @@ -303,7 +209,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, offset, size)); CL_RETURN_ON_FAILURE(ClErr); } @@ -338,17 +243,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueReadImage( + CL_RETURN_ON_FAILURE(clEnqueueReadImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingRead, Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -359,17 +261,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueWriteImage( + CL_RETURN_ON_FAILURE(clEnqueueWriteImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingWrite, Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -382,18 +281,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - auto ClErr = clEnqueueCopyImage( + CL_RETURN_ON_FAILURE(clEnqueueCopyImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), SrcOrigin, DstOrigin, Region, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_adapter::cast(phEvent))); - if (ClErr == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateImageSize(hImageSrc, region, srcOrigin)); - UR_RETURN_ON_FAILURE(ValidateImageSize(hImageDst, region, dstOrigin)); - } - return mapCLErrorToUR(ClErr); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( @@ -410,9 +305,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent), &Err); - if (Err == CL_INVALID_VALUE) { - UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); - } return mapCLErrorToUR(Err); } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 5d46aec2ef..0d64f23d13 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -10,6 +10,75 @@ #include "common.hpp" +inline cl_mem_alloc_flags_intel +hostDescToClFlags(const ur_usm_host_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + return allocFlags; +} + +inline cl_mem_alloc_flags_intel +deviceDescToClFlags(const ur_usm_device_desc_t &desc) { + cl_mem_alloc_flags_intel allocFlags = 0; + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + allocFlags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (desc.flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + allocFlags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + return allocFlags; +} + +ur_result_t +usmDescToCLMemProperties(const ur_base_desc_t *Desc, + std::vector &Properties) { + cl_mem_alloc_flags_intel AllocFlags = 0; + const auto *Next = Desc; + do { + switch (Next->stype) { + case UR_STRUCTURE_TYPE_USM_HOST_DESC: { + auto HostDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & HostDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= hostDescToClFlags(*HostDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_DEVICE_DESC: { + auto DeviceDesc = reinterpret_cast(Next); + if (UR_USM_HOST_MEM_FLAGS_MASK & DeviceDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + AllocFlags |= deviceDescToClFlags(*DeviceDesc); + break; + } + case UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC: { + auto LocationDesc = + reinterpret_cast(Next); + Properties.push_back(CL_MEM_ALLOC_BUFFER_LOCATION_INTEL); + // CL bitfields are cl_ulong + Properties.push_back(static_cast(LocationDesc->location)); + break; + } + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + Next = Next->pNext ? static_cast(Next->pNext) + : nullptr; + } while (Next); + + if (AllocFlags) { + Properties.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + Properties.push_back(AllocFlags); + } + Properties.push_back(0); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -17,23 +86,10 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -47,7 +103,9 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, Properties, size, Alignment, &ClResult); + Ptr = FuncPtr(CLContext, + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -71,25 +129,10 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - cl_mem_properties_intel Properties[3]; - if (pUSMDesc && pUSMDesc->pNext && - static_cast(pUSMDesc->pNext)->stype == - UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *HostDesc = - static_cast(pUSMDesc->pNext); - - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; - Properties[1] = Flags; - Properties[2] = 0; - } else { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -104,8 +147,8 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, &ClResult); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -129,35 +172,10 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, void *Ptr = nullptr; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; - cl_mem_alloc_flags_intel Flags = 0; - const auto *NextStruct = - (pUSMDesc ? static_cast(pUSMDesc->pNext) - : nullptr); - while (NextStruct) { - if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { - const auto *HostDesc = - reinterpret_cast(NextStruct); - if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; - } - } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { - const auto *DevDesc = - reinterpret_cast(NextStruct); - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { - Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; - } - if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { - Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; - } - } - NextStruct = static_cast(NextStruct->pNext); - } - - cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; - - // Passing a flags value of 0 doesn't work, so truncate the properties - if (Flags == 0) { - Properties[0] = 0; + std::vector AllocProperties; + if (pUSMDesc && pUSMDesc->pNext) { + UR_RETURN_ON_FAILURE(usmDescToCLMemProperties( + static_cast(pUSMDesc->pNext), AllocProperties)); } // First we need to look up the function pointer @@ -172,8 +190,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&ClResult)); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, cl_adapter::cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d33a3aaf51..402b64d638 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -3325,7 +3325,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -3367,7 +3368,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -3412,7 +3414,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -3479,7 +3482,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -3549,9 +3553,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -3590,9 +3596,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -3646,10 +3654,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -3693,7 +3702,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -3741,7 +3751,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -3789,9 +3800,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -3837,7 +3850,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -3920,7 +3934,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -3945,14 +3959,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( } ur_enqueue_usm_fill_params_t params = { - &hQueue, &ptr, &patternSize, + &hQueue, &pMem, &patternSize, &pPattern, &size, &numEventsInWaitList, &phEventWaitList, &phEvent}; uint64_t instance = context.notify_begin(UR_FUNCTION_ENQUEUE_USM_FILL, "urEnqueueUSMFill", ¶ms); ur_result_t result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); context.notify_end(UR_FUNCTION_ENQUEUE_USM_FILL, "urEnqueueUSMFill", @@ -3966,9 +3980,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4004,9 +4020,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4043,9 +4060,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4074,7 +4092,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -4124,10 +4143,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index a307bb37de..72e225028c 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -94,7 +94,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRetain( ur_result_t result = pfnAdapterRetain(hAdapter); if (context.enableLeakChecking && result == UR_RESULT_SUCCESS) { - refCountContext.decrementRefCount(hAdapter, true); + refCountContext.incrementRefCount(hAdapter, true); } return result; @@ -4084,7 +4084,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4126,6 +4127,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4146,7 +4152,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4190,6 +4197,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4210,7 +4222,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4304,6 +4317,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = bounds(hBuffer, bufferOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4325,7 +4343,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4423,6 +4442,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = bounds(hBuffer, bufferOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4443,9 +4467,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4486,6 +4512,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hBufferSrc, srcOffset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hBufferDst, dstOffset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4505,9 +4541,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4593,6 +4631,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = bounds(hBufferSrc, srcOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hBufferDst, dstOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4614,10 +4662,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4676,6 +4725,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4696,7 +4750,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4747,6 +4802,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = boundsImage(hImage, origin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4767,7 +4827,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4819,6 +4880,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = boundsImage(hImage, origin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4838,9 +4904,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4891,6 +4959,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( return UR_RESULT_ERROR_INVALID_SIZE; } + if (auto boundsError = boundsImage(hImageSrc, srcOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = boundsImage(hImageDst, dstOrigin, region); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -4911,7 +4989,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4959,6 +5038,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hBuffer, offset, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -5039,7 +5123,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -5068,7 +5152,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (NULL == ptr) { + if (NULL == pMem) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -5100,6 +5184,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -5110,7 +5199,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( } ur_result_t result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); return result; @@ -5121,9 +5210,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -5165,6 +5256,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hQueue, pDst, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hQueue, pSrc, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -5184,9 +5285,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5229,6 +5331,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -5248,9 +5355,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -5278,6 +5386,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( if (size == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (auto boundsError = bounds(hQueue, pMem, 0, size); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } } ur_result_t result = pfnUSMAdvise(hQueue, pMem, size, advice, phEvent); @@ -5289,7 +5402,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5370,6 +5484,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hQueue, pMem, 0, pitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { @@ -5391,10 +5510,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. @@ -5456,6 +5578,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + if (auto boundsError = bounds(hQueue, pDst, 0, dstPitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + + if (auto boundsError = bounds(hQueue, pSrc, 0, srcPitch * height); + boundsError != UR_RESULT_SUCCESS) { + return boundsError; + } + if (phEventWaitList != NULL && numEventsInWaitList > 0) { for (uint32_t i = 0; i < numEventsInWaitList; ++i) { if (phEventWaitList[i] == NULL) { diff --git a/source/loader/layers/validation/ur_validation_layer.cpp b/source/loader/layers/validation/ur_validation_layer.cpp index 5cd3f8c13a..3e040fcc50 100644 --- a/source/loader/layers/validation/ur_validation_layer.cpp +++ b/source/loader/layers/validation/ur_validation_layer.cpp @@ -11,6 +11,8 @@ */ #include "ur_validation_layer.hpp" +#include + namespace ur_validation_layer { context_t context; @@ -20,4 +22,127 @@ context_t::context_t() : logger(logger::create_logger("validation")) {} /////////////////////////////////////////////////////////////////////////////// context_t::~context_t() {} +// Some adapters don't support all the queries yet, we should be lenient and +// just not attempt to validate in those cases to preserve functionality. +#define RETURN_ON_FAILURE(result) \ + if (result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION || \ + result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) \ + return UR_RESULT_SUCCESS; \ + if (result != UR_RESULT_SUCCESS) { \ + context.logger.error("Unexpected non-success result code from {}", \ + #result); \ + assert(0); \ + return result; \ + } + +ur_result_t bounds(ur_mem_handle_t buffer, size_t offset, size_t size) { + auto pfnMemGetInfo = context.urDdiTable.Mem.pfnGetInfo; + + size_t bufferSize = 0; + RETURN_ON_FAILURE(pfnMemGetInfo(buffer, UR_MEM_INFO_SIZE, + sizeof(bufferSize), &bufferSize, nullptr)); + + if (size + offset > bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t bounds(ur_mem_handle_t buffer, ur_rect_offset_t offset, + ur_rect_region_t region) { + auto pfnMemGetInfo = context.urDdiTable.Mem.pfnGetInfo; + + size_t bufferSize = 0; + RETURN_ON_FAILURE(pfnMemGetInfo(buffer, UR_MEM_INFO_SIZE, + sizeof(bufferSize), &bufferSize, nullptr)); + + if (offset.x >= bufferSize || offset.y >= bufferSize || + offset.z >= bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((region.width + offset.x) * (region.height + offset.y) * + (region.depth + offset.z) > + bufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t bounds(ur_queue_handle_t queue, const void *ptr, size_t offset, + size_t size) { + auto pfnQueueGetInfo = context.urDdiTable.Queue.pfnGetInfo; + auto pfnUSMGetMemAllocInfo = context.urDdiTable.USM.pfnGetMemAllocInfo; + + ur_context_handle_t urContext = nullptr; + RETURN_ON_FAILURE(pfnQueueGetInfo(queue, UR_QUEUE_INFO_CONTEXT, + sizeof(ur_context_handle_t), &urContext, + nullptr)); + ur_usm_type_t usmType = UR_USM_TYPE_UNKNOWN; + RETURN_ON_FAILURE( + pfnUSMGetMemAllocInfo(urContext, ptr, UR_USM_ALLOC_INFO_TYPE, + sizeof(usmType), &usmType, nullptr)); + + // We can't reliably get size info about pointers that didn't come from the + // USM alloc entry points. + if (usmType == UR_USM_TYPE_UNKNOWN) { + return UR_RESULT_SUCCESS; + } + + size_t allocSize = 0; + RETURN_ON_FAILURE( + pfnUSMGetMemAllocInfo(urContext, ptr, UR_USM_ALLOC_INFO_SIZE, + sizeof(allocSize), &allocSize, nullptr)); + + if (size + offset > allocSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t boundsImage(ur_mem_handle_t image, ur_rect_offset_t origin, + ur_rect_region_t region) { + auto pfnMemImageGetInfo = context.urDdiTable.Mem.pfnImageGetInfo; + + size_t width = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_WIDTH, + sizeof(width), &width, nullptr)); + if (region.width + origin.x > width) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t height = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_HEIGHT, + sizeof(height), &height, nullptr)); + + // Some adapters return a height and depth of 0 for images that don't have + // those dimensions, but regions for enqueue operations must set these to + // 1, so we need to make this adjustment to properly validate. + if (height == 0) { + height = 1; + } + + if (region.height + origin.y > height) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t depth = 0; + RETURN_ON_FAILURE(pfnMemImageGetInfo(image, UR_IMAGE_INFO_DEPTH, + sizeof(depth), &depth, nullptr)); + if (depth == 0) { + depth = 1; + } + + if (region.depth + origin.z > depth) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +#undef RETURN_ON_FAILURE + } // namespace ur_validation_layer diff --git a/source/loader/layers/validation/ur_validation_layer.hpp b/source/loader/layers/validation/ur_validation_layer.hpp index e41c621dc8..d29b64230e 100644 --- a/source/loader/layers/validation/ur_validation_layer.hpp +++ b/source/loader/layers/validation/ur_validation_layer.hpp @@ -44,6 +44,17 @@ class __urdlllocal context_t : public proxy_layer_context_t { const std::string nameLeakChecking = "UR_LAYER_LEAK_CHECKING"; }; +ur_result_t bounds(ur_mem_handle_t buffer, size_t offset, size_t size); + +ur_result_t bounds(ur_mem_handle_t buffer, ur_rect_offset_t offset, + ur_rect_region_t region); + +ur_result_t bounds(ur_queue_handle_t queue, const void *ptr, size_t offset, + size_t size); + +ur_result_t boundsImage(ur_mem_handle_t image, ur_rect_offset_t origin, + ur_rect_region_t region); + extern context_t context; } // namespace ur_validation_layer diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 9327f349c5..6d3dda30f0 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -65,6 +65,9 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( break; } adapterIndex++; + if (adapterIndex == NumEntries) { + break; + } } } @@ -349,14 +352,6 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativePlatform = reinterpret_cast( - ur_native_factory.getInstance(*phNativePlatform, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -670,14 +665,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeDevice = reinterpret_cast( - ur_native_factory.getInstance(*phNativeDevice, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -696,17 +683,13 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeDevice)->dditable; + reinterpret_cast(hPlatform)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Device.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeDevice = - reinterpret_cast(hNativeDevice)->handle; - // convert loader handle to platform handle hPlatform = reinterpret_cast(hPlatform)->handle; @@ -913,14 +896,6 @@ __urdlllocal ur_result_t UR_APICALL urContextGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeContext = reinterpret_cast( - ur_native_factory.getInstance(*phNativeContext, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -941,17 +916,13 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( // extract platform's function pointer table auto dditable = - reinterpret_cast(hNativeContext)->dditable; + reinterpret_cast(*phDevices)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Context.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeContext = - reinterpret_cast(hNativeContext)->handle; - // convert loader handles to platform handles auto phDevicesLocal = std::vector(numDevices); for (size_t i = 0; i < numDevices; ++i) { @@ -1204,14 +1175,6 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeMem = reinterpret_cast( - ur_native_factory.getInstance(*phNativeMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1229,17 +1192,13 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnBufferCreateWithNativeHandle = dditable->ur.Mem.pfnBufferCreateWithNativeHandle; if (nullptr == pfnBufferCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1279,17 +1238,13 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeMem)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnImageCreateWithNativeHandle = dditable->ur.Mem.pfnImageCreateWithNativeHandle; if (nullptr == pfnImageCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeMem = reinterpret_cast(hNativeMem)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -1525,14 +1480,6 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeSampler = reinterpret_cast( - ur_native_factory.getInstance(*phNativeSampler, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1550,18 +1497,13 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeSampler)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Sampler.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeSampler = - reinterpret_cast(hNativeSampler)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -2601,14 +2543,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeProgram = reinterpret_cast( - ur_native_factory.getInstance(*phNativeProgram, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2626,18 +2560,13 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeProgram)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Program.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeProgram = - reinterpret_cast(hNativeProgram)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3085,14 +3014,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeKernel = reinterpret_cast( - ur_native_factory.getInstance(*phNativeKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3112,18 +3033,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeKernel)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Kernel.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeKernel = - reinterpret_cast(hNativeKernel)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3297,14 +3213,6 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeQueue = reinterpret_cast( - ur_native_factory.getInstance(*phNativeQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3323,17 +3231,13 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeQueue)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Queue.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeQueue = reinterpret_cast(hNativeQueue)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3570,14 +3474,6 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( return result; } - try { - // convert platform handle to loader handle - *phNativeEvent = reinterpret_cast( - ur_native_factory.getInstance(*phNativeEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3595,17 +3491,13 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( ur_result_t result = UR_RESULT_SUCCESS; // extract platform's function pointer table - auto dditable = - reinterpret_cast(hNativeEvent)->dditable; + auto dditable = reinterpret_cast(hContext)->dditable; auto pfnCreateWithNativeHandle = dditable->ur.Event.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) { return UR_RESULT_ERROR_UNINITIALIZED; } - // convert loader handle to platform handle - hNativeEvent = reinterpret_cast(hNativeEvent)->handle; - // convert loader handle to platform handle hContext = reinterpret_cast(hContext)->handle; @@ -3849,7 +3741,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// @brief Intercept function for urEnqueueMemBufferRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -3913,7 +3806,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// @brief Intercept function for urEnqueueMemBufferWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -3979,7 +3873,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// @brief Intercept function for urEnqueueMemBufferReadRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4056,7 +3951,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// @brief Intercept function for urEnqueueMemBufferWriteRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4135,9 +4031,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4202,9 +4100,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemBufferCopyRect __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4280,10 +4180,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// @brief Intercept function for urEnqueueMemBufferFill __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4344,7 +4245,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// @brief Intercept function for urEnqueueMemImageRead __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4413,7 +4315,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// @brief Intercept function for urEnqueueMemImageWrite __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4482,9 +4385,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueMemImageCopy __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4556,7 +4461,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// @brief Intercept function for urEnqueueMemBufferMap __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4683,7 +4589,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// @brief Intercept function for urEnqueueUSMFill __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -4723,7 +4629,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( // forward to device-platform result = - pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitListLocal.data(), phEvent); if (UR_RESULT_SUCCESS != result) { @@ -4748,9 +4654,11 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4806,9 +4714,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMPrefetch __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4864,9 +4773,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueUSMAdvise __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4908,7 +4818,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// @brief Intercept function for urEnqueueUSMFill2D __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -4977,10 +4888,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index de9e029536..4b7525d92f 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2121,6 +2121,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2177,6 +2178,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -2236,6 +2238,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -4833,7 +4836,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4894,7 +4898,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4967,7 +4972,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -5052,7 +5058,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -5125,9 +5132,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -5191,9 +5200,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -5266,10 +5277,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5329,7 +5341,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -5396,7 +5409,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -5458,9 +5472,11 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -5532,7 +5548,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -5625,7 +5642,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -5644,7 +5661,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -5667,7 +5684,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnUSMFill(hQueue, ptr, patternSize, pPattern, size, + return pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); @@ -5701,9 +5718,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill( ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -5757,9 +5776,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -5810,9 +5830,10 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -5861,7 +5882,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5926,10 +5948,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 0437d719ba..da5ef0d81f 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -106,6 +106,7 @@ class ur_shared_mutex { // nop. class ur_mutex { std::mutex Mutex; + friend class ur_lock; public: void lock() { @@ -121,6 +122,17 @@ class ur_mutex { } }; +class ur_lock { + std::unique_lock Lock; + +public: + explicit ur_lock(ur_mutex &Mutex) { + if (!SingleThreadMode) { + Lock = std::unique_lock(Mutex.Mutex); + } + } +}; + /// SpinLock is a synchronization primitive, that uses atomic variable and /// causes thread trying acquire lock wait in loop while repeatedly check if /// the lock is available. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index ca1f82019c..eeca6c0c95 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -1811,6 +1811,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_host_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1861,6 +1862,7 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - Any flags/hints passed through pUSMDesc only affect the single /// allocation. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -1914,6 +1916,7 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// allocation. /// - See also ::ur_usm_host_desc_t. /// - See also ::ur_usm_device_desc_t. +/// - See also ::ur_usm_alloc_location_desc_t. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -4094,7 +4097,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object size_t size, ///< [in] size in bytes of data being read @@ -4147,7 +4151,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -4212,7 +4217,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region @@ -4287,7 +4293,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer @@ -4350,9 +4357,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the src buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into size_t size, ///< [in] size in bytes of data being copied @@ -4407,9 +4416,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBufferSrc, ///< [in] handle of the source buffer object - ur_mem_handle_t hBufferDst, ///< [in] handle of the dest buffer object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer ur_rect_region_t @@ -4472,10 +4483,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object - const void *pPattern, ///< [in] pointer to the fill pattern - size_t patternSize, ///< [in] size in bytes of the pattern - size_t offset, ///< [in] offset into the buffer + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4526,7 +4538,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t origin, ///< [in] defines the (x,y,z) offset in pixels in the 1D, 2D, or 3D image @@ -4585,7 +4598,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImage, ///< [in] handle of the image object + ur_mem_handle_t + hImage, ///< [in][bounds(origin, region)] handle of the image object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t @@ -4638,9 +4652,11 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hImageSrc, ///< [in] handle of the src image object - ur_mem_handle_t hImageDst, ///< [in] handle of the dest image object + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hImageSrc, ///< [in][bounds(srcOrigin, region)] handle of the src image object + ur_mem_handle_t + hImageDst, ///< [in][bounds(dstOrigin, region)] handle of the dest image object ur_rect_offset_t srcOrigin, ///< [in] defines the (x,y,z) offset in pixels in the source 1D, 2D, or 3D ///< image @@ -4704,7 +4720,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping size_t offset, ///< [in] offset in bytes of the buffer region being mapped @@ -4782,7 +4799,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hQueue` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == ptr` +/// + `NULL == pMem` /// + `NULL == pPattern` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -4801,7 +4818,7 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, ///< [in] handle of the queue object - void *ptr, ///< [in] pointer to USM memory object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object size_t patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less ///< than or equal to width. @@ -4851,9 +4868,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill( ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, ///< [in] handle of the queue object bool blocking, ///< [in] blocking or non-blocking copy - void *pDst, ///< [in] pointer to the destination USM memory object - const void *pSrc, ///< [in] pointer to the source USM memory object - size_t size, ///< [in] size in bytes to be copied + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4900,9 +4919,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be fetched + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t * @@ -4946,9 +4966,10 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueUSMAdvise( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - const void *pMem, ///< [in] pointer to the USM memory object - size_t size, ///< [in] size in bytes to be advised + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const void + *pMem, ///< [in][bounds(0, size)] pointer to the USM memory object + size_t size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t advice, ///< [in] USM memory advice ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular @@ -4991,7 +5012,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. - void *pMem, ///< [in] pointer to memory to be filled. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. size_t pitch, ///< [in] the total width of the destination memory including padding. size_t @@ -5049,10 +5071,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. bool blocking, ///< [in] indicates if this operation should block the host. - void *pDst, ///< [in] pointer to memory where data will be copied. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. size_t dstPitch, ///< [in] the total width of the source memory including padding. - const void *pSrc, ///< [in] pointer to memory to be copied. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. size_t srcPitch, ///< [in] the total width of the source memory including padding. size_t width, ///< [in] the width in bytes of each row to be copied. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a9fdf2ba37..335fae5e1d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,7 +14,6 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) enable_testing() -add_subdirectory(python) add_subdirectory(loader) add_subdirectory(adapters) add_subdirectory(conformance) diff --git a/test/adapters/hip/test_context.cpp b/test/adapters/hip/test_context.cpp index 90c28b842f..c58dfc5af7 100644 --- a/test/adapters/hip/test_context.cpp +++ b/test/adapters/hip/test_context.cpp @@ -28,7 +28,9 @@ TEST_P(urHipContextTest, ActiveContexts) { hipCtx_t hipContext = nullptr; ASSERT_SUCCESS_HIP(hipCtxGetCurrent(&hipContext)); ASSERT_NE(hipContext, nullptr); - ASSERT_EQ(hipContext, context->getDevice()->getNativeContext()); + if (context->getDevices().size() == 1) { + ASSERT_EQ(hipContext, context->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); ASSERT_SUCCESS(urContextRelease(context)); @@ -60,7 +62,9 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // check that the first context is now the active HIP context ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - ASSERT_EQ(current, context1->getDevice()->getNativeContext()); + if (context1->getDevices().size() == 1) { + ASSERT_EQ(current, context1->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); @@ -87,7 +91,9 @@ TEST_P(urHipContextTest, ActiveContextsThreads) { // check that the second context is now the active HIP context ASSERT_SUCCESS_HIP(hipCtxGetCurrent(¤t)); - ASSERT_EQ(current, context2->getDevice()->getNativeContext()); + if (context2->getDevices().size() == 1) { + ASSERT_EQ(current, context2->getDevices()[0]->getNativeContext()); + } ASSERT_SUCCESS(urQueueRelease(queue)); }); diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index a4c2e8cf94..df80c02681 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -4,6 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(UR_CONFORMANCE_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +option(UR_TEST_DEVICES_COUNT "Count of devices on which CTS will be run" 1) +option(UR_TEST_PLATFORMS_COUNT "Count of platforms on which CTS will be run" 1) function(add_test_adapter name adapter) set(TEST_TARGET_NAME test-${name}) @@ -12,7 +14,7 @@ function(add_test_adapter name adapter) add_test(NAME ${TEST_NAME} COMMAND ${CMAKE_COMMAND} -D TEST_FILE=${Python3_EXECUTABLE} - -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME}" + -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${CMAKE_BINARY_DIR}/bin/${TEST_TARGET_NAME} --test_devices_count=${UR_TEST_DEVICES_COUNT} --test_platforms_count=${UR_TEST_PLATFORMS_COUNT}" -D MODE=stdout -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}_${adapter}.match -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake diff --git a/test/conformance/README.md b/test/conformance/README.md index db90fc759b..e895a5299d 100644 --- a/test/conformance/README.md +++ b/test/conformance/README.md @@ -8,4 +8,15 @@ In the future, when all bugs are fixed, and the tests pass, this solution will no longer be necessary. When you fix any test, the match file must be updated Empty match files indicate that there are no failing tests -in a particular group for the corresponding adapter. \ No newline at end of file +in a particular group for the corresponding adapter. + +## How to set test device/platform name or limit the test devices/platforms count + +To limit how many devices/platforms you want to run the CTS on, +use CMake option UR_TEST_DEVICES_COUNT or +UR_TEST_PLATFORMS_COUNT. If you want to run the tests on +all available devices/platforms, set 0. The default value is 1. +If you run binaries for the tests, you can use the parameter +`--platforms_count=COUNT` or `--devices_count=COUNT`. +To set test device/platform name you want to run the CTS on, use +parameter `--platform=NAME` or `--device=NAME`. \ No newline at end of file diff --git a/test/conformance/context/context_adapter_hip.match b/test/conformance/context/context_adapter_hip.match index 129b8d392c..82d8d71397 100644 --- a/test/conformance/context/context_adapter_hip.match +++ b/test/conformance/context/context_adapter_hip.match @@ -1 +1,2 @@ urContextCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ +urContextGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}} diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index ce3ca00a20..55ab134b07 100644 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -20,9 +20,13 @@ parser = ArgumentParser() parser.add_argument("--test_command", help="Ctest test case") + parser.add_argument("--test_devices_count", type=str, help="Number of devices on which tests will be run") + parser.add_argument("--test_platforms_count", type=str, help="Number of platforms on which tests will be run") args = parser.parse_args() - result = subprocess.Popen([args.test_command, '--gtest_brief=1'], stdout = subprocess.PIPE, text = True) # nosec B603 + result = subprocess.Popen([args.test_command, '--gtest_brief=1', f'--devices_count={args.test_devices_count}', + f'--platforms_count={args.test_platforms_count}'], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) # nosec B603 pat = re.compile(r'\[( )*FAILED( )*\]') output_list = [] diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index 75b58b6d48..2668b6821a 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -11,6 +11,15 @@ urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runt urKernelSetArgPointerTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -Segmentation fault +urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentIndex/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetSpecializationConstantsTest.InvalidSizeCount/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match index 9a890011d8..799225be19 100644 --- a/test/conformance/kernel/kernel_adapter_opencl.match +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -1,7 +1,3 @@ -urKernelSetArgSamplerTest.Success/Intel_R__OpenCL___{{.*}}_ -urKernelSetArgSamplerTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}}_ -urKernelSetArgSamplerTest.InvalidNullHandleArgValue/Intel_R__OpenCL___{{.*}}_ -urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/Intel_R__OpenCL___{{.*}}_ urKernelSetArgValueTest.InvalidKernelArgumentSize/Intel_R__OpenCL___{{.*}}_ urKernelSetSpecializationConstantsTest.Success/Intel_R__OpenCL___{{.*}}_ urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/Intel_R__OpenCL___{{.*}}_ diff --git a/test/conformance/kernel/urKernelSetArgPointer.cpp b/test/conformance/kernel/urKernelSetArgPointer.cpp index 50396eb2ed..11d26778c5 100644 --- a/test/conformance/kernel/urKernelSetArgPointer.cpp +++ b/test/conformance/kernel/urKernelSetArgPointer.cpp @@ -15,7 +15,7 @@ struct urKernelSetArgPointerTest : uur::urKernelExecutionTest { if (allocation) { ASSERT_SUCCESS(urUSMFree(context, allocation)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); } void ValidateAllocation(void *pointer) { diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 814b79a153..37cb3401f2 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -5,8 +5,10 @@ #include -struct urKernelSetArgSamplerTest : uur::urKernelTest { +struct urKernelSetArgSamplerTest : uur::urBaseKernelTest { void SetUp() { + program_name = "image_copy"; + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::SetUp()); // Images and samplers are not available on AMD ur_platform_backend_t backend; ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, @@ -14,9 +16,7 @@ struct urKernelSetArgSamplerTest : uur::urKernelTest { if (backend == UR_PLATFORM_BACKEND_HIP) { GTEST_SKIP() << "Sampler are not supported on hip."; } - - program_name = "image_copy"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + Build(); ur_sampler_desc_t sampler_desc = { UR_STRUCTURE_TYPE_SAMPLER_DESC, /* sType */ nullptr, /* pNext */ @@ -31,7 +31,7 @@ struct urKernelSetArgSamplerTest : uur::urKernelTest { if (sampler) { ASSERT_SUCCESS(urSamplerRelease(sampler)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::TearDown()); } ur_sampler_handle_t sampler = nullptr; diff --git a/test/conformance/kernel/urKernelSetSpecializationConstants.cpp b/test/conformance/kernel/urKernelSetSpecializationConstants.cpp index 9b2bce7208..665a20de4a 100644 --- a/test/conformance/kernel/urKernelSetSpecializationConstants.cpp +++ b/test/conformance/kernel/urKernelSetSpecializationConstants.cpp @@ -5,10 +5,10 @@ #include -struct urKernelSetSpecializationConstantsTest : uur::urKernelExecutionTest { +struct urKernelSetSpecializationConstantsTest : uur::urBaseKernelExecutionTest { void SetUp() override { program_name = "spec_constant"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelExecutionTest::SetUp()); bool supports_kernel_spec_constant = false; ASSERT_SUCCESS(urDeviceGetInfo( device, UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS, @@ -18,6 +18,7 @@ struct urKernelSetSpecializationConstantsTest : uur::urKernelExecutionTest { GTEST_SKIP() << "Device does not support setting kernel spec constants."; } + Build(); } uint32_t spec_value = 42; diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index a4ae7d4f8a..02760dcb8a 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -1,5 +1,7 @@ -urMemBufferCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ -{{OPT}}urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}_ -{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}Segmentation fault +{{OPT}}urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerParamValue/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} +{{OPT}}urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index e76b84692c..6c917914ed 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -3,6 +3,7 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include @@ -41,6 +42,23 @@ std::ostream &operator<<(std::ostream &out, return out; } +std::ostream &operator<<(std::ostream &out, const ur_device_handle_t &device) { + size_t size; + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, 0, nullptr, &size); + std::vector name(size); + urDeviceGetInfo(device, UR_DEVICE_INFO_NAME, size, name.data(), nullptr); + out << name.data(); + return out; +} + +std::ostream &operator<<(std::ostream &out, + const std::vector &devices) { + for (auto device : devices) { + out << "\n * \"" << device << "\""; + } + return out; +} + uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) : platform_options{parsePlatformOptions(argc, argv)} { instance = this; @@ -100,14 +118,16 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) } if (platform_options.platform_name.empty()) { - if (platforms.size() == 1) { + + if (platforms.size() == 1 || platform_options.platforms_count == 1) { platform = platforms[0]; } else { std::stringstream ss_error; ss_error << "Select a single platform from below using the " "--platform=NAME " "command-line option:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -136,7 +156,8 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) << "\" not found. Select a single platform from below " "using the " "--platform=NAME command-line options:" - << platforms; + << platforms << std::endl + << "or set --platforms_count=1."; error = ss_error.str(); return; } @@ -177,6 +198,10 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { arg, "--platform=", sizeof("--platform=") - 1) == 0) { options.platform_name = std::string(&arg[std::strlen("--platform=")]); + } else if (std::strncmp(arg, "--platforms_count=", + sizeof("--platforms_count=") - 1) == 0) { + options.platforms_count = std::strtoul( + &arg[std::strlen("--platforms_count=")], nullptr, 10); } } @@ -192,10 +217,31 @@ PlatformEnvironment::parsePlatformOptions(int argc, char **argv) { return options; } +DevicesEnvironment::DeviceOptions +DevicesEnvironment::parseDeviceOptions(int argc, char **argv) { + DeviceOptions options; + for (int argi = 1; argi < argc; ++argi) { + const char *arg = argv[argi]; + if (!(std::strcmp(arg, "-h") && std::strcmp(arg, "--help"))) { + // TODO - print help + break; + } else if (std::strncmp(arg, "--device=", sizeof("--device=") - 1) == + 0) { + options.device_name = std::string(&arg[std::strlen("--device=")]); + } else if (std::strncmp(arg, "--devices_count=", + sizeof("--devices_count=") - 1) == 0) { + options.devices_count = std::strtoul( + &arg[std::strlen("--devices_count=")], nullptr, 10); + } + } + return options; +} + DevicesEnvironment *DevicesEnvironment::instance = nullptr; DevicesEnvironment::DevicesEnvironment(int argc, char **argv) - : PlatformEnvironment(argc, argv) { + : PlatformEnvironment(argc, argv), + device_options(parseDeviceOptions(argc, argv)) { instance = this; if (!error.empty()) { return; @@ -209,11 +255,64 @@ DevicesEnvironment::DevicesEnvironment(int argc, char **argv) error = "Could not find any devices associated with the platform"; return; } - devices.resize(count); - if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), - nullptr)) { - error = "urDeviceGet() failed to get devices."; - return; + + // Get the argument (devices_count) to limit test devices count. + // In case, the devices_count is "0", the variable count will not be changed. + // The CTS will run on all devices. + if (device_options.device_name.empty()) { + if (device_options.devices_count > + (std::numeric_limits::max)()) { + error = "Invalid devices_count argument"; + return; + } else if (device_options.devices_count > 0) { + count = (std::min)( + count, static_cast(device_options.devices_count)); + } + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + } else { + devices.resize(count); + if (urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, devices.data(), + nullptr)) { + error = "urDeviceGet() failed to get devices."; + return; + } + for (u_long i = 0; i < count; i++) { + size_t size; + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, 0, nullptr, + &size)) { + error = "urDeviceGetInfo() failed"; + return; + } + std::vector device_name(size); + if (urDeviceGetInfo(devices[i], UR_DEVICE_INFO_NAME, size, + device_name.data(), nullptr)) { + error = "urDeviceGetInfo() failed"; + return; + } + if (device_options.device_name == device_name.data()) { + device = devices[i]; + devices.clear(); + devices.resize(1); + devices[0] = device; + break; + } + } + if (!device) { + std::stringstream ss_error; + ss_error << "Device \"" << device_options.device_name + << "\" not found. Select a single device from below " + "using the " + "--device=NAME command-line options:" + << devices << std::endl + << "or set --devices_count=COUNT."; + error = ss_error.str(); + return; + } } } diff --git a/test/conformance/testing/include/uur/environment.h b/test/conformance/testing/include/uur/environment.h index 5cc6756364..551be76e17 100644 --- a/test/conformance/testing/include/uur/environment.h +++ b/test/conformance/testing/include/uur/environment.h @@ -17,6 +17,7 @@ struct PlatformEnvironment : ::testing::Environment { struct PlatformOptions { std::string platform_name; + unsigned long platforms_count; }; PlatformEnvironment(int argc, char **argv); @@ -36,17 +37,26 @@ struct PlatformEnvironment : ::testing::Environment { struct DevicesEnvironment : PlatformEnvironment { + struct DeviceOptions { + std::string device_name; + unsigned long devices_count; + }; + DevicesEnvironment(int argc, char **argv); virtual ~DevicesEnvironment() override = default; virtual void SetUp() override; virtual void TearDown() override; + DeviceOptions parseDeviceOptions(int argc, char **argv); + inline const std::vector &GetDevices() const { return devices; } + DeviceOptions device_options; std::vector devices; + ur_device_handle_t device = nullptr; static DevicesEnvironment *instance; }; diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 681db73f05..2ede84d135 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1089,14 +1089,17 @@ template struct urProgramTestWithParam : urContextTestWithParam { ur_program_handle_t program = nullptr; }; -struct urKernelTest : urProgramTest { +struct urBaseKernelTest : urProgramTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTest::SetUp()); - ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); auto kernel_names = uur::KernelsEnvironment::instance->GetEntryPointNames(program_name); kernel_name = kernel_names[0]; ASSERT_FALSE(kernel_name.empty()); + } + + void Build() { + ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); ASSERT_SUCCESS(urKernelCreate(program, kernel_name.data(), &kernel)); } @@ -1111,15 +1114,26 @@ struct urKernelTest : urProgramTest { ur_kernel_handle_t kernel = nullptr; }; -template struct urKernelTestWithParam : urProgramTestWithParam { +struct urKernelTest : urBaseKernelTest { + void SetUp() override { + urBaseKernelTest::SetUp(); + Build(); + } +}; + +template +struct urBaseKernelTestWithParam : urProgramTestWithParam { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urProgramTestWithParam::SetUp()); - ASSERT_SUCCESS(urProgramBuild(this->context, this->program, nullptr)); auto kernel_names = uur::KernelsEnvironment::instance->GetEntryPointNames( this->program_name); kernel_name = kernel_names[0]; ASSERT_FALSE(kernel_name.empty()); + } + + void Build() { + ASSERT_SUCCESS(urProgramBuild(this->context, this->program, nullptr)); ASSERT_SUCCESS( urKernelCreate(this->program, kernel_name.data(), &kernel)); } @@ -1135,16 +1149,23 @@ template struct urKernelTestWithParam : urProgramTestWithParam { ur_kernel_handle_t kernel = nullptr; }; -struct urKernelExecutionTest : urKernelTest { +template struct urKernelTestWithParam : urBaseKernelTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTestWithParam::SetUp()); + urBaseKernelTestWithParam::Build(); + } +}; + +struct urBaseKernelExecutionTest : urBaseKernelTest { void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::SetUp()); } void TearDown() override { for (auto &buffer : buffer_args) { ASSERT_SUCCESS(urMemRelease(buffer)); } - UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelTest::TearDown()); } // Adds a kernel arg representing a sycl buffer constructed with a 1D range. @@ -1233,6 +1254,13 @@ struct urKernelExecutionTest : urKernelTest { uint32_t current_arg_index = 0; }; +struct urKernelExecutionTest : urBaseKernelExecutionTest { + void SetUp() { + UUR_RETURN_ON_FATAL_FAILURE(urBaseKernelExecutionTest::SetUp()); + Build(); + } +}; + template struct GlobalVar { std::string name; T value; diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index e2ba6b6f63..15b68f5c6c 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,45 +1,7 @@ -urUSMDeviceAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_SIZE -{{OPT}}urUSMAllocInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -{{OPT}}urUSMHostAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMPoolCreateTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPoolDesc/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidNullPointerPool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolCreateTest.InvalidEnumerationFlags/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -{{OPT}}urUSMPoolGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -{{OPT}}urUSMPoolGetInfoTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidEnumerationProperty/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeZero/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidSizeTooSmall/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropValue/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolDestroyTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMPoolRetainTest.InvalidNullHandlePool/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithDescriptors/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.SuccessWithMultipleAdvices/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleContext/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullHandleDevice/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidNullPtrMem/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled {{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 9e275d805e..c036fa785c 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,36 +1,2 @@ -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMFreeTest.SuccessDeviceAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessHostAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMFreeTest.SuccessSharedAlloc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_TYPE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_BASE_PTR -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_SIZE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_DEVICE -urUSMAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidNullPointerMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidEnumeration/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMGetMemAllocInfoTest.InvalidValuePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_CONTEXT -urUSMPoolGetInfoTestWithInfoParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidSizeTooSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMPoolRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index b9aa3f3bdf..16211ba8e7 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -1,6 +1,5 @@ urUSMDeviceAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled urUSMDeviceAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidNullHandleDevice/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMDeviceAllocTest.InvalidNullPtrResult/Intel_R__OpenCL___{{.*}}___UsePoolEnabled @@ -9,7 +8,6 @@ urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/Intel_R__OpenCL___{{.*}}___UseP urUSMAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL urUSMHostAllocTest.Success/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/Intel_R__OpenCL___{{.*}}___UsePoolDisabled urUSMHostAllocTest.InvalidNullHandleContext/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidNullPtrMem/Intel_R__OpenCL___{{.*}}___UsePoolEnabled urUSMHostAllocTest.InvalidUSMSize/Intel_R__OpenCL___{{.*}}___UsePoolEnabled diff --git a/test/python/CMakeLists.txt b/test/python/CMakeLists.txt deleted file mode 100644 index 396d5722b8..0000000000 --- a/test/python/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -function(add_python_test name) - set(TEST_NAME python-${name}) - add_test(NAME ${TEST_NAME} - COMMAND ${Python3_EXECUTABLE} -B -m pytest ${CMAKE_CURRENT_SOURCE_DIR}/${name}.py - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_tests_properties(${TEST_NAME} PROPERTIES LABELS "python") - # python uses LD_LIBRARY_PATH (PATH on Windows) to search for dynamic libraries, - # so set it to the location where it can find the loader. - if(UNIX) - set_property(TEST ${TEST_NAME} PROPERTY - ENVIRONMENT "LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") - else() - set_property(TEST ${TEST_NAME} PROPERTY - ENVIRONMENT_MODIFICATION "PATH=cmake_list_prepend:${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - endif() - # this is for importing the include/ur.py module in other python files - set_property(TEST ${TEST_NAME} APPEND PROPERTY - ENVIRONMENT "PYTHONPATH=${PROJECT_SOURCE_DIR}" "UR_ADAPTERS_FORCE_LOAD=\"$\"") -endfunction() - -add_python_test(basic) diff --git a/test/python/basic.py b/test/python/basic.py deleted file mode 100755 index 0d33d235db..0000000000 --- a/test/python/basic.py +++ /dev/null @@ -1,19 +0,0 @@ -#! /usr/bin/env python3 -""" - Copyright (C) 2022 Intel Corporation - - Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. - See LICENSE.TXT - SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -""" - -import pytest -import sys -import os - -import include.ur as ur - -def test_ddi(): - ddi = ur.UR_DDI(ur.ur_api_version_v.CURRENT) - assert True