diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 9edbd459d7..41329949f7 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -288,6 +288,13 @@ jobs: - name: Install prerequisites run: python3 -m pip install -r third_party/requirements.txt + - name: Install doxygen + run: | + $WorkingDir = $PWD.Path + Invoke-WebRequest -Uri https://github.com/doxygen/doxygen/releases/download/Release_1_9_8/doxygen-1.9.8.windows.x64.bin.zip -OutFile "$WorkingDir\doxygen.zip" + Expand-Archive -Path "$WorkingDir\doxygen.zip" + Add-Content $env:GITHUB_PATH "$WorkingDir\doxygen" + - name: Configure CMake run: > cmake diff --git a/.github/workflows/e2e_nightly.yml b/.github/workflows/e2e_nightly.yml new file mode 100644 index 0000000000..8dbb5d3e74 --- /dev/null +++ b/.github/workflows/e2e_nightly.yml @@ -0,0 +1,115 @@ +name: E2E Nightly + +on: + schedule: + # Run every day at 23:00 UTC + - cron: '0 23 * * *' + +jobs: + e2e-build-hw: + name: Build SYCL, UR, run E2E + strategy: + matrix: + adapter: [ + {name: CUDA} + ] + build_type: [Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: ${{matrix.adapter.name}} + + steps: + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + + - name: Checkout UR + uses: actions/checkout@v4 + with: + ref: adapters + path: ur-repo + + - name: Checkout SYCL + uses: actions/checkout@v4 + with: + repository: intel/llvm + ref: sycl + path: sycl-repo + + - name: Install pip packages + working-directory: ${{github.workspace}}/ur-repo + run: pip install -r third_party/requirements.txt + + - name: Configure CMake UR + working-directory: ${{github.workspace}}/ur-repo + run: > + cmake + -B build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_ENABLE_TRACING=ON + -DUR_DEVELOPER_MODE=ON + -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON + + - name: Build UR + run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib + cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc) + + - name: Set env vars & pre setup + run: | + echo "SYCL_PREFER_UR=1" >> $GITHUB_ENV + echo "CUDA_LIB_PATH=/usr/local/cuda/lib64/stubs" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV + source /opt/intel/oneapi/setvars.sh + sycl-ls + + - name: Configure SYCL + run: > + python3 sycl-repo/buildbot/configure.py + -t ${{matrix.build_type}} + -o ${{github.workspace}}/sycl_build + --cmake-gen "Unix Makefiles" + --ci-defaults --cuda --hip + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache + --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache + + - name: Build SYCL + run: cmake --build ${{github.workspace}}/sycl_build + + - name: Run check-sycl + # Remove after fixing SYCL test :: abi/layout_handler.cpp + # This issue does not affect further execution of e2e with UR. + continue-on-error: true + run: cmake --build ${{github.workspace}}/sycl_build --target check-sycl + + - name: Swap UR loader and adapters + run: | + cp ${{github.workspace}}/ur-repo/build/lib/libur_loader.so* ${{github.workspace}}/sycl_build/lib/ + cp ${{github.workspace}}/ur-repo/build/lib/libur_adapter_cuda.so* ${{github.workspace}}/sycl_build/lib/ + + - name: Setup SYCL + run: | + echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + which clang++ sycl-ls + SYCL_PI_TRACE=-1 sycl-ls + + - name: Build e2e tests + run: > + cmake + -GNinja + -B ${{github.workspace}}/build-e2e/ + -S ${{github.workspace}}/sycl-repo/sycl/test-e2e/ + -DSYCL_TEST_E2E_TARGETS="ext_oneapi_cuda:gpu" + -DCMAKE_CXX_COMPILER="$(which clang++)" + -DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py" + + - name: Run e2e tests + run: ninja -C build-e2e check-sycl-e2e diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 0000000000..527f641a51 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,53 @@ +name: Nightly + +on: + schedule: + # Run every day at 23:00 UTC + - cron: '0 23 * * *' + +jobs: + long-fuzz-test: + name: Run long fuzz tests + strategy: + matrix: + build_type: [Debug, Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: 'ubuntu-22.04' + + steps: + - uses: actions/checkout@v3 + # with-ref part to be removed after merging 'adapters' branch with 'main' + with: + ref: adapters + + - name: Install pip packages + run: pip install -r third_party/requirements.txt + + - name: Download DPC++ + run: | + wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2023-08-31/sycl_linux.tar.gz + mkdir dpcpp_compiler + tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler + + - name: Configure CMake + run: > + cmake + -B${{github.workspace}}/build + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -DUR_ENABLE_TRACING=ON + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUR_BUILD_TESTS=ON + -DUR_USE_ASAN=ON + -DUR_USE_UBSAN=ON + -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ + + - name: Build + run: > + LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib + cmake --build ${{github.workspace}}/build -j $(nproc) + + - name: Fuzz long test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{matrix.build_type}} --output-on-failure -L "fuzz-long" diff --git a/CMakeLists.txt b/CMakeLists.txt index 1210375dd8..9fad363b76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) -project(unified-runtime VERSION 0.7.0) +project(unified-runtime VERSION 0.8.0) include(GNUInstallDirs) include(CheckCXXSourceCompiles) @@ -15,7 +15,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(helpers) if(CMAKE_SYSTEM_NAME STREQUAL Darwin) - set(CMAKE_FIND_FRAMEWORK NEVER) + set(Python3_FIND_FRAMEWORK NEVER) + set(Python3_FIND_STRATEGY LOCATION) endif() find_package(Python3 COMPONENTS Interpreter REQUIRED) diff --git a/README.md b/README.md index 6d3c4345e2..9f7773bd77 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ see cmake options for details. - [Contents of the repo](#contents-of-the-repo) - [Integration](#integration) - [Weekly tags](#weekly-tags) - - [Third-Party tools](#third-party-tools) - - [Building](#building) +3. [Third-Party tools](#third-party-tools) +4. [Building](#building) - [Requirements](#requirements) - [Windows](#windows) - [Linux](#linux) @@ -31,7 +31,7 @@ see cmake options for details. - [Adapter naming convention](#adapter-naming-convention) - [Source code generation](#source-code-generation) - [Documentation](#documentation) - +6. [Release Process](#release-process) ## Contents of the repo @@ -80,6 +80,12 @@ Tools can be acquired via instructions in [third_party](/third_party/README.md). ## Building +The requirements and instructions below are for building the project from source +without any modifications. To make modifications to the specification, please +see the +[Contribution Guide](https://oneapi-src.github.io/unified-runtime/core/CONTRIB.html) +for more detailed instructions on the correct setup. + ### Requirements Required packages: @@ -150,6 +156,10 @@ It will generate the source code **and** run automated code formatting: $ make generate ``` +This target has additional dependencies which are described in the *Build +Environment* section of the +[Contribution Guide](https://oneapi-src.github.io/unified-runtime/core/CONTRIB.html). + ## Contributions For those who intend to make a contribution to the project please read our @@ -172,3 +182,26 @@ Code is generated using included [Python scripts](/scripts/README.md). Documentation is generated from source code using Sphinx - see [scripts dir](/scripts/README.md) for details. + +## Release Process + +Unified Runtime releases are aligned with oneAPI releases. Once all changes +planned for a release have been accepted, the release process is defined as: + +1. Create a new release branch based on the [main][main-branch] branch taking + the form `v..x` where `x` is a placeholder for the patch + version. This branch will always contain the latest patch version for a given + release. +2. Create a PR to increment the CMake project version on the [main][main-branch] + and merge before accepting any other changes. +3. Create a new tag based on the latest commit on the release branch taking the + form `v..`. +4. Create a [new GitHub release][new-github-release] using the tag created in + the previous step. + * Prior to version 1.0, check the *Set as a pre-release* tick box. +5. Update downstream projects to utilize the release tag. If any issues arise + from integration, apply any necessary hot fixes to `v..x` + branch and go back to step 3. + +[main-branch]: https://github.com/oneapi-src/unified-runtime/tree/main +[new-github-release]: https://github.com/oneapi-src/unified-runtime/releases/new diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 3c90d41236..35c4789432 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -63,6 +63,8 @@ function(add_ur_target_compile_options name) -fPIC -Wall -Wpedantic + -Wempty-body + -Wunused-parameter $<$:-fdiagnostics-color=always> $<$:-fcolor-diagnostics> ) @@ -78,7 +80,7 @@ function(add_ur_target_compile_options name) endif() elseif(MSVC) target_compile_options(${name} PRIVATE - /MP + $<$:/MP> # clang-cl.exe does not support /MP /W3 /MD$<$:d> /GS diff --git a/examples/collector/README.md b/examples/collector/README.md index aaf5eed32a..fbdf18a8ae 100644 --- a/examples/collector/README.md +++ b/examples/collector/README.md @@ -19,7 +19,7 @@ $ mkdir build $ cd build $ cmake .. -DUR_ENABLE_TRACING=ON $ make -$ UR_ADAPTERS_FORCE_LOAD=./lib/libur_adapter_null.so XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=./lib/libxptifw.so XPTI_SUBSCRIBERS=./lib/libcollector.so ./bin/hello_world +$ UR_ADAPTERS_FORCE_LOAD=./lib/libur_adapter_null.so UR_ENABLE_LAYERS=UR_LAYER_TRACING XPTI_TRACE_ENABLE=1 XPTI_FRAMEWORK_DISPATCHER=./lib/libxptifw.so XPTI_SUBSCRIBERS=./lib/libcollector.so ./bin/hello_world ``` See [XPTI framework documentation](https://github.com/intel/llvm/blob/sycl/xptifw/doc/XPTI_Framework.md) for more information. diff --git a/examples/collector/collector.cpp b/examples/collector/collector.cpp index 6f2f6d57b1..910964e02c 100644 --- a/examples/collector/collector.cpp +++ b/examples/collector/collector.cpp @@ -34,15 +34,23 @@ constexpr uint16_t TRACE_FN_END = constexpr std::string_view UR_STREAM_NAME = "ur"; /** - * @brief Formats the function parameters and arguments for urInit + * @brief Formats the function parameters and arguments for urAdapterGet */ std::ostream &operator<<(std::ostream &os, - const struct ur_init_params_t *params) { - os << ".device_flags = "; - if (*params->pdevice_flags & UR_DEVICE_INIT_FLAG_GPU) { - os << "UR_DEVICE_INIT_FLAG_GPU"; - } else { - os << "0"; + const struct ur_adapter_get_params_t *params) { + os << ".NumEntries = "; + os << *params->pNumEntries; + os << ", "; + os << ".phAdapters = "; + os << *params->pphAdapters; + if (*params->pphAdapters) { + os << " (" << **params->pphAdapters << ")"; + } + os << ", "; + os << ".pNumAdapters = "; + os << *params->ppNumAdapters; + if (*params->ppNumAdapters) { + os << " (" << **params->ppNumAdapters << ")"; } os << ""; return os; @@ -50,16 +58,17 @@ std::ostream &operator<<(std::ostream &os, /** * A map of functions that format the parameters and arguments for each UR function. - * This example only implements a handler for one function, `urInit`, but it's + * This example only implements a handler for one function, `urAdapterGet`, but it's * trivial to expand it to support more. */ static std::unordered_map< std::string_view, std::function> - handlers = {{"urInit", [](const xpti::function_with_args_t *fn_args, - std::ostream &os) { - auto params = static_cast( - fn_args->args_data); + handlers = {{"urAdapterGet", [](const xpti::function_with_args_t *fn_args, + std::ostream &os) { + auto params = + static_cast( + fn_args->args_data); os << params; }}}; @@ -73,10 +82,9 @@ static std::unordered_map< * On begin, it prints the function declaration with the call arguments specified, * and on end it prints the function name with the result of the call. */ -XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, - xpti::trace_event_data_t *parent, - xpti::trace_event_data_t *event, - uint64_t instance, const void *user_data) { +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *, uint64_t instance, + const void *user_data) { auto *args = static_cast(user_data); std::ostringstream out; if (trace_type == TRACE_FN_BEGIN) { @@ -110,8 +118,7 @@ XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, * selected trace types. */ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, - unsigned int minor_version, - const char *version_str, + unsigned int minor_version, const char *, const char *stream_name) { if (stream_name == nullptr) { std::cout << "Stream name not provided. Aborting." << std::endl; @@ -149,5 +156,5 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, * * Can be used to cleanup state or resources. */ -XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { /* noop */ +XPTI_CALLBACK_API void xptiTraceFinish(const char *) { /* noop */ } diff --git a/examples/hello_world/hello_world.cpp b/examples/hello_world/hello_world.cpp index 4d903da65a..904ac6d2ef 100644 --- a/examples/hello_world/hello_world.cpp +++ b/examples/hello_world/hello_world.cpp @@ -15,13 +15,14 @@ #include "ur_api.h" ////////////////////////////////////////////////////////////////////////// -int main(int argc, char *argv[]) { +int main(int, char *[]) { ur_result_t status; // Initialize the platform - status = urInit(0, nullptr); + status = urLoaderInit(0, nullptr); if (status != UR_RESULT_SUCCESS) { - std::cout << "urInit failed with return code: " << status << std::endl; + std::cout << "urLoaderInit failed with return code: " << status + << std::endl; return 1; } std::cout << "Platform initialized.\n"; @@ -119,6 +120,6 @@ int main(int argc, char *argv[]) { for (auto adapter : adapters) { urAdapterRelease(adapter); } - urTearDown(nullptr); + urLoaderTearDown(); return status == UR_RESULT_SUCCESS ? 0 : 1; } diff --git a/include/ur.py b/include/ur.py index 2b49088119..13996b1815 100644 --- a/include/ur.py +++ b/include/ur.py @@ -6,7 +6,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @file ur.py - @version v0.7-r0 + @version v0.8-r0 """ import platform @@ -117,8 +117,6 @@ class ur_function_v(IntEnum): QUEUE_CREATE_WITH_NATIVE_HANDLE = 96 ## Enumerator for ::urQueueCreateWithNativeHandle QUEUE_FINISH = 97 ## Enumerator for ::urQueueFinish QUEUE_FLUSH = 98 ## Enumerator for ::urQueueFlush - INIT = 99 ## Enumerator for ::urInit - TEAR_DOWN = 100 ## Enumerator for ::urTearDown SAMPLER_CREATE = 101 ## Enumerator for ::urSamplerCreate SAMPLER_RETAIN = 102 ## Enumerator for ::urSamplerRetain SAMPLER_RELEASE = 103 ## Enumerator for ::urSamplerRelease @@ -144,9 +142,6 @@ class ur_function_v(IntEnum): COMMAND_BUFFER_FINALIZE_EXP = 123 ## Enumerator for ::urCommandBufferFinalizeExp COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125 ## Enumerator for ::urCommandBufferAppendKernelLaunchExp COMMAND_BUFFER_ENQUEUE_EXP = 128 ## Enumerator for ::urCommandBufferEnqueueExp - COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP = 129 ## Enumerator for ::urCommandBufferAppendMemcpyUSMExp - COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP = 130 ## Enumerator for ::urCommandBufferAppendMembufferCopyExp - COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP = 131 ## Enumerator for ::urCommandBufferAppendMembufferCopyRectExp USM_PITCHED_ALLOC_EXP = 132 ## Enumerator for ::urUSMPitchedAllocExp BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133## Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134 ## Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp @@ -182,10 +177,6 @@ class ur_function_v(IntEnum): USM_P2P_ENABLE_PEER_ACCESS_EXP = 165 ## Enumerator for ::urUsmP2PEnablePeerAccessExp USM_P2P_DISABLE_PEER_ACCESS_EXP = 166 ## Enumerator for ::urUsmP2PDisablePeerAccessExp USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167 ## Enumerator for ::urUsmP2PPeerAccessGetInfoExp - COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP = 168 ## Enumerator for ::urCommandBufferAppendMembufferWriteExp - COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP = 169 ## Enumerator for ::urCommandBufferAppendMembufferReadExp - COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP = 170## Enumerator for ::urCommandBufferAppendMembufferWriteRectExp - COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP = 171 ## Enumerator for ::urCommandBufferAppendMembufferReadRectExp LOADER_CONFIG_CREATE = 172 ## Enumerator for ::urLoaderConfigCreate LOADER_CONFIG_RELEASE = 173 ## Enumerator for ::urLoaderConfigRelease LOADER_CONFIG_RETAIN = 174 ## Enumerator for ::urLoaderConfigRetain @@ -196,6 +187,22 @@ class ur_function_v(IntEnum): ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo + LOADER_INIT = 182 ## Enumerator for ::urLoaderInit + LOADER_TEAR_DOWN = 183 ## Enumerator for ::urLoaderTearDown + COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 184 ## Enumerator for ::urCommandBufferAppendUSMMemcpyExp + COMMAND_BUFFER_APPEND_USM_FILL_EXP = 185 ## Enumerator for ::urCommandBufferAppendUSMFillExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 186 ## Enumerator for ::urCommandBufferAppendMemBufferCopyExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 187## Enumerator for ::urCommandBufferAppendMemBufferWriteExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 188 ## Enumerator for ::urCommandBufferAppendMemBufferReadExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 189## Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 190 ## Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 191## Enumerator for ::urCommandBufferAppendMemBufferReadRectExp + COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 192 ## Enumerator for ::urCommandBufferAppendMemBufferFillExp + ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 193 ## Enumerator for ::urEnqueueCooperativeKernelLaunchExp + KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 194## Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 195 ## Enumerator for ::urCommandBufferAppendUSMPrefetchExp + COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 196 ## Enumerator for ::urCommandBufferAppendUSMAdviseExp + LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 197 ## Enumerator for ::urLoaderConfigSetCodeLocationCallback class ur_function_t(c_int): def __str__(self): @@ -245,6 +252,8 @@ class ur_structure_type_v(IntEnum): EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t EXP_FILE_DESCRIPTOR = 0x2003 ## ::ur_exp_file_descriptor_t EXP_WIN32_HANDLE = 0x2004 ## ::ur_exp_win32_handle_t + EXP_LAYERED_IMAGE_PROPERTIES = 0x2005 ## ::ur_exp_layered_image_properties_t + EXP_SAMPLER_ADDR_MODES = 0x2006 ## ::ur_exp_sampler_addr_modes_t class ur_structure_type_t(c_int): def __str__(self): @@ -510,6 +519,24 @@ def __str__(self): return str(ur_loader_config_info_v(self.value)) +############################################################################### +## @brief Code location data +class ur_code_location_t(Structure): + _fields_ = [ + ("functionName", c_char_p), ## [in][out] Function name. + ("sourceFile", c_char_p), ## [in][out] Source code file. + ("lineNumber", c_ulong), ## [in][out] Source code line number. + ("columnNumber", c_ulong) ## [in][out] Source code column number. + ] + +############################################################################### +## @brief Code location callback with user data. +def ur_code_location_callback_t(user_defined_callback): + @CFUNCTYPE(ur_code_location_t, c_void_p) + def ur_code_location_callback_t_wrapper(pUserData): + return user_defined_callback(pUserData) + return ur_code_location_callback_t_wrapper + ############################################################################### ## @brief Supported adapter info class ur_adapter_info_v(IntEnum): @@ -570,7 +597,8 @@ def __str__(self): class ur_api_version_v(IntEnum): _0_6 = UR_MAKE_VERSION( 0, 6 ) ## version 0.6 _0_7 = UR_MAKE_VERSION( 0, 7 ) ## version 0.7 - CURRENT = UR_MAKE_VERSION( 0, 7 ) ## latest known version + _0_8 = UR_MAKE_VERSION( 0, 8 ) ## version 0.8 + CURRENT = UR_MAKE_VERSION( 0, 8 ) ## latest known version class ur_api_version_t(c_int): def __str__(self): @@ -2097,10 +2125,10 @@ class ur_event_native_properties_t(Structure): ############################################################################### ## @brief Event states for all events. class ur_execution_info_v(IntEnum): - EXECUTION_INFO_COMPLETE = 0 ## Indicates that the event has completed. - EXECUTION_INFO_RUNNING = 1 ## Indicates that the device has started processing this event. - EXECUTION_INFO_SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. - EXECUTION_INFO_QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of + COMPLETE = 0 ## Indicates that the event has completed. + RUNNING = 1 ## Indicates that the device has started processing this event. + SUBMITTED = 2 ## Indicates that the event has been submitted by the host to the device. + QUEUED = 3 ## Indicates that the event has been queued, this is the initial state of ## events. class ur_execution_info_t(c_int): @@ -2210,6 +2238,20 @@ class ur_exp_sampler_mip_properties_t(Structure): ("mipFilterMode", ur_sampler_filter_mode_t) ## [in] mipmap filter mode used for filtering between mipmap levels ] +############################################################################### +## @brief Describes unique sampler addressing mode per dimension +## +## @details +## - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t +## as part of a `pNext` chain. +class ur_exp_sampler_addr_modes_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES + ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure + ("addrModes", ur_sampler_addressing_mode_t * 3) ## [in] Specify the address mode of the sampler per dimension + ] + ############################################################################### ## @brief Describes an interop memory resource descriptor class ur_exp_interop_mem_desc_t(Structure): @@ -2228,6 +2270,21 @@ class ur_exp_interop_semaphore_desc_t(Structure): ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure ] +############################################################################### +## @brief Describes layered image properties +## +## @details +## - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp +## or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as +## part of a `pNext` chain. +class ur_exp_layered_image_properties_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure + ("numLayers", c_ulong) ## [in] number of layers the image should have + ] + ############################################################################### ## @brief The extension string which defines support for command-buffers which ## is returned when querying device extensions. @@ -2253,6 +2310,11 @@ class ur_exp_command_buffer_sync_point_t(c_ulong): class ur_exp_command_buffer_handle_t(c_void_p): pass +############################################################################### +## @brief The extension string which defines support for cooperative-kernels +## which is returned when querying device extensions. +UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP = "ur_exp_cooperative_kernels" + ############################################################################### ## @brief Supported peer info class ur_exp_peer_info_v(IntEnum): @@ -2696,6 +2758,21 @@ class ur_kernel_dditable_t(Structure): ("pfnSetSpecializationConstants", c_void_p) ## _urKernelSetSpecializationConstants_t ] +############################################################################### +## @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp +if __use_win_types: + _urKernelSuggestMaxCooperativeGroupCountExp_t = WINFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) +else: + _urKernelSuggestMaxCooperativeGroupCountExp_t = CFUNCTYPE( ur_result_t, ur_kernel_handle_t, POINTER(c_ulong) ) + + +############################################################################### +## @brief Table of KernelExp functions pointers +class ur_kernel_exp_dditable_t(Structure): + _fields_ = [ + ("pfnSuggestMaxCooperativeGroupCountExp", c_void_p) ## _urKernelSuggestMaxCooperativeGroupCountExp_t + ] + ############################################################################### ## @brief Function-pointer for urSamplerCreate if __use_win_types: @@ -2869,6 +2946,53 @@ class ur_physical_mem_dditable_t(Structure): ("pfnRelease", c_void_p) ## _urPhysicalMemRelease_t ] +############################################################################### +## @brief Function-pointer for urAdapterGet +if __use_win_types: + _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) +else: + _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) + +############################################################################### +## @brief Function-pointer for urAdapterRelease +if __use_win_types: + _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) +else: + _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) + +############################################################################### +## @brief Function-pointer for urAdapterRetain +if __use_win_types: + _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) +else: + _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) + +############################################################################### +## @brief Function-pointer for urAdapterGetLastError +if __use_win_types: + _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) +else: + _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) + +############################################################################### +## @brief Function-pointer for urAdapterGetInfo +if __use_win_types: + _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) +else: + _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) + + +############################################################################### +## @brief Table of Global functions pointers +class ur_global_dditable_t(Structure): + _fields_ = [ + ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t + ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t + ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t + ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t + ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t + ] + ############################################################################### ## @brief Function-pointer for urEnqueueKernelLaunch if __use_win_types: @@ -3076,6 +3200,21 @@ class ur_enqueue_dditable_t(Structure): ("pfnWriteHostPipe", c_void_p) ## _urEnqueueWriteHostPipe_t ] +############################################################################### +## @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp +if __use_win_types: + _urEnqueueCooperativeKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) +else: + _urEnqueueCooperativeKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_queue_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) + + +############################################################################### +## @brief Table of EnqueueExp functions pointers +class ur_enqueue_exp_dditable_t(Structure): + _fields_ = [ + ("pfnCooperativeKernelLaunchExp", c_void_p) ## _urEnqueueCooperativeKernelLaunchExp_t + ] + ############################################################################### ## @brief Function-pointer for urQueueGetInfo if __use_win_types: @@ -3436,53 +3575,81 @@ class ur_usm_exp_dditable_t(Structure): _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMemcpyUSMExp +## @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp +if __use_win_types: + _urCommandBufferAppendUSMMemcpyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMMemcpyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMFillExp +if __use_win_types: + _urCommandBufferAppendUSMFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp +if __use_win_types: + _urCommandBufferAppendMemBufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendMemBufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp if __use_win_types: - _urCommandBufferAppendMemcpyUSMExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMemcpyUSMExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_void_p, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferCopyExp +## @brief Function-pointer for urCommandBufferAppendMemBufferReadExp if __use_win_types: - _urCommandBufferAppendMembufferCopyExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferCopyExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferWriteExp +## @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp if __use_win_types: - _urCommandBufferAppendMembufferWriteExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferWriteExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferReadExp +## @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp if __use_win_types: - _urCommandBufferAppendMembufferReadExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferReadExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferCopyRectExp +## @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp if __use_win_types: - _urCommandBufferAppendMembufferCopyRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferCopyRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferWriteRectExp +## @brief Function-pointer for urCommandBufferAppendMemBufferFillExp if __use_win_types: - _urCommandBufferAppendMembufferWriteRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferFillExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferWriteRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendMemBufferFillExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, c_void_p, c_size_t, c_size_t, c_size_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### -## @brief Function-pointer for urCommandBufferAppendMembufferReadRectExp +## @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp if __use_win_types: - _urCommandBufferAppendMembufferReadRectExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendUSMPrefetchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) else: - _urCommandBufferAppendMembufferReadRectExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, c_size_t, c_size_t, c_size_t, c_size_t, c_void_p, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendUSMPrefetchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_migration_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + +############################################################################### +## @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +if __use_win_types: + _urCommandBufferAppendUSMAdviseExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) +else: + _urCommandBufferAppendUSMAdviseExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, c_void_p, c_size_t, ur_usm_advice_flags_t, c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) ############################################################################### ## @brief Function-pointer for urCommandBufferEnqueueExp @@ -3501,13 +3668,17 @@ class ur_command_buffer_exp_dditable_t(Structure): ("pfnReleaseExp", c_void_p), ## _urCommandBufferReleaseExp_t ("pfnFinalizeExp", c_void_p), ## _urCommandBufferFinalizeExp_t ("pfnAppendKernelLaunchExp", c_void_p), ## _urCommandBufferAppendKernelLaunchExp_t - ("pfnAppendMemcpyUSMExp", c_void_p), ## _urCommandBufferAppendMemcpyUSMExp_t - ("pfnAppendMembufferCopyExp", c_void_p), ## _urCommandBufferAppendMembufferCopyExp_t - ("pfnAppendMembufferWriteExp", c_void_p), ## _urCommandBufferAppendMembufferWriteExp_t - ("pfnAppendMembufferReadExp", c_void_p), ## _urCommandBufferAppendMembufferReadExp_t - ("pfnAppendMembufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMembufferCopyRectExp_t - ("pfnAppendMembufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMembufferWriteRectExp_t - ("pfnAppendMembufferReadRectExp", c_void_p), ## _urCommandBufferAppendMembufferReadRectExp_t + ("pfnAppendUSMMemcpyExp", c_void_p), ## _urCommandBufferAppendUSMMemcpyExp_t + ("pfnAppendUSMFillExp", c_void_p), ## _urCommandBufferAppendUSMFillExp_t + ("pfnAppendMemBufferCopyExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyExp_t + ("pfnAppendMemBufferWriteExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteExp_t + ("pfnAppendMemBufferReadExp", c_void_p), ## _urCommandBufferAppendMemBufferReadExp_t + ("pfnAppendMemBufferCopyRectExp", c_void_p), ## _urCommandBufferAppendMemBufferCopyRectExp_t + ("pfnAppendMemBufferWriteRectExp", c_void_p), ## _urCommandBufferAppendMemBufferWriteRectExp_t + ("pfnAppendMemBufferReadRectExp", c_void_p), ## _urCommandBufferAppendMemBufferReadRectExp_t + ("pfnAppendMemBufferFillExp", c_void_p), ## _urCommandBufferAppendMemBufferFillExp_t + ("pfnAppendUSMPrefetchExp", c_void_p), ## _urCommandBufferAppendUSMPrefetchExp_t + ("pfnAppendUSMAdviseExp", c_void_p), ## _urCommandBufferAppendUSMAdviseExp_t ("pfnEnqueueExp", c_void_p) ## _urCommandBufferEnqueueExp_t ] @@ -3542,69 +3713,6 @@ class ur_usm_p2p_exp_dditable_t(Structure): ("pfnPeerAccessGetInfoExp", c_void_p) ## _urUsmP2PPeerAccessGetInfoExp_t ] -############################################################################### -## @brief Function-pointer for urInit -if __use_win_types: - _urInit_t = WINFUNCTYPE( ur_result_t, ur_device_init_flags_t, ur_loader_config_handle_t ) -else: - _urInit_t = CFUNCTYPE( ur_result_t, ur_device_init_flags_t, ur_loader_config_handle_t ) - -############################################################################### -## @brief Function-pointer for urTearDown -if __use_win_types: - _urTearDown_t = WINFUNCTYPE( ur_result_t, c_void_p ) -else: - _urTearDown_t = CFUNCTYPE( ur_result_t, c_void_p ) - -############################################################################### -## @brief Function-pointer for urAdapterGet -if __use_win_types: - _urAdapterGet_t = WINFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) -else: - _urAdapterGet_t = CFUNCTYPE( ur_result_t, c_ulong, POINTER(ur_adapter_handle_t), POINTER(c_ulong) ) - -############################################################################### -## @brief Function-pointer for urAdapterRelease -if __use_win_types: - _urAdapterRelease_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRelease_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterRetain -if __use_win_types: - _urAdapterRetain_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t ) -else: - _urAdapterRetain_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t ) - -############################################################################### -## @brief Function-pointer for urAdapterGetLastError -if __use_win_types: - _urAdapterGetLastError_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) -else: - _urAdapterGetLastError_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, POINTER(c_char_p), POINTER(c_long) ) - -############################################################################### -## @brief Function-pointer for urAdapterGetInfo -if __use_win_types: - _urAdapterGetInfo_t = WINFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) -else: - _urAdapterGetInfo_t = CFUNCTYPE( ur_result_t, ur_adapter_handle_t, ur_adapter_info_t, c_size_t, c_void_p, POINTER(c_size_t) ) - - -############################################################################### -## @brief Table of Global functions pointers -class ur_global_dditable_t(Structure): - _fields_ = [ - ("pfnInit", c_void_p), ## _urInit_t - ("pfnTearDown", c_void_p), ## _urTearDown_t - ("pfnAdapterGet", c_void_p), ## _urAdapterGet_t - ("pfnAdapterRelease", c_void_p), ## _urAdapterRelease_t - ("pfnAdapterRetain", c_void_p), ## _urAdapterRetain_t - ("pfnAdapterGetLastError", c_void_p), ## _urAdapterGetLastError_t - ("pfnAdapterGetInfo", c_void_p) ## _urAdapterGetInfo_t - ] - ############################################################################### ## @brief Function-pointer for urVirtualMemGranularityGetInfo if __use_win_types: @@ -3755,17 +3863,19 @@ class ur_dditable_t(Structure): ("Event", ur_event_dditable_t), ("Program", ur_program_dditable_t), ("Kernel", ur_kernel_dditable_t), + ("KernelExp", ur_kernel_exp_dditable_t), ("Sampler", ur_sampler_dditable_t), ("Mem", ur_mem_dditable_t), ("PhysicalMem", ur_physical_mem_dditable_t), + ("Global", ur_global_dditable_t), ("Enqueue", ur_enqueue_dditable_t), + ("EnqueueExp", ur_enqueue_exp_dditable_t), ("Queue", ur_queue_dditable_t), ("BindlessImagesExp", ur_bindless_images_exp_dditable_t), ("USM", ur_usm_dditable_t), ("USMExp", ur_usm_exp_dditable_t), ("CommandBufferExp", ur_command_buffer_exp_dditable_t), ("UsmP2PExp", ur_usm_p2p_exp_dditable_t), - ("Global", ur_global_dditable_t), ("VirtualMem", ur_virtual_mem_dditable_t), ("Device", ur_device_dditable_t) ] @@ -3784,7 +3894,7 @@ def __init__(self, version : ur_api_version_t): self.__dditable = ur_dditable_t() # initialize the UR - self.__dll.urInit(0, 0) + self.__dll.urLoaderInit(0, 0) # call driver to get function pointers Platform = ur_platform_dditable_t() @@ -3880,6 +3990,16 @@ def __init__(self, version : ur_api_version_t): self.urKernelSetArgMemObj = _urKernelSetArgMemObj_t(self.__dditable.Kernel.pfnSetArgMemObj) self.urKernelSetSpecializationConstants = _urKernelSetSpecializationConstants_t(self.__dditable.Kernel.pfnSetSpecializationConstants) + # call driver to get function pointers + KernelExp = ur_kernel_exp_dditable_t() + r = ur_result_v(self.__dll.urGetKernelExpProcAddrTable(version, byref(KernelExp))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.KernelExp = KernelExp + + # attach function interface to function address + self.urKernelSuggestMaxCooperativeGroupCountExp = _urKernelSuggestMaxCooperativeGroupCountExp_t(self.__dditable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp) + # call driver to get function pointers Sampler = ur_sampler_dditable_t() r = ur_result_v(self.__dll.urGetSamplerProcAddrTable(version, byref(Sampler))) @@ -3926,6 +4046,20 @@ def __init__(self, version : ur_api_version_t): self.urPhysicalMemRetain = _urPhysicalMemRetain_t(self.__dditable.PhysicalMem.pfnRetain) self.urPhysicalMemRelease = _urPhysicalMemRelease_t(self.__dditable.PhysicalMem.pfnRelease) + # call driver to get function pointers + Global = ur_global_dditable_t() + r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.Global = Global + + # attach function interface to function address + self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) + self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) + self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) + self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) + self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) + # call driver to get function pointers Enqueue = ur_enqueue_dditable_t() r = ur_result_v(self.__dll.urGetEnqueueProcAddrTable(version, byref(Enqueue))) @@ -3960,6 +4094,16 @@ def __init__(self, version : ur_api_version_t): self.urEnqueueReadHostPipe = _urEnqueueReadHostPipe_t(self.__dditable.Enqueue.pfnReadHostPipe) self.urEnqueueWriteHostPipe = _urEnqueueWriteHostPipe_t(self.__dditable.Enqueue.pfnWriteHostPipe) + # call driver to get function pointers + EnqueueExp = ur_enqueue_exp_dditable_t() + r = ur_result_v(self.__dll.urGetEnqueueExpProcAddrTable(version, byref(EnqueueExp))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.EnqueueExp = EnqueueExp + + # attach function interface to function address + self.urEnqueueCooperativeKernelLaunchExp = _urEnqueueCooperativeKernelLaunchExp_t(self.__dditable.EnqueueExp.pfnCooperativeKernelLaunchExp) + # call driver to get function pointers Queue = ur_queue_dditable_t() r = ur_result_v(self.__dll.urGetQueueProcAddrTable(version, byref(Queue))) @@ -4046,13 +4190,17 @@ def __init__(self, version : ur_api_version_t): self.urCommandBufferReleaseExp = _urCommandBufferReleaseExp_t(self.__dditable.CommandBufferExp.pfnReleaseExp) self.urCommandBufferFinalizeExp = _urCommandBufferFinalizeExp_t(self.__dditable.CommandBufferExp.pfnFinalizeExp) self.urCommandBufferAppendKernelLaunchExp = _urCommandBufferAppendKernelLaunchExp_t(self.__dditable.CommandBufferExp.pfnAppendKernelLaunchExp) - self.urCommandBufferAppendMemcpyUSMExp = _urCommandBufferAppendMemcpyUSMExp_t(self.__dditable.CommandBufferExp.pfnAppendMemcpyUSMExp) - self.urCommandBufferAppendMembufferCopyExp = _urCommandBufferAppendMembufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferCopyExp) - self.urCommandBufferAppendMembufferWriteExp = _urCommandBufferAppendMembufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferWriteExp) - self.urCommandBufferAppendMembufferReadExp = _urCommandBufferAppendMembufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferReadExp) - self.urCommandBufferAppendMembufferCopyRectExp = _urCommandBufferAppendMembufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferCopyRectExp) - self.urCommandBufferAppendMembufferWriteRectExp = _urCommandBufferAppendMembufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferWriteRectExp) - self.urCommandBufferAppendMembufferReadRectExp = _urCommandBufferAppendMembufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMembufferReadRectExp) + self.urCommandBufferAppendUSMMemcpyExp = _urCommandBufferAppendUSMMemcpyExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMMemcpyExp) + self.urCommandBufferAppendUSMFillExp = _urCommandBufferAppendUSMFillExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMFillExp) + self.urCommandBufferAppendMemBufferCopyExp = _urCommandBufferAppendMemBufferCopyExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyExp) + self.urCommandBufferAppendMemBufferWriteExp = _urCommandBufferAppendMemBufferWriteExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteExp) + self.urCommandBufferAppendMemBufferReadExp = _urCommandBufferAppendMemBufferReadExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadExp) + self.urCommandBufferAppendMemBufferCopyRectExp = _urCommandBufferAppendMemBufferCopyRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferCopyRectExp) + self.urCommandBufferAppendMemBufferWriteRectExp = _urCommandBufferAppendMemBufferWriteRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferWriteRectExp) + self.urCommandBufferAppendMemBufferReadRectExp = _urCommandBufferAppendMemBufferReadRectExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferReadRectExp) + self.urCommandBufferAppendMemBufferFillExp = _urCommandBufferAppendMemBufferFillExp_t(self.__dditable.CommandBufferExp.pfnAppendMemBufferFillExp) + self.urCommandBufferAppendUSMPrefetchExp = _urCommandBufferAppendUSMPrefetchExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMPrefetchExp) + self.urCommandBufferAppendUSMAdviseExp = _urCommandBufferAppendUSMAdviseExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMAdviseExp) self.urCommandBufferEnqueueExp = _urCommandBufferEnqueueExp_t(self.__dditable.CommandBufferExp.pfnEnqueueExp) # call driver to get function pointers @@ -4067,22 +4215,6 @@ def __init__(self, version : ur_api_version_t): self.urUsmP2PDisablePeerAccessExp = _urUsmP2PDisablePeerAccessExp_t(self.__dditable.UsmP2PExp.pfnDisablePeerAccessExp) self.urUsmP2PPeerAccessGetInfoExp = _urUsmP2PPeerAccessGetInfoExp_t(self.__dditable.UsmP2PExp.pfnPeerAccessGetInfoExp) - # call driver to get function pointers - Global = ur_global_dditable_t() - r = ur_result_v(self.__dll.urGetGlobalProcAddrTable(version, byref(Global))) - if r != ur_result_v.SUCCESS: - raise Exception(r) - self.__dditable.Global = Global - - # attach function interface to function address - self.urInit = _urInit_t(self.__dditable.Global.pfnInit) - self.urTearDown = _urTearDown_t(self.__dditable.Global.pfnTearDown) - self.urAdapterGet = _urAdapterGet_t(self.__dditable.Global.pfnAdapterGet) - self.urAdapterRelease = _urAdapterRelease_t(self.__dditable.Global.pfnAdapterRelease) - self.urAdapterRetain = _urAdapterRetain_t(self.__dditable.Global.pfnAdapterRetain) - self.urAdapterGetLastError = _urAdapterGetLastError_t(self.__dditable.Global.pfnAdapterGetLastError) - self.urAdapterGetInfo = _urAdapterGetInfo_t(self.__dditable.Global.pfnAdapterGetInfo) - # call driver to get function pointers VirtualMem = ur_virtual_mem_dditable_t() r = ur_result_v(self.__dll.urGetVirtualMemProcAddrTable(version, byref(VirtualMem))) diff --git a/include/ur_api.h b/include/ur_api.h index 677c31005f..1504c0b1b2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_api.h - * @version v0.7-r0 + * @version v0.8-r0 * */ #ifndef UR_API_H_INCLUDED @@ -126,8 +126,6 @@ typedef enum ur_function_t { UR_FUNCTION_QUEUE_CREATE_WITH_NATIVE_HANDLE = 96, ///< Enumerator for ::urQueueCreateWithNativeHandle UR_FUNCTION_QUEUE_FINISH = 97, ///< Enumerator for ::urQueueFinish UR_FUNCTION_QUEUE_FLUSH = 98, ///< Enumerator for ::urQueueFlush - UR_FUNCTION_INIT = 99, ///< Enumerator for ::urInit - UR_FUNCTION_TEAR_DOWN = 100, ///< Enumerator for ::urTearDown UR_FUNCTION_SAMPLER_CREATE = 101, ///< Enumerator for ::urSamplerCreate UR_FUNCTION_SAMPLER_RETAIN = 102, ///< Enumerator for ::urSamplerRetain UR_FUNCTION_SAMPLER_RELEASE = 103, ///< Enumerator for ::urSamplerRelease @@ -153,9 +151,6 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP = 123, ///< Enumerator for ::urCommandBufferFinalizeExp UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125, ///< Enumerator for ::urCommandBufferAppendKernelLaunchExp UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP = 128, ///< Enumerator for ::urCommandBufferEnqueueExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP = 129, ///< Enumerator for ::urCommandBufferAppendMemcpyUSMExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP = 130, ///< Enumerator for ::urCommandBufferAppendMembufferCopyExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP = 131, ///< Enumerator for ::urCommandBufferAppendMembufferCopyRectExp UR_FUNCTION_USM_PITCHED_ALLOC_EXP = 132, ///< Enumerator for ::urUSMPitchedAllocExp UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133, ///< Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134, ///< Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp @@ -191,10 +186,6 @@ typedef enum ur_function_t { UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP = 165, ///< Enumerator for ::urUsmP2PEnablePeerAccessExp UR_FUNCTION_USM_P2P_DISABLE_PEER_ACCESS_EXP = 166, ///< Enumerator for ::urUsmP2PDisablePeerAccessExp UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP = 167, ///< Enumerator for ::urUsmP2PPeerAccessGetInfoExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP = 168, ///< Enumerator for ::urCommandBufferAppendMembufferWriteExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP = 169, ///< Enumerator for ::urCommandBufferAppendMembufferReadExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP = 170, ///< Enumerator for ::urCommandBufferAppendMembufferWriteRectExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP = 171, ///< Enumerator for ::urCommandBufferAppendMembufferReadRectExp UR_FUNCTION_LOADER_CONFIG_CREATE = 172, ///< Enumerator for ::urLoaderConfigCreate UR_FUNCTION_LOADER_CONFIG_RELEASE = 173, ///< Enumerator for ::urLoaderConfigRelease UR_FUNCTION_LOADER_CONFIG_RETAIN = 174, ///< Enumerator for ::urLoaderConfigRetain @@ -205,6 +196,22 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_LOADER_INIT = 182, ///< Enumerator for ::urLoaderInit + UR_FUNCTION_LOADER_TEAR_DOWN = 183, ///< Enumerator for ::urLoaderTearDown + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 184, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 185, ///< Enumerator for ::urCommandBufferAppendUSMFillExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 186, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 187, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 188, ///< Enumerator for ::urCommandBufferAppendMemBufferReadExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 189, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 190, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 191, ///< Enumerator for ::urCommandBufferAppendMemBufferReadRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 192, ///< Enumerator for ::urCommandBufferAppendMemBufferFillExp + UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 193, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 194, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 195, ///< Enumerator for ::urCommandBufferAppendUSMPrefetchExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 196, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp + UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 197, ///< Enumerator for ::urLoaderConfigSetCodeLocationCallback /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -214,46 +221,48 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -515,9 +524,9 @@ typedef struct ur_rect_region_t { #if !defined(__GNUC__) #pragma endregion #endif -// Intel 'oneAPI' Unified Runtime APIs for Runtime +// Intel 'oneAPI' Unified Runtime APIs for Loader #if !defined(__GNUC__) -#pragma region runtime +#pragma region loader #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Supported device initialization flags @@ -668,21 +677,64 @@ urLoaderConfigEnableLayer( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Code location data +typedef struct ur_code_location_t { + const char *functionName; ///< [in][out] Function name. + const char *sourceFile; ///< [in][out] Source code file. + uint32_t lineNumber; ///< [in][out] Source code line number. + uint32_t columnNumber; ///< [in][out] Source code column number. + +} ur_code_location_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Code location callback with user data. +typedef ur_code_location_t (*ur_code_location_callback_t)( + void *pUserData ///< [in][out] pointer to data to be passed to callback +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +UR_APIEXPORT ur_result_t UR_APICALL +urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t pfnCodeloc, ///< [in] Function pointer to code location callback. + void *pUserData ///< [in][out][optional] pointer to data to be passed to callback. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -693,28 +745,32 @@ urLoaderConfigEnableLayer( /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL -urInit( +urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t hLoaderConfig ///< [in][optional] Handle of loader config handle. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL -urTearDown( - void *pParams ///< [in] pointer to tear down parameters -); +urLoaderTearDown( + void); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime APIs for Adapter +#if !defined(__GNUC__) +#pragma region adapter +#endif /////////////////////////////////////////////////////////////////////////////// /// @brief Retrieves all available adapters /// @@ -753,7 +809,9 @@ urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -934,6 +992,7 @@ typedef enum ur_adapter_backend_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t *phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -1022,7 +1081,8 @@ urPlatformGetInfo( typedef enum ur_api_version_t { UR_API_VERSION_0_6 = UR_MAKE_VERSION(0, 6), ///< version 0.6 UR_API_VERSION_0_7 = UR_MAKE_VERSION(0, 7), ///< version 0.7 - UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 7), ///< latest known version + UR_API_VERSION_0_8 = UR_MAKE_VERSION(0, 8), ///< version 0.8 + UR_API_VERSION_CURRENT = UR_MAKE_VERSION(0, 8), ///< latest known version /// @cond UR_API_VERSION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1292,14 +1352,18 @@ typedef enum ur_device_type_t { /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t *phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. ///< If NumEntries is less than the number of devices available, then @@ -1705,6 +1769,7 @@ typedef struct ur_device_partition_properties_t { /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT UR_APIEXPORT ur_result_t UR_APICALL @@ -2017,6 +2082,8 @@ typedef struct ur_context_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY UR_APIEXPORT ur_result_t UR_APICALL @@ -3261,6 +3328,8 @@ typedef struct ur_usm_pool_limits_desc_t { /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3305,6 +3374,8 @@ urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3351,6 +3422,8 @@ urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -3793,6 +3866,8 @@ typedef struct ur_physical_mem_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -4870,6 +4945,8 @@ typedef struct ur_kernel_arg_mem_obj_properties_t { /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( @@ -5123,12 +5200,15 @@ typedef struct ur_queue_index_properties_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -5488,6 +5568,8 @@ urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -5649,11 +5731,11 @@ urEventCreateWithNativeHandle( /////////////////////////////////////////////////////////////////////////////// /// @brief Event states for all events. typedef enum ur_execution_info_t { - UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE = 0, ///< Indicates that the event has completed. - UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING = 1, ///< Indicates that the device has started processing this event. - UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED = 2, ///< Indicates that the event has been submitted by the host to the device. - UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED = 3, ///< Indicates that the event has been queued, this is the initial state of - ///< events. + UR_EXECUTION_INFO_COMPLETE = 0, ///< Indicates that the event has completed. + UR_EXECUTION_INFO_RUNNING = 1, ///< Indicates that the device has started processing this event. + UR_EXECUTION_INFO_SUBMITTED = 2, ///< Indicates that the event has been submitted by the host to the device. + UR_EXECUTION_INFO_QUEUED = 3, ///< Indicates that the event has been queued, this is the initial state of + ///< events. /// @cond UR_EXECUTION_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -5676,6 +5758,8 @@ typedef void (*ur_event_callback_t)( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -5688,9 +5772,11 @@ typedef void (*ur_event_callback_t)( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object @@ -6206,6 +6292,11 @@ urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6255,6 +6346,8 @@ urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -6306,6 +6399,8 @@ urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -6351,6 +6446,8 @@ urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -6588,6 +6685,11 @@ urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -6629,6 +6731,11 @@ urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -6883,7 +6990,6 @@ urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -6904,7 +7010,7 @@ urEnqueueWriteHostPipe( const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. - ur_event_handle_t *phEvent ///< [out] returns an event object that identifies this write command + ur_event_handle_t *phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ); @@ -6986,6 +7092,20 @@ typedef struct ur_exp_sampler_mip_properties_t { } ur_exp_sampler_mip_properties_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Describes unique sampler addressing mode per dimension +/// +/// @details +/// - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t +/// as part of a `pNext` chain. +typedef struct ur_exp_sampler_addr_modes_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_sampler_addressing_mode_t addrModes[3]; ///< [in] Specify the address mode of the sampler per dimension + +} ur_exp_sampler_addr_modes_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Describes an interop memory resource descriptor typedef struct ur_exp_interop_mem_desc_t { @@ -7004,6 +7124,21 @@ typedef struct ur_exp_interop_semaphore_desc_t { } ur_exp_interop_semaphore_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Describes layered image properties +/// +/// @details +/// - Specify these properties in ::urBindlessImagesUnsampledImageCreateExp +/// or ::urBindlessImagesSampledImageCreateExp via ::ur_image_desc_t as +/// part of a `pNext` chain. +typedef struct ur_exp_layered_image_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + uint32_t numLayers; ///< [in] number of layers the image should have + +} ur_exp_layered_image_properties_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// @@ -7031,6 +7166,8 @@ typedef struct ur_exp_interop_semaphore_desc_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -7770,7 +7907,7 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMemcpyUSMExp( +urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. @@ -7780,6 +7917,45 @@ urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -7801,7 +7977,7 @@ urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferCopyExp( +urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. @@ -7835,7 +8011,7 @@ urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferWriteExp( +urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. size_t offset, ///< [in] offset in bytes in the buffer object. @@ -7868,7 +8044,7 @@ urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferReadExp( +urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. size_t offset, ///< [in] offset in bytes in the buffer object. @@ -7900,7 +8076,7 @@ urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferCopyRectExp( +urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. @@ -7938,7 +8114,7 @@ urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferWriteRectExp( +urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. @@ -7979,7 +8155,7 @@ urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL -urCommandBufferAppendMembufferReadRectExp( +urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. @@ -7997,6 +8173,124 @@ urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -8030,6 +8324,90 @@ urCommandBufferEnqueueExp( ///< command-buffer execution instance. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental APIs for Cooperative Kernels +#if !defined(__GNUC__) +#pragma region cooperative kernels(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP +/// @brief The extension string which defines support for cooperative-kernels +/// which is returned when querying device extensions. +#define UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP "ur_exp_cooperative_kernels" +#endif // UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t *pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t *pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t *pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -8284,6 +8662,16 @@ typedef struct ur_loader_config_enable_layer_params_t { const char **ppLayerName; } ur_loader_config_enable_layer_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urLoaderConfigSetCodeLocationCallback +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_loader_config_set_code_location_callback_params_t { + ur_loader_config_handle_t *phLoaderConfig; + ur_code_location_callback_t *ppfnCodeloc; + void **ppUserData; +} ur_loader_config_set_code_location_callback_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urPlatformGet /// @details Each entry is a pointer to the parameter passed to the function; @@ -8798,6 +9186,15 @@ typedef struct ur_kernel_set_specialization_constants_params_t { const ur_specialization_constant_info_t **ppSpecConstants; } ur_kernel_set_specialization_constants_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urKernelSuggestMaxCooperativeGroupCountExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t { + ur_kernel_handle_t *phKernel; + uint32_t **ppGroupCountRet; +} ur_kernel_suggest_max_cooperative_group_count_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urSamplerCreate /// @details Each entry is a pointer to the parameter passed to the function; @@ -8994,6 +9391,54 @@ typedef struct ur_physical_mem_release_params_t { ur_physical_mem_handle_t *phPhysicalMem; } ur_physical_mem_release_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGet +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_params_t { + uint32_t *pNumEntries; + ur_adapter_handle_t **pphAdapters; + uint32_t **ppNumAdapters; +} ur_adapter_get_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterRelease +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_release_params_t { + ur_adapter_handle_t *phAdapter; +} ur_adapter_release_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterRetain +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_retain_params_t { + ur_adapter_handle_t *phAdapter; +} ur_adapter_retain_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGetLastError +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_last_error_params_t { + ur_adapter_handle_t *phAdapter; + const char ***pppMessage; + int32_t **ppError; +} ur_adapter_get_last_error_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urAdapterGetInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_adapter_get_info_params_t { + ur_adapter_handle_t *phAdapter; + ur_adapter_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_adapter_get_info_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueKernelLaunch /// @details Each entry is a pointer to the parameter passed to the function; @@ -9397,6 +9842,22 @@ typedef struct ur_enqueue_write_host_pipe_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_write_host_pipe_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t { + ur_queue_handle_t *phQueue; + ur_kernel_handle_t *phKernel; + uint32_t *pworkDim; + const size_t **ppGlobalWorkOffset; + const size_t **ppGlobalWorkSize; + const size_t **ppLocalWorkSize; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_cooperative_kernel_launch_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urQueueGetInfo /// @details Each entry is a pointer to the parameter passed to the function; @@ -9863,10 +10324,10 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMemcpyUSMExp +/// @brief Function parameters for urCommandBufferAppendUSMMemcpyExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_memcpy_usm_exp_params_t { +typedef struct ur_command_buffer_append_usm_memcpy_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; void **ppDst; const void **ppSrc; @@ -9874,13 +10335,28 @@ typedef struct ur_command_buffer_append_memcpy_usm_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_memcpy_usm_exp_params_t; +} ur_command_buffer_append_usm_memcpy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferCopyExp +/// @brief Function parameters for urCommandBufferAppendUSMFillExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_copy_exp_params_t { +typedef struct ur_command_buffer_append_usm_fill_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + void **ppMemory; + const void **ppPattern; + size_t *ppatternSize; + size_t *psize; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_fill_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendMemBufferCopyExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_mem_buffer_copy_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phSrcMem; ur_mem_handle_t *phDstMem; @@ -9890,13 +10366,13 @@ typedef struct ur_command_buffer_append_membuffer_copy_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_copy_exp_params_t; +} ur_command_buffer_append_mem_buffer_copy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferWriteExp +/// @brief Function parameters for urCommandBufferAppendMemBufferWriteExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_write_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_write_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; size_t *poffset; @@ -9905,13 +10381,13 @@ typedef struct ur_command_buffer_append_membuffer_write_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_write_exp_params_t; +} ur_command_buffer_append_mem_buffer_write_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferReadExp +/// @brief Function parameters for urCommandBufferAppendMemBufferReadExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_read_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_read_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; size_t *poffset; @@ -9920,13 +10396,13 @@ typedef struct ur_command_buffer_append_membuffer_read_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_read_exp_params_t; +} ur_command_buffer_append_mem_buffer_read_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferCopyRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferCopyRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phSrcMem; ur_mem_handle_t *phDstMem; @@ -9940,13 +10416,13 @@ typedef struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_copy_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferWriteRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferWriteRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_write_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_write_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; ur_rect_offset_t *pbufferOffset; @@ -9960,13 +10436,13 @@ typedef struct ur_command_buffer_append_membuffer_write_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_write_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_write_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urCommandBufferAppendMembufferReadRectExp +/// @brief Function parameters for urCommandBufferAppendMemBufferReadRectExp /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_command_buffer_append_membuffer_read_rect_exp_params_t { +typedef struct ur_command_buffer_append_mem_buffer_read_rect_exp_params_t { ur_exp_command_buffer_handle_t *phCommandBuffer; ur_mem_handle_t *phBuffer; ur_rect_offset_t *pbufferOffset; @@ -9980,7 +10456,51 @@ typedef struct ur_command_buffer_append_membuffer_read_rect_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; -} ur_command_buffer_append_membuffer_read_rect_exp_params_t; +} ur_command_buffer_append_mem_buffer_read_rect_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendMemBufferFillExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_mem_buffer_fill_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_mem_handle_t *phBuffer; + const void **ppPattern; + size_t *ppatternSize; + size_t *poffset; + size_t *psize; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_mem_buffer_fill_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMPrefetchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_migration_flags_t *pflags; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_prefetch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendUSMAdviseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_usm_advise_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + const void **ppMemory; + size_t *psize; + ur_usm_advice_flags_t *padvice; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_usm_advise_exp_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp @@ -10026,69 +10546,13 @@ typedef struct ur_usm_p2p_peer_access_get_info_exp_params_t { } ur_usm_p2p_peer_access_get_info_exp_params_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urInit +/// @brief Function parameters for urLoaderInit /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value -typedef struct ur_init_params_t { +typedef struct ur_loader_init_params_t { ur_device_init_flags_t *pdevice_flags; ur_loader_config_handle_t *phLoaderConfig; -} ur_init_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urTearDown -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_tear_down_params_t { - void **ppParams; -} ur_tear_down_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGet -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_params_t { - uint32_t *pNumEntries; - ur_adapter_handle_t **pphAdapters; - uint32_t **ppNumAdapters; -} ur_adapter_get_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterRelease -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_release_params_t { - ur_adapter_handle_t *phAdapter; -} ur_adapter_release_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterRetain -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_retain_params_t { - ur_adapter_handle_t *phAdapter; -} ur_adapter_retain_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGetLastError -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_last_error_params_t { - ur_adapter_handle_t *phAdapter; - const char ***pppMessage; - int32_t **ppError; -} ur_adapter_get_last_error_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urAdapterGetInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_adapter_get_info_params_t { - ur_adapter_handle_t *phAdapter; - ur_adapter_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_adapter_get_info_params_t; +} ur_loader_init_params_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urVirtualMemGranularityGetInfo diff --git a/include/ur_ddi.h b/include/ur_ddi.h index a0c2a5012d..246ffc200d 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_ddi.h - * @version v0.7-r0 + * @version v0.8-r0 * */ #ifndef UR_DDI_H_INCLUDED @@ -567,6 +567,39 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetKernelProcAddrTable_t)( ur_api_version_t, ur_kernel_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urKernelSuggestMaxCooperativeGroupCountExp +typedef ur_result_t(UR_APICALL *ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t)( + ur_kernel_handle_t, + uint32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of KernelExp functions pointers +typedef struct ur_kernel_exp_dditable_t { + ur_pfnKernelSuggestMaxCooperativeGroupCountExp_t pfnSuggestMaxCooperativeGroupCountExp; +} ur_kernel_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetKernelExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetKernelExpProcAddrTable_t)( + ur_api_version_t, + ur_kernel_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urSamplerCreate typedef ur_result_t(UR_APICALL *ur_pfnSamplerCreate_t)( @@ -803,6 +836,70 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetPhysicalMemProcAddrTable_t)( ur_api_version_t, ur_physical_mem_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGet +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGet_t)( + uint32_t, + ur_adapter_handle_t *, + uint32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterRelease +typedef ur_result_t(UR_APICALL *ur_pfnAdapterRelease_t)( + ur_adapter_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterRetain +typedef ur_result_t(UR_APICALL *ur_pfnAdapterRetain_t)( + ur_adapter_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGetLastError +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetLastError_t)( + ur_adapter_handle_t, + const char **, + int32_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urAdapterGetInfo +typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetInfo_t)( + ur_adapter_handle_t, + ur_adapter_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of Global functions pointers +typedef struct ur_global_dditable_t { + ur_pfnAdapterGet_t pfnAdapterGet; + ur_pfnAdapterRelease_t pfnAdapterRelease; + ur_pfnAdapterRetain_t pfnAdapterRetain; + ur_pfnAdapterGetLastError_t pfnAdapterGetLastError; + ur_pfnAdapterGetInfo_t pfnAdapterGetInfo; +} ur_global_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Global table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetGlobalProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_global_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetGlobalProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetGlobalProcAddrTable_t)( + ur_api_version_t, + ur_global_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueKernelLaunch typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunch_t)( @@ -1182,6 +1279,46 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueProcAddrTable_t)( ur_api_version_t, ur_enqueue_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)( + ur_queue_handle_t, + ur_kernel_handle_t, + uint32_t, + const size_t *, + const size_t *, + const size_t *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of EnqueueExp functions pointers +typedef struct ur_enqueue_exp_dditable_t { + ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp; +} ur_enqueue_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetEnqueueExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueExpProcAddrTable_t)( + ur_api_version_t, + ur_enqueue_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urQueueGetInfo typedef ur_result_t(UR_APICALL *ur_pfnQueueGetInfo_t)( @@ -1663,19 +1800,31 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMemcpyUSMExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemcpyUSMExp_t)( +/// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMMemcpyExp_t)( + ur_exp_command_buffer_handle_t, + void *, + const void *, + size_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMFillExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMFillExp_t)( ur_exp_command_buffer_handle_t, void *, const void *, size_t, + size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferCopyExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, @@ -1687,8 +1836,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferWriteExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, @@ -1699,8 +1848,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferReadExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferReadExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, @@ -1711,8 +1860,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadExp_t)( ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferCopyRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, @@ -1728,8 +1877,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferCopyRectExp_t) ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferWriteRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, @@ -1745,8 +1894,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferWriteRectExp_t ur_exp_command_buffer_sync_point_t *); /////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urCommandBufferAppendMembufferReadRectExp -typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadRectExp_t)( +/// @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadRectExp_t)( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, @@ -1761,6 +1910,41 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMembufferReadRectExp_t) const ur_exp_command_buffer_sync_point_t *, ur_exp_command_buffer_sync_point_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendMemBufferFillExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferFillExp_t)( + ur_exp_command_buffer_handle_t, + ur_mem_handle_t, + const void *, + size_t, + size_t, + size_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMPrefetchExp_t)( + ur_exp_command_buffer_handle_t, + const void *, + size_t, + ur_usm_migration_flags_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendUSMAdviseExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMAdviseExp_t)( + ur_exp_command_buffer_handle_t, + const void *, + size_t, + ur_usm_advice_flags_t, + uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferEnqueueExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( @@ -1778,13 +1962,17 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferReleaseExp_t pfnReleaseExp; ur_pfnCommandBufferFinalizeExp_t pfnFinalizeExp; ur_pfnCommandBufferAppendKernelLaunchExp_t pfnAppendKernelLaunchExp; - ur_pfnCommandBufferAppendMemcpyUSMExp_t pfnAppendMemcpyUSMExp; - ur_pfnCommandBufferAppendMembufferCopyExp_t pfnAppendMembufferCopyExp; - ur_pfnCommandBufferAppendMembufferWriteExp_t pfnAppendMembufferWriteExp; - ur_pfnCommandBufferAppendMembufferReadExp_t pfnAppendMembufferReadExp; - ur_pfnCommandBufferAppendMembufferCopyRectExp_t pfnAppendMembufferCopyRectExp; - ur_pfnCommandBufferAppendMembufferWriteRectExp_t pfnAppendMembufferWriteRectExp; - ur_pfnCommandBufferAppendMembufferReadRectExp_t pfnAppendMembufferReadRectExp; + ur_pfnCommandBufferAppendUSMMemcpyExp_t pfnAppendUSMMemcpyExp; + ur_pfnCommandBufferAppendUSMFillExp_t pfnAppendUSMFillExp; + ur_pfnCommandBufferAppendMemBufferCopyExp_t pfnAppendMemBufferCopyExp; + ur_pfnCommandBufferAppendMemBufferWriteExp_t pfnAppendMemBufferWriteExp; + ur_pfnCommandBufferAppendMemBufferReadExp_t pfnAppendMemBufferReadExp; + ur_pfnCommandBufferAppendMemBufferCopyRectExp_t pfnAppendMemBufferCopyRectExp; + ur_pfnCommandBufferAppendMemBufferWriteRectExp_t pfnAppendMemBufferWriteRectExp; + ur_pfnCommandBufferAppendMemBufferReadRectExp_t pfnAppendMemBufferReadRectExp; + ur_pfnCommandBufferAppendMemBufferFillExp_t pfnAppendMemBufferFillExp; + ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; + ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; } ur_command_buffer_exp_dditable_t; @@ -1860,83 +2048,6 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetUsmP2PExpProcAddrTable_t)( ur_api_version_t, ur_usm_p2p_exp_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urInit -typedef ur_result_t(UR_APICALL *ur_pfnInit_t)( - ur_device_init_flags_t, - ur_loader_config_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urTearDown -typedef ur_result_t(UR_APICALL *ur_pfnTearDown_t)( - void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGet -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGet_t)( - uint32_t, - ur_adapter_handle_t *, - uint32_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterRelease -typedef ur_result_t(UR_APICALL *ur_pfnAdapterRelease_t)( - ur_adapter_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterRetain -typedef ur_result_t(UR_APICALL *ur_pfnAdapterRetain_t)( - ur_adapter_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGetLastError -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetLastError_t)( - ur_adapter_handle_t, - const char **, - int32_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urAdapterGetInfo -typedef ur_result_t(UR_APICALL *ur_pfnAdapterGetInfo_t)( - ur_adapter_handle_t, - ur_adapter_info_t, - size_t, - void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of Global functions pointers -typedef struct ur_global_dditable_t { - ur_pfnInit_t pfnInit; - ur_pfnTearDown_t pfnTearDown; - ur_pfnAdapterGet_t pfnAdapterGet; - ur_pfnAdapterRelease_t pfnAdapterRelease; - ur_pfnAdapterRetain_t pfnAdapterRetain; - ur_pfnAdapterGetLastError_t pfnAdapterGetLastError; - ur_pfnAdapterGetInfo_t pfnAdapterGetInfo; -} ur_global_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's Global table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL -urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers -); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetGlobalProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetGlobalProcAddrTable_t)( - ur_api_version_t, - ur_global_dditable_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urVirtualMemGranularityGetInfo typedef ur_result_t(UR_APICALL *ur_pfnVirtualMemGranularityGetInfo_t)( @@ -2140,17 +2251,19 @@ typedef struct ur_dditable_t { ur_event_dditable_t Event; ur_program_dditable_t Program; ur_kernel_dditable_t Kernel; + ur_kernel_exp_dditable_t KernelExp; ur_sampler_dditable_t Sampler; ur_mem_dditable_t Mem; ur_physical_mem_dditable_t PhysicalMem; + ur_global_dditable_t Global; ur_enqueue_dditable_t Enqueue; + ur_enqueue_exp_dditable_t EnqueueExp; ur_queue_dditable_t Queue; ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; ur_command_buffer_exp_dditable_t CommandBufferExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; - ur_global_dditable_t Global; ur_virtual_mem_dditable_t VirtualMem; ur_device_dditable_t Device; } ur_dditable_t; diff --git a/scripts/Doxyfile b/scripts/Doxyfile index c038d5276d..0134f27418 100644 --- a/scripts/Doxyfile +++ b/scripts/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "Intel One API Unified Runtime API" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = v0.7 +PROJECT_NUMBER = v0.8 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/scripts/ci.py b/scripts/ci.py deleted file mode 100644 index 05a9141248..0000000000 --- a/scripts/ci.py +++ /dev/null @@ -1,80 +0,0 @@ -import os -import sys -import argparse -import re -import fileinput -from distutils import dir_util -import util - -script_dir = os.path.dirname(os.path.abspath(__file__)) -root_dir = os.path.dirname(script_dir) - - -""" -Entry-point: - publishes HTML for GitLab pages -""" -def publish_gitlab_html(): - src_html_dir = os.path.join(root_dir, "docs", "html") - src_img_dir = os.path.join(root_dir, "images") - tmp_dir = os.path.join(root_dir, ".public") - tmp_img_dir = os.path.join(root_dir, ".public/images") - publishing_dir = os.path.join(root_dir, "public") - - # Remove dest dirs - if os.path.exists(tmp_dir): - print("Deleting temp dir: %s" % tmp_dir) - util.removePath(tmp_dir) - if os.path.exists(publishing_dir): - print("Deleting publishing dir: %s" % publishing_dir) - util.removePath(publishing_dir) - - # Copy over generated content to new folder - print("Copying html files from '%s' to '%s'" % (src_html_dir, tmp_dir)) - dir_util.copy_tree(src_html_dir, tmp_dir) - - # Fixes html files by converting paths relative to root html folder instead of repo - print("Fixing paths in html files in '%s' to be relative to root..." % (tmp_dir)) - regex_pattern = re.compile(r'\.\.[\/|\\]images') - files = util.findFiles(tmp_dir, "*.html") - print("Found %s files" % (len(files))) - with fileinput.FileInput(files=files, inplace=True) as f: - for line in f: - print(re.sub(regex_pattern, './images', line), end='') - - # Publish new folder to GitLab Pages folder (/public) - print("Publishing to GitLab pages by renaming '%s' to '%s'" % (tmp_dir, publishing_dir)) - os.rename(tmp_dir, publishing_dir) - - -""" -Entry-point: - main() -""" -def main(args=sys.argv[1:]): - # Define args - parser = argparse.ArgumentParser() - parser.add_argument( - "--publish-html", - help="Publish html", - action="store_true") - - # Parse args - options = parser.parse_args(args) - - # Publish GitLab html - if options.publish_html: - try: - publish_gitlab_html() - except Exception as e: - print(e) - print("Failed") - return 1 - - print("Done") - return 0 - - -if __name__ == '__main__': - sys.exit(main()) -# END OF FILE diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index f40307e34e..b9d4130d5a 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -13,9 +13,19 @@ accepted into the project. .. important:: - Before making a contribution you *should* determine if the change should be - made directly to the core specification or introduced as an experimental - feature. The criteria we use to make this distinction are as follows: + Any contributions that fall into the following criteria *must* follow the + `Adapter Change Process`_: + + * Changing the API/ABI of the specification and or loader. + + * Changing the implementation of an adapter. + + * Changing the implementation of shared/common code used by an adapter. + + Before making a contribution to the specification you *should* determine if + the change should be made directly to the core specification or introduced + as an experimental feature. The criteria we use to make this distinction + are as follows: * The feature exists to enable an experimental feature in a parallel language runtime being built on top of Unified Runtime. @@ -39,6 +49,114 @@ accepted into the project. Runtime team via the `GitHub issue tracker `_. +Adapter Change Process +====================== + +1. Create a pull request containing the adapter changes in the + `oneapi-src/unified-runtime`_ project targeting the `adapters + `_ branch. + +2. Create a draft pull request in the `intel/llvm`_ project to take advantage + of the pre-merge testing. Add any required implementation changes in + addition to changing: + + * `UNIFIED_RUNTIME_REPO`_ to point at your fork of Unified Runtime. + + * `UNIFIED_RUNTIME_TAG`_ to point at your development branch name used to + create the Unified Runtime pull request in step 1. + +3. Add a comment in the *oneapi-src/unified-runtime* pull request linking to + the *intel/llvm* pull request created in step 2. + +4. Code reviews for the adapter changes are carried out in the + *oneapi-src/unified-runtime* pull request. + +5. Any new commits to the *oneapi-src/unified-runtime* pull request *must* be + accompanied by a corresponding update in the *intel/llvm* pull request as + indicated in step 2, so the testing is always up-to-date. + +6. The Unified Runtime maintainers *must* ensure that step 5 has been carried + out and that all pre-merge testing has passed before accepting the + *oneapi-src/unified-runtime* pull request. + +7. Once the *oneapi-src/unified-runtime* pull request is accepted: + + * Reverse the change to `UNIFIED_RUNTIME_REPO`_ made in step 2. + * Update the `UNIFIED_RUNTIME_TAG`_ to point at the + *oneapi-src/unified-runtime* commit/tag containing the merged adapter + changes. + * Update the pull request description, linking to any other *intel/llvm* + pull requests who's changes have been merged into + *oneapi-src/unified-runtime* but have not yet been merge into + *intel/llvm*. + * Mark the *intel/llvm* pull request as ready for review and follow their + review process. + +.. _oneapi-src/unified-runtime: + https://github.com/oneapi-src/unified-runtime +.. _intel/llvm: + https://github.com/intel/llvm +.. _UNIFIED_RUNTIME_REPO: + https://github.com/intel/llvm/blob/sycl/sycl/plugins/unified_runtime/CMakeLists.txt#L7 +.. _UNIFIED_RUNTIME_TAG: + https://github.com/intel/llvm/blob/sycl/sycl/plugins/unified_runtime/CMakeLists.txt#L8 + +Build Environment +================= + +To be able to generate the source from the YAML files, the build environment +must be configured correctly and all dependencies must be installed. The +instructions for a basic setup are available in the `README +`_. + +The following additional dependencies are required to support the ``generate`` +target: + +* Doxygen (>= 1.8) + +* The Python script requirements listed in `thirdparty/requirements.txt`_ + +Doxygen can be installed via your system's package manager, e.g. on Ubuntu +``sudo apt install doxygen``, or by downloading it from the Doxygen website. It +must be available on the current ``PATH`` when the script is run. + +One way to install the requirements for the script is using a Python virtual +environment. This can be set up by running the following commands from the +project root: + +.. code-block:: console + + $ python3 -m venv .local + $ source .local/bin/activate + $ pip install -r third_party/requirements.txt + +The virtual environment can be subsequently reactivated before any builds +without needing to reinstall the requirements: + +.. code-block:: console + + $ source .local/bin/activate + +Alternatively, a Docker container can be used instead of a virtual environment. +Instructions on building and using a Docker image can be found in +`.github/docker`_ + +You *must* also enable the ``UR_FORMAT_CPP_STYLE`` CMake option to allow +formatting of the generated code, or the ``generate`` target will not be +available. + +.. code-block:: console + + $ cmake build/ -DUR_FORMAT_CPP_STYLE=ON + +You can then follow the instructions below to use the ``generate`` target to +regenerate the source. + +.. _thirdparty/requirements.txt: + https://github.com/oneapi-src/unified-runtime/blob/main/third_party/requirements.txt +.. _.github/docker: + https://github.com/oneapi-src/unified-runtime/blob/main/.github/docker + Generating Source ================= @@ -46,10 +164,9 @@ The specification and many other components in the Unified Runtime repository are generated from a set of YAML_ files which are used as inputs to a Mako_ based templating system. The YAML file syntax is defined in `YAML syntax`_. To generate the outputs of the Mako templates a build directory must be -configured, instructions are available in the `README -`_ file. -Upon successfully configuring a build directory, generate the outputs with the -following command (or suitable build system equivalent): +configured as detailed above. Upon successfully configuring a build directory, +generate the outputs with the following command (or suitable build system +equivalent): .. code-block:: console @@ -137,8 +254,8 @@ defined within them, with the following exceptions: enumerations, and structure type enumerations. * `scripts/core/enqueue.yml`_ defines commands which can be enqueued on a queue object. -* `scripts/core/runtime.yml`_ defines global symbols pertaining to - initialization and tear down of the entire runtime. +* `scripts/core/loader.yml`_ defines global symbols pertaining to + initialization and tear down of the loader. * `scripts/core/registry.yml`_ contains an enumeration of all entry-points, past and present, for use in the XPTI tracing framework. It is automatically updated so shouldn't require manual editing. @@ -148,8 +265,8 @@ defined within them, with the following exceptions: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/common.yml .. _scripts/core/enqueue.yml: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/enqueue.yml -.. _scripts/core/runtime.yml: - https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/runtime.yml +.. _scripts/core/loader.yml: + https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/loader.yml .. _scripts/core/registry.yml: https://github.com/oneapi-src/unified-runtime/blob/main/scripts/core/registry.yml diff --git a/scripts/core/CUDA.rst b/scripts/core/CUDA.rst new file mode 100644 index 0000000000..52b68c010f --- /dev/null +++ b/scripts/core/CUDA.rst @@ -0,0 +1,159 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +========================== +CUDA UR Reference Document +========================== + +This document gives general guidelines of how to use UR to load and build +programs, and execute kernels on a CUDA device. + +Device code +=========== + +A CUDA device image may be made of PTX and/or SASS, two different kinds of +device code for NVIDIA GPUs. + +CUDA device images can be generated by a CUDA-capable compiler toolchain. Most +CUDA compiler toolchains are capable of generating PTX, SASS and/or bundles of +PTX and SASS. + +PTX +--- + +PTX is a high level NVIDIA ISA which can be JIT compiled at runtime by the CUDA +driver. In UR, this JIT compilation happens at ${x}ProgramBuild, where PTX is +assembled into device specific SASS which then can run on device. + +PTX is forward compatible, so PTX generated for ``.target sm_52`` will be JIT +compiled without issue for devices with a greater compute capability than +``sm_52``. Whereas PTX generated for ``sm_80`` cannot be JIT compiled for an +``sm_60`` device. + +An advantage of using PTX over SASS is that one code can run on multiple +devices. However, PTX generated for an older arch may not give access to newer +hardware instructions, such as new atomic operations, or tensor core +instructions. + +JIT compilation has some overhead at ${x}ProgramBuild, especially if the program +that is being loaded contains multiple kernels. The ``ptxjitcompiler`` keeps a +JIT cache, however, so this overhead is only paid the first time that a program +is built. JIT caching may be turned off by setting the environment variable +``CUDA_CACHE_DISABLE=1``. + +SASS +---- + +SASS is a device specific binary which may be produced by ``ptxas`` or some +other tool. SASS is specific to an individual arch and is not portable across +arches. + +A SASS file may be stored as a ``.cubin`` file by NVIDIA tools. + +UR Programs +=========== + +A ${x}_program_handle_t has a one to one mapping with the CUDA driver object +`CUModule `_. + +In UR for CUDA, a ${x}_program_handle_t can be created using +${x}ProgramCreateWithBinary with: + +* A single PTX module, stored as a null terminated ``uint8_t`` buffer. +* A single SASS module, stored as an opaque ``uint8_t`` buffer. +* A mixed PTX/SASS module, where the SASS module is the assembled PTX module. + +A ${x}_program_handle_t is valid only for a single architecture. If a CUDA +compatible binary contains device code for multiple NVIDIA architectures, it is +the user's responsibility to split these separate device images so that +${x}ProgramCreateWithBinary is only called with a device binary for a single +device arch. + +If a program is large and contains many kernels, loading and/or JIT compiling +the program may have a high overhead. This can be mitigated by splitting a +program into multiple smaller programs (corresponding to PTX/SASS files). In +this way, an application will only pay the overhead of loading/compiling +kernels that it will likely use. + +Using PTX Modules in UR +----------------------- + +A PTX module will be loaded and JIT compiled for the necessary architecture at +${x}ProgramBuild. If the PTX module has been generated for a compute capability +greater than the compute capability of the device, then ${x}ProgramBuild will +fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +A PTX module passed to ${x}ProgramBuild must contain only one PTX file. +Separate PTX files are to be handled separately. + +Arguments may be passed to the ``ptxjitcompiler`` via ${x}ProgramBuild. +Currently ``maxrregcount`` is the only supported argument. + +.. parsed-literal:: + + ${x}ProgramBuild(ctx, program, "maxrregcount=128"); + + +Using SASS Modules in UR +------------------------ + +A SASS module will be loaded and checked for compatibility at ${x}ProgramBuild. +If the SASS module is incompatible with the device arch then ${x}ProgramBuild +will fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +Using Mixed PTX/SASS Bundles in UR +---------------------------------- + +Mixed PTX/SASS modules can be used to make a program with +${x}ProgramCreateWithBinary. At ${x}ProgramBuild the CUDA driver will check +whether the bundled SASS is compatible with the active device. If the SASS is +compatible then the ${x}_program_handle_t will be built from the SASS, and if +not then the PTX will be used as a fallback and JIT compiled by the CUDA +driver. If both PTX and SASS are incompatible with the active device then +${x}ProgramBuild will fail with the error ``CUDA_ERROR_NO_BINARY_FOR_GPU``. + +UR Kernels +========== + +Once ${x}ProgramCreateWithBinary and ${x}ProgramBuild have succeeded, kernels +can be fetched from programs with ${x}KernelCreate. ${x}KernelCreate must be +called with the exact name of the kernel in the PTX/SASS module. This name will +depend on the mangling used when compiling the kernel, so it is recommended to +examine the symbols in the PTX/SASS module before trying to extract kernels in +UR. + +.. code-block:: console + + $ cuobjdump --dump-elf-symbols hello.cubin | grep mykernel + _Z13mykernelv + +At present it is not possible to query the names of the kernels in a UR program +for CUDA, so it is necessary to know the (mangled or otherwise) names of kernels +in advance or by some other means. + +UR kernels can be dispatched with ${x}EnqueueKernelLaunch. The argument +``pGlobalWorkOffset`` can only be used if the kernels have been instrumented to +take the extra global offset argument. Use of the global offset is not +recommended for non SYCL compiler toolchains. This parameter can be ignored if +the user does not wish to use the global offset. + +Other Notes +=========== + +- The environment variable ``SYCL_PI_CUDA_MAX_LOCAL_MEM_SIZE`` can be set in + order to exceed the default max dynamic local memory size. More information + can be found + `here `_. +- The size of primitive datatypes may differ in host and device code. For + instance, NVCC treats ``long double`` as 8 bytes for device and 16 bytes for + host. +- In kernel ``printf`` for NVPTX targets does not support the ``%z`` modifier. + +Contributors +------------ + +* Hugh Delaney `hugh.delaney@codeplay.com `_ + diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index 071fe799fd..c794c199d9 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -68,6 +68,8 @@ Enums ${X}_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC ${X}_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR ${X}_STRUCTURE_TYPE_EXP_WIN32_HANDLE + ${X}_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES + ${X}_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES * ${x}_device_info_t * ${X}_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP @@ -127,6 +129,8 @@ Types * ${x}_exp_interop_semaphore_desc_t * ${x}_exp_file_descriptor_t * ${x}_exp_win32_handle_t +* ${x}_exp_layered_image_properties_t +* ${x}_exp_sampler_addr_modes_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -176,6 +180,10 @@ Changelog +----------+-------------------------------------------------------------+ | 6.0 | Fix semaphore import function parameter name. | +----------+-------------------------------------------------------------+ +| 7.0 | Add layered image properties struct. | ++----------+-------------------------------------------------------------+ +| 8.0 | Added structure for sampler addressing modes per dimension. | ++------------------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a169117022..a6a32a66a1 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -92,13 +92,17 @@ of event handles. Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp -* ${x}CommandBufferAppendMemcpyUSMExp -* ${x}CommandBufferAppendMembufferCopyExp -* ${x}CommandBufferAppendMembufferCopyRectExp -* ${x}CommandBufferAppendMembufferReadExp -* ${x}CommandBufferAppendMembufferReadRectExp -* ${x}CommandBufferAppendMembufferWriteExp -* ${x}CommandBufferAppendMembufferWriteRectExp +* ${x}CommandBufferAppendUSMMemcpyExp +* ${x}CommandBufferAppendUSMFillExp +* ${x}CommandBufferAppendMemBufferCopyExp +* ${x}CommandBufferAppendMemBufferCopyRectExp +* ${x}CommandBufferAppendMemBufferReadExp +* ${x}CommandBufferAppendMemBufferReadRectExp +* ${x}CommandBufferAppendMemBufferWriteExp +* ${x}CommandBufferAppendMemBufferWriteRectExp +* ${x}CommandBufferAppendMemBufferFillExp +* ${x}CommandBufferAppendUSMPrefetchExp +* ${x}CommandBufferAppendUSMAdviseExp It is planned to eventually support any command type from the Core API which can actually be appended to the equiavalent adapter native constructs. @@ -118,7 +122,7 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); // Append a kernel launch with syncPoint as a dependency, ignore returned @@ -167,13 +171,17 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP - * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP + * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP @@ -191,13 +199,17 @@ Functions * ${x}CommandBufferReleaseExp * ${x}CommandBufferFinalizeExp * ${x}CommandBufferAppendKernelLaunchExp -* ${x}CommandBufferAppendMemcpyUSMExp -* ${x}CommandBufferAppendMembufferCopyExp -* ${x}CommandBufferAppendMembufferCopyRectExp -* ${x}CommandBufferAppendMembufferReadExp -* ${x}CommandBufferAppendMembufferReadRectExp -* ${x}CommandBufferAppendMembufferWriteExp -* ${x}CommandBufferAppendMembufferWriteRectExp +* ${x}CommandBufferAppendUSMMemcpyExp +* ${x}CommandBufferAppendUSMFillExp +* ${x}CommandBufferAppendMemBufferCopyExp +* ${x}CommandBufferAppendMemBufferCopyRectExp +* ${x}CommandBufferAppendMemBufferReadExp +* ${x}CommandBufferAppendMemBufferReadRectExp +* ${x}CommandBufferAppendMemBufferWriteExp +* ${x}CommandBufferAppendMemBufferWriteRectExp +* ${x}CommandBufferAppendMemBufferFillExp +* ${x}CommandBufferAppendUSMPrefetchExp +* ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp Changelog @@ -208,7 +220,12 @@ Changelog +===========+=======================================================+ | 1.0 | Initial Draft | +-----------+-------------------------------------------------------+ -| 1.1 | add function definitions for buffer read and write | +| 1.1 | Add function definitions for buffer read and write | ++-----------+-------------------------------------------------------+ +| 1.2 | Add function definitions for fill commands | ++-----------+-------------------------------------------------------+ +| 1.3 | Add function definitions for Prefetch and Advise | +| | commands | +-----------+-------------------------------------------------------+ Contributors diff --git a/scripts/core/EXP-COOPERATIVE-KERNELS.rst b/scripts/core/EXP-COOPERATIVE-KERNELS.rst new file mode 100644 index 0000000000..c6b64ef669 --- /dev/null +++ b/scripts/core/EXP-COOPERATIVE-KERNELS.rst @@ -0,0 +1,68 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-cooperative-kernels: + +================================================================================ +Cooperative Kernels +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +Cooperative kernels are kernels that use cross-workgroup synchronization +features. All enqueued workgroups must run concurrently for cooperative kernels +to execute without hanging. This experimental feature provides an API for +querying the maximum number of workgroups and launching cooperative kernels. + +Any device can support cooperative kernels by restricting the maximum number of +workgroups to 1. Devices that support cross-workgroup synchronization can +specify a larger maximum for a given cooperative kernel. + +The functions defined here align with those specified in Level Zero. + +API +-------------------------------------------------------------------------------- + +Macros +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${X}_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueCooperativeKernelLaunchExp +* ${x}KernelSuggestMaxCooperativeGroupCountExp + +Changelog +-------------------------------------------------------------------------------- ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return the valid string +defined in ``${X}_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP`` +as one of the options from ${x}DeviceGetInfo when querying for +${X}_DEVICE_INFO_EXTENSIONS. Conversely, before using any of the +functionality defined in this experimental feature the user *must* use the +device query to determine if the adapter supports this feature. + +Contributors +-------------------------------------------------------------------------------- +* Michael Aziz `michael.aziz@intel.com `_ diff --git a/scripts/core/HIP.rst b/scripts/core/HIP.rst new file mode 100644 index 0000000000..6e8304fe85 --- /dev/null +++ b/scripts/core/HIP.rst @@ -0,0 +1,95 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +============================= +AMD HIP UR Reference Document +============================= + +This document gives general guidelines of how to use UR to execute kernels on +a AMD HIP device. + +Device code +=========== + +Unlike the NVPTX platform, AMDGPU does not use a device IR that can be JIT +compiled at runtime. Therefore, all device binaries must be precompiled for a +particular arch. + +The naming of AMDGPU device code files may vary across different generations +of devices. ``.hsa`` or ``.hsaco`` are common extensions as of 2023. + +HIPCC can generate device code for a particular arch using the ``--genco`` flag + +.. code-block:: console + + $ hipcc --genco hello.cu --amdgpu-target=gfx906 -o hello.hsaco + +UR Programs +=========== + +A ${x}_program_handle_t has a one to one mapping with the HIP runtime object +`hipModule_t `__ + +In UR for HIP, a ${x}_program_handle_t can be created using +${x}ProgramCreateWithBinary with: + +* A single device code module + +A ${x}_program_handle_t is valid only for a single architecture. If a HIP +compatible binary contains device code for multiple AMDGPU architectures, it is +the user's responsibility to split these separate device images so that +${x}ProgramCreateWithBinary is only called with a device binary for a single +device arch. + +If the AMDGPU module is incompatible with the device arch then ${x}ProgramBuild +will fail with the error ``hipErrorNoBinaryForGpu``. + +If a program is large and contains many kernels, loading the program may have a +high overhead. This can be mitigated by splitting a program into multiple +smaller programs. In this way, an application will only pay the overhead of +loading kernels that it will likely use. + +Kernels +======= + +Once ${x}ProgramCreateWithBinary and ${x}ProgramBuild have succeeded, kernels +can be fetched from programs with ${x}KernelCreate. ${x}KernelCreate must be +called with the exact name of the kernel in the AMDGPU device code module. This +name will depend on the mangling used when compiling the kernel, so it is +recommended to examine the symbols in the AMDGPU device code module before +trying to extract kernels in UR code. + +``llvm-objdump`` or ``readelf`` may not correctly view the symbols in an AMDGPU +device module. It may be necessary to call ``clang-offload-bundler`` first in +order to extract the ``ELF`` file that can be passed to ``readelf``. + +.. code-block:: console + + $ clang-offload-bundler --unbundle --input=hello.hsaco --output=hello.o \ + --targets=hipv4-amdgcn-amd-amdhsa--gfx906 --type=o + $ readelf hello.o -s | grep mykernel + _Z13mykernelv + +At present it is not possible to query the names of the kernels in a UR program +for HIP, so it is necessary to know the (mangled or otherwise) names of kernels +in advance or by some other means. + +UR kernels can be dispatched with ${x}EnqueueKernelLaunch. The argument +``pGlobalWorkOffset`` can only be used if the kernels have been instrumented to +take the extra global offset argument. Use of the global offset is not +recommended for non SYCL compiler toolchains. This parameter can be ignored if +the user does not wish to use the global offset. + +Other Notes +=========== + +- In kernel ``printf`` may not work for certain ROCm versions. + +Contributors +------------ + +* Hugh Delaney `hugh.delaney@codeplay.com `_ + diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst index 4c3a1a9d2d..d3a862ad87 100644 --- a/scripts/core/INTRO.rst +++ b/scripts/core/INTRO.rst @@ -296,9 +296,18 @@ Specific environment variables can be set to control the behavior of unified run This environment variable is ignored when :envvar:`UR_ADAPTERS_FORCE_LOAD` environment variable is used. +.. envvar:: UR_ADAPTERS_DEEP_BIND + + If set, the loader will use `RTLD_DEEPBIND` when opening adapter libraries. This might be useful if an adapter + requires a different version of a shared library compared to the rest of the applcation. + + .. note:: + + This environment variable is Linux-only. + .. envvar:: UR_ENABLE_LAYERS - Holds a comma-separated list of layers to enable in addition to any specified via ``urInit``. + Holds a comma-separated list of layers to enable in addition to any specified via ``urLoaderInit``. .. note:: diff --git a/scripts/core/runtime.yml b/scripts/core/adapter.yml similarity index 52% rename from scripts/core/runtime.yml rename to scripts/core/adapter.yml index c14f939cc2..a2331244e1 100644 --- a/scripts/core/runtime.yml +++ b/scripts/core/adapter.yml @@ -1,5 +1,5 @@ # -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2023 Intel Corporation # # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT @@ -9,176 +9,8 @@ # --- #-------------------------------------------------------------------------- type: header -desc: "Intel $OneApi Unified Runtime APIs for Runtime" -ordinal: "0" ---- #-------------------------------------------------------------------------- -type: enum -desc: "Supported device initialization flags" -class: $x -name: $x_device_init_flags_t -etors: - - name: GPU - desc: "initialize GPU device adapters." - - name: CPU - desc: "initialize CPU device adapters." - - name: FPGA - desc: "initialize FPGA device adapters." - - name: MCA - desc: "initialize MCA device adapters." - - name: VPU - desc: "initialize VPU device adapters." ---- #-------------------------------------------------------------------------- -type: function -desc: "Create a loader config object." -class: $xLoaderConfig -loader_only: True -name: Create -decl: static -params: - - type: $x_loader_config_handle_t* - name: phLoaderConfig - desc: "[out] Pointer to handle of loader config object created." ---- #-------------------------------------------------------------------------- -type: function -desc: "Get a reference to the loader config object." -class: $xLoaderConfig -loader_only: True -name: Retain -decl: static -details: - - "Get a reference to the loader config handle. Increment its reference count" - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] loader config handle to retain" ---- #-------------------------------------------------------------------------- -type: function -desc: "Release config handle." -class: $xLoaderConfig -loader_only: True -name: Release -decl: static -details: - - "Decrement reference count and destroy the config handle if reference count becomes zero." - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] config handle to release" ---- #-------------------------------------------------------------------------- -type: enum -desc: "Supported loader info" -class: $xLoaderConfig -name: $x_loader_config_info_t -typed_etors: True -etors: - - name: AVAILABLE_LAYERS - desc: "[char[]] Null-terminated, semi-colon separated list of available layers." - - name: REFERENCE_COUNT - desc: "[uint32_t] Reference count of the loader config object." ---- #-------------------------------------------------------------------------- -type: function -desc: "Retrieves various information about the loader." -class: $xLoaderConfig -loader_only: True -name: GetInfo -decl: static -details: - - "The application may call this function from simultaneous threads." - - "The implementation of this function should be lock-free." -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] handle of the loader config object" - - type: $x_loader_config_info_t - name: propName - desc: "[in] type of the info to retrieve" - - type: "size_t" - name: propSize - desc: | - [in] the number of bytes pointed to by pPropValue. - - type: "void*" - name: pPropValue - desc: | - [out][optional][typename(propName, propSize)] array of bytes holding the info. - If propSize is not equal to or greater than the real number of bytes needed to return the info - then the $X_RESULT_ERROR_INVALID_SIZE error is returned and pPropValue is not used. - - type: "size_t*" - name: pPropSizeRet - desc: | - [out][optional] pointer to the actual size in bytes of the queried propName. -returns: - - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: - - "If `propName` is not supported by the loader." - - $X_RESULT_ERROR_INVALID_SIZE: - - "`propSize == 0 && pPropValue != NULL`" - - "If `propSize` is less than the real number of bytes needed to return the info." - - $X_RESULT_ERROR_INVALID_NULL_POINTER: - - "`propSize != 0 && pPropValue == NULL`" - - "`pPropValue == NULL && pPropSizeRet == NULL`" - - $X_RESULT_ERROR_INVALID_DEVICE - - $X_RESULT_ERROR_OUT_OF_RESOURCES - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY ---- #-------------------------------------------------------------------------- -type: function -desc: "Enable a layer for the specified loader config." -class: $xLoaderConfig -loader_only: True -name: EnableLayer -decl: static -params: - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in] Handle to config object the layer will be enabled for." - - type: const char* - name: pLayerName - desc: "[in] Null terminated string containing the name of the layer to enable." -returns: - - $X_RESULT_ERROR_LAYER_NOT_PRESENT: - - "If layer specified with `pLayerName` can't be found by the loader." ---- #-------------------------------------------------------------------------- -type: function -desc: "Initialize the $OneApi adapter(s)" -class: $x -name: Init -decl: static -ordinal: "0" -details: - - "The application must call this function before calling any other function." - - "If this function is not called then all other functions will return $X_RESULT_ERROR_UNINITIALIZED." - - "Only one instance of each adapter will be initialized per process." - - "The application may call this function multiple times with different flags or environment variables enabled." - - "The application must call this function after forking new processes. Each forked process must call this function." - - "The application may call this function from simultaneous threads." - - "The implementation of this function must be thread-safe for scenarios where multiple libraries may initialize the adapter(s) simultaneously." -params: - - type: $x_device_init_flags_t - name: device_flags - desc: | - [in] device initialization flags. - must be 0 (default) or a combination of $x_device_init_flag_t. - init: "0" - - type: $x_loader_config_handle_t - name: hLoaderConfig - desc: "[in][optional] Handle of loader config handle." -returns: - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY ---- #-------------------------------------------------------------------------- -type: function -desc: "Tear down the $OneApi instance and release all its resources" -class: $x -name: TearDown -decl: static +desc: "Intel $OneApi Unified Runtime APIs for Adapter" ordinal: "1" -params: - - type: void* - name: pParams - desc: "[in] pointer to tear down parameters" -returns: - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY --- #-------------------------------------------------------------------------- type: function desc: "Retrieves all available adapters" @@ -218,7 +50,8 @@ name: AdapterRelease decl: static ordinal: "3" details: - - "When the reference count of the adapter reaches zero, the adapter may perform adapter-specififc resource teardown" + - "When the reference count of the adapter reaches zero, the adapter may perform adapter-specififc resource teardown. Resources + must be left in a state where it safe for the adapter to be subsequently reinitialized with $xAdapterGet" params: - type: "$x_adapter_handle_t" name: hAdapter diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 27f2100feb..3999fa70f2 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -131,7 +131,7 @@ params: name: NumEntries desc: | [in] the number of devices to be added to phDevices. - If phDevices in not NULL then NumEntries should be greater than zero, otherwise $X_RESULT_ERROR_INVALID_VALUE, + If phDevices is not NULL, then NumEntries should be greater than zero. Otherwise $X_RESULT_ERROR_INVALID_SIZE will be returned. - type: "$x_device_handle_t*" name: phDevices @@ -144,6 +144,10 @@ params: [out][optional] pointer to the number of devices. pNumDevices will be updated with the total number of devices available. returns: + - $X_RESULT_ERROR_INVALID_SIZE: + - "`NumEntries == 0 && phDevices != NULL`" + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`NumEntries > 0 && phDevices == NULL`" - $X_RESULT_ERROR_INVALID_VALUE --- #-------------------------------------------------------------------------- type: enum diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index aef1d8023b..7da1c8f680 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -571,6 +571,11 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: + - "`patternSize == 0 || size == 0`" + - "`patternSize > size`" + - "`(patternSize & (patternSize - 1)) != 0`" + - "`size % patternSize != 0`" + - "`offset % patternSize != 0`" - "If `offset + size` results in an out-of-bounds access." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -629,6 +634,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -686,6 +693,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -735,6 +744,8 @@ returns: - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`region.width == 0 || region.height == 0 || region.depth == 0`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -1077,6 +1088,9 @@ desc: "Enqueue a command to prefetch USM memory" class: $xEnqueue name: USMPrefetch ordinal: "0" +details: + - "Prefetching may not be supported for all devices or allocation types. If memory prefetching + is not supported, the prefetch hint will be ignored." params: - type: $x_queue_handle_t name: hQueue @@ -1117,10 +1131,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Enqueue a command to set USM memory advice" +desc: "Enqueue a command to set USM memory advice" class: $xEnqueue name: USMAdvise ordinal: "0" +details: + - "Not all memory advice hints may be supported for all devices or allocation types. + If a memory advice hint is not supported, it will be ignored." params: - type: $x_queue_handle_t name: hQueue @@ -1441,7 +1458,7 @@ params: - type: $x_event_handle_t* name: phEvent desc: | - [out] returns an event object that identifies this write command + [out][optional] returns an event object that identifies this write command and can be used to query or queue a wait for this command to complete. returns: - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: diff --git a/scripts/core/event.yml b/scripts/core/event.yml index c20a8e04c9..ba0ae968c8 100644 --- a/scripts/core/event.yml +++ b/scripts/core/event.yml @@ -185,6 +185,8 @@ params: name: pPropSizeRet desc: "[out][optional] pointer to the actual size in bytes returned in propValue" returns: + - $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: + - "If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`." - $X_RESULT_ERROR_INVALID_VALUE: - "`pPropValue && propSize == 0`" - $X_RESULT_ERROR_INVALID_EVENT @@ -317,13 +319,13 @@ desc: "Event states for all events." class: $xEvent name: $x_execution_info_t etors: - - name: EXECUTION_INFO_COMPLETE + - name: COMPLETE desc: "Indicates that the event has completed." - - name: EXECUTION_INFO_RUNNING + - name: RUNNING desc: "Indicates that the device has started processing this event." - - name: EXECUTION_INFO_SUBMITTED + - name: SUBMITTED desc: "Indicates that the event has been submitted by the host to the device." - - name: EXECUTION_INFO_QUEUED + - name: QUEUED desc: "Indicates that the event has been queued, this is the initial state of events." --- #-------------------------------------------------------------------------- type: fptr_typedef @@ -349,6 +351,7 @@ decl: static ordinal: "0" details: - "The registered callback function will be called when the execution status of command associated with event changes to an execution status equal to or past the status specified by command_exec_status." + - "`execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the initial state of all events." - "The application may call this function from simultaneous threads for the same context." - "The implementation of this function should be thread-safe." params: @@ -364,3 +367,6 @@ params: - type: void* name: pUserData desc: "[in][out][optional] pointer to data to be passed to callback." +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "`execStatus == UR_EXECUTION_INFO_QUEUED`" diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index 846e97ac61..b5f87a6633 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -107,6 +107,12 @@ etors: - name: EXP_WIN32_HANDLE desc: $x_exp_win32_handle_t value: "0x2004" + - name: EXP_LAYERED_IMAGE_PROPERTIES + desc: $x_exp_layered_image_properties_t + value: "0x2005" + - name: EXP_SAMPLER_ADDR_MODES + desc: $x_exp_sampler_addr_modes_t + value: "0x2006" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -173,6 +179,19 @@ members: desc: "[in] mipmap filter mode used for filtering between mipmap levels" --- #-------------------------------------------------------------------------- type: struct +desc: "Describes unique sampler addressing mode per dimension" +details: + - Specify these properties in $xSamplerCreate via $x_sampler_desc_t as part + of a `pNext` chain. +class: $xBindlessImages +name: $x_exp_sampler_addr_modes_t +base: $x_base_properties_t +members: + - type: $x_sampler_addressing_mode_t[3] + name: addrModes + desc: "[in] Specify the address mode of the sampler per dimension" +--- #-------------------------------------------------------------------------- +type: struct desc: "Describes an interop memory resource descriptor" class: $xBindlessImages name: $x_exp_interop_mem_desc_t @@ -186,6 +205,20 @@ name: $x_exp_interop_semaphore_desc_t base: $x_base_desc_t members: [] --- #-------------------------------------------------------------------------- +type: struct +desc: "Describes layered image properties" +details: + - Specify these properties in $xBindlessImagesUnsampledImageCreateExp or + $xBindlessImagesSampledImageCreateExp via $x_image_desc_t as part of a + `pNext` chain. +class: $xBindlessImages +name: $x_exp_layered_image_properties_t +base: $x_base_properties_t +members: + - type: uint32_t + name: numLayers + desc: "[in] number of layers the image should have" +--- #-------------------------------------------------------------------------- type: function desc: "USM allocate pitched memory" class: $xUSM diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index e8c5417831..7d1b686aab 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -180,7 +180,7 @@ returns: type: function desc: "Append a USM memcpy command to a command-buffer object" class: $xCommandBuffer -name: AppendMemcpyUSMExp +name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -217,9 +217,54 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function +desc: "Append a USM fill command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMFillExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "void*" + name: pMemory + desc: "[in] pointer to USM allocated memory to fill." + - type: "const void*" + name: pPattern + desc: "[in] pointer to the fill pattern." + - type: "size_t" + name: patternSize + desc: "[in] size in bytes of the pattern." + - type: "size_t" + name: size + desc: "[in] fill size in bytes, must be a multiple of patternSize." + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_SIZE: + - "`patternSize == 0 || size == 0`" + - "`patternSize > size`" + - "`(patternSize & (patternSize - 1)) != 0`" + - "`size % patternSize != 0`" + - "If `size` is higher than the allocation size of `ptr`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function desc: "Append a memory copy command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferCopyExp +name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -261,7 +306,7 @@ returns: type: function desc: "Append a memory write command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferWriteExp +name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -300,7 +345,7 @@ returns: type: function desc: "Append a memory read command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferReadExp +name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -339,7 +384,7 @@ returns: type: function desc: "Append a rectangular memory copy command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferCopyRectExp +name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -393,7 +438,7 @@ returns: type: function desc: "Append a rectangular memory write command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferWriteRectExp +name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -447,7 +492,7 @@ returns: type: function desc: "Append a rectangular memory read command to a command-buffer object" class: $xCommandBuffer -name: AppendMembufferReadRectExp +name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -499,6 +544,134 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function +desc: "Append a memory fill command to a command-buffer object" +class: $xCommandBuffer +name: AppendMemBufferFillExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: $x_mem_handle_t + name: hBuffer + desc: "[in] handle of the buffer object." + - type: "const void*" + name: pPattern + desc: "[in] pointer to the fill pattern." + - type: "size_t" + name: patternSize + desc: "[in] size in bytes of the pattern." + - type: "size_t" + name: offset + desc: "[in] offset into the buffer." + - type: "size_t" + name: size + desc: "[in] fill size in bytes, must be a multiple of patternSize." + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: $x_exp_command_buffer_sync_point_t* + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "If `offset + size` results in an out-of-bounds access." + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Append a USM Prefetch command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMPrefetchExp +details: + - "Prefetching may not be supported for all devices or allocation types. If memory prefetching + is not supported, the prefetch hint will be ignored." +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "const void*" + name: pMemory + desc: "[in] pointer to USM allocated memory to prefetch." + - type: "size_t" + name: size + desc: "[in] size in bytes to be fetched." + - type: $x_usm_migration_flags_t + name: flags + desc: "[in] USM prefetch flags" + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`size == 0`" + - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Append a USM Advise command to a command-buffer object" +class: $xCommandBuffer +name: AppendUSMAdviseExp +details: + - "Not all memory advice hints may be supported for all devices or allocation types. + If a memory advice hint is not supported, it will be ignored." +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: "const void*" + name: pMemory + desc: "[in] pointer to the USM memory object." + - type: "size_t" + name: size + desc: "[in] size in bytes to be advised." + - type: $x_usm_advice_flags_t + name: advice + desc: "[in] USM memory advice" + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`size == 0`" + - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function desc: "Submit a command-buffer for execution on a queue." class: $xCommandBuffer name: EnqueueExp diff --git a/scripts/core/exp-cooperative-kernels.yml b/scripts/core/exp-cooperative-kernels.yml new file mode 100644 index 0000000000..fb2c6b3a4a --- /dev/null +++ b/scripts/core/exp-cooperative-kernels.yml @@ -0,0 +1,85 @@ +# +# Copyright (C) 2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for Cooperative Kernels" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: macro +desc: | + The extension string which defines support for cooperative-kernels + which is returned when querying device extensions. +name: $X_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP +value: "\"$x_exp_cooperative_kernels\"" +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue a command to execute a cooperative kernel" +class: $xEnqueue +name: CooperativeKernelLaunchExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_kernel_handle_t + name: hKernel + desc: "[in] handle of the kernel object" + - type: uint32_t + name: workDim + desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items" + - type: "const size_t*" + name: pGlobalWorkOffset + desc: "[in] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item" + - type: "const size_t*" + name: pGlobalWorkSize + desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function" + - type: "const size_t*" + name: pLocalWorkSize + desc: | + [in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function. + If nullptr, the runtime implementation will choose the work-group size. + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. + - type: $x_event_handle_t* + name: phEvent + desc: | + [out][optional] return an event object that identifies this particular kernel execution instance. +returns: + - $X_RESULT_ERROR_INVALID_QUEUE + - $X_RESULT_ERROR_INVALID_KERNEL + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Query the maximum number of work groups for a cooperative kernel" +class: $xKernel +name: SuggestMaxCooperativeGroupCountExp +params: + - type: $x_kernel_handle_t + name: hKernel + desc: "[in] handle of the kernel object" + - type: "uint32_t*" + name: "pGroupCountRet" + desc: "[out] pointer to maximum number of groups" +returns: + - $X_RESULT_ERROR_INVALID_KERNEL diff --git a/scripts/core/loader.yml b/scripts/core/loader.yml new file mode 100644 index 0000000000..b5ad1eadec --- /dev/null +++ b/scripts/core/loader.yml @@ -0,0 +1,227 @@ +# +# Copyright (C) 2022-2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime APIs for Loader" +ordinal: "0" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Supported device initialization flags" +class: $x +name: $x_device_init_flags_t +etors: + - name: GPU + desc: "initialize GPU device adapters." + - name: CPU + desc: "initialize CPU device adapters." + - name: FPGA + desc: "initialize FPGA device adapters." + - name: MCA + desc: "initialize MCA device adapters." + - name: VPU + desc: "initialize VPU device adapters." +--- #-------------------------------------------------------------------------- +type: function +desc: "Create a loader config object." +class: $xLoaderConfig +loader_only: True +name: Create +decl: static +params: + - type: $x_loader_config_handle_t* + name: phLoaderConfig + desc: "[out] Pointer to handle of loader config object created." +--- #-------------------------------------------------------------------------- +type: function +desc: "Get a reference to the loader config object." +class: $xLoaderConfig +loader_only: True +name: Retain +decl: static +details: + - "Get a reference to the loader config handle. Increment its reference count" + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] loader config handle to retain" +--- #-------------------------------------------------------------------------- +type: function +desc: "Release config handle." +class: $xLoaderConfig +loader_only: True +name: Release +decl: static +details: + - "Decrement reference count and destroy the config handle if reference count becomes zero." + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] config handle to release" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Supported loader info" +class: $xLoaderConfig +name: $x_loader_config_info_t +typed_etors: True +etors: + - name: AVAILABLE_LAYERS + desc: "[char[]] Null-terminated, semi-colon separated list of available layers." + - name: REFERENCE_COUNT + desc: "[uint32_t] Reference count of the loader config object." +--- #-------------------------------------------------------------------------- +type: function +desc: "Retrieves various information about the loader." +class: $xLoaderConfig +loader_only: True +name: GetInfo +decl: static +details: + - "The application may call this function from simultaneous threads." + - "The implementation of this function should be lock-free." +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] handle of the loader config object" + - type: $x_loader_config_info_t + name: propName + desc: "[in] type of the info to retrieve" + - type: "size_t" + name: propSize + desc: | + [in] the number of bytes pointed to by pPropValue. + - type: "void*" + name: pPropValue + desc: | + [out][optional][typename(propName, propSize)] array of bytes holding the info. + If propSize is not equal to or greater than the real number of bytes needed to return the info + then the $X_RESULT_ERROR_INVALID_SIZE error is returned and pPropValue is not used. + - type: "size_t*" + name: pPropSizeRet + desc: | + [out][optional] pointer to the actual size in bytes of the queried propName. +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the loader." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_DEVICE + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Enable a layer for the specified loader config." +class: $xLoaderConfig +loader_only: True +name: EnableLayer +decl: static +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] Handle to config object the layer will be enabled for." + - type: const char* + name: pLayerName + desc: "[in] Null terminated string containing the name of the layer to enable." +returns: + - $X_RESULT_ERROR_LAYER_NOT_PRESENT: + - "If layer specified with `pLayerName` can't be found by the loader." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Code location data" +class: $xLoaderConfig +name: $x_code_location_t +members: + - type: const char* + name: functionName + desc: "[in][out] Function name." + - type: const char* + name: sourceFile + desc: "[in][out] Source code file." + - type: uint32_t + name: lineNumber + desc: "[in][out] Source code line number." + - type: uint32_t + name: columnNumber + desc: "[in][out] Source code column number." +--- #-------------------------------------------------------------------------- +type: fptr_typedef +desc: "Code location callback with user data." +name: $x_code_location_callback_t +return: $x_code_location_t +params: + - type: void* + name: pUserData + desc: "[in][out] pointer to data to be passed to callback" +--- #-------------------------------------------------------------------------- +type: function +desc: "Set a function callback for use by the loader to retrieve code location information." +details: + - "The code location callback is optional and provides additional information to the tracing layer about the entry point of the current execution flow." + - "This functionality can be used to match traced unified runtime function calls with higher-level user calls." +class: $xLoaderConfig +loader_only: True +name: SetCodeLocationCallback +decl: static +params: + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in] Handle to config object the layer will be enabled for." + - type: $x_code_location_callback_t + name: pfnCodeloc + desc: "[in] Function pointer to code location callback." + - type: void* + name: pUserData + desc: "[in][out][optional] pointer to data to be passed to callback." +--- #-------------------------------------------------------------------------- +type: function +desc: "Initialize the $OneApi loader" +class: $xLoader +loader_only: True +name: Init +decl: static +ordinal: "0" +details: + - "The application must call this function before calling any other function." + - "If this function is not called then all other functions will return $X_RESULT_ERROR_UNINITIALIZED." + - "Only one instance of the loader will be initialized per process." + - "The application may call this function multiple times with different flags or environment variables enabled." + - "The application must call this function after forking new processes. Each forked process must call this function." + - "The application may call this function from simultaneous threads." + - "The implementation of this function must be thread-safe for scenarios where multiple libraries may initialize the loader simultaneously." +params: + - type: $x_device_init_flags_t + name: device_flags + desc: | + [in] device initialization flags. + must be 0 (default) or a combination of $x_device_init_flag_t. + init: "0" + - type: $x_loader_config_handle_t + name: hLoaderConfig + desc: "[in][optional] Handle of loader config handle." +returns: + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Tear down the $OneApi loader and release all its resources" +class: $xLoader +loader_only: True +name: TearDown +decl: static +ordinal: "1" +params: [] +returns: + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY diff --git a/scripts/core/platform.yml b/scripts/core/platform.yml index f7020b4138..f583f7d007 100644 --- a/scripts/core/platform.yml +++ b/scripts/core/platform.yml @@ -46,7 +46,8 @@ params: desc: | [out][optional] returns the total number of platforms available. returns: - - $X_RESULT_ERROR_INVALID_SIZE + - $X_RESULT_ERROR_INVALID_SIZE: + - "`NumEntries == 0 && phPlatforms != NULL`" --- #-------------------------------------------------------------------------- type: enum desc: "Supported platform info" @@ -133,6 +134,9 @@ etors: - name: "0_7" value: "$X_MAKE_VERSION( 0, 7 )" desc: "version 0.7" + - name: "0_8" + value: "$X_MAKE_VERSION( 0, 8 )" + desc: "version 0.8" --- #-------------------------------------------------------------------------- type: function desc: "Returns the API version supported by the specified platform" diff --git a/scripts/core/queue.yml b/scripts/core/queue.yml index 88fe153165..15934c0e2f 100644 --- a/scripts/core/queue.yml +++ b/scripts/core/queue.yml @@ -161,8 +161,9 @@ params: returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE - - $X_RESULT_ERROR_INVALID_VALUE - - $X_RESULT_ERROR_INVALID_QUEUE_PROPERTIES + - $X_RESULT_ERROR_INVALID_QUEUE_PROPERTIES: + - "`pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW`" + - "`pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 30596ec14e..61fb2aa690 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -292,12 +292,6 @@ etors: - name: QUEUE_FLUSH desc: Enumerator for $xQueueFlush value: '98' -- name: INIT - desc: Enumerator for $xInit - value: '99' -- name: TEAR_DOWN - desc: Enumerator for $xTearDown - value: '100' - name: SAMPLER_CREATE desc: Enumerator for $xSamplerCreate value: '101' @@ -373,15 +367,6 @@ etors: - name: COMMAND_BUFFER_ENQUEUE_EXP desc: Enumerator for $xCommandBufferEnqueueExp value: '128' -- name: COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP - desc: Enumerator for $xCommandBufferAppendMemcpyUSMExp - value: '129' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP - desc: Enumerator for $xCommandBufferAppendMembufferCopyExp - value: '130' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferCopyRectExp - value: '131' - name: USM_PITCHED_ALLOC_EXP desc: Enumerator for $xUSMPitchedAllocExp value: '132' @@ -487,18 +472,6 @@ etors: - name: USM_P2P_PEER_ACCESS_GET_INFO_EXP desc: Enumerator for $xUsmP2PPeerAccessGetInfoExp value: '167' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP - desc: Enumerator for $xCommandBufferAppendMembufferWriteExp - value: '168' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP - desc: Enumerator for $xCommandBufferAppendMembufferReadExp - value: '169' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferWriteRectExp - value: '170' -- name: COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMembufferReadRectExp - value: '171' - name: LOADER_CONFIG_CREATE desc: Enumerator for $xLoaderConfigCreate value: '172' @@ -529,6 +502,54 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: LOADER_INIT + desc: Enumerator for $xLoaderInit + value: '182' +- name: LOADER_TEAR_DOWN + desc: Enumerator for $xLoaderTearDown + value: '183' +- name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP + desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp + value: '184' +- name: COMMAND_BUFFER_APPEND_USM_FILL_EXP + desc: Enumerator for $xCommandBufferAppendUSMFillExp + value: '185' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyExp + value: '186' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteExp + value: '187' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadExp + value: '188' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyRectExp + value: '189' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteRectExp + value: '190' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadRectExp + value: '191' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferFillExp + value: '192' +- name: ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xEnqueueCooperativeKernelLaunchExp + value: '193' +- name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP + desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp + value: '194' +- name: COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP + desc: Enumerator for $xCommandBufferAppendUSMPrefetchExp + value: '195' +- name: COMMAND_BUFFER_APPEND_USM_ADVISE_EXP + desc: Enumerator for $xCommandBufferAppendUSMAdviseExp + value: '196' +- name: LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK + desc: Enumerator for $xLoaderConfigSetCodeLocationCallback + value: '197' --- type: enum desc: Defines structure types diff --git a/scripts/ctest_parser.py b/scripts/ctest_parser.py new file mode 100644 index 0000000000..f41ba5ea60 --- /dev/null +++ b/scripts/ctest_parser.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +""" + Copyright (C) 2022 Intel Corporation + + Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + See LICENSE.TXT + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +""" + +from subprocess import Popen, DEVNULL, PIPE +import argparse +import os +import json + +TMP_RESULTS_FILE = "tmp-results-file.json" + +def get_cts_test_suite_names(working_directory): + process = Popen(["ctest", "--show-only=json-v1"], cwd=working_directory, + stdout=PIPE, env=os.environ.copy()) + out,_ = process.communicate() + testsuites = json.loads(out) + return [test['name']for test in testsuites['tests']] + +def percent(amount, total): + return round((amount / total) * 100, 2) + +def summarize_results(results): + total = results['Total'] + total_passed = len(results['Passed']) + total_skipped = len(results['Skipped']) + total_failed = len(results['Failed']) + total_crashed = total - (total_passed + total_skipped + total_failed) + + pass_rate_incl_skipped = percent(total_passed, total) + pass_rate_excl_skipped = percent(total_passed, total - total_skipped) + + skipped_rate = percent(total_skipped, total) + failed_rate = percent(total_failed, total) + crash_rate = percent(total_crashed, total) + + ljust_param = len(str(total)) + + print( +f"""[CTest Parser] Results: + Total [{str(total).ljust(ljust_param)}] + Passed [{str(total_passed).ljust(ljust_param)}] ({pass_rate_incl_skipped}%) - ({pass_rate_excl_skipped}% with skipped tests excluded) + Skipped [{str(total_skipped).ljust(ljust_param)}] ({skipped_rate}%) + Failed [{str(total_failed).ljust(ljust_param)}] ({failed_rate}%) + Crashed [{str(total_crashed).ljust(ljust_param)}] ({crash_rate}%) +""" + ) + +def parse_results(results): + parsed_results = {"Passed": {}, "Skipped":{}, "Failed": {}, 'Crashed': {}, 'Total':0, 'Success':True} + for _, result in results.items(): + if result['actual'] is None: + parsed_results['Success'] = False + parsed_results['Total'] += result['expected']['tests'] + continue + + parsed_results['Total'] += result['actual']['tests'] + for testsuite in result['actual'].get('testsuites'): + for test in testsuite.get('testsuite'): + test_name = f"{testsuite['name']}.{test['name']}" + test_time = test['time'] + if 'failures' in test: + parsed_results['Failed'][test_name] = {'time': test_time} + elif test['result'] == 'SKIPPED': + parsed_results['Skipped'][test_name] = {'time': test_time} + else: + parsed_results['Passed'][test_name] = {'time': test_time} + return parsed_results + +def run(args): + results = {} + + tmp_results_file = f"{args.ctest_path}/{TMP_RESULTS_FILE}" + env = os.environ.copy() + env['GTEST_OUTPUT'] = f"json:{tmp_results_file}" + + test_suite_names = get_cts_test_suite_names(f"{args.ctest_path}/test/conformance/") + + ## try and list all the available tests + for suite in test_suite_names: + results[suite] = {} + test_executable = f"{args.ctest_path}/bin/test-{suite}" + process = Popen([test_executable, "--gtest_list_tests"], env=env, + stdout=DEVNULL if args.quiet else None, + stderr=DEVNULL if args.quiet else None) + process.wait() + try: + with open(tmp_results_file,'r') as test_list: + all_tests = json.load(test_list) + results[suite]['expected'] = all_tests + os.remove(tmp_results_file) + except FileNotFoundError: + print(f"Could not discover tests for {suite}") + + for suite in test_suite_names: + ctest_path = f"{args.ctest_path}/test/conformance/{suite}" + process = Popen(['ctest',ctest_path], env=env, cwd=ctest_path, + stdout=DEVNULL if args.quiet else None, + stderr=DEVNULL if args.quiet else None) + process.wait() + + try: + with open(tmp_results_file, 'r') as results_file: + json_data = json.load(results_file) + results[suite]['actual'] = json_data + os.remove(tmp_results_file) + except FileNotFoundError: + results[suite]['actual'] = None + print('\033[91m' + f"Conformance test suite '{suite}' : likely crashed!" + '\033[0m') + + return results + +def dir_path(string): + if os.path.isdir(string): + return os.path.abspath(string) + else: + raise NotADirectoryError(string) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('ctest_path', type=dir_path, nargs='?', default='.', + help='Optional path to test directory containing ' + 'CTestTestfile. Defaults to current directory.') + parser.add_argument('-q', '--quiet', action='store_true', + help='Output only failed tests.') + args = parser.parse_args() + + raw_results = run(args) + parsed_results = parse_results(raw_results) + summarize_results(parsed_results) + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + exit(130) diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index 07ae086efd..9c8a331e76 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -18,8 +18,8 @@ import ctypes import itertools -default_version = "0.7" -all_versions = ["0.6", "0.7"] +default_version = "0.8" +all_versions = ["0.6", "0.7", "0.8"] """ preprocess object @@ -97,7 +97,7 @@ def __validate_ordinal(d): ordinal = None if ordinal != d['ordinal']: - raise Exception("'ordinal' invalid value: '%s'"%d['ordinal']) + raise Exception("'ordinal' invalid value: '%s'"%d['ordinal']) def __validate_version(d, prefix="", base_version=default_version): if 'version' in d: @@ -333,7 +333,7 @@ def __validate_params(d, tags): if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) - + if type_traits.is_pointer(item['type']) and "_handle_t" in item['type'] and "[in]" in item['desc']: if not param_traits.is_range(item): raise Exception(prefix+"handle type must include a range(start, end) as part of 'desc'") @@ -342,11 +342,11 @@ def __validate_params(d, tags): if ver < max_ver: raise Exception(prefix+"'version' must be increasing: %s"%item['version']) max_ver = ver - + def __validate_union_tag(d): if d.get('tag') is None: raise Exception(f"{d['name']} must include a 'tag' part of the union.") - + try: if 'type' not in d: raise Exception("every document must have 'type'") @@ -466,7 +466,7 @@ def __filter_desc(d): return d flt = [] - type = d['type'] + type = d['type'] if 'enum' == type: for e in d['etors']: ver = float(e.get('version', default_version)) @@ -706,58 +706,54 @@ def _append(lst, key, val): if val and val not in rets[idx][key]: rets[idx][key].append(val) + def append_nullchecks(param, accessor: str): + if type_traits.is_pointer(param['type']): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`" % accessor) + + elif type_traits.is_funcptr(param['type'], meta): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`" % accessor) + + elif type_traits.is_handle(param['type']) and not type_traits.is_ipc_handle(item['type']): + _append(rets, "$X_RESULT_ERROR_INVALID_NULL_HANDLE", "`NULL == %s`" % accessor) + + def append_enum_checks(param, accessor: str): + ptypename = type_traits.base(param['type']) + + prefix = "`" + if param_traits.is_optional(item): + prefix = "`NULL != %s && " % item['name'] + + if re.match(r"stype", param['name']): + _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", prefix + "%s != %s`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), accessor)) + else: + if type_traits.is_flags(param['type']) and 'bit_mask' in meta['enum'][ptypename].keys(): + _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", prefix + "%s & %s`"%(ptypename.upper()[:-2]+ "_MASK", accessor)) + else: + _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", prefix + "%s < %s`"%(meta['enum'][ptypename]['max'], accessor)) + # generate results based on parameters for item in obj['params']: if param_traits.is_nocheck(item): continue if not param_traits.is_optional(item): + append_nullchecks(item, item['name']) + + if type_traits.is_enum(item['type'], meta) and not type_traits.is_pointer(item['type']): + append_enum_checks(item, item['name']) + + if type_traits.is_descriptor(item['type']) or type_traits.is_properties(item['type']): typename = type_traits.base(item['type']) + # walk each entry in the desc for pointers and enums + for i, m in enumerate(meta['struct'][typename]['members']): + if param_traits.is_nocheck(m): + continue + + if not param_traits.is_optional(m): + append_nullchecks(m, "%s->%s" % (item['name'], m['name'])) - if type_traits.is_pointer(item['type']): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`"%item['name']) - - elif type_traits.is_funcptr(item['type'], meta): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", "`NULL == %s`"%item['name']) - - elif type_traits.is_handle(item['type']) and not type_traits.is_ipc_handle(item['type']): - _append(rets, "$X_RESULT_ERROR_INVALID_NULL_HANDLE", "`NULL == %s`"%item['name']) - - elif type_traits.is_enum(item['type'], meta): - if type_traits.is_flags(item['type']) and 'bit_mask' in meta['enum'][typename].keys(): - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s & %s`"%(typename.upper()[:-2]+ "_MASK", item['name'])) - else: - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s < %s`"%(meta['enum'][typename]['max'], item['name'])) - - if type_traits.is_descriptor(item['type']): - # walk each entry in the desc for pointers and enums - for i, m in enumerate(meta['struct'][typename]['members']): - if param_traits.is_nocheck(m): - continue - mtypename = type_traits.base(m['type']) - - if type_traits.is_pointer(m['type']) and not param_traits.is_optional({'desc': m['desc']}): - _append(rets, - "$X_RESULT_ERROR_INVALID_NULL_POINTER", - "`NULL == %s->%s`"%(item['name'], m['name'])) - - elif type_traits.is_enum(m['type'], meta): - if re.match(r"stype", m['name']): - _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", "`%s != %s->stype`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), item['name'])) - else: - if type_traits.is_flags(m['type']) and 'bit_mask' in meta['enum'][mtypename].keys(): - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s & %s->%s`"%(mtypename.upper()[:-2]+ "_MASK", item['name'], m['name'])) - else: - _append(rets, "$X_RESULT_ERROR_INVALID_ENUMERATION", "`%s < %s->%s`"%(meta['enum'][mtypename]['max'], item['name'], m['name'])) - - elif type_traits.is_properties(item['type']): - # walk each entry in the properties - for i, m in enumerate(meta['struct'][typename]['members']): - if param_traits.is_nocheck(m): - continue - if type_traits.is_enum(m['type'], meta): - if re.match(r"stype", m['name']): - _append(rets, "$X_RESULT_ERROR_UNSUPPORTED_VERSION", "`%s != %s->stype`"%(re.sub(r"(\$\w)_(.*)_t.*", r"\1_STRUCTURE_TYPE_\2", typename).upper(), item['name'])) + if type_traits.is_enum(m['type'], meta) and not type_traits.is_pointer(m['type']): + append_enum_checks(m, "%s->%s" % (item['name'], m['name'])) # finally, append all user entries for item in obj.get('returns', []): @@ -823,7 +819,7 @@ def _refresh_enum_meta(obj, meta): ## remove the existing meta records if obj.get('class'): meta['class'][obj['class']]['enum'].remove(obj['name']) - + if meta['enum'].get(obj['name']): del meta['enum'][obj['name']] ## re-generate meta @@ -851,13 +847,13 @@ def _extend_enums(enum_extensions, specs, meta): if not _validate_ext_enum_range(extension, matching_enum): raise Exception(f"Invalid enum values.") matching_enum['etors'].extend(extension['etors']) - + _refresh_enum_meta(matching_enum, meta) ## Sort the etors value = -1 def sort_etors(x): - nonlocal value + nonlocal value value = _get_etor_value(x.get('value'), value) return value matching_enum['etors'] = sorted(matching_enum['etors'], key=sort_etors) @@ -872,6 +868,7 @@ def parse(section, version, tags, meta, ref): specs = [] files = util.findFiles(path, "*.yml") + files.sort(key = lambda f: 0 if f.endswith('common.yml') else 1) registry = [f for f in files if f.endswith('registry.yml')][0] enum_extensions = [] diff --git a/scripts/templates/api.h.mako b/scripts/templates/api.h.mako index 74ba84beeb..41d6d8f456 100644 --- a/scripts/templates/api.h.mako +++ b/scripts/templates/api.h.mako @@ -151,6 +151,7 @@ typedef struct ${th.subt(n, tags, obj['name'])}_ *${th.subt(n, tags, obj['name'] #endif %for tbl in th.get_pfncbtables(specs, meta, n, tags): %for obj in tbl['functions']: +%if obj['params']: /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for ${th.make_func_name(n, tags, obj)} /// @details Each entry is a pointer to the parameter passed to the function; @@ -167,6 +168,7 @@ typedef struct ${th.make_pfncb_param_type(n, tags, obj)} %if 'condition' in obj: #endif // ${th.subt(n, tags, obj['condition'])} %endif +%endif %endfor %endfor diff --git a/scripts/templates/api.py.mako b/scripts/templates/api.py.mako index 35a2fd6d27..7815f2cf53 100644 --- a/scripts/templates/api.py.mako +++ b/scripts/templates/api.py.mako @@ -175,7 +175,7 @@ class ${N}_DDI: self.__dditable = ${n}_dditable_t() # initialize the UR - self.__dll.${x}Init(0, 0) + self.__dll.${x}LoaderInit(0, 0) %for tbl in tables: # call driver to get function pointers diff --git a/scripts/templates/api_listing.mako b/scripts/templates/api_listing.mako index 722a803915..252c5ee887 100644 --- a/scripts/templates/api_listing.mako +++ b/scripts/templates/api_listing.mako @@ -115,7 +115,7 @@ ${title} ## ------------------------- <%isempty = True%> %for obj in objects: -%if re.match(r"typedef", obj['type']): +%if re.match(r"typedef", obj['type']) or re.match(r"fptr_typedef", obj['type']): %if isempty: # only display section title if there is content. %if needstitle: <%needstitle = False%> @@ -245,7 +245,7 @@ ${th.make_type_name(n, tags, obj)} ## ------------------------- <%isempty = True%> %for obj in objects: -%if re.match(r"typedef", obj['type']): +%if re.match(r"typedef", obj['type']) or re.match(r"fptr_typedef", obj['type']): %if isempty: # only display section title if there is content. ${title} Typedefs -------------------------------------------------------------------------------- diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 2b283b8119..4fbb2ca47b 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -105,6 +105,7 @@ class type_traits: RE_DESC = r"(.*)desc_t.*" RE_PROPS = r"(.*)properties_t.*" RE_FLAGS = r"(.*)flags_t" + RE_ARRAY = r"(.*)\[([1-9][0-9]*)\]" @staticmethod def base(name): @@ -217,6 +218,29 @@ def find_class_name(name, meta): except: return None + @classmethod + def is_array(cls, name): + try: + return True if re.match(cls.RE_ARRAY, name) else False + except: + return False + + @classmethod + def get_array_length(cls, name): + if not cls.is_array(name): + raise Exception("Cannot find array length of non-array type.") + + match = re.match(cls.RE_ARRAY, name) + return match.groups()[1] + + @classmethod + def get_array_element_type(cls, name): + if not cls.is_array(name): + raise Exception("Cannot find array type of non-array type.") + + match = re.match(cls.RE_ARRAY, name) + return match.groups()[0] + """ Extracts traits from a value name """ @@ -729,7 +753,10 @@ def make_etor_lines(namespace, tags, obj, py=False, meta=None): returns c/c++ name of any type """ def _get_type_name(namespace, tags, obj, item): - name = subt(namespace, tags, item['type'],) + type = item['type'] + if type_traits.is_array(type): + type = type_traits.get_array_element_type(type) + name = subt(namespace, tags, type,) return name """ @@ -763,9 +790,9 @@ def get_ctype_name(namespace, tags, item): while type_traits.is_pointer(name): name = "POINTER(%s)"%_remove_ptr(name) - if 'name' in item and value_traits.is_array(item['name']): - length = subt(namespace, tags, value_traits.get_array_length(item['name'])) - name = "%s * %s"%(name, length) + if 'name' in item and type_traits.is_array(item['type']): + length = subt(namespace, tags, type_traits.get_array_length(item['type'])) + name = "%s * %s"%(type_traits.get_array_element_type(name), length) return name @@ -804,7 +831,8 @@ def make_member_lines(namespace, tags, obj, prefix="", py=False, meta=None): delim = "," if i < (len(obj['members'])-1) else "" prologue = "(\"%s\", %s)%s"%(name, tname, delim) else: - prologue = "%s %s;"%(tname, name) + array_suffix = f"[{type_traits.get_array_length(item['type'])}]" if type_traits.is_array(item['type']) else "" + prologue = "%s %s %s;"%(tname, name, array_suffix) comment_style = "##" if py else "///<" ws_count = 64 if py else 48 @@ -1300,3 +1328,14 @@ def get_create_retain_release_functions(specs, namespace, tags): ) return {"create": create_funcs, "retain": retain_funcs, "release": release_funcs} + + +def get_event_wait_list_functions(specs, namespace, tags): + funcs = [] + for s in specs: + for obj in s['objects']: + if re.match(r"function", obj['type']): + if any(x['name'] == 'phEventWaitList' for x in obj['params']) and any( + x['name'] == 'numEventsInWaitList' for x in obj['params']): + funcs.append(make_func_name(namespace, tags, obj)) + return funcs diff --git a/scripts/templates/index.rst.mako b/scripts/templates/index.rst.mako index 8a53ba0427..1d5ba6a9b0 100644 --- a/scripts/templates/index.rst.mako +++ b/scripts/templates/index.rst.mako @@ -14,5 +14,7 @@ core/INTRO.rst core/PROG.rst core/CONTRIB.rst + core/CUDA.rst + core/HIP.rst exp-features.rst api.rst diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 0498ba00dc..0c9a3ed8b0 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -51,22 +51,7 @@ namespace ur_loader add_local = False %> - %if re.match(r"Init", obj['name']): - for( auto& platform : context->platforms ) - { - if(platform.initStatus != ${X}_RESULT_SUCCESS) - continue; - platform.initStatus = platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - } - - %elif re.match(r"\w+TearDown$", th.make_func_name(n, tags, obj)): - - for( auto& platform : context->platforms ) - { - platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - } - - %elif re.match(r"\w+AdapterGet$", th.make_func_name(n, tags, obj)): + %if re.match(r"\w+AdapterGet$", th.make_func_name(n, tags, obj)): size_t adapterIndex = 0; if( nullptr != ${obj['params'][1]['name']} && ${obj['params'][0]['name']} !=0) diff --git a/scripts/templates/libapi.cpp.mako b/scripts/templates/libapi.cpp.mako index d269d62241..6fe1f3992b 100644 --- a/scripts/templates/libapi.cpp.mako +++ b/scripts/templates/libapi.cpp.mako @@ -56,19 +56,27 @@ ${th.make_func_name(n, tags, obj)}( %endfor ) try { -%if th.obj_traits.is_loader_only(obj): - return ur_lib::${th.make_func_name(n, tags, obj)}(${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); -%else: %if re.match("Init", obj['name']): + <% + param_checks=th.make_param_checks(n, tags, obj, meta=meta).items() + %> + %for key, values in param_checks: + %for val in values: + if( ${val} ) + return ${key}; + + %endfor + %endfor + static ${x}_result_t result = ${X}_RESULT_SUCCESS; std::call_once(${x}_lib::context->initOnce, [device_flags, hLoaderConfig]() { result = ${x}_lib::context->Init(device_flags, hLoaderConfig); }); - if( ${X}_RESULT_SUCCESS != result ) - return result; - -%endif + return result; +%elif th.obj_traits.is_loader_only(obj): + return ur_lib::${th.make_func_name(n, tags, obj)}(${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); +%else: auto ${th.make_pfn_name(n, tags, obj)} = ${x}_lib::context->${n}DdiTable.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; diff --git a/scripts/templates/libddi.cpp.mako b/scripts/templates/libddi.cpp.mako index de73cc2fc7..eae178324c 100644 --- a/scripts/templates/libddi.cpp.mako +++ b/scripts/templates/libddi.cpp.mako @@ -28,7 +28,7 @@ namespace ${x}_lib /////////////////////////////////////////////////////////////////////////////// - __${x}dlllocal ${x}_result_t context_t::${n}Init() + __${x}dlllocal ${x}_result_t context_t::${n}LoaderInit() { ${x}_result_t result = ${X}_RESULT_SUCCESS; diff --git a/scripts/templates/params.hpp.mako b/scripts/templates/params.hpp.mako index 863c3d37ea..42fbf61d1f 100644 --- a/scripts/templates/params.hpp.mako +++ b/scripts/templates/params.hpp.mako @@ -36,6 +36,8 @@ from templates import helper as th ${x}_params::serializePtr(os, ${caller.body()}); %elif th.type_traits.is_handle(itype): ${x}_params::serializePtr(os, ${caller.body()}); + %elif iname and iname.startswith("pfn"): + os << reinterpret_cast(${caller.body()}); %else: os << ${caller.body()}; %endif @@ -82,6 +84,17 @@ def findMemberType(_item): %elif findMemberType(item) is not None and findMemberType(item)['type'] == "union": os << ".${iname} = "; ${x}_params::serializeUnion(os, ${deref}(params${access}${item['name']}), params${access}${th.param_traits.tagged_member(item)}); + %elif th.type_traits.is_array(item['type']): + os << ".${iname} = {"; + for(auto i = 0; i < ${th.type_traits.get_array_length(item['type'])}; i++){ + if(i != 0){ + os << ", "; + } + <%call expr="member(iname, itype, True)"> + ${deref}(params${access}${item['name']}[i]) + + } + os << "}"; %elif typename is not None: os << ".${iname} = "; ${x}_params::serializeTagged(os, ${deref}(params${access}${pname}), ${deref}(params${access}${prefix}${typename}), ${deref}(params${access}${prefix}${typename_size})); @@ -104,7 +117,7 @@ template <> struct is_handle<${th.make_type_name(n, tags, obj)}> : std::true_typ %endfor template inline constexpr bool is_handle_v = is_handle::value; -template inline void serializePtr(std::ostream &os, T *ptr); +template inline void serializePtr(std::ostream &os, const T *ptr); template inline void serializeFlag(std::ostream &os, uint32_t flag); template inline void serializeTagged(std::ostream &os, const void *ptr, T value, size_t size); @@ -142,7 +155,7 @@ template inline void serializeTagged(std::ostream &os, const void * %if re.match(r"enum", obj['type']): inline std::ostream &operator<<(std::ostream &os, enum ${th.make_enum_name(n, tags, obj)} value); %elif re.match(r"struct", obj['type']): - inline std::ostream &operator<<(std::ostream &os, const ${obj['type']} ${th.make_type_name(n, tags, obj)} params); + inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ${obj['type']} ${th.make_type_name(n, tags, obj)} params); %endif %endfor # obj in spec['objects'] %endfor @@ -192,7 +205,11 @@ template inline void serializeTagged(std::ostream &os, const void * case ${ename}: { %if th.value_traits.is_array(vtype): <% atype = th.value_traits.get_array_name(vtype) %> + %if 'void' in atype: + const ${atype} const *tptr = (const ${atype} const*)ptr; + %else: const ${atype} *tptr = (const ${atype} *)ptr; + %endif %if "char" in atype: ## print char* arrays as simple NULL-terminated strings serializePtr(os, tptr); %else: @@ -209,12 +226,16 @@ template inline void serializeTagged(std::ostream &os, const void * os << "}"; %endif %else: + %if 'void' in vtype: + const ${vtype} const *tptr = (const ${vtype} const *)ptr; + %else: const ${vtype} *tptr = (const ${vtype} *)ptr; + %endif if (sizeof(${vtype}) > size) { os << "invalid size (is: " << size << ", expected: >=" << sizeof(${vtype}) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; <%call expr="member(tptr, vtype, False)"> *tptr @@ -237,7 +258,7 @@ template inline void serializeTagged(std::ostream &os, const void * } ## structure type enum value must be first - enum ${th.make_enum_name(n, tags, obj)} *value = (enum ${th.make_enum_name(n, tags, obj)} *)ptr; + const enum ${th.make_enum_name(n, tags, obj)} *value = (const enum ${th.make_enum_name(n, tags, obj)} *)ptr; switch (*value) { %for n, item in enumerate(obj['etors']): <% @@ -343,7 +364,7 @@ for item in obj['members']: %for tbl in th.get_pfncbtables(specs, meta, n, tags): %for obj in tbl['functions']: -inline std::ostream &operator<<(std::ostream &os, const struct ${th.make_pfncb_param_type(n, tags, obj)} *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ${th.make_pfncb_param_type(n, tags, obj)} *params) { <% params_dict = dict() for item in obj['params']: @@ -362,21 +383,21 @@ inline std::ostream &operator<<(std::ostream &os, const struct ${th.make_pfncb_p namespace ${x}_params { -template inline void serializePtr(std::ostream &os, T *ptr) { +template inline void serializePtr(std::ostream &os, const T *ptr) { if (ptr == nullptr) { os << "nullptr"; } else if constexpr (std::is_pointer_v) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; serializePtr(os, *ptr); os << ")"; } else if constexpr (std::is_void_v || is_handle_v) { - os << (void *)ptr; + os << (const void *)ptr; } else if constexpr (std::is_same_v, char>) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << ptr; os << ")"; } else { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << *ptr; os << ")"; } diff --git a/scripts/templates/trcddi.cpp.mako b/scripts/templates/trcddi.cpp.mako index 9a2eb3e319..2ace43072b 100644 --- a/scripts/templates/trcddi.cpp.mako +++ b/scripts/templates/trcddi.cpp.mako @@ -104,13 +104,16 @@ namespace ur_tracing_layer ${x}_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data codelocData) { ${x}_result_t result = ${X}_RESULT_SUCCESS; if(!enabledLayerNames.count(name)) { return result; } + ur_tracing_layer::context.codelocData = codelocData; + %for tbl in th.get_pfntables(specs, meta, n, tags): if( ${X}_RESULT_SUCCESS == result ) { diff --git a/scripts/templates/valddi.cpp.mako b/scripts/templates/valddi.cpp.mako index 862c8b81a5..f3ec24bfb9 100644 --- a/scripts/templates/valddi.cpp.mako +++ b/scripts/templates/valddi.cpp.mako @@ -60,11 +60,36 @@ namespace ur_validation_layer %endfor %endfor + %if func_name in th.get_event_wait_list_functions(specs, n, tags): + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + %endif + } ${x}_result_t result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - %if func_name in create_retain_release_funcs["create"]: + %if func_name == n + "AdapterRelease": + if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) + { + refCountContext.decrementRefCount(${object_param}, true); + } + %elif func_name == n + "AdapterRetain": + if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) + { + refCountContext.decrementRefCount(${object_param}, true); + } + %elif func_name == n + "AdapterGet": + if( context.enableLeakChecking && phAdapters && result == UR_RESULT_SUCCESS ) + { + refCountContext.createOrIncrementRefCount(*phAdapters, true); + } + %elif func_name in create_retain_release_funcs["create"]: if( context.enableLeakChecking && result == UR_RESULT_SUCCESS ) { refCountContext.createRefCount(*${object_param}); @@ -79,12 +104,6 @@ namespace ur_validation_layer { refCountContext.decrementRefCount(${object_param}); } - %elif func_name == n + "TearDown": - if ( context.enableLeakChecking ) - { - refCountContext.logInvalidReferences(); - refCountContext.clear(); - } %endif return result; @@ -141,9 +160,10 @@ namespace ur_validation_layer %endfor ${x}_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data) { ${x}_result_t result = ${X}_RESULT_SUCCESS; - + if (enabledLayerNames.count(nameFullValidation)) { enableParameterValidation = true; enableLeakChecking = true; @@ -170,4 +190,14 @@ namespace ur_validation_layer return result; } + ${x}_result_t context_t::tearDown() { + ${x}_result_t result = ${X}_RESULT_SUCCESS; + + if (enableLeakChecking) { + refCountContext.logInvalidReferences(); + refCountContext.clear(); + } + return result; + } + } // namespace ur_validation_layer diff --git a/source/adapters/null/ur_null.cpp b/source/adapters/null/ur_null.cpp index 5a62761b67..1a5d0aa373 100644 --- a/source/adapters/null/ur_null.cpp +++ b/source/adapters/null/ur_null.cpp @@ -38,21 +38,20 @@ context_t::context_t() { return UR_RESULT_SUCCESS; }; ////////////////////////////////////////////////////////////////////////// - urDdiTable.Platform.pfnGet = [](ur_adapter_handle_t *phAdapters, - uint32_t NumAdapters, uint32_t NumEntries, - ur_platform_handle_t *phPlatforms, - uint32_t *pNumPlatforms) { - if (phPlatforms != nullptr && NumEntries != 1) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - if (pNumPlatforms != nullptr) { - *pNumPlatforms = 1; - } - if (nullptr != phPlatforms) { - *reinterpret_cast(phPlatforms) = d_context.get(); - } - return UR_RESULT_SUCCESS; - }; + urDdiTable.Platform.pfnGet = + [](ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + if (phPlatforms != nullptr && NumEntries != 1) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + if (pNumPlatforms != nullptr) { + *pNumPlatforms = 1; + } + if (nullptr != phPlatforms) { + *reinterpret_cast(phPlatforms) = d_context.get(); + } + return UR_RESULT_SUCCESS; + }; ////////////////////////////////////////////////////////////////////////// urDdiTable.Platform.pfnGetApiVersion = [](ur_platform_handle_t, @@ -122,8 +121,8 @@ context_t::context_t() { ////////////////////////////////////////////////////////////////////////// urDdiTable.Device.pfnGetInfo = - [](ur_device_handle_t hDevice, ur_device_info_t infoType, - size_t propSize, void *pDeviceInfo, size_t *pPropSizeRet) { + [](ur_device_handle_t, ur_device_info_t infoType, size_t propSize, + void *pDeviceInfo, size_t *pPropSizeRet) { switch (infoType) { case UR_DEVICE_INFO_TYPE: if (pDeviceInfo && propSize != sizeof(ur_device_type_t)) { diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index f9b8fb4d11..a8a4883aa1 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -12,49 +12,6 @@ #include "ur_null.hpp" namespace driver { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. - ) try { - ur_result_t result = UR_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnInit = d_context.urDdiTable.Global.pfnInit; - if (nullptr != pfnInit) { - result = pfnInit(device_flags, hLoaderConfig); - } else { - // generic implementation - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters - ) try { - ur_result_t result = UR_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnTearDown = d_context.urDdiTable.Global.pfnTearDown; - if (nullptr != pfnTearDown) { - result = pfnTearDown(pParams); - } else { - // generic implementation - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -363,8 +320,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -3875,7 +3832,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -3888,7 +3845,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( phEvent); } else { // generic implementation - *phEvent = reinterpret_cast(d_context.get()); + if (nullptr != phEvent) { + *phEvent = reinterpret_cast(d_context.get()); + } } return result; @@ -4588,8 +4547,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -4605,10 +4564,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMemcpyUSMExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr != pfnAppendMemcpyUSMExp) { - result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + auto pfnAppendUSMMemcpyExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr != pfnAppendUSMMemcpyExp) { + result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4621,8 +4580,43 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMFillExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr != pfnAppendUSMFillExp) { + result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, + patternSize, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -4640,10 +4634,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferCopyExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr != pfnAppendMembufferCopyExp) { - result = pfnAppendMembufferCopyExp( + auto pfnAppendMemBufferCopyExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr != pfnAppendMemBufferCopyExp) { + result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4656,8 +4650,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4675,10 +4669,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferWriteExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr != pfnAppendMembufferWriteExp) { - result = pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, + auto pfnAppendMemBufferWriteExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr != pfnAppendMemBufferWriteExp) { + result = pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4691,8 +4685,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4709,10 +4703,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferReadExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr != pfnAppendMembufferReadExp) { - result = pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, + auto pfnAppendMemBufferReadExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr != pfnAppendMemBufferReadExp) { + result = pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } else { @@ -4725,8 +4719,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -4751,10 +4745,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferCopyRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; - if (nullptr != pfnAppendMembufferCopyRectExp) { - result = pfnAppendMembufferCopyRectExp( + auto pfnAppendMemBufferCopyRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + if (nullptr != pfnAppendMemBufferCopyRectExp) { + result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4768,8 +4762,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4800,10 +4794,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferWriteRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; - if (nullptr != pfnAppendMembufferWriteRectExp) { - result = pfnAppendMembufferWriteRectExp( + auto pfnAppendMemBufferWriteRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + if (nullptr != pfnAppendMemBufferWriteRectExp) { + result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4817,8 +4811,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -4847,10 +4841,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_result_t result = UR_RESULT_SUCCESS; // if the driver has created a custom function, then call it instead of using the generic path - auto pfnAppendMembufferReadRectExp = - d_context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; - if (nullptr != pfnAppendMembufferReadRectExp) { - result = pfnAppendMembufferReadRectExp( + auto pfnAppendMemBufferReadRectExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; + if (nullptr != pfnAppendMemBufferReadRectExp) { + result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -4863,6 +4857,108 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendMemBufferFillExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr != pfnAppendMemBufferFillExp) { + result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMPrefetchExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr != pfnAppendUSMPrefetchExp) { + result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnAppendUSMAdviseExp = + d_context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr != pfnAppendUSMAdviseExp) { + result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -4899,6 +4995,80 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnCooperativeKernelLaunchExp = + d_context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr != pfnCooperativeKernelLaunchExp) { + result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + } else { + // generic implementation + if (nullptr != phEvent) { + *phEvent = reinterpret_cast(d_context.get()); + } + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnSuggestMaxCooperativeGroupCountExp = + d_context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr != pfnSuggestMaxCooperativeGroupCountExp) { + result = pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -5052,10 +5222,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnInit = driver::urInit; - - pDdiTable->pfnTearDown = driver::urTearDown; - pDdiTable->pfnAdapterGet = driver::urAdapterGet; pDdiTable->pfnAdapterRelease = driver::urAdapterRelease; @@ -5176,26 +5342,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = driver::urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = - driver::urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendUSMMemcpyExp = + driver::urCommandBufferAppendUSMMemcpyExp; + + pDdiTable->pfnAppendUSMFillExp = driver::urCommandBufferAppendUSMFillExp; + + pDdiTable->pfnAppendMemBufferCopyExp = + driver::urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMembufferCopyExp = - driver::urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + driver::urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMembufferWriteExp = - driver::urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + driver::urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMembufferReadExp = - driver::urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + driver::urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - driver::urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + driver::urCommandBufferAppendMemBufferWriteRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - driver::urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + driver::urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - driver::urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + driver::urCommandBufferAppendMemBufferFillExp; + + pDdiTable->pfnAppendUSMPrefetchExp = + driver::urCommandBufferAppendUSMPrefetchExp; + + pDdiTable->pfnAppendUSMAdviseExp = + driver::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; @@ -5328,6 +5505,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCooperativeKernelLaunchExp = + driver::urEnqueueCooperativeKernelLaunchExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -5433,6 +5641,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + driver::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index f240f9908b..5c6fb231da 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -3,28 +3,25 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_library(ur_common INTERFACE) +add_subdirectory(unified_malloc_framework) +add_subdirectory(umf_pools) + +add_ur_library(ur_common STATIC + umf_helpers.hpp + ur_pool_manager.hpp + $<$:windows/ur_lib_loader.cpp> + $<$:linux/ur_lib_loader.cpp> +) add_library(${PROJECT_NAME}::common ALIAS ur_common) -target_include_directories(ur_common INTERFACE +target_include_directories(ur_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/include ) -add_subdirectory(unified_malloc_framework) -add_subdirectory(umf_pools) -target_link_libraries(ur_common INTERFACE unified_malloc_framework disjoint_pool ${CMAKE_DL_LIBS} ${PROJECT_NAME}::headers) - -if(WIN32) - target_sources(ur_common - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/windows/ur_lib_loader.cpp - umf_helpers.hpp ur_pool_manager.hpp - ) -else() - target_sources(ur_common - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/linux/ur_lib_loader.cpp - umf_helpers.hpp ur_pool_manager.hpp - ) -endif() +target_link_libraries(ur_common PUBLIC + unified_malloc_framework + disjoint_pool + ${CMAKE_DL_LIBS} + ${PROJECT_NAME}::headers +) diff --git a/source/common/linux/ur_lib_loader.cpp b/source/common/linux/ur_lib_loader.cpp index 1c5e0af89b..d316e48b74 100644 --- a/source/common/linux/ur_lib_loader.cpp +++ b/source/common/linux/ur_lib_loader.cpp @@ -12,12 +12,7 @@ #include "logger/ur_logger.hpp" #include "ur_lib_loader.hpp" -#if defined(SANITIZER_ANY) || defined(__APPLE__) -#define LOAD_DRIVER_LIBRARY(NAME) dlopen(NAME, RTLD_LAZY | RTLD_LOCAL) -#else -#define LOAD_DRIVER_LIBRARY(NAME) \ - dlopen(NAME, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND) -#endif +#define DEEP_BIND_ENV "UR_ADAPTERS_DEEP_BIND" namespace ur_loader { @@ -34,8 +29,21 @@ void LibLoader::freeAdapterLibrary(HMODULE handle) { std::unique_ptr LibLoader::loadAdapterLibrary(const char *name) { - return std::unique_ptr( - LOAD_DRIVER_LIBRARY(name)); + int mode = RTLD_LAZY | RTLD_LOCAL; +#if !defined(__APPLE__) + bool deepbind = getenv_tobool(DEEP_BIND_ENV); + if (deepbind) { +#if defined(SANITIZER_ANY) + logger::warning( + "Enabling RTLD_DEEPBIND while running under a sanitizer is likely " + "to cause issues. Consider disabling {} environment variable.", + DEEP_BIND_ENV); +#endif + mode |= RTLD_DEEPBIND; + } +#endif + + return std::unique_ptr(dlopen(name, mode)); } void *LibLoader::getFunctionPtr(HMODULE handle, const char *func_name) { diff --git a/source/common/umf_pools/disjoint_pool.cpp b/source/common/umf_pools/disjoint_pool.cpp index 5c6c3a852a..dcccb25c3b 100644 --- a/source/common/umf_pools/disjoint_pool.cpp +++ b/source/common/umf_pools/disjoint_pool.cpp @@ -381,6 +381,15 @@ Slab::~Slab() { } catch (MemoryProviderError &e) { std::cout << "DisjointPool: error from memory provider: " << e.code << "\n"; + if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + const char *message = ""; + int error = 0; + + umfMemoryProviderGetLastNativeError( + umfGetLastFailedMemoryProvider(), &message, &error); + std::cout << "Native error msg: " << message + << ", native error code: " << error << std::endl; + } } } @@ -885,6 +894,12 @@ umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t *providers, if (numProviders != 1 || !providers[0]) { return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + // MinBucketSize parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!parameters.MinBucketSize || + !((parameters.MinBucketSize & (parameters.MinBucketSize - 1)) == 0)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } impl = std::make_unique(providers[0], parameters); return UMF_RESULT_SUCCESS; diff --git a/source/common/umf_pools/disjoint_pool.hpp b/source/common/umf_pools/disjoint_pool.hpp index a8c9487ef2..f5b67ef3ed 100644 --- a/source/common/umf_pools/disjoint_pool.hpp +++ b/source/common/umf_pools/disjoint_pool.hpp @@ -45,7 +45,8 @@ class DisjointPoolConfig { size_t Capacity = 0; // Holds the minimum bucket size valid for allocation of a memory type. - size_t MinBucketSize = 0; + // This value must be a power of 2. + size_t MinBucketSize = 1; // Holds size of the pool managed by the allocator. size_t CurPoolSize = 0; diff --git a/source/common/unified_malloc_framework/CMakeLists.txt b/source/common/unified_malloc_framework/CMakeLists.txt index 15744605ec..a71b688b74 100644 --- a/source/common/unified_malloc_framework/CMakeLists.txt +++ b/source/common/unified_malloc_framework/CMakeLists.txt @@ -6,10 +6,17 @@ set(UMF_SOURCES src/memory_pool.c src/memory_provider.c - src/memory_tracker.cpp + src/memory_tracker.c src/memory_provider_get_last_failed.cpp + src/critnib/critnib.c ) +if(MSVC) + set(UMF_SOURCES ${UMF_SOURCES} src/utils/utils_windows.cpp src/memory_tracker_windows.cpp) +else() + set(UMF_SOURCES ${UMF_SOURCES} src/utils/utils_posix.c) +endif() + if(UMF_BUILD_SHARED_LIBRARY) message(WARNING "Unified Malloc Framework is still an early work in progress." "There are no API/ABI backward compatibility guarantees. There will be breakages." diff --git a/source/common/unified_malloc_framework/src/critnib/critnib.c b/source/common/unified_malloc_framework/src/critnib/critnib.c new file mode 100644 index 0000000000..8a11a9c3f5 --- /dev/null +++ b/source/common/unified_malloc_framework/src/critnib/critnib.c @@ -0,0 +1,813 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * critnib.c -- implementation of critnib tree + * + * It offers identity lookup (like a hashmap) and <= lookup (like a search + * tree). Unlike some hashing algorithms (cuckoo hash, perfect hashing) the + * complexity isn't constant, but for data sizes we expect it's several + * times as fast as cuckoo, and has no "stop the world" cases that would + * cause latency (ie, better worst case behaviour). + */ + +/* + * STRUCTURE DESCRIPTION + * + * Critnib is a hybrid between a radix tree and DJ Bernstein's critbit: + * it skips nodes for uninteresting radix nodes (ie, ones that would have + * exactly one child), this requires adding to every node a field that + * describes the slice (4-bit in our case) that this radix level is for. + * + * This implementation also stores each node's path (ie, bits that are + * common to every key in that subtree) -- this doesn't help with lookups + * at all (unused in == match, could be reconstructed at no cost in <= + * after first dive) but simplifies inserts and removes. If we ever want + * that piece of memory it's easy to trim it down. + */ + +/* + * CONCURRENCY ISSUES + * + * Reads are completely lock-free sync-free, but only almost wait-free: + * if for some reason a read thread gets pathologically stalled, it will + * notice the data being stale and restart the work. In usual cases, + * the structure having been modified does _not_ cause a restart. + * + * Writes could be easily made lock-free as well (with only a cmpxchg + * sync), but this leads to problems with removes. A possible solution + * would be doing removes by overwriting by NULL w/o freeing -- yet this + * would lead to the structure growing without bounds. Complex per-node + * locks would increase concurrency but they slow down individual writes + * enough that in practice a simple global write lock works faster. + * + * Removes are the only operation that can break reads. The structure + * can do local RCU well -- the problem being knowing when it's safe to + * free. Any synchronization with reads would kill their speed, thus + * instead we have a remove count. The grace period is DELETED_LIFE, + * after which any read will notice staleness and restart its work. + */ +#include +#include +#include + +#include "../utils/utils.h" +#include "critnib.h" + +/* + * A node that has been deleted is left untouched for this many delete + * cycles. Reads have guaranteed correctness if they took no longer than + * DELETED_LIFE concurrent deletes, otherwise they notice something is + * wrong and restart. The memory of deleted nodes is never freed to + * malloc nor their pointers lead anywhere wrong, thus a stale read will + * (temporarily) get a wrong answer but won't crash. + * + * There's no need to count writes as they never interfere with reads. + * + * Allowing stale reads (of arbitrarily old writes or of deletes less than + * DELETED_LIFE old) might sound counterintuitive, but it doesn't affect + * semantics in any way: the thread could have been stalled just after + * returning from our code. Thus, the guarantee is: the result of get() or + * find_le() is a value that was current at any point between the call + * start and end. + */ +#define DELETED_LIFE 16 + +#define SLICE 4 +#define NIB ((1UL << SLICE) - 1) +#define SLNODES (1 << SLICE) + +typedef uintptr_t word; +typedef unsigned char sh_t; + +struct critnib_node { + /* + * path is the part of a tree that's already traversed (be it through + * explicit nodes or collapsed links) -- ie, any subtree below has all + * those bits set to this value. + * + * nib is a 4-bit slice that's an index into the node's children. + * + * shift is the length (in bits) of the part of the key below this node. + * + * nib + * |XXXXXXXXXX|?|*****| + * path ^ + * +-----+ + * shift + */ + struct critnib_node *child[SLNODES]; + word path; + sh_t shift; +}; + +struct critnib_leaf { + word key; + void *value; +}; + +struct critnib { + struct critnib_node *root; + + /* pool of freed nodes: singly linked list, next at child[0] */ + struct critnib_node *deleted_node; + struct critnib_leaf *deleted_leaf; + + /* nodes removed but not yet eligible for reuse */ + struct critnib_node *pending_del_nodes[DELETED_LIFE]; + struct critnib_leaf *pending_del_leaves[DELETED_LIFE]; + + uint64_t remove_count; + + struct os_mutex_t *mutex; /* writes/removes */ +}; + +/* + * atomic load + */ +static void load(void *src, void *dst) { + util_atomic_load_acquire((word *)src, (word *)dst); +} + +static void load64(uint64_t *src, uint64_t *dst) { + util_atomic_load_acquire(src, dst); +} + +/* + * atomic store + */ +static void store(void *dst, void *src) { + util_atomic_store_release((word *)dst, (word)src); +} + +/* + * internal: is_leaf -- check tagged pointer for leafness + */ +static inline bool is_leaf(struct critnib_node *n) { return (word)n & 1; } + +/* + * internal: to_leaf -- untag a leaf pointer + */ +static inline struct critnib_leaf *to_leaf(struct critnib_node *n) { + return (void *)((word)n & ~1UL); +} + +/* + * internal: path_mask -- return bit mask of a path above a subtree [shift] + * bits tall + */ +static inline word path_mask(sh_t shift) { return ~NIB << shift; } + +/* + * internal: slice_index -- return index of child at the given nib + */ +static inline unsigned slice_index(word key, sh_t shift) { + return (unsigned)((key >> shift) & NIB); +} + +/* + * critnib_new -- allocates a new critnib structure + */ +struct critnib *critnib_new(void) { + struct critnib *c = Zalloc(sizeof(struct critnib)); + if (!c) { + return NULL; + } + + c->mutex = util_mutex_create(); + if (!c->mutex) { + free(c); + return NULL; + } + + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, sizeof(c->remove_count)); + + return c; +} + +/* + * internal: delete_node -- recursively free (to malloc) a subtree + */ +static void delete_node(struct critnib_node *__restrict n) { + if (is_leaf(n)) { + Free(to_leaf(n)); + } else { + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) { + delete_node(n->child[i]); + } + } + + Free(n); + } +} + +/* + * critnib_delete -- destroy and free a critnib struct + */ +void critnib_delete(struct critnib *c) { + if (c->root) { + delete_node(c->root); + } + + util_mutex_destroy(c->mutex); + + for (struct critnib_node *m = c->deleted_node; m;) { + struct critnib_node *mm = m->child[0]; + Free(m); + m = mm; + } + + for (struct critnib_leaf *k = c->deleted_leaf; k;) { + struct critnib_leaf *kk = k->value; + Free(k); + k = kk; + } + + for (int i = 0; i < DELETED_LIFE; i++) { + Free(c->pending_del_nodes[i]); + Free(c->pending_del_leaves[i]); + } + + Free(c); +} + +/* + * internal: free_node -- free (to internal pool, not malloc) a node. + * + * We cannot free them to malloc as a stalled reader thread may still walk + * through such nodes; it will notice the result being bogus but only after + * completing the walk, thus we need to ensure any freed nodes still point + * to within the critnib structure. + */ +static void free_node(struct critnib *__restrict c, + struct critnib_node *__restrict n) { + if (!n) { + return; + } + + ASSERT(!is_leaf(n)); + n->child[0] = c->deleted_node; + c->deleted_node = n; +} + +/* + * internal: alloc_node -- allocate a node from our pool or from malloc + */ +static struct critnib_node *alloc_node(struct critnib *__restrict c) { + if (!c->deleted_node) { + return Malloc(sizeof(struct critnib_node)); + } + + struct critnib_node *n = c->deleted_node; + + c->deleted_node = n->child[0]; + VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + + return n; +} + +/* + * internal: free_leaf -- free (to internal pool, not malloc) a leaf. + * + * See free_node(). + */ +static void free_leaf(struct critnib *__restrict c, + struct critnib_leaf *__restrict k) { + if (!k) { + return; + } + + k->value = c->deleted_leaf; + c->deleted_leaf = k; +} + +/* + * internal: alloc_leaf -- allocate a leaf from our pool or from malloc + */ +static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { + if (!c->deleted_leaf) { + return Malloc(sizeof(struct critnib_leaf)); + } + + struct critnib_leaf *k = c->deleted_leaf; + + c->deleted_leaf = k->value; + VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + + return k; +} + +/* + * crinib_insert -- write a key:value pair to the critnib structure + * + * Returns: + * • 0 on success + * • EEXIST if such a key already exists + * • ENOMEM if we're out of memory + * + * Takes a global write lock but doesn't stall any readers. + */ +int critnib_insert(struct critnib *c, word key, void *value, int update) { + util_mutex_lock(c->mutex); + + struct critnib_leaf *k = alloc_leaf(c); + if (!k) { + util_mutex_unlock(c->mutex); + + return ENOMEM; + } + + VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + + k->key = key; + k->value = value; + + struct critnib_node *kn = (void *)((word)k | 1); + + struct critnib_node *n = c->root; + if (!n) { + c->root = kn; + + util_mutex_unlock(c->mutex); + + return 0; + } + + struct critnib_node **parent = &c->root; + struct critnib_node *prev = c->root; + + while (n && !is_leaf(n) && (key & path_mask(n->shift)) == n->path) { + prev = n; + parent = &n->child[slice_index(key, n->shift)]; + n = *parent; + } + + if (!n) { + n = prev; + store(&n->child[slice_index(key, n->shift)], kn); + + util_mutex_unlock(c->mutex); + + return 0; + } + + word path = is_leaf(n) ? to_leaf(n)->key : n->path; + /* Find where the path differs from our key. */ + word at = path ^ key; + if (!at) { + ASSERT(is_leaf(n)); + free_leaf(c, to_leaf(kn)); + + if (update) { + to_leaf(n)->value = value; + util_mutex_unlock(c->mutex); + return 0; + } else { + util_mutex_unlock(c->mutex); + return EEXIST; + } + } + + /* and convert that to an index. */ + sh_t sh = util_mssb_index(at) & (sh_t) ~(SLICE - 1); + + struct critnib_node *m = alloc_node(c); + if (!m) { + free_leaf(c, to_leaf(kn)); + + util_mutex_unlock(c->mutex); + + return ENOMEM; + } + VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + + for (int i = 0; i < SLNODES; i++) { + m->child[i] = NULL; + } + + m->child[slice_index(key, sh)] = kn; + m->child[slice_index(path, sh)] = n; + m->shift = sh; + m->path = key & path_mask(sh); + store(parent, m); + + util_mutex_unlock(c->mutex); + + return 0; +} + +/* + * critnib_remove -- delete a key from the critnib structure, return its value + */ +void *critnib_remove(struct critnib *c, word key) { + struct critnib_leaf *k; + void *value = NULL; + + util_mutex_lock(c->mutex); + + struct critnib_node *n = c->root; + if (!n) { + goto not_found; + } + + word del = (util_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + free_node(c, c->pending_del_nodes[del]); + free_leaf(c, c->pending_del_leaves[del]); + c->pending_del_nodes[del] = NULL; + c->pending_del_leaves[del] = NULL; + + if (is_leaf(n)) { + k = to_leaf(n); + if (k->key == key) { + store(&c->root, NULL); + goto del_leaf; + } + + goto not_found; + } + /* + * n and k are a parent:child pair (after the first iteration); k is the + * leaf that holds the key we're deleting. + */ + struct critnib_node **k_parent = &c->root; + struct critnib_node **n_parent = &c->root; + struct critnib_node *kn = n; + + while (!is_leaf(kn)) { + n_parent = k_parent; + n = kn; + k_parent = &kn->child[slice_index(key, kn->shift)]; + kn = *k_parent; + + if (!kn) { + goto not_found; + } + } + + k = to_leaf(kn); + if (k->key != key) { + goto not_found; + } + + store(&n->child[slice_index(key, n->shift)], NULL); + + /* Remove the node if there's only one remaining child. */ + int ochild = -1; + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) { + if (ochild != -1) { + goto del_leaf; + } + + ochild = i; + } + } + + ASSERTne(ochild, -1); + + store(n_parent, n->child[ochild]); + c->pending_del_nodes[del] = n; + +del_leaf: + value = k->value; + c->pending_del_leaves[del] = k; + +not_found: + util_mutex_unlock(c->mutex); + return value; +} + +/* + * critnib_get -- query for a key ("==" match), returns value or NULL + * + * Doesn't need a lock but if many deletes happened while our thread was + * somehow stalled the query is restarted (as freed nodes remain unused only + * for a grace period). + * + * Counterintuitively, it's pointless to return the most current answer, + * we need only one that was valid at any point after the call started. + */ +void *critnib_get(struct critnib *c, word key) { + uint64_t wrs1, wrs2; + void *res; + + do { + struct critnib_node *n; + + load64(&c->remove_count, &wrs1); + load(&c->root, &n); + + /* + * critbit algorithm: dive into the tree, looking at nothing but + * each node's critical bit^H^H^Hnibble. This means we risk + * going wrong way if our path is missing, but that's ok... + */ + while (n && !is_leaf(n)) { + load(&n->child[slice_index(key, n->shift)], &n); + } + + /* ... as we check it at the end. */ + struct critnib_leaf *k = to_leaf(n); + res = (n && k->key == key) ? k->value : NULL; + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} + +/* + * internal: find_predecessor -- return the rightmost leaf in a subtree + */ +static struct critnib_leaf * +find_predecessor(struct critnib_node *__restrict n) { + while (1) { + int nib; + for (nib = NIB; nib >= 0; nib--) { + if (n->child[nib]) { + break; + } + } + + if (nib < 0) { + return NULL; + } + + n = n->child[nib]; + if (is_leaf(n)) { + return to_leaf(n); + } + } +} + +/* + * internal: find_le -- recursively search <= in a subtree + */ +static struct critnib_leaf *find_le(struct critnib_node *__restrict n, + word key) { + if (!n) { + return NULL; + } + + if (is_leaf(n)) { + struct critnib_leaf *k = to_leaf(n); + return (k->key <= key) ? k : NULL; + } + + /* + * is our key outside the subtree we're in? + * + * If we're inside, all bits above the nib will be identical; note + * that shift points at the nib's lower rather than upper edge, so it + * needs to be masked away as well. + */ + if ((key ^ n->path) >> (n->shift) & ~NIB) { + /* + * subtree is too far to the left? + * -> its rightmost value is good + */ + if (n->path < key) { + return find_predecessor(n); + } + + /* + * subtree is too far to the right? + * -> it has nothing of interest to us + */ + return NULL; + } + + unsigned nib = slice_index(key, n->shift); + /* recursive call: follow the path */ + { + struct critnib_node *m; + load(&n->child[nib], &m); + struct critnib_leaf *k = find_le(m, key); + if (k) { + return k; + } + } + + /* + * nothing in that subtree? We strayed from the path at this point, + * thus need to search every subtree to our left in this node. No + * need to dive into any but the first non-null, though. + */ + for (; nib > 0; nib--) { + struct critnib_node *m; + load(&n->child[nib - 1], &m); + if (m) { + n = m; + if (is_leaf(n)) { + return to_leaf(n); + } + + return find_predecessor(n); + } + } + + return NULL; +} + +/* + * critnib_find_le -- query for a key ("<=" match), returns value or NULL + * + * Same guarantees as critnib_get(). + */ +void *critnib_find_le(struct critnib *c, word key) { + uint64_t wrs1, wrs2; + void *res; + + do { + load64(&c->remove_count, &wrs1); + struct critnib_node *n; /* avoid a subtle TOCTOU */ + load(&c->root, &n); + struct critnib_leaf *k = n ? find_le(n, key) : NULL; + res = k ? k->value : NULL; + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} + +/* + * internal: find_successor -- return the rightmost leaf in a subtree + */ +static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { + while (1) { + unsigned nib; + for (nib = 0; nib <= NIB; nib++) { + if (n->child[nib]) { + break; + } + } + + if (nib > NIB) { + return NULL; + } + + n = n->child[nib]; + if (is_leaf(n)) { + return to_leaf(n); + } + } +} + +/* + * internal: find_ge -- recursively search >= in a subtree + */ +static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, + word key) { + if (!n) { + return NULL; + } + + if (is_leaf(n)) { + struct critnib_leaf *k = to_leaf(n); + return (k->key >= key) ? k : NULL; + } + + if ((key ^ n->path) >> (n->shift) & ~NIB) { + if (n->path > key) { + return find_successor(n); + } + + return NULL; + } + + unsigned nib = slice_index(key, n->shift); + { + struct critnib_node *m; + load(&n->child[nib], &m); + struct critnib_leaf *k = find_ge(m, key); + if (k) { + return k; + } + } + + for (; nib < NIB; nib++) { + struct critnib_node *m; + load(&n->child[nib + 1], &m); + if (m) { + n = m; + if (is_leaf(n)) { + return to_leaf(n); + } + + return find_successor(n); + } + } + + return NULL; +} + +/* + * critnib_find -- parametrized query, returns 1 if found + */ +int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, + uintptr_t *rkey, void **rvalue) { + uint64_t wrs1, wrs2; + struct critnib_leaf *k; + uintptr_t _rkey; + void **_rvalue; + + /* <42 ≡ ≤41 */ + if (dir < -1) { + if (!key) { + return 0; /* no key is <0 */ + } + key--; + } else if (dir > +1) { + if (key == (uintptr_t)-1) { + return 0; /* no key is >(unsigned)∞ */ + } + key++; + } + + do { + load64(&c->remove_count, &wrs1); + struct critnib_node *n; + load(&c->root, &n); + + if (dir < 0) { + k = find_le(n, key); + } else if (dir > 0) { + k = find_ge(n, key); + } else { + while (n && !is_leaf(n)) { + load(&n->child[slice_index(key, n->shift)], &n); + } + + struct critnib_leaf *kk = to_leaf(n); + k = (n && kk->key == key) ? kk : NULL; + } + if (k) { + _rkey = k->key; + _rvalue = k->value; + } + load64(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + if (k) { + if (rkey) { + *rkey = _rkey; + } + if (rvalue) { + *rvalue = _rvalue; + } + return 1; + } + + return 0; +} + +/* + * critnib_iter -- iterator, [min..max], calls func(key, value, privdata) + * + * If func() returns non-zero, the search is aborted. + */ +static int iter(struct critnib_node *__restrict n, word min, word max, + int (*func)(word key, void *value, void *privdata), + void *privdata) { + if (is_leaf(n)) { + word k = to_leaf(n)->key; + if (k >= min && k <= max) { + return func(to_leaf(n)->key, to_leaf(n)->value, privdata); + } + return 0; + } + + if (n->path > max) { + return 1; + } + if ((n->path | path_mask(n->shift)) < min) { + return 0; + } + + for (int i = 0; i < SLNODES; i++) { + struct critnib_node *__restrict m = n->child[i]; + if (m && iter(m, min, max, func, privdata)) { + return 1; + } + } + + return 0; +} + +void critnib_iter(critnib *c, uintptr_t min, uintptr_t max, + int (*func)(uintptr_t key, void *value, void *privdata), + void *privdata) { + util_mutex_lock(c->mutex); + if (c->root) { + iter(c->root, min, max, func, privdata); + } + util_mutex_unlock(c->mutex); +} diff --git a/source/common/unified_malloc_framework/src/critnib/critnib.h b/source/common/unified_malloc_framework/src/critnib/critnib.h new file mode 100644 index 0000000000..b7ce850871 --- /dev/null +++ b/source/common/unified_malloc_framework/src/critnib/critnib.h @@ -0,0 +1,48 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#ifndef CRITNIB_H +#define CRITNIB_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct critnib; +typedef struct critnib critnib; + +enum find_dir_t { + FIND_L = -2, + FIND_LE = -1, + FIND_EQ = 0, + FIND_GE = +1, + FIND_G = +2, +}; + +critnib *critnib_new(void); +void critnib_delete(critnib *c); + +int critnib_insert(critnib *c, uintptr_t key, void *value, int update); +void *critnib_remove(critnib *c, uintptr_t key); +void *critnib_get(critnib *c, uintptr_t key); +void *critnib_find_le(critnib *c, uintptr_t key); +int critnib_find(critnib *c, uintptr_t key, enum find_dir_t dir, + uintptr_t *rkey, void **rvalue); +void critnib_iter(critnib *c, uintptr_t min, uintptr_t max, + int (*func)(uintptr_t key, void *value, void *privdata), + void *privdata); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/source/common/unified_malloc_framework/src/memory_pool_default.c b/source/common/unified_malloc_framework/src/memory_pool_default.c index be7c4c9c57..b997b090cd 100644 --- a/source/common/unified_malloc_framework/src/memory_pool_default.c +++ b/source/common/unified_malloc_framework/src/memory_pool_default.c @@ -69,9 +69,15 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { free(hPool); } -enum umf_result_t umfFree(void *ptr) { return UMF_RESULT_ERROR_NOT_SUPPORTED; } +enum umf_result_t umfFree(void *ptr) { + (void)ptr; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} -umf_memory_pool_handle_t umfPoolByPtr(const void *ptr) { return NULL; } +umf_memory_pool_handle_t umfPoolByPtr(const void *ptr) { + (void)ptr; + return NULL; +} enum umf_result_t umfPoolGetMemoryProviders(umf_memory_pool_handle_t hPool, size_t numProviders, diff --git a/source/common/unified_malloc_framework/src/memory_tracker.cpp b/source/common/unified_malloc_framework/src/memory_tracker.c similarity index 69% rename from source/common/unified_malloc_framework/src/memory_tracker.cpp rename to source/common/unified_malloc_framework/src/memory_tracker.c index adbe2aa5e9..76b0b7b745 100644 --- a/source/common/unified_malloc_framework/src/memory_tracker.cpp +++ b/source/common/unified_malloc_framework/src/memory_tracker.c @@ -9,112 +9,99 @@ */ #include "memory_tracker.h" +#include "critnib/critnib.h" + +#include #include #include -#include -#include -#include -#include +#include +#include #include -#ifdef _WIN32 -#include -#endif - -// TODO: reimplement in C and optimize... -struct umf_memory_tracker_t { - enum umf_result_t add(void *pool, const void *ptr, size_t size) { - std::unique_lock lock(mtx); +#if !defined(_WIN32) +critnib *TRACKER = NULL; +void __attribute__((constructor)) createLibTracker(void) { + TRACKER = critnib_new(); +} +void __attribute__((destructor)) deleteLibTracker(void) { + critnib_delete(TRACKER); +} - if (size == 0) { - return UMF_RESULT_SUCCESS; - } +umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { + return (umf_memory_tracker_handle_t)TRACKER; +} +#endif - auto ret = - map.try_emplace(reinterpret_cast(ptr), size, pool); - return ret.second ? UMF_RESULT_SUCCESS : UMF_RESULT_ERROR_UNKNOWN; - } +struct tracker_value_t { + umf_memory_pool_handle_t pool; + size_t size; +}; - enum umf_result_t remove(const void *ptr, size_t size) { - std::unique_lock lock(mtx); +static enum umf_result_t +umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool, const void *ptr, + size_t size) { + assert(ptr); - map.erase(reinterpret_cast(ptr)); + struct tracker_value_t *value = + (struct tracker_value_t *)malloc(sizeof(struct tracker_value_t)); + value->pool = pool; + value->size = size; - // TODO: handle removing part of the range - (void)size; + int ret = critnib_insert((critnib *)hTracker, (uintptr_t)ptr, value, 0); + if (ret == 0) { return UMF_RESULT_SUCCESS; } - void *find(const void *ptr) { - std::shared_lock lock(mtx); - - auto intptr = reinterpret_cast(ptr); - auto it = map.upper_bound(intptr); - if (it == map.begin()) { - return nullptr; - } - - --it; - - auto address = it->first; - auto size = it->second.first; - auto pool = it->second.second; - - if (intptr >= address && intptr < address + size) { - return pool; - } + free(value); - return nullptr; + if (ret == ENOMEM) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - private: - std::shared_mutex mtx; - std::map> map; -}; - -static enum umf_result_t -umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, void *pool, - const void *ptr, size_t size) { - return hTracker->add(pool, ptr, size); + // This should not happen + // TODO: add logging here + return UMF_RESULT_ERROR_UNKNOWN; } static enum umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, const void *ptr, size_t size) { - return hTracker->remove(ptr, size); -} + assert(ptr); + + // TODO: there is no support for removing partial ranges (or multipe entires + // in a single remove call) yet. + // Every umfMemoryTrackerAdd(..., ptr, ...) should have a corresponsding + // umfMemoryTrackerRemove call with the same ptr value. + (void)size; + + void *value = critnib_remove((critnib *)hTracker, (uintptr_t)ptr); + if (!value) { + // This should not happen + // TODO: add logging here + return UMF_RESULT_ERROR_UNKNOWN; + } -extern "C" { + free(value); -#if defined(_WIN32) && defined(UMF_SHARED_LIBRARY) -umf_memory_tracker_t *tracker = nullptr; -BOOL APIENTRY DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { - if (fdwReason == DLL_PROCESS_DETACH) { - delete tracker; - } else if (fdwReason == DLL_PROCESS_ATTACH) { - tracker = new umf_memory_tracker_t; - } - return TRUE; -} -#elif defined(_WIN32) -umf_memory_tracker_t trackerInstance; -umf_memory_tracker_t *tracker = &trackerInstance; -#else -umf_memory_tracker_t *tracker = nullptr; -void __attribute__((constructor)) createLibTracker() { - tracker = new umf_memory_tracker_t; + return UMF_RESULT_SUCCESS; } -void __attribute__((destructor)) deleteLibTracker() { delete tracker; } -#endif +umf_memory_pool_handle_t +umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, const void *ptr) { + assert(ptr); -umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { return tracker; } + uintptr_t rkey; + struct tracker_value_t *rvalue; + int found = critnib_find((critnib *)hTracker, (uintptr_t)ptr, FIND_LE, + (void *)&rkey, (void **)&rvalue); + if (!found) { + return NULL; + } -void *umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, - const void *ptr) { - return hTracker->find(ptr); + return (rkey + rvalue->size >= (uintptr_t)ptr) ? rvalue->pool : NULL; } struct umf_tracking_memory_provider_t { @@ -136,7 +123,7 @@ static enum umf_result_t trackingAlloc(void *hProvider, size_t size, } ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, ptr); - if (ret != UMF_RESULT_SUCCESS) { + if (ret != UMF_RESULT_SUCCESS || !*ptr) { return ret; } @@ -159,9 +146,11 @@ static enum umf_result_t trackingFree(void *hProvider, void *ptr, size_t size) { // to avoid a race condition. If the order would be different, other thread // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove // resulting in inconsistent state. - ret = umfMemoryTrackerRemove(p->hTracker, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - return ret; + if (ptr) { + ret = umfMemoryTrackerRemove(p->hTracker, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + return ret; + } } ret = umfMemoryProviderFree(p->hUpstream, ptr, size); @@ -267,4 +256,3 @@ void umfTrackingMemoryProviderGetUpstreamProvider( (umf_tracking_memory_provider_t *)hTrackingProvider; *hUpstream = p->hUpstream; } -} diff --git a/source/common/unified_malloc_framework/src/memory_tracker.h b/source/common/unified_malloc_framework/src/memory_tracker.h index 43a95cf0cd..c16844928e 100644 --- a/source/common/unified_malloc_framework/src/memory_tracker.h +++ b/source/common/unified_malloc_framework/src/memory_tracker.h @@ -22,8 +22,8 @@ extern "C" { typedef struct umf_memory_tracker_t *umf_memory_tracker_handle_t; umf_memory_tracker_handle_t umfMemoryTrackerGet(void); -void *umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, - const void *ptr); +umf_memory_pool_handle_t +umfMemoryTrackerGetPool(umf_memory_tracker_handle_t hTracker, const void *ptr); // Creates a memory provider that tracks each allocation/deallocation through umf_memory_tracker_handle_t and // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. diff --git a/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp b/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp new file mode 100644 index 0000000000..b5545f3490 --- /dev/null +++ b/source/common/unified_malloc_framework/src/memory_tracker_windows.cpp @@ -0,0 +1,37 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "critnib/critnib.h" +#include "memory_tracker.h" + +#include +#if defined(UMF_SHARED_LIBRARY) +critnib *TRACKER = NULL; +BOOL APIENTRY DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpvReserved) { + if (fdwReason == DLL_PROCESS_DETACH) { + critnib_delete(TRACKER); + } else if (fdwReason == DLL_PROCESS_ATTACH) { + TRACKER = critnib_new(); + } + return TRUE; +} +#else +struct tracker_t { + tracker_t() { map = critnib_new(); } + ~tracker_t() { critnib_delete(map); } + critnib *map; +}; +tracker_t TRACKER_INSTANCE; +critnib *TRACKER = TRACKER_INSTANCE.map; +#endif + +umf_memory_tracker_handle_t umfMemoryTrackerGet(void) { + return (umf_memory_tracker_handle_t)TRACKER; +} diff --git a/source/common/unified_malloc_framework/src/utils/utils.h b/source/common/unified_malloc_framework/src/utils/utils.h new file mode 100644 index 0000000000..4dd779c57b --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils.h @@ -0,0 +1,110 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include +#if defined(_WIN32) +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct os_mutex_t; + +struct os_mutex_t *util_mutex_create(void); +void util_mutex_destroy(struct os_mutex_t *mutex); +int util_mutex_lock(struct os_mutex_t *mutex); +int util_mutex_unlock(struct os_mutex_t *mutex); + +#if defined(_WIN32) +static __inline unsigned char util_lssb_index(long long value) { + unsigned long ret; + _BitScanForward64(&ret, value); + return (unsigned char)ret; +} +static __inline unsigned char util_mssb_index(long long value) { + unsigned long ret; + _BitScanReverse64(&ret, value); + return (unsigned char)ret; +} + +// There is no good way to do atomic_load on windows... +#define util_atomic_load_acquire(object, dest) \ + do { \ + *dest = InterlockedOr64Acquire((LONG64 volatile *)dest, 0); \ + } while (0) + +#define util_atomic_store_release(object, desired) \ + InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) +#define util_atomic_increment(object) \ + InterlockedIncrement64((LONG64 volatile *)object) +#else +#define util_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) +#define util_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) +#define util_atomic_load_acquire(object, dest) \ + __atomic_load(object, dest, memory_order_acquire) +#define util_atomic_store_release(object, desired) \ + __atomic_store_n(object, desired, memory_order_release) +#define util_atomic_increment(object) \ + __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) +#endif + +#define Malloc malloc +#define Free free + +static inline void *Zalloc(size_t s) { + void *m = Malloc(s); + if (m) { + memset(m, 0, s); + } + return m; +} + +#define NOFUNCTION \ + do { \ + } while (0) +#define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) NOFUNCTION +#define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) NOFUNCTION + +#ifdef NDEBUG +#define ASSERT(x) NOFUNCTION +#define ASSERTne(x, y) ASSERT(x != y) +#else +#define ASSERT(x) \ + do \ + if (!(x)) { \ + fprintf(stderr, \ + "Assertion failed: " #x " at " __FILE__ " line %d.\n", \ + __LINE__); \ + abort(); \ + } \ + while (0) +#define ASSERTne(x, y) \ + do { \ + long X = (x); \ + long Y = (y); \ + if (X == Y) { \ + fprintf(stderr, \ + "Assertion failed: " #x " != " #y \ + ", both are %ld, at " __FILE__ " line %d.\n", \ + X, __LINE__); \ + abort(); \ + } \ + } while (0) +#endif + +#ifdef __cplusplus +} +#endif diff --git a/source/common/unified_malloc_framework/src/utils/utils_posix.c b/source/common/unified_malloc_framework/src/utils/utils_posix.c new file mode 100644 index 0000000000..d03bb366a1 --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils_posix.c @@ -0,0 +1,35 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include + +#include "utils.h" + +struct os_mutex_t *util_mutex_create(void) { + pthread_mutex_t *mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t)); + int ret = pthread_mutex_init(mutex, NULL); + return ret == 0 ? ((struct os_mutex_t *)mutex) : NULL; +} + +void util_mutex_destroy(struct os_mutex_t *m) { + pthread_mutex_t *mutex = (pthread_mutex_t *)m; + int ret = pthread_mutex_destroy(mutex); + (void)ret; // TODO: add logging + free(m); +} + +int util_mutex_lock(struct os_mutex_t *m) { + return pthread_mutex_lock((pthread_mutex_t *)m); +} + +int util_mutex_unlock(struct os_mutex_t *m) { + return pthread_mutex_unlock((pthread_mutex_t *)m); +} diff --git a/source/common/unified_malloc_framework/src/utils/utils_windows.cpp b/source/common/unified_malloc_framework/src/utils/utils_windows.cpp new file mode 100644 index 0000000000..b5db557c77 --- /dev/null +++ b/source/common/unified_malloc_framework/src/utils/utils_windows.cpp @@ -0,0 +1,33 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include + +#include "utils.h" + +struct os_mutex_t *util_mutex_create(void) { + return reinterpret_cast(new std::mutex); +} + +void util_mutex_destroy(struct os_mutex_t *mutex) { + delete reinterpret_cast(mutex); +} + +int util_mutex_lock(struct os_mutex_t *mutex) try { + reinterpret_cast(mutex)->lock(); + return 0; +} catch (std::system_error &err) { + return err.code().value(); +} + +int util_mutex_unlock(struct os_mutex_t *mutex) { + reinterpret_cast(mutex)->unlock(); + return 0; +} diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 4c1c90e993..9d7779ea2d 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -40,7 +40,7 @@ struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; -template inline void serializePtr(std::ostream &os, T *ptr); +template inline void serializePtr(std::ostream &os, const T *ptr); template inline void serializeFlag(std::ostream &os, uint32_t flag); template @@ -224,31 +224,39 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_base_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_offset_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_rect_region_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_base_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_base_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_rect_offset_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_rect_region_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_init_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_loader_config_info_t value); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_code_location_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_adapter_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_adapter_backend_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_platform_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_api_version_t value); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_native_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_platform_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_platform_backend_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_binary_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_binary_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value); inline std::ostream &operator<<(std::ostream &os, @@ -257,10 +265,10 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_partition_t value); inline std::ostream & operator<<(std::ostream &os, - const struct ur_device_partition_property_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_partition_properties_t params); + [[maybe_unused]] const struct ur_device_partition_property_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_device_partition_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_fp_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, @@ -270,7 +278,8 @@ inline std::ostream &operator<<(std::ostream &os, inline std::ostream &operator<<(std::ostream &os, enum ur_device_exec_capability_flag_t value); inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_native_properties_t params); +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_memory_order_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, @@ -278,12 +287,13 @@ inline std::ostream &operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, enum ur_device_usm_access_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_context_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_context_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_context_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_context_info_t value); inline std::ostream & operator<<(std::ostream &os, - const struct ur_context_native_properties_t params); + [[maybe_unused]] const struct ur_context_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_mem_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_mem_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value); @@ -292,34 +302,40 @@ inline std::ostream &operator<<(std::ostream &os, inline std::ostream &operator<<(std::ostream &os, enum ur_image_channel_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_image_info_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_format_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_image_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_properties_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_buffer_channel_properties_t params); + [[maybe_unused]] const struct ur_image_format_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_buffer_alloc_location_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_buffer_region_t params); + [[maybe_unused]] const struct ur_image_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_buffer_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_buffer_channel_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_buffer_alloc_location_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_buffer_region_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_buffer_create_type_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_native_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_mem_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_sampler_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_sampler_addressing_mode_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_sampler_info_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_sampler_desc_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_sampler_native_properties_t params); + [[maybe_unused]] const struct ur_sampler_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_sampler_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_host_mem_flag_t value); inline std::ostream &operator<<(std::ostream &os, @@ -331,16 +347,21 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_usm_alloc_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_advice_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_host_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_device_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_desc_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_pool_limits_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_host_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_device_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_limits_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_pool_info_t value); inline std::ostream &operator<<(std::ostream &os, @@ -352,13 +373,16 @@ inline std::ostream &operator<<(std::ostream &os, inline std::ostream &operator<<(std::ostream &os, enum ur_physical_mem_flag_t value); inline std::ostream & -operator<<(std::ostream &os, const struct ur_physical_mem_properties_t params); +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_physical_mem_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_program_metadata_type_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_metadata_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_metadata_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_program_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_program_build_status_t value); @@ -366,18 +390,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_program_binary_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_program_build_info_t value); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_specialization_constant_info_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_specialization_constant_info_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_native_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_value_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_arg_local_properties_t params); + [[maybe_unused]] const struct ur_program_native_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_arg_value_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_arg_local_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_group_info_t value); @@ -387,37 +411,43 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_cache_config_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_kernel_exec_info_t value); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_arg_pointer_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_exec_info_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_arg_sampler_properties_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_arg_mem_obj_properties_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_arg_pointer_properties_t params); + [[maybe_unused]] const struct ur_kernel_native_properties_t params); +inline std::ostream &operator<<(std::ostream &os, enum ur_queue_info_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value); inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_exec_info_properties_t params); + [[maybe_unused]] const struct ur_queue_properties_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_arg_sampler_properties_t params); + [[maybe_unused]] const struct ur_queue_index_properties_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_arg_mem_obj_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_native_properties_t params); -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_info_t value); -inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_properties_t params); -inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_index_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_native_desc_t params); + [[maybe_unused]] const struct ur_queue_native_desc_t params); inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_native_properties_t params); +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_event_status_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_event_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_profiling_info_t value); inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_native_properties_t params); +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_event_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); @@ -425,20 +455,30 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_file_descriptor_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_win32_handle_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_exp_sampler_mip_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_exp_interop_mem_desc_t params); + [[maybe_unused]] const struct ur_exp_file_descriptor_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_exp_win32_handle_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_exp_sampler_mip_properties_t params); +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); inline std::ostream & operator<<(std::ostream &os, - const struct ur_exp_interop_semaphore_desc_t params); + [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_exp_layered_image_properties_t params); inline std::ostream & -operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params); +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); @@ -825,14 +865,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { os << "UR_FUNCTION_QUEUE_FLUSH"; break; - case UR_FUNCTION_INIT: - os << "UR_FUNCTION_INIT"; - break; - - case UR_FUNCTION_TEAR_DOWN: - os << "UR_FUNCTION_TEAR_DOWN"; - break; - case UR_FUNCTION_SAMPLER_CREATE: os << "UR_FUNCTION_SAMPLER_CREATE"; break; @@ -933,18 +965,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { os << "UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP"; - break; - case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: os << "UR_FUNCTION_USM_PITCHED_ALLOC_EXP"; break; @@ -1086,22 +1106,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { os << "UR_FUNCTION_USM_P2P_PEER_ACCESS_GET_INFO_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP"; - break; - - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP"; - break; - case UR_FUNCTION_LOADER_CONFIG_CREATE: os << "UR_FUNCTION_LOADER_CONFIG_CREATE"; break; @@ -1141,6 +1145,70 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + + case UR_FUNCTION_LOADER_INIT: + os << "UR_FUNCTION_LOADER_INIT"; + break; + + case UR_FUNCTION_LOADER_TEAR_DOWN: + os << "UR_FUNCTION_LOADER_TEAR_DOWN"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP"; + break; + + case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP"; + break; + + case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: + os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP"; + break; + + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP"; + break; + + case UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK: + os << "UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK"; + break; default: os << "unknown enumerator"; break; @@ -1310,6 +1378,14 @@ inline std::ostream &operator<<(std::ostream &os, case UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE: os << "UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE"; break; + + case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES"; + break; + + case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: + os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES"; + break; default: os << "unknown enumerator"; break; @@ -1323,7 +1399,8 @@ inline void serializeStruct(std::ostream &os, const void *ptr) { return; } - enum ur_structure_type_t *value = (enum ur_structure_type_t *)ptr; + const enum ur_structure_type_t *value = + (const enum ur_structure_type_t *)ptr; switch (*value) { case UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES: { @@ -1557,6 +1634,18 @@ inline void serializeStruct(std::ostream &os, const void *ptr) { (const ur_exp_win32_handle_t *)ptr; ur_params::serializePtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES: { + const ur_exp_layered_image_properties_t *pstruct = + (const ur_exp_layered_image_properties_t *)ptr; + ur_params::serializePtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: { + const ur_exp_sampler_addr_modes_t *pstruct = + (const ur_exp_sampler_addr_modes_t *)ptr; + ur_params::serializePtr(os, pstruct); + } break; default: os << "unknown enumerator"; break; @@ -2076,7 +2165,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2088,6 +2177,32 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } } } // namespace ur_params +inline std::ostream &operator<<(std::ostream &os, + const struct ur_code_location_t params) { + os << "(struct ur_code_location_t){"; + + os << ".functionName = "; + + ur_params::serializePtr(os, (params.functionName)); + + os << ", "; + os << ".sourceFile = "; + + ur_params::serializePtr(os, (params.sourceFile)); + + os << ", "; + os << ".lineNumber = "; + + os << (params.lineNumber); + + os << ", "; + os << ".columnNumber = "; + + os << (params.columnNumber); + + os << "}"; + return os; +} inline std::ostream &operator<<(std::ostream &os, enum ur_adapter_info_t value) { switch (value) { @@ -2123,7 +2238,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_adapter_backend_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2137,7 +2252,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2263,7 +2378,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_platform_backend_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2945,7 +3060,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_type_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2959,7 +3074,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2973,7 +3088,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -2987,7 +3102,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3001,7 +3116,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3030,7 +3145,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3046,7 +3161,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3062,7 +3177,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3078,7 +3193,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3092,7 +3207,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3106,7 +3221,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3120,7 +3235,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3134,7 +3249,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3148,7 +3263,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3162,7 +3277,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3176,7 +3291,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3190,7 +3305,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3204,7 +3319,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3218,7 +3333,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3232,7 +3347,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3246,7 +3361,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3260,7 +3375,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3274,7 +3389,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3288,7 +3403,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3302,7 +3417,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3316,7 +3431,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3330,7 +3445,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3344,7 +3459,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3358,7 +3473,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3372,7 +3487,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3386,7 +3501,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3400,7 +3515,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3414,7 +3529,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3428,7 +3543,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3442,7 +3557,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3456,7 +3571,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3470,7 +3585,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3484,7 +3599,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3498,7 +3613,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3512,7 +3627,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3526,7 +3641,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3540,7 +3655,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3555,7 +3670,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_mem_cache_type_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3569,7 +3684,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3583,7 +3698,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3597,7 +3712,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3611,7 +3726,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3625,7 +3740,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3639,7 +3754,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3654,7 +3769,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_local_mem_type_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3668,7 +3783,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3682,7 +3797,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3696,7 +3811,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3710,7 +3825,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3724,7 +3839,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3738,7 +3853,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3752,7 +3867,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3766,7 +3881,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3782,7 +3897,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3796,7 +3911,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3810,7 +3925,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -3830,7 +3945,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_platform_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -3844,7 +3959,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3906,7 +4021,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3920,7 +4035,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3934,7 +4049,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -3963,7 +4078,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -3979,7 +4094,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4009,7 +4124,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4023,7 +4138,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4053,7 +4168,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << sizeof(ur_device_usm_access_capability_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4069,7 +4184,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << sizeof(ur_device_usm_access_capability_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4085,7 +4200,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << sizeof(ur_device_usm_access_capability_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4101,7 +4216,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << sizeof(ur_device_usm_access_capability_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4117,7 +4232,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << sizeof(ur_device_usm_access_capability_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4144,7 +4259,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4158,7 +4273,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4172,7 +4287,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4186,7 +4301,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4200,7 +4315,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4214,7 +4329,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4228,7 +4343,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4242,7 +4357,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4256,7 +4371,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4270,7 +4385,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4286,7 +4401,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4302,7 +4417,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4318,7 +4433,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4334,7 +4449,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -4348,7 +4463,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4362,7 +4477,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4376,7 +4491,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4390,7 +4505,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4419,7 +4534,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4433,7 +4548,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4447,7 +4562,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4461,7 +4576,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4475,7 +4590,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4489,7 +4604,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4503,7 +4618,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4517,7 +4632,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4531,7 +4646,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4545,7 +4660,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4559,7 +4674,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4573,7 +4688,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4587,7 +4702,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4601,7 +4716,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4615,7 +4730,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4629,7 +4744,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4643,7 +4758,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4657,7 +4772,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4671,7 +4786,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4685,7 +4800,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4699,7 +4814,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4713,7 +4828,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -4727,7 +4842,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5648,7 +5763,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5677,7 +5792,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5691,7 +5806,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5705,7 +5820,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5721,7 +5836,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -5737,7 +5852,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -5753,7 +5868,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -5769,7 +5884,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -5985,7 +6100,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -5999,7 +6114,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -6203,7 +6318,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_image_format_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6217,7 +6332,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6231,7 +6346,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6245,7 +6360,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6259,7 +6374,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6273,7 +6388,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6287,7 +6402,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6594,7 +6709,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6608,7 +6723,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -6622,7 +6737,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6638,7 +6753,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6653,7 +6768,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_sampler_filter_mode_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6944,7 +7059,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_usm_type_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6952,13 +7067,13 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } break; case UR_USM_ALLOC_INFO_BASE_PTR: { - const void **tptr = (const void **)ptr; + const void *const *tptr = (const void *const *)ptr; if (sizeof(void *) > size) { os << "invalid size (is: " << size << ", expected: >=" << sizeof(void *) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6972,7 +7087,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -6986,7 +7101,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -7000,7 +7115,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_usm_pool_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -7414,7 +7529,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7428,7 +7543,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -7476,7 +7591,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7490,7 +7605,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7607,7 +7722,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -7869,7 +7984,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7883,7 +7998,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -7897,7 +8012,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -7953,7 +8068,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8065,7 +8180,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_program_build_status_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8092,7 +8207,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_program_binary_type_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8242,7 +8357,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8256,7 +8371,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8270,7 +8385,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -8284,7 +8399,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_program_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -8304,7 +8419,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8382,7 +8497,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8411,7 +8526,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8425,7 +8540,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8439,7 +8554,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(size_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8494,7 +8609,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8508,7 +8623,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8522,7 +8637,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8536,7 +8651,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8608,7 +8723,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8617,7 +8732,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, case UR_KERNEL_EXEC_INFO_USM_PTRS: { - const void **tptr = (const void **)ptr; + const void *const *tptr = (const void *const *)ptr; os << "{"; size_t nelems = size / sizeof(void *); for (size_t i = 0; i < nelems; ++i) { @@ -8638,7 +8753,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_kernel_cache_config_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8799,7 +8914,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -8813,7 +8928,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_device_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -8827,7 +8942,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -8841,7 +8956,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_flags_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializeFlag(os, *tptr); @@ -8855,7 +8970,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8869,7 +8984,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -8883,7 +8998,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_bool_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9359,7 +9474,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_queue_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -9373,7 +9488,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_context_handle_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; ur_params::serializePtr(os, *tptr); @@ -9387,7 +9502,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_command_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9401,7 +9516,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(ur_event_status_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9415,7 +9530,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9474,7 +9589,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9488,7 +9603,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9502,7 +9617,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9516,7 +9631,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9530,7 +9645,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint64_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9567,20 +9682,20 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value) { switch (value) { - case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE"; + case UR_EXECUTION_INFO_COMPLETE: + os << "UR_EXECUTION_INFO_COMPLETE"; break; - case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING"; + case UR_EXECUTION_INFO_RUNNING: + os << "UR_EXECUTION_INFO_RUNNING"; break; - case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED"; + case UR_EXECUTION_INFO_SUBMITTED: + os << "UR_EXECUTION_INFO_SUBMITTED"; break; - case UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: - os << "UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED"; + case UR_EXECUTION_INFO_QUEUED: + os << "UR_EXECUTION_INFO_QUEUED"; break; default: os << "unknown enumerator"; @@ -9849,6 +9964,33 @@ operator<<(std::ostream &os, os << "}"; return os; } +inline std::ostream & +operator<<(std::ostream &os, const struct ur_exp_sampler_addr_modes_t params) { + os << "(struct ur_exp_sampler_addr_modes_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur_params::serializeStruct(os, (params.pNext)); + + os << ", "; + os << ".addrModes = {"; + for (auto i = 0; i < 3; i++) { + if (i != 0) { + os << ", "; + } + + os << (params.addrModes[i]); + } + os << "}"; + + os << "}"; + return os; +} inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_mem_desc_t params) { os << "(struct ur_exp_interop_mem_desc_t){"; @@ -9883,6 +10025,28 @@ operator<<(std::ostream &os, return os; } inline std::ostream & +operator<<(std::ostream &os, + const struct ur_exp_layered_image_properties_t params) { + os << "(struct ur_exp_layered_image_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur_params::serializeStruct(os, (params.pNext)); + + os << ", "; + os << ".numLayers = "; + + os << (params.numLayers); + + os << "}"; + return os; +} +inline std::ostream & operator<<(std::ostream &os, const struct ur_exp_command_buffer_desc_t params) { os << "(struct ur_exp_command_buffer_desc_t){"; @@ -9933,7 +10097,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9947,7 +10111,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, << ", expected: >=" << sizeof(uint32_t) << ")"; return; } - os << (void *)(tptr) << " ("; + os << (const void *)(tptr) << " ("; os << *tptr; @@ -9960,34 +10124,9 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } } // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - const struct ur_init_params_t *params) { - - os << ".device_flags = "; - - ur_params::serializeFlag(os, - *(params->pdevice_flags)); - - os << ", "; - os << ".hLoaderConfig = "; - - ur_params::serializePtr(os, *(params->phLoaderConfig)); - - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ur_tear_down_params_t *params) { - - os << ".pParams = "; - - ur_params::serializePtr(os, *(params->ppParams)); - - return os; -} - -inline std::ostream &operator<<(std::ostream &os, - const struct ur_adapter_get_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_adapter_get_params_t *params) { os << ".NumEntries = "; @@ -10014,7 +10153,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_adapter_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_adapter_release_params_t *params) { os << ".hAdapter = "; @@ -10024,7 +10164,8 @@ operator<<(std::ostream &os, const struct ur_adapter_release_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_adapter_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_adapter_retain_params_t *params) { os << ".hAdapter = "; @@ -10033,9 +10174,9 @@ operator<<(std::ostream &os, const struct ur_adapter_retain_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_adapter_get_last_error_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_adapter_get_last_error_params_t *params) { os << ".hAdapter = "; @@ -10056,7 +10197,7 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_adapter_get_info_params_t *params) { + [[maybe_unused]] const struct ur_adapter_get_info_params_t *params) { os << ".hAdapter = "; @@ -10086,9 +10227,8 @@ operator<<(std::ostream &os, } inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t - *params) { + std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { os << ".hContext = "; @@ -10108,9 +10248,8 @@ inline std::ostream &operator<<( } inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t - *params) { + std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { os << ".hContext = "; @@ -10131,7 +10270,8 @@ inline std::ostream &operator<<( inline std::ostream &operator<<( std::ostream &os, - const struct ur_bindless_images_image_allocate_exp_params_t *params) { + [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t + *params) { os << ".hContext = "; @@ -10160,9 +10300,10 @@ inline std::ostream &operator<<( return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_image_free_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t + *params) { os << ".hContext = "; @@ -10182,9 +10323,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_unsampled_image_create_exp_params_t - *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_unsampled_image_create_exp_params_t *params) { os << ".hContext = "; @@ -10223,9 +10363,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_sampled_image_create_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_sampled_image_create_exp_params_t *params) { os << ".hContext = "; @@ -10269,9 +10409,10 @@ inline std::ostream &operator<<( return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_image_copy_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t + *params) { os << ".hQueue = "; @@ -10351,7 +10492,8 @@ operator<<(std::ostream &os, inline std::ostream &operator<<( std::ostream &os, - const struct ur_bindless_images_image_get_info_exp_params_t *params) { + [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t + *params) { os << ".hImageMem = "; @@ -10375,9 +10517,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_mipmap_get_level_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_mipmap_get_level_exp_params_t *params) { os << ".hContext = "; @@ -10406,9 +10548,10 @@ inline std::ostream &operator<<( return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_mipmap_free_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t + *params) { os << ".hContext = "; @@ -10427,9 +10570,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_import_opaque_fd_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_import_opaque_fd_exp_params_t *params) { os << ".hContext = "; @@ -10458,9 +10601,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_map_external_array_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_map_external_array_exp_params_t *params) { os << ".hContext = "; @@ -10494,9 +10637,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_release_interop_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_release_interop_exp_params_t *params) { os << ".hContext = "; @@ -10516,7 +10659,7 @@ inline std::ostream &operator<<( } inline std::ostream & -operator<<(std::ostream &os, const struct +operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_semaphore_opaque_fd_exp_params_t *params) { @@ -10542,10 +10685,9 @@ operator<<(std::ostream &os, const struct return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_destroy_external_semaphore_exp_params_t - *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_destroy_external_semaphore_exp_params_t *params) { os << ".hContext = "; @@ -10565,9 +10707,8 @@ inline std::ostream &operator<<( } inline std::ostream & -operator<<(std::ostream &os, - const struct ur_bindless_images_wait_external_semaphore_exp_params_t - *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_wait_external_semaphore_exp_params_t *params) { os << ".hQueue = "; @@ -10604,10 +10745,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_bindless_images_signal_external_semaphore_exp_params_t - *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_bindless_images_signal_external_semaphore_exp_params_t *params) { os << ".hQueue = "; @@ -10646,7 +10786,8 @@ inline std::ostream &operator<<( inline std::ostream & operator<<(std::ostream &os, - const struct ur_command_buffer_create_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_create_exp_params_t + *params) { os << ".hContext = "; @@ -10672,7 +10813,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_command_buffer_retain_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_retain_exp_params_t + *params) { os << ".hCommandBuffer = "; @@ -10683,7 +10825,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_command_buffer_release_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_release_exp_params_t + *params) { os << ".hCommandBuffer = "; @@ -10694,7 +10837,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_command_buffer_finalize_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_finalize_exp_params_t + *params) { os << ".hCommandBuffer = "; @@ -10703,9 +10847,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_kernel_launch_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_kernel_launch_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -10754,9 +10898,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_memcpy_usm_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_usm_memcpy_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -10797,7 +10941,54 @@ inline std::ostream &operator<<( inline std::ostream &operator<<( std::ostream &os, - const struct ur_command_buffer_append_membuffer_copy_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_append_usm_fill_exp_params_t + *params) { + + os << ".hCommandBuffer = "; + + ur_params::serializePtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".pMemory = "; + + ur_params::serializePtr(os, *(params->ppMemory)); + + os << ", "; + os << ".pPattern = "; + + ur_params::serializePtr(os, *(params->ppPattern)); + + os << ", "; + os << ".patternSize = "; + + os << *(params->ppatternSize); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".numSyncPointsInWaitList = "; + + os << *(params->pnumSyncPointsInWaitList); + + os << ", "; + os << ".pSyncPointWaitList = "; + + ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + + os << ", "; + os << ".pSyncPoint = "; + + ur_params::serializePtr(os, *(params->ppSyncPoint)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_copy_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -10847,9 +11038,8 @@ inline std::ostream &operator<<( } inline std::ostream & -operator<<(std::ostream &os, - const struct ur_command_buffer_append_membuffer_write_exp_params_t - *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_write_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -10893,9 +11083,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_read_exp_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_read_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -10939,10 +11129,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_copy_rect_exp_params_t - *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -11012,9 +11201,8 @@ inline std::ostream &operator<<( } inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_write_rect_exp_params_t - *params) { + std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_write_rect_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -11083,10 +11271,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_command_buffer_append_membuffer_read_rect_exp_params_t - *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_read_rect_exp_params_t *params) { os << ".hCommandBuffer = "; @@ -11155,9 +11342,143 @@ inline std::ostream &operator<<( return os; } +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_mem_buffer_fill_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur_params::serializePtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".hBuffer = "; + + ur_params::serializePtr(os, *(params->phBuffer)); + + os << ", "; + os << ".pPattern = "; + + ur_params::serializePtr(os, *(params->ppPattern)); + + os << ", "; + os << ".patternSize = "; + + os << *(params->ppatternSize); + + os << ", "; + os << ".offset = "; + + os << *(params->poffset); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".numSyncPointsInWaitList = "; + + os << *(params->pnumSyncPointsInWaitList); + + os << ", "; + os << ".pSyncPointWaitList = "; + + ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + + os << ", "; + os << ".pSyncPoint = "; + + ur_params::serializePtr(os, *(params->ppSyncPoint)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_usm_prefetch_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur_params::serializePtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".pMemory = "; + + ur_params::serializePtr(os, *(params->ppMemory)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".flags = "; + + ur_params::serializeFlag(os, *(params->pflags)); + + os << ", "; + os << ".numSyncPointsInWaitList = "; + + os << *(params->pnumSyncPointsInWaitList); + + os << ", "; + os << ".pSyncPointWaitList = "; + + ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + + os << ", "; + os << ".pSyncPoint = "; + + ur_params::serializePtr(os, *(params->ppSyncPoint)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_usm_advise_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur_params::serializePtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".pMemory = "; + + ur_params::serializePtr(os, *(params->ppMemory)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".advice = "; + + ur_params::serializeFlag(os, *(params->padvice)); + + os << ", "; + os << ".numSyncPointsInWaitList = "; + + os << *(params->pnumSyncPointsInWaitList); + + os << ", "; + os << ".pSyncPointWaitList = "; + + ur_params::serializePtr(os, *(params->ppSyncPointWaitList)); + + os << ", "; + os << ".pSyncPoint = "; + + ur_params::serializePtr(os, *(params->ppSyncPoint)); + + return os; +} + inline std::ostream & operator<<(std::ostream &os, - const struct ur_command_buffer_enqueue_exp_params_t *params) { + [[maybe_unused]] const struct ur_command_buffer_enqueue_exp_params_t + *params) { os << ".hCommandBuffer = "; @@ -11195,7 +11516,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_create_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_context_create_params_t *params) { os << ".DeviceCount = "; @@ -11227,7 +11549,8 @@ operator<<(std::ostream &os, const struct ur_context_create_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_context_retain_params_t *params) { os << ".hContext = "; @@ -11237,7 +11560,8 @@ operator<<(std::ostream &os, const struct ur_context_retain_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_context_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_context_release_params_t *params) { os << ".hContext = "; @@ -11248,7 +11572,7 @@ operator<<(std::ostream &os, const struct ur_context_release_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_context_get_info_params_t *params) { + [[maybe_unused]] const struct ur_context_get_info_params_t *params) { os << ".hContext = "; @@ -11279,7 +11603,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_context_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_context_get_native_handle_params_t + *params) { os << ".hContext = "; @@ -11293,9 +11618,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_context_create_with_native_handle_params_t + *params) { os << ".hNativeContext = "; @@ -11331,9 +11657,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_context_set_extended_deleter_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_context_set_extended_deleter_params_t + *params) { os << ".hContext = "; @@ -11342,7 +11669,7 @@ operator<<(std::ostream &os, os << ", "; os << ".pfnDeleter = "; - os << *(params->ppfnDeleter); + os << reinterpret_cast(*(params->ppfnDeleter)); os << ", "; os << ".pUserData = "; @@ -11352,9 +11679,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_kernel_launch_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_kernel_launch_params_t *params) { os << ".hQueue = "; @@ -11411,9 +11738,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_events_wait_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_events_wait_params_t *params) { os << ".hQueue = "; @@ -11445,9 +11772,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_events_wait_with_barrier_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_events_wait_with_barrier_params_t + *params) { os << ".hQueue = "; @@ -11479,9 +11807,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_read_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_read_params_t *params) { os << ".hQueue = "; @@ -11540,7 +11868,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_write_params_t *params) { + [[maybe_unused]] const struct ur_enqueue_mem_buffer_write_params_t + *params) { os << ".hQueue = "; @@ -11597,9 +11926,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_read_rect_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_read_rect_params_t + *params) { os << ".hQueue = "; @@ -11681,9 +12011,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_write_rect_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_write_rect_params_t + *params) { os << ".hQueue = "; @@ -11765,9 +12096,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_copy_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_copy_params_t *params) { os << ".hQueue = "; @@ -11824,9 +12155,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_copy_rect_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_copy_rect_params_t + *params) { os << ".hQueue = "; @@ -11903,9 +12235,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_fill_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_fill_params_t *params) { os << ".hQueue = "; @@ -11962,9 +12294,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_read_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_image_read_params_t *params) { os << ".hQueue = "; @@ -12031,9 +12363,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_write_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_image_write_params_t *params) { os << ".hQueue = "; @@ -12100,9 +12432,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_image_copy_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_image_copy_params_t *params) { os << ".hQueue = "; @@ -12159,9 +12491,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_buffer_map_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_buffer_map_params_t *params) { os << ".hQueue = "; @@ -12223,9 +12555,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_mem_unmap_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_mem_unmap_params_t *params) { os << ".hQueue = "; @@ -12269,7 +12601,7 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_enqueue_usm_fill_params_t *params) { + [[maybe_unused]] const struct ur_enqueue_usm_fill_params_t *params) { os << ".hQueue = "; @@ -12321,9 +12653,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_memcpy_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_memcpy_params_t *params) { os << ".hQueue = "; @@ -12375,9 +12707,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_prefetch_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_prefetch_params_t *params) { os << ".hQueue = "; @@ -12424,9 +12756,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_advise_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_advise_params_t *params) { os << ".hQueue = "; @@ -12455,9 +12787,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_fill_2d_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_fill_2d_params_t *params) { os << ".hQueue = "; @@ -12519,9 +12851,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_usm_memcpy_2d_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_usm_memcpy_2d_params_t *params) { os << ".hQueue = "; @@ -12588,9 +12920,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_enqueue_device_global_variable_write_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_enqueue_device_global_variable_write_params_t *params) { os << ".hQueue = "; @@ -12652,9 +12984,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_enqueue_device_global_variable_read_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_enqueue_device_global_variable_read_params_t *params) { os << ".hQueue = "; @@ -12716,9 +13048,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_read_host_pipe_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_read_host_pipe_params_t *params) { os << ".hQueue = "; @@ -12775,9 +13107,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_enqueue_write_host_pipe_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_enqueue_write_host_pipe_params_t *params) { os << ".hQueue = "; @@ -12835,7 +13167,67 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_get_info_params_t *params) { +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_enqueue_cooperative_kernel_launch_exp_params_t *params) { + + os << ".hQueue = "; + + ur_params::serializePtr(os, *(params->phQueue)); + + os << ", "; + os << ".hKernel = "; + + ur_params::serializePtr(os, *(params->phKernel)); + + os << ", "; + os << ".workDim = "; + + os << *(params->pworkDim); + + os << ", "; + os << ".pGlobalWorkOffset = "; + + ur_params::serializePtr(os, *(params->ppGlobalWorkOffset)); + + os << ", "; + os << ".pGlobalWorkSize = "; + + ur_params::serializePtr(os, *(params->ppGlobalWorkSize)); + + os << ", "; + os << ".pLocalWorkSize = "; + + ur_params::serializePtr(os, *(params->ppLocalWorkSize)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && + i < *params->pnumEventsInWaitList; + ++i) { + if (i != 0) { + os << ", "; + } + + ur_params::serializePtr(os, (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".phEvent = "; + + ur_params::serializePtr(os, *(params->pphEvent)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_event_get_info_params_t *params) { os << ".hEvent = "; @@ -12866,7 +13258,8 @@ operator<<(std::ostream &os, const struct ur_event_get_info_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_event_get_profiling_info_params_t *params) { + [[maybe_unused]] const struct ur_event_get_profiling_info_params_t + *params) { os << ".hEvent = "; @@ -12895,8 +13288,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_event_wait_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_event_wait_params_t *params) { os << ".numEvents = "; @@ -12917,8 +13311,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_event_retain_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_event_retain_params_t *params) { os << ".hEvent = "; @@ -12928,7 +13323,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_event_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_event_release_params_t *params) { os << ".hEvent = "; @@ -12937,9 +13333,9 @@ operator<<(std::ostream &os, const struct ur_event_release_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_get_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_event_get_native_handle_params_t *params) { os << ".hEvent = "; @@ -12953,9 +13349,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_event_create_with_native_handle_params_t + *params) { os << ".hNativeEvent = "; @@ -12979,9 +13376,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_event_set_callback_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_event_set_callback_params_t *params) { os << ".hEvent = "; @@ -12995,7 +13392,7 @@ operator<<(std::ostream &os, os << ", "; os << ".pfnNotify = "; - os << *(params->ppfnNotify); + os << reinterpret_cast(*(params->ppfnNotify)); os << ", "; os << ".pUserData = "; @@ -13006,7 +13403,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_create_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_kernel_create_params_t *params) { os << ".hProgram = "; @@ -13026,7 +13424,8 @@ operator<<(std::ostream &os, const struct ur_kernel_create_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_get_info_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_kernel_get_info_params_t *params) { os << ".hKernel = "; @@ -13055,9 +13454,9 @@ operator<<(std::ostream &os, const struct ur_kernel_get_info_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_get_group_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_get_group_info_params_t *params) { os << ".hKernel = "; @@ -13093,7 +13492,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_get_sub_group_info_params_t *params) { + [[maybe_unused]] const struct ur_kernel_get_sub_group_info_params_t + *params) { os << ".hKernel = "; @@ -13128,7 +13528,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_kernel_retain_params_t *params) { os << ".hKernel = "; @@ -13138,7 +13539,8 @@ operator<<(std::ostream &os, const struct ur_kernel_retain_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_kernel_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_kernel_release_params_t *params) { os << ".hKernel = "; @@ -13149,7 +13551,8 @@ operator<<(std::ostream &os, const struct ur_kernel_release_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_kernel_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_kernel_get_native_handle_params_t + *params) { os << ".hKernel = "; @@ -13163,9 +13566,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_create_with_native_handle_params_t + *params) { os << ".hNativeKernel = "; @@ -13194,9 +13598,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_value_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_arg_value_params_t *params) { os << ".hKernel = "; @@ -13225,9 +13629,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_local_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_arg_local_params_t *params) { os << ".hKernel = "; @@ -13251,9 +13655,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_pointer_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_arg_pointer_params_t *params) { os << ".hKernel = "; @@ -13277,9 +13681,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_exec_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_exec_info_params_t *params) { os << ".hKernel = "; @@ -13308,9 +13712,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_sampler_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_arg_sampler_params_t *params) { os << ".hKernel = "; @@ -13334,9 +13738,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_kernel_set_arg_mem_obj_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_kernel_set_arg_mem_obj_params_t *params) { os << ".hKernel = "; @@ -13360,9 +13764,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_kernel_set_specialization_constants_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_kernel_set_specialization_constants_params_t *params) { os << ".hKernel = "; @@ -13376,14 +13780,54 @@ inline std::ostream &operator<<( os << ", "; os << ".pSpecConstants = "; - ur_params::serializePtr(os, *(params->ppSpecConstants)); + ur_params::serializePtr(os, *(params->ppSpecConstants)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_kernel_suggest_max_cooperative_group_count_exp_params_t *params) { + + os << ".hKernel = "; + + ur_params::serializePtr(os, *(params->phKernel)); + + os << ", "; + os << ".pGroupCountRet = "; + + ur_params::serializePtr(os, *(params->ppGroupCountRet)); + + return os; +} + +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_loader_init_params_t *params) { + + os << ".device_flags = "; + + ur_params::serializeFlag(os, + *(params->pdevice_flags)); + + os << ", "; + os << ".hLoaderConfig = "; + + ur_params::serializePtr(os, *(params->phLoaderConfig)); return os; } inline std::ostream & operator<<(std::ostream &os, - const struct ur_loader_config_create_params_t *params) { + [[maybe_unused]] const struct ur_loader_tear_down_params_t *params) { + + return os; +} + +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_loader_config_create_params_t *params) { os << ".phLoaderConfig = "; @@ -13392,9 +13836,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_retain_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_loader_config_retain_params_t *params) { os << ".hLoaderConfig = "; @@ -13403,9 +13847,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_release_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_loader_config_release_params_t *params) { os << ".hLoaderConfig = "; @@ -13414,9 +13858,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_loader_config_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_loader_config_get_info_params_t *params) { os << ".hLoaderConfig = "; @@ -13447,7 +13891,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_loader_config_enable_layer_params_t *params) { + [[maybe_unused]] const struct ur_loader_config_enable_layer_params_t + *params) { os << ".hLoaderConfig = "; @@ -13461,9 +13906,30 @@ operator<<(std::ostream &os, return os; } +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_loader_config_set_code_location_callback_params_t *params) { + + os << ".hLoaderConfig = "; + + ur_params::serializePtr(os, *(params->phLoaderConfig)); + + os << ", "; + os << ".pfnCodeloc = "; + + os << reinterpret_cast(*(params->ppfnCodeloc)); + + os << ", "; + os << ".pUserData = "; + + ur_params::serializePtr(os, *(params->ppUserData)); + + return os; +} + inline std::ostream & operator<<(std::ostream &os, - const struct ur_mem_image_create_params_t *params) { + [[maybe_unused]] const struct ur_mem_image_create_params_t *params) { os << ".hContext = "; @@ -13497,9 +13963,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_buffer_create_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_mem_buffer_create_params_t *params) { os << ".hContext = "; @@ -13528,8 +13994,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_retain_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_mem_retain_params_t *params) { os << ".hMem = "; @@ -13538,8 +14005,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_release_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_mem_release_params_t *params) { os << ".hMem = "; @@ -13548,9 +14016,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_buffer_partition_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_mem_buffer_partition_params_t *params) { os << ".hBuffer = "; @@ -13579,9 +14047,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_get_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_mem_get_native_handle_params_t *params) { os << ".hMem = "; @@ -13595,9 +14063,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_mem_buffer_create_with_native_handle_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_mem_buffer_create_with_native_handle_params_t *params) { os << ".hNativeMem = "; @@ -13621,9 +14089,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_mem_image_create_with_native_handle_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_mem_image_create_with_native_handle_params_t *params) { os << ".hNativeMem = "; @@ -13657,8 +14125,9 @@ inline std::ostream &operator<<( return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_mem_get_info_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_mem_get_info_params_t *params) { os << ".hMemory = "; @@ -13687,9 +14156,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_mem_image_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_mem_image_get_info_params_t *params) { os << ".hMemory = "; @@ -13718,9 +14187,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_create_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_physical_mem_create_params_t *params) { os << ".hContext = "; @@ -13749,9 +14218,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_retain_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_physical_mem_retain_params_t *params) { os << ".hPhysicalMem = "; @@ -13760,9 +14229,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_physical_mem_release_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_physical_mem_release_params_t *params) { os << ".hPhysicalMem = "; @@ -13771,8 +14240,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_platform_get_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_platform_get_params_t *params) { os << ".phAdapters = {"; for (size_t i = 0; @@ -13815,9 +14285,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_platform_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_platform_get_info_params_t *params) { os << ".hPlatform = "; @@ -13848,7 +14318,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_platform_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_platform_get_native_handle_params_t + *params) { os << ".hPlatform = "; @@ -13864,7 +14335,8 @@ operator<<(std::ostream &os, inline std::ostream &operator<<( std::ostream &os, - const struct ur_platform_create_with_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_platform_create_with_native_handle_params_t + *params) { os << ".hNativePlatform = "; @@ -13885,7 +14357,8 @@ inline std::ostream &operator<<( inline std::ostream & operator<<(std::ostream &os, - const struct ur_platform_get_api_version_params_t *params) { + [[maybe_unused]] const struct ur_platform_get_api_version_params_t + *params) { os << ".hPlatform = "; @@ -13901,7 +14374,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_platform_get_backend_option_params_t *params) { + [[maybe_unused]] const struct ur_platform_get_backend_option_params_t + *params) { os << ".hPlatform = "; @@ -13920,9 +14394,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_create_with_il_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_program_create_with_il_params_t *params) { os << ".hContext = "; @@ -13953,7 +14427,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_program_create_with_binary_params_t *params) { + [[maybe_unused]] const struct ur_program_create_with_binary_params_t + *params) { os << ".hContext = "; @@ -13988,7 +14463,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_build_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_build_params_t *params) { os << ".hContext = "; @@ -14008,7 +14484,8 @@ operator<<(std::ostream &os, const struct ur_program_build_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_compile_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_compile_params_t *params) { os << ".hContext = "; @@ -14027,8 +14504,9 @@ operator<<(std::ostream &os, const struct ur_program_compile_params_t *params) { return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_program_link_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_link_params_t *params) { os << ".hContext = "; @@ -14065,7 +14543,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_retain_params_t *params) { os << ".hProgram = "; @@ -14075,7 +14554,8 @@ operator<<(std::ostream &os, const struct ur_program_retain_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_program_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_release_params_t *params) { os << ".hProgram = "; @@ -14084,9 +14564,10 @@ operator<<(std::ostream &os, const struct ur_program_release_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_function_pointer_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_program_get_function_pointer_params_t + *params) { os << ".hDevice = "; @@ -14112,7 +14593,7 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_program_get_info_params_t *params) { + [[maybe_unused]] const struct ur_program_get_info_params_t *params) { os << ".hProgram = "; @@ -14141,9 +14622,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_get_build_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_program_get_build_info_params_t *params) { os << ".hProgram = "; @@ -14177,9 +14658,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<( - std::ostream &os, - const struct ur_program_set_specialization_constants_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_program_set_specialization_constants_params_t *params) { os << ".hProgram = "; @@ -14207,7 +14688,8 @@ inline std::ostream &operator<<( inline std::ostream & operator<<(std::ostream &os, - const struct ur_program_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_program_get_native_handle_params_t + *params) { os << ".hProgram = "; @@ -14221,9 +14703,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_program_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_program_create_with_native_handle_params_t + *params) { os << ".hNativeProgram = "; @@ -14248,7 +14731,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_get_info_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_get_info_params_t *params) { os << ".hQueue = "; @@ -14277,8 +14761,9 @@ operator<<(std::ostream &os, const struct ur_queue_get_info_params_t *params) { return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_create_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_create_params_t *params) { os << ".hContext = "; @@ -14302,8 +14787,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_retain_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_retain_params_t *params) { os << ".hQueue = "; @@ -14313,7 +14799,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_queue_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_release_params_t *params) { os << ".hQueue = "; @@ -14322,9 +14809,9 @@ operator<<(std::ostream &os, const struct ur_queue_release_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_queue_get_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_queue_get_native_handle_params_t *params) { os << ".hQueue = "; @@ -14343,9 +14830,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_queue_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_queue_create_with_native_handle_params_t + *params) { os << ".hNativeQueue = "; @@ -14374,8 +14862,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_finish_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_finish_params_t *params) { os << ".hQueue = "; @@ -14384,8 +14873,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_queue_flush_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_queue_flush_params_t *params) { os << ".hQueue = "; @@ -14395,7 +14885,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_create_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_sampler_create_params_t *params) { os << ".hContext = "; @@ -14415,7 +14906,8 @@ operator<<(std::ostream &os, const struct ur_sampler_create_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_sampler_retain_params_t *params) { os << ".hSampler = "; @@ -14425,7 +14917,8 @@ operator<<(std::ostream &os, const struct ur_sampler_retain_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_sampler_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_sampler_release_params_t *params) { os << ".hSampler = "; @@ -14436,7 +14929,7 @@ operator<<(std::ostream &os, const struct ur_sampler_release_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_sampler_get_info_params_t *params) { + [[maybe_unused]] const struct ur_sampler_get_info_params_t *params) { os << ".hSampler = "; @@ -14467,7 +14960,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_sampler_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_sampler_get_native_handle_params_t + *params) { os << ".hSampler = "; @@ -14481,9 +14975,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_sampler_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_sampler_create_with_native_handle_params_t + *params) { os << ".hNativeSampler = "; @@ -14508,7 +15003,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_host_alloc_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { os << ".hContext = "; @@ -14539,7 +15035,7 @@ operator<<(std::ostream &os, const struct ur_usm_host_alloc_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_usm_device_alloc_params_t *params) { + [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { os << ".hContext = "; @@ -14575,7 +15071,7 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_usm_shared_alloc_params_t *params) { + [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { os << ".hContext = "; @@ -14609,8 +15105,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_usm_free_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_free_params_t *params) { os << ".hContext = "; @@ -14624,9 +15121,9 @@ inline std::ostream &operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_get_mem_alloc_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { os << ".hContext = "; @@ -14661,7 +15158,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_pool_create_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { os << ".hContext = "; @@ -14681,7 +15179,8 @@ operator<<(std::ostream &os, const struct ur_usm_pool_create_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_pool_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { os << ".pPool = "; @@ -14692,7 +15191,7 @@ operator<<(std::ostream &os, const struct ur_usm_pool_retain_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_usm_pool_release_params_t *params) { + [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { os << ".pPool = "; @@ -14701,9 +15200,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_pool_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { os << ".hPool = "; @@ -14732,9 +15231,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_pitched_alloc_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { os << ".hContext = "; @@ -14784,7 +15283,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_import_exp_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { os << ".hContext = "; @@ -14804,7 +15304,8 @@ operator<<(std::ostream &os, const struct ur_usm_import_exp_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_usm_release_exp_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { os << ".hContext = "; @@ -14818,9 +15319,10 @@ operator<<(std::ostream &os, const struct ur_usm_release_exp_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_enable_peer_access_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_p2p_enable_peer_access_exp_params_t + *params) { os << ".commandDevice = "; @@ -14834,9 +15336,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_disable_peer_access_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_p2p_disable_peer_access_exp_params_t + *params) { os << ".commandDevice = "; @@ -14850,9 +15353,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_usm_p2p_peer_access_get_info_exp_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_usm_p2p_peer_access_get_info_exp_params_t + *params) { os << ".commandDevice = "; @@ -14886,9 +15390,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_granularity_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_granularity_get_info_params_t + *params) { os << ".hContext = "; @@ -14922,9 +15427,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_reserve_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_reserve_params_t *params) { os << ".hContext = "; @@ -14950,7 +15455,7 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_virtual_mem_free_params_t *params) { + [[maybe_unused]] const struct ur_virtual_mem_free_params_t *params) { os << ".hContext = "; @@ -14970,7 +15475,8 @@ operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_virtual_mem_map_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_map_params_t *params) { os << ".hContext = "; @@ -15005,9 +15511,9 @@ operator<<(std::ostream &os, const struct ur_virtual_mem_map_params_t *params) { return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_unmap_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_unmap_params_t *params) { os << ".hContext = "; @@ -15026,9 +15532,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_set_access_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_set_access_params_t *params) { os << ".hContext = "; @@ -15053,9 +15559,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_virtual_mem_get_info_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_virtual_mem_get_info_params_t *params) { os << ".hContext = "; @@ -15094,8 +15600,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_get_params_t *params) { +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_get_params_t *params) { os << ".hPlatform = "; @@ -15132,7 +15639,8 @@ inline std::ostream &operator<<(std::ostream &os, } inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_get_info_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_get_info_params_t *params) { os << ".hDevice = "; @@ -15162,7 +15670,8 @@ operator<<(std::ostream &os, const struct ur_device_get_info_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_retain_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_retain_params_t *params) { os << ".hDevice = "; @@ -15172,7 +15681,8 @@ operator<<(std::ostream &os, const struct ur_device_retain_params_t *params) { } inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_release_params_t *params) { +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_device_release_params_t *params) { os << ".hDevice = "; @@ -15183,7 +15693,7 @@ operator<<(std::ostream &os, const struct ur_device_release_params_t *params) { inline std::ostream & operator<<(std::ostream &os, - const struct ur_device_partition_params_t *params) { + [[maybe_unused]] const struct ur_device_partition_params_t *params) { os << ".hDevice = "; @@ -15219,9 +15729,9 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_select_binary_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_device_select_binary_params_t *params) { os << ".hDevice = "; @@ -15247,7 +15757,8 @@ operator<<(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, - const struct ur_device_get_native_handle_params_t *params) { + [[maybe_unused]] const struct ur_device_get_native_handle_params_t + *params) { os << ".hDevice = "; @@ -15261,9 +15772,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_create_with_native_handle_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_device_create_with_native_handle_params_t + *params) { os << ".hNativeDevice = "; @@ -15287,9 +15799,10 @@ operator<<(std::ostream &os, return os; } -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_device_get_global_timestamps_params_t *params) { +inline std::ostream &operator<<( + std::ostream &os, + [[maybe_unused]] const struct ur_device_get_global_timestamps_params_t + *params) { os << ".hDevice = "; @@ -15310,21 +15823,21 @@ operator<<(std::ostream &os, namespace ur_params { -template inline void serializePtr(std::ostream &os, T *ptr) { +template inline void serializePtr(std::ostream &os, const T *ptr) { if (ptr == nullptr) { os << "nullptr"; } else if constexpr (std::is_pointer_v) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; serializePtr(os, *ptr); os << ")"; } else if constexpr (std::is_void_v || is_handle_v) { - os << (void *)ptr; + os << (const void *)ptr; } else if constexpr (std::is_same_v, char>) { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << ptr; os << ")"; } else { - os << (void *)(ptr) << " ("; + os << (const void *)(ptr) << " ("; os << *ptr; os << ")"; } @@ -15333,12 +15846,6 @@ template inline void serializePtr(std::ostream &os, T *ptr) { inline int serializeFunctionParams(std::ostream &os, uint32_t function, const void *params) { switch ((enum ur_function_t)function) { - case UR_FUNCTION_INIT: { - os << (const struct ur_init_params_t *)params; - } break; - case UR_FUNCTION_TEAR_DOWN: { - os << (const struct ur_tear_down_params_t *)params; - } break; case UR_FUNCTION_ADAPTER_GET: { os << (const struct ur_adapter_get_params_t *)params; } break; @@ -15441,35 +15948,51 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_command_buffer_append_kernel_launch_exp_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP: { - os << (const struct ur_command_buffer_append_memcpy_usm_exp_params_t *) + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: { + os << (const struct ur_command_buffer_append_usm_memcpy_exp_params_t *) params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP: { - os << (const struct ur_command_buffer_append_membuffer_copy_exp_params_t - *)params; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: { + os << (const struct ur_command_buffer_append_usm_fill_exp_params_t *) + params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP: { + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: { os << (const struct - ur_command_buffer_append_membuffer_write_exp_params_t *)params; + ur_command_buffer_append_mem_buffer_copy_exp_params_t *)params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP: { - os << (const struct ur_command_buffer_append_membuffer_read_exp_params_t - *)params; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: { + os << (const struct + ur_command_buffer_append_mem_buffer_write_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: { + os << (const struct + ur_command_buffer_append_mem_buffer_read_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: { + os << (const struct + ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t *) + params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP: { + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: { os << (const struct - ur_command_buffer_append_membuffer_copy_rect_exp_params_t *) + ur_command_buffer_append_mem_buffer_write_rect_exp_params_t *) params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP: { + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: { os << (const struct - ur_command_buffer_append_membuffer_write_rect_exp_params_t *) + ur_command_buffer_append_mem_buffer_read_rect_exp_params_t *) params; } break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP: { + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: { os << (const struct - ur_command_buffer_append_membuffer_read_rect_exp_params_t *) + ur_command_buffer_append_mem_buffer_fill_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: { + os << (const struct ur_command_buffer_append_usm_prefetch_exp_params_t + *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: { + os << (const struct ur_command_buffer_append_usm_advise_exp_params_t *) params; } break; case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { @@ -15575,6 +16098,10 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: { os << (const struct ur_enqueue_write_host_pipe_params_t *)params; } break; + case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *) + params; + } break; case UR_FUNCTION_EVENT_GET_INFO: { os << (const struct ur_event_get_info_params_t *)params; } break; @@ -15647,6 +16174,17 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, os << (const struct ur_kernel_set_specialization_constants_params_t *) params; } break; + case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: { + os << (const struct + ur_kernel_suggest_max_cooperative_group_count_exp_params_t *) + params; + } break; + case UR_FUNCTION_LOADER_INIT: { + os << (const struct ur_loader_init_params_t *)params; + } break; + case UR_FUNCTION_LOADER_TEAR_DOWN: { + os << (const struct ur_loader_tear_down_params_t *)params; + } break; case UR_FUNCTION_LOADER_CONFIG_CREATE: { os << (const struct ur_loader_config_create_params_t *)params; } break; @@ -15662,6 +16200,10 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_LOADER_CONFIG_ENABLE_LAYER: { os << (const struct ur_loader_config_enable_layer_params_t *)params; } break; + case UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK: { + os << (const struct ur_loader_config_set_code_location_callback_params_t + *)params; + } break; case UR_FUNCTION_MEM_IMAGE_CREATE: { os << (const struct ur_mem_image_create_params_t *)params; } break; diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index c4da5d149f..2215bd0575 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -11,11 +11,17 @@ #ifndef USM_POOL_MANAGER_HPP #define USM_POOL_MANAGER_HPP 1 +#include "logger/ur_logger.hpp" +#include "umf_helpers.hpp" +#include "umf_pools/disjoint_pool.hpp" #include "ur_api.h" -#include "ur_pool_manager.hpp" #include "ur_util.hpp" +#include +#include + #include +#include #include namespace usm { @@ -29,8 +35,9 @@ struct pool_descriptor { ur_usm_type_t type; bool deviceReadOnly; - static bool equal(const pool_descriptor &lhs, const pool_descriptor &rhs); - static std::size_t hash(const pool_descriptor &desc); + bool operator==(const pool_descriptor &other) const; + friend std::ostream &operator<<(std::ostream &os, + const pool_descriptor &desc); static std::pair> create(ur_usm_pool_handle_t poolHandle, ur_context_handle_t hContext); }; @@ -45,8 +52,8 @@ urGetSubDevices(ur_device_handle_t hDevice) { } ur_device_partition_property_t prop; - prop.type = UR_DEVICE_PARTITION_EQUALLY; - prop.value.equally = nComputeUnits; + prop.type = UR_DEVICE_PARTITION_BY_CSLICE; + prop.value.affinity_domain = 0; ur_device_partition_properties_t properties{ UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, @@ -75,10 +82,10 @@ urGetSubDevices(ur_device_handle_t hDevice) { inline std::pair> urGetAllDevicesAndSubDevices(ur_context_handle_t hContext) { - size_t deviceCount; + size_t deviceCount = 0; auto ret = urContextGetInfo(hContext, UR_CONTEXT_INFO_NUM_DEVICES, sizeof(deviceCount), &deviceCount, nullptr); - if (ret != UR_RESULT_SUCCESS) { + if (ret != UR_RESULT_SUCCESS || deviceCount == 0) { return {ret, {}}; } @@ -110,6 +117,11 @@ urGetAllDevicesAndSubDevices(ur_context_handle_t hContext) { for (size_t i = 0; i < deviceCount; i++) { ret = addPoolsForDevicesRec(devices[i]); if (ret != UR_RESULT_SUCCESS) { + if (ret == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + // Return main devices when sub-devices are unsupported. + return {ret, std::move(devices)}; + } + return {ret, {}}; } } @@ -122,22 +134,28 @@ isSharedAllocationReadOnlyOnDevice(const pool_descriptor &desc) { return desc.type == UR_USM_TYPE_SHARED && desc.deviceReadOnly; } -inline bool pool_descriptor::equal(const pool_descriptor &lhs, - const pool_descriptor &rhs) { - ur_native_handle_t lhsNative, rhsNative; +inline bool pool_descriptor::operator==(const pool_descriptor &other) const { + const pool_descriptor &lhs = *this; + const pool_descriptor &rhs = other; + ur_native_handle_t lhsNative = nullptr, rhsNative = nullptr; // We want to share a memory pool for sub-devices and sub-sub devices. // Sub-devices and sub-sub-devices might be represented by different ur_device_handle_t but // they share the same native_handle_t (which is used by UMF provider). // Ref: https://github.com/intel/llvm/commit/86511c5dc84b5781dcfd828caadcb5cac157eae1 // TODO: is this L0 specific? - auto ret = urDeviceGetNativeHandle(lhs.hDevice, &lhsNative); - if (ret != UR_RESULT_SUCCESS) { - throw ret; + if (lhs.hDevice) { + auto ret = urDeviceGetNativeHandle(lhs.hDevice, &lhsNative); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } } - ret = urDeviceGetNativeHandle(rhs.hDevice, &rhsNative); - if (ret != UR_RESULT_SUCCESS) { - throw ret; + + if (rhs.hDevice) { + auto ret = urDeviceGetNativeHandle(rhs.hDevice, &rhsNative); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } } return lhsNative == rhsNative && lhs.type == rhs.type && @@ -146,16 +164,12 @@ inline bool pool_descriptor::equal(const pool_descriptor &lhs, lhs.poolHandle == rhs.poolHandle; } -inline std::size_t pool_descriptor::hash(const pool_descriptor &desc) { - ur_native_handle_t native; - auto ret = urDeviceGetNativeHandle(desc.hDevice, &native); - if (ret != UR_RESULT_SUCCESS) { - throw ret; - } - - return combine_hashes(0, desc.type, native, - isSharedAllocationReadOnlyOnDevice(desc), - desc.poolHandle); +inline std::ostream &operator<<(std::ostream &os, const pool_descriptor &desc) { + os << "pool handle: " << desc.poolHandle + << " context handle: " << desc.hContext + << " device handle: " << desc.hDevice << " memory type: " << desc.type + << " is read only: " << desc.deviceReadOnly; + return os; } inline std::pair> @@ -177,6 +191,7 @@ pool_descriptor::create(ur_usm_pool_handle_t poolHandle, pool_descriptor &desc = descriptors.emplace_back(); desc.poolHandle = poolHandle; desc.hContext = hContext; + desc.hDevice = device; desc.type = UR_USM_TYPE_DEVICE; } { @@ -200,6 +215,69 @@ pool_descriptor::create(ur_usm_pool_handle_t poolHandle, return {ret, descriptors}; } +template struct pool_manager { + private: + using desc_to_pool_map_t = std::unordered_map; + + desc_to_pool_map_t descToPoolMap; + + public: + static std::pair + create(desc_to_pool_map_t descToHandleMap = {}) { + auto manager = pool_manager(); + + for (auto &[desc, hPool] : descToHandleMap) { + auto ret = manager.addPool(desc, hPool); + if (ret != UR_RESULT_SUCCESS) { + return {ret, pool_manager()}; + } + } + + return {UR_RESULT_SUCCESS, std::move(manager)}; + } + + ur_result_t addPool(const D &desc, + umf::pool_unique_handle_t &hPool) noexcept { + if (!descToPoolMap.try_emplace(desc, std::move(hPool)).second) { + logger::error("Pool for pool descriptor: {}, already exists", desc); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UR_RESULT_SUCCESS; + } + + std::optional getPool(const D &desc) noexcept { + auto it = descToPoolMap.find(desc); + if (it == descToPoolMap.end()) { + logger::error("Pool descriptor doesn't match any existing pool: {}", + desc); + return std::nullopt; + } + + return it->second.get(); + } +}; + } // namespace usm +namespace std { +/// @brief hash specialization for usm::pool_descriptor +template <> struct hash { + inline size_t operator()(const usm::pool_descriptor &desc) const { + ur_native_handle_t native = nullptr; + if (desc.hDevice) { + auto ret = urDeviceGetNativeHandle(desc.hDevice, &native); + if (ret != UR_RESULT_SUCCESS) { + throw ret; + } + } + + return combine_hashes(0, desc.type, native, + isSharedAllocationReadOnlyOnDevice(desc), + desc.poolHandle); + } +}; + +} // namespace std + #endif /* USM_POOL_MANAGER_HPP */ diff --git a/source/common/ur_singleton.hpp b/source/common/ur_singleton.hpp index d757bb197c..6440e3ac7f 100644 --- a/source/common/ur_singleton.hpp +++ b/source/common/ur_singleton.hpp @@ -31,7 +31,8 @@ template class singleton_factory_t { ////////////////////////////////////////////////////////////////////////// /// extract the key from parameter list and if necessary, convert type - template key_t getKey(key_tn key, Ts &&...params) { + template + key_t getKey(key_tn key, [[maybe_unused]] Ts &&...params) { return reinterpret_cast(key); } diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index db796612ea..d4f5bc73a5 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -88,6 +88,7 @@ target_sources(ur_loader ${CMAKE_CURRENT_SOURCE_DIR}/ur_libddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_codeloc.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/validation/ur_valddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/validation/ur_validation_layer.cpp ) diff --git a/source/loader/layers/tracing/ur_tracing_layer.cpp b/source/loader/layers/tracing/ur_tracing_layer.cpp index b022ae831f..dd36b286f2 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.cpp +++ b/source/loader/layers/tracing/ur_tracing_layer.cpp @@ -14,6 +14,7 @@ #include "ur_util.hpp" #include "xpti/xpti_data_types.h" #include "xpti/xpti_trace_framework.h" +#include #include namespace ur_tracing_layer { @@ -23,6 +24,8 @@ constexpr auto CALL_STREAM_NAME = "ur"; constexpr auto STREAM_VER_MAJOR = UR_MAJOR_VERSION(UR_API_VERSION_CURRENT); constexpr auto STREAM_VER_MINOR = UR_MINOR_VERSION(UR_API_VERSION_CURRENT); +static thread_local xpti_td *activeEvent; + /////////////////////////////////////////////////////////////////////////////// context_t::context_t() { xptiFrameworkInitialize(); @@ -39,11 +42,21 @@ bool context_t::isAvailable() const { return xptiTraceEnabled(); } void context_t::notify(uint16_t trace_type, uint32_t id, const char *name, void *args, ur_result_t *resultp, uint64_t instance) { xpti::function_with_args_t payload{id, name, args, resultp, nullptr}; - xptiNotifySubscribers(call_stream_id, trace_type, nullptr, nullptr, + xptiNotifySubscribers(call_stream_id, trace_type, nullptr, activeEvent, instance, &payload); } uint64_t context_t::notify_begin(uint32_t id, const char *name, void *args) { + if (auto loc = codelocData.get_codeloc()) { + xpti::payload_t payload = + xpti::payload_t(loc->functionName, loc->sourceFile, loc->lineNumber, + loc->columnNumber, nullptr); + uint64_t InstanceNumber{}; + activeEvent = xptiMakeEvent("Unified Runtime call", &payload, + xpti::trace_graph_event, xpti_at::active, + &InstanceNumber); + } + uint64_t instance = xptiGetUniqueId(); notify((uint16_t)xpti::trace_point_type_t::function_with_args_begin, id, name, args, nullptr, instance); diff --git a/source/loader/layers/tracing/ur_tracing_layer.hpp b/source/loader/layers/tracing/ur_tracing_layer.hpp index b00d12d301..ddda493c05 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.hpp +++ b/source/loader/layers/tracing/ur_tracing_layer.hpp @@ -24,6 +24,7 @@ namespace ur_tracing_layer { class __urdlllocal context_t : public proxy_layer_context_t { public: ur_dditable_t urDdiTable = {}; + codeloc_data codelocData; context_t(); ~context_t(); @@ -32,7 +33,9 @@ class __urdlllocal context_t : public proxy_layer_context_t { std::vector getNames() const override { return {name}; } ur_result_t init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) override; + const std::set &enabledLayerNames, + codeloc_data codelocData) override; + ur_result_t tearDown() override { return UR_RESULT_SUCCESS; } uint64_t notify_begin(uint32_t id, const char *name, void *args); void notify_end(uint32_t id, const char *name, void *args, ur_result_t *resultp, uint64_t instance); diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f30fac3807..4cd712dbbe 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -15,54 +15,6 @@ #include namespace ur_tracing_layer { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - auto pfnInit = context.urDdiTable.Global.pfnInit; - - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - ur_init_params_t params = {&device_flags, &hLoaderConfig}; - uint64_t instance = - context.notify_begin(UR_FUNCTION_INIT, "urInit", ¶ms); - - ur_result_t result = pfnInit(device_flags, hLoaderConfig); - - context.notify_end(UR_FUNCTION_INIT, "urInit", ¶ms, &result, instance); - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - auto pfnTearDown = context.urDdiTable.Global.pfnTearDown; - - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - ur_tear_down_params_t params = {&pParams}; - uint64_t instance = - context.notify_begin(UR_FUNCTION_TEAR_DOWN, "urTearDown", ¶ms); - - ur_result_t result = pfnTearDown(pParams); - - context.notify_end(UR_FUNCTION_TEAR_DOWN, "urTearDown", ¶ms, &result, - instance); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -405,8 +357,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -4386,7 +4338,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { auto pfnWriteHostPipe = context.urDdiTable.Enqueue.pfnWriteHostPipe; @@ -5232,8 +5184,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -5246,34 +5198,77 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMemcpyUSMExp = - context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; + auto pfnAppendUSMMemcpyExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_memcpy_usm_exp_params_t params = { + ur_command_buffer_append_usm_memcpy_exp_params_t params = { &hCommandBuffer, &pDst, &pSrc, &size, &numSyncPointsInWaitList, &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = - context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP, - "urCommandBufferAppendMemcpyUSMExp", ¶ms); + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, + "urCommandBufferAppendUSMMemcpyExp", ¶ms); - ur_result_t result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMCPY_USM_EXP, - "urCommandBufferAppendMemcpyUSMExp", ¶ms, &result, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, + "urCommandBufferAppendUSMMemcpyExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_fill_exp_params_t params = { + &hCommandBuffer, &pMemory, &pPattern, + &patternSize, &size, &numSyncPointsInWaitList, + &pSyncPointWaitList, &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, + "urCommandBufferAppendUSMFillExp", ¶ms); + + ur_result_t result = pfnAppendUSMFillExp( + hCommandBuffer, pMemory, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, + "urCommandBufferAppendUSMFillExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -5288,14 +5283,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; + auto pfnAppendMemBufferCopyExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_copy_exp_params_t params = { + ur_command_buffer_append_mem_buffer_copy_exp_params_t params = { &hCommandBuffer, &hSrcMem, &hDstMem, @@ -5306,23 +5301,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP, - "urCommandBufferAppendMembufferCopyExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, + "urCommandBufferAppendMemBufferCopyExp", ¶ms); - ur_result_t result = pfnAppendMembufferCopyExp( + ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_EXP, - "urCommandBufferAppendMembufferCopyExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, + "urCommandBufferAppendMemBufferCopyExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5337,14 +5332,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; + auto pfnAppendMemBufferWriteExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_write_exp_params_t params = { + ur_command_buffer_append_mem_buffer_write_exp_params_t params = { &hCommandBuffer, &hBuffer, &offset, @@ -5354,23 +5349,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP, - "urCommandBufferAppendMembufferWriteExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, + "urCommandBufferAppendMemBufferWriteExp", ¶ms); - ur_result_t result = pfnAppendMembufferWriteExp( + ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_EXP, - "urCommandBufferAppendMembufferWriteExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, + "urCommandBufferAppendMemBufferWriteExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5384,14 +5379,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; + auto pfnAppendMemBufferReadExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_read_exp_params_t params = { + ur_command_buffer_append_mem_buffer_read_exp_params_t params = { &hCommandBuffer, &hBuffer, &offset, @@ -5401,23 +5396,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP, - "urCommandBufferAppendMembufferReadExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, + "urCommandBufferAppendMemBufferReadExp", ¶ms); - ur_result_t result = pfnAppendMembufferReadExp( + ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_EXP, - "urCommandBufferAppendMembufferReadExp", ¶ms, + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, + "urCommandBufferAppendMemBufferReadExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -5439,14 +5434,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; + auto pfnAppendMemBufferCopyRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_copy_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t params = { &hCommandBuffer, &hSrcMem, &hDstMem, @@ -5461,25 +5456,25 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP, - "urCommandBufferAppendMembufferCopyRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, + "urCommandBufferAppendMemBufferCopyRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferCopyRectExp( + ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_COPY_RECT_EXP, - "urCommandBufferAppendMembufferCopyRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, + "urCommandBufferAppendMemBufferCopyRectExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5507,14 +5502,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; + auto pfnAppendMemBufferWriteRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_write_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_write_rect_exp_params_t params = { &hCommandBuffer, &hBuffer, &bufferOffset, @@ -5529,25 +5524,25 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP, - "urCommandBufferAppendMembufferWriteRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, + "urCommandBufferAppendMemBufferWriteRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferWriteRectExp( + ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_WRITE_RECT_EXP, - "urCommandBufferAppendMembufferWriteRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, + "urCommandBufferAppendMemBufferWriteRectExp", ¶ms, &result, instance); return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -5573,14 +5568,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; + auto pfnAppendMemBufferReadRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_command_buffer_append_membuffer_read_rect_exp_params_t params = { + ur_command_buffer_append_mem_buffer_read_rect_exp_params_t params = { &hCommandBuffer, &hBuffer, &bufferOffset, @@ -5595,22 +5590,162 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( &pSyncPointWaitList, &pSyncPoint}; uint64_t instance = context.notify_begin( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP, - "urCommandBufferAppendMembufferReadRectExp", ¶ms); + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, + "urCommandBufferAppendMemBufferReadRectExp", ¶ms); - ur_result_t result = pfnAppendMembufferReadRectExp( + ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); context.notify_end( - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEMBUFFER_READ_RECT_EXP, - "urCommandBufferAppendMembufferReadRectExp", ¶ms, &result, + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, + "urCommandBufferAppendMemBufferReadRectExp", ¶ms, &result, instance); return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendMemBufferFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_mem_buffer_fill_exp_params_t params = { + &hCommandBuffer, + &hBuffer, + &pPattern, + &patternSize, + &offset, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, + "urCommandBufferAppendMemBufferFillExp", ¶ms); + + ur_result_t result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, + "urCommandBufferAppendMemBufferFillExp", ¶ms, + &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMPrefetchExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_prefetch_exp_params_t params = { + &hCommandBuffer, + &pMemory, + &size, + &flags, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, + "urCommandBufferAppendUSMPrefetchExp", ¶ms); + + ur_result_t result = pfnAppendUSMPrefetchExp( + hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, + "urCommandBufferAppendUSMPrefetchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMAdviseExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_append_usm_advise_exp_params_t params = { + &hCommandBuffer, + &pMemory, + &size, + &advice, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, + "urCommandBufferAppendUSMAdviseExp", ¶ms); + + ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, + advice, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, + "urCommandBufferAppendUSMAdviseExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -5650,6 +5785,99 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnCooperativeKernelLaunchExp = + context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_cooperative_kernel_launch_exp_params_t params = { + &hQueue, + &hKernel, + &workDim, + &pGlobalWorkOffset, + &pGlobalWorkSize, + &pLocalWorkSize, + &numEventsInWaitList, + &phEventWaitList, + &phEvent}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP, + "urEnqueueCooperativeKernelLaunchExp", ¶ms); + + ur_result_t result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + + context.notify_end(UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP, + "urEnqueueCooperativeKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + auto pfnSuggestMaxCooperativeGroupCountExp = + context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_kernel_suggest_max_cooperative_group_count_exp_params_t params = { + &hKernel, &pGroupCountRet}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP, + "urKernelSuggestMaxCooperativeGroupCountExp", ¶ms); + + ur_result_t result = + pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + context.notify_end( + UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP, + "urKernelSuggestMaxCooperativeGroupCountExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -5828,12 +6056,6 @@ __urdlllocal ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnInit = pDdiTable->pfnInit; - pDdiTable->pfnInit = ur_tracing_layer::urInit; - - dditable.pfnTearDown = pDdiTable->pfnTearDown; - pDdiTable->pfnTearDown = ur_tracing_layer::urTearDown; - dditable.pfnAdapterGet = pDdiTable->pfnAdapterGet; pDdiTable->pfnAdapterGet = ur_tracing_layer::urAdapterGet; @@ -5997,36 +6219,52 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = ur_tracing_layer::urCommandBufferAppendKernelLaunchExp; - dditable.pfnAppendMemcpyUSMExp = pDdiTable->pfnAppendMemcpyUSMExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_tracing_layer::urCommandBufferAppendMemcpyUSMExp; + dditable.pfnAppendUSMMemcpyExp = pDdiTable->pfnAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_tracing_layer::urCommandBufferAppendUSMMemcpyExp; + + dditable.pfnAppendUSMFillExp = pDdiTable->pfnAppendUSMFillExp; + pDdiTable->pfnAppendUSMFillExp = + ur_tracing_layer::urCommandBufferAppendUSMFillExp; + + dditable.pfnAppendMemBufferCopyExp = pDdiTable->pfnAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_tracing_layer::urCommandBufferAppendMemBufferCopyExp; + + dditable.pfnAppendMemBufferWriteExp = pDdiTable->pfnAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_tracing_layer::urCommandBufferAppendMemBufferWriteExp; + + dditable.pfnAppendMemBufferReadExp = pDdiTable->pfnAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_tracing_layer::urCommandBufferAppendMemBufferReadExp; - dditable.pfnAppendMembufferCopyExp = pDdiTable->pfnAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_tracing_layer::urCommandBufferAppendMembufferCopyExp; + dditable.pfnAppendMemBufferCopyRectExp = + pDdiTable->pfnAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferCopyRectExp; - dditable.pfnAppendMembufferWriteExp = pDdiTable->pfnAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_tracing_layer::urCommandBufferAppendMembufferWriteExp; + dditable.pfnAppendMemBufferWriteRectExp = + pDdiTable->pfnAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferWriteRectExp; - dditable.pfnAppendMembufferReadExp = pDdiTable->pfnAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_tracing_layer::urCommandBufferAppendMembufferReadExp; + dditable.pfnAppendMemBufferReadRectExp = + pDdiTable->pfnAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_tracing_layer::urCommandBufferAppendMemBufferReadRectExp; - dditable.pfnAppendMembufferCopyRectExp = - pDdiTable->pfnAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferCopyRectExp; + dditable.pfnAppendMemBufferFillExp = pDdiTable->pfnAppendMemBufferFillExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_tracing_layer::urCommandBufferAppendMemBufferFillExp; - dditable.pfnAppendMembufferWriteRectExp = - pDdiTable->pfnAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferWriteRectExp; + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_tracing_layer::urCommandBufferAppendUSMPrefetchExp; - dditable.pfnAppendMembufferReadRectExp = - pDdiTable->pfnAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_tracing_layer::urCommandBufferAppendMembufferReadRectExp; + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; @@ -6200,6 +6438,41 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.EnqueueExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnCooperativeKernelLaunchExp = + pDdiTable->pfnCooperativeKernelLaunchExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_tracing_layer::urEnqueueCooperativeKernelLaunchExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses /// @@ -6333,6 +6606,41 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.KernelExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnSuggestMaxCooperativeGroupCountExp = + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_tracing_layer::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses /// @@ -6914,13 +7222,16 @@ __urdlllocal ur_result_t UR_APICALL urGetDeviceProcAddrTable( } ur_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data codelocData) { ur_result_t result = UR_RESULT_SUCCESS; if (!enabledLayerNames.count(name)) { return result; } + ur_tracing_layer::context.codelocData = codelocData; + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetGlobalProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Global); @@ -6946,6 +7257,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetEnqueueExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetEventProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Event); @@ -6956,6 +7272,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetKernelExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &dditable->Mem); diff --git a/source/loader/layers/ur_proxy_layer.hpp b/source/loader/layers/ur_proxy_layer.hpp index 782a7e241b..2b710f3287 100644 --- a/source/loader/layers/ur_proxy_layer.hpp +++ b/source/loader/layers/ur_proxy_layer.hpp @@ -12,6 +12,7 @@ #ifndef UR_PROXY_LAYER_H #define UR_PROXY_LAYER_H 1 +#include "ur_codeloc.hpp" #include "ur_ddi.h" #include "ur_util.hpp" @@ -24,9 +25,10 @@ class __urdlllocal proxy_layer_context_t { virtual std::vector getNames() const = 0; virtual bool isAvailable() const = 0; - virtual ur_result_t - init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) = 0; + virtual ur_result_t init(ur_dditable_t *dditable, + const std::set &enabledLayerNames, + codeloc_data codelocData) = 0; + virtual ur_result_t tearDown() = 0; }; #endif /* UR_PROXY_LAYER_H */ diff --git a/source/loader/layers/validation/ur_leak_check.hpp b/source/loader/layers/validation/ur_leak_check.hpp index 475742fc75..3f3fb80b0b 100644 --- a/source/loader/layers/validation/ur_leak_check.hpp +++ b/source/loader/layers/validation/ur_leak_check.hpp @@ -24,6 +24,7 @@ struct RefCountContext { }; enum RefCountUpdateType { + REFCOUNT_CREATE_OR_INCREASE, REFCOUNT_CREATE, REFCOUNT_INCREASE, REFCOUNT_DECREASE, @@ -31,13 +32,25 @@ struct RefCountContext { std::mutex mutex; std::unordered_map counts; + int64_t adapterCount = 0; - void updateRefCount(void *ptr, enum RefCountUpdateType type) { + void updateRefCount(void *ptr, enum RefCountUpdateType type, + bool isAdapterHandle = false) { std::unique_lock ulock(mutex); auto it = counts.find(ptr); switch (type) { + case REFCOUNT_CREATE_OR_INCREASE: + if (it == counts.end()) { + counts[ptr] = {1, getCurrentBacktrace()}; + if (isAdapterHandle) { + adapterCount++; + } + } else { + counts[ptr].refCount++; + } + break; case REFCOUNT_CREATE: if (it == counts.end()) { counts[ptr] = {1, getCurrentBacktrace()}; @@ -65,6 +78,8 @@ struct RefCountContext { if (counts[ptr].refCount < 0) { context.logger.error( "Attempting to release nonexistent handle {}", ptr); + } else if (counts[ptr].refCount == 0 && isAdapterHandle) { + adapterCount--; } break; } @@ -75,17 +90,27 @@ struct RefCountContext { if (counts[ptr].refCount == 0) { counts.erase(ptr); } + + // No more active adapters, so any references still held are leaked + if (adapterCount == 0) { + logInvalidReferences(); + clear(); + } } public: void createRefCount(void *ptr) { updateRefCount(ptr, REFCOUNT_CREATE); } - void incrementRefCount(void *ptr) { - updateRefCount(ptr, REFCOUNT_INCREASE); + void incrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_INCREASE, isAdapterHandle); + } + + void decrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_DECREASE, isAdapterHandle); } - void decrementRefCount(void *ptr) { - updateRefCount(ptr, REFCOUNT_DECREASE); + void createOrIncrementRefCount(void *ptr, bool isAdapterHandle = false) { + updateRefCount(ptr, REFCOUNT_CREATE_OR_INCREASE, isAdapterHandle); } void clear() { counts.clear(); } diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 46b0eef491..dca284e0d1 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -14,58 +14,6 @@ namespace ur_validation_layer { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - auto pfnInit = context.urDdiTable.Global.pfnInit; - - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (context.enableParameterValidation) { - if (UR_DEVICE_INIT_FLAGS_MASK & device_flags) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - ur_result_t result = pfnInit(device_flags, hLoaderConfig); - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - auto pfnTearDown = context.urDdiTable.Global.pfnTearDown; - - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (context.enableParameterValidation) { - if (NULL == pParams) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - } - - ur_result_t result = pfnTearDown(pParams); - - if (context.enableLeakChecking) { - refCountContext.logInvalidReferences(); - refCountContext.clear(); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -92,6 +40,11 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( ur_result_t result = pfnAdapterGet(NumEntries, phAdapters, pNumAdapters); + if (context.enableLeakChecking && phAdapters && + result == UR_RESULT_SUCCESS) { + refCountContext.createOrIncrementRefCount(*phAdapters, true); + } + return result; } @@ -115,7 +68,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRelease( ur_result_t result = pfnAdapterRelease(hAdapter); if (context.enableLeakChecking && result == UR_RESULT_SUCCESS) { - refCountContext.decrementRefCount(hAdapter); + refCountContext.decrementRefCount(hAdapter, true); } return result; @@ -141,7 +94,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRetain( ur_result_t result = pfnAdapterRetain(hAdapter); if (context.enableLeakChecking && result == UR_RESULT_SUCCESS) { - refCountContext.incrementRefCount(hAdapter); + refCountContext.decrementRefCount(hAdapter, true); } return result; @@ -261,6 +214,10 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGet( if (NULL == phAdapters) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NumEntries == 0 && phPlatforms != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } ur_result_t result = @@ -445,8 +402,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -466,9 +423,17 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + if (NumEntries > 0 && phDevices == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + if (UR_DEVICE_TYPE_VPU < DeviceType) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (NumEntries == 0 && phDevices != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } ur_result_t result = @@ -609,6 +574,10 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( if (NULL == pProperties) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL == pProperties->pProperties) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } } ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, @@ -782,6 +751,10 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( if (NULL == phContext) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && UR_CONTEXT_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -1659,6 +1632,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -1706,6 +1683,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -1754,6 +1735,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -2279,6 +2264,11 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemCreate( if (NULL == phPhysicalMem) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && + UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -3251,6 +3241,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( if (NULL == hKernel) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (NULL != pProperties && + UR_MEM_FLAGS_MASK & pProperties->memoryAccess) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } } ur_result_t result = @@ -3441,6 +3436,22 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( if (NULL == phQueue) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (NULL != pProperties && UR_QUEUE_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pProperties != NULL && + pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && + pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + } + + if (pProperties != NULL && + pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && + pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + } } ur_result_t result = pfnCreate(hContext, hDevice, pProperties, phQueue); @@ -3877,9 +3888,13 @@ __urdlllocal ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus) { + if (UR_EXECUTION_INFO_QUEUED < execStatus) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (execStatus == UR_EXECUTION_INFO_QUEUED) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } } ur_result_t result = @@ -3949,6 +3964,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnKernelLaunch( @@ -3991,6 +4014,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWait( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4033,6 +4064,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnEventsWaitWithBarrier(hQueue, numEventsInWaitList, @@ -4086,6 +4125,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4142,6 +4189,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4248,6 +4303,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferReadRect( @@ -4359,6 +4422,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferWriteRect( @@ -4414,6 +4485,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4513,6 +4592,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferCopyRect( @@ -4568,6 +4655,34 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (patternSize == 0 || size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (patternSize > size) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((patternSize & (patternSize - 1)) != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (size % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (offset % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4627,6 +4742,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemImageRead( @@ -4687,6 +4814,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemImageWrite( @@ -4747,6 +4886,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (region.width == 0 || region.height == 0 || region.depth == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4807,6 +4958,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, @@ -4859,6 +5018,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4932,6 +5099,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -4989,6 +5164,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5045,6 +5228,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5178,6 +5369,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5256,6 +5455,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5319,6 +5526,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnDeviceGlobalVariableWrite( @@ -5382,6 +5597,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnDeviceGlobalVariableRead( @@ -5448,6 +5671,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueReadHostPipe( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5481,7 +5712,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { auto pfnWriteHostPipe = context.urDdiTable.Enqueue.pfnWriteHostPipe; @@ -5507,10 +5738,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == phEvent) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - if (phEventWaitList == NULL && numEventsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } @@ -5518,6 +5745,14 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = @@ -5567,6 +5802,10 @@ __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + if (NULL != pUSMDesc && UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + if (pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align - 1)) != 0)) { return UR_RESULT_ERROR_INVALID_VALUE; @@ -5935,6 +6174,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( if (pImageDesc && UR_MEM_TYPE_IMAGE1D_BUFFER < pImageDesc->type) { return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnImageCopyExp( @@ -6301,6 +6548,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( if (NULL == hSemaphore) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnWaitExternalSemaphoreExp( @@ -6341,6 +6596,14 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( if (NULL == hSemaphore) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnSignalExternalSemaphoreExp( @@ -6528,8 +6791,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6542,10 +6805,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMemcpyUSMExp = - context.urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; + auto pfnAppendUSMMemcpyExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6575,7 +6838,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } } - ur_result_t result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6583,8 +6846,77 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pPattern) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (patternSize == 0 || size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (patternSize > size) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((patternSize & (patternSize - 1)) != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (size % patternSize != 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + } + + ur_result_t result = pfnAppendUSMFillExp( + hCommandBuffer, pMemory, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6599,10 +6931,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; + auto pfnAppendMemBufferCopyExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6628,7 +6960,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } } - ur_result_t result = pfnAppendMembufferCopyExp( + ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6636,8 +6968,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6652,10 +6984,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; + auto pfnAppendMemBufferWriteExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6681,7 +7013,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } } - ur_result_t result = pfnAppendMembufferWriteExp( + ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6689,8 +7021,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6704,10 +7036,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; + auto pfnAppendMemBufferReadExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6733,7 +7065,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } } - ur_result_t result = pfnAppendMembufferReadExp( + ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6741,8 +7073,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6764,10 +7096,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferCopyRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferCopyRectExp; + auto pfnAppendMemBufferCopyRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6793,7 +7125,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } } - ur_result_t result = pfnAppendMembufferCopyRectExp( + ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6802,8 +7134,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6831,10 +7163,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferWriteRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferWriteRectExp; + auto pfnAppendMemBufferWriteRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6860,7 +7192,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } } - ur_result_t result = pfnAppendMembufferWriteRectExp( + ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6869,8 +7201,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6896,10 +7228,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) { - auto pfnAppendMembufferReadRectExp = - context.urDdiTable.CommandBufferExp.pfnAppendMembufferReadRectExp; + auto pfnAppendMemBufferReadRectExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6925,7 +7257,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( } } - ur_result_t result = pfnAppendMembufferReadRectExp( + ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6933,6 +7265,170 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendMemBufferFillExp = + context.urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pPattern) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + } + + ur_result_t result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMPrefetchExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_USM_MIGRATION_FLAGS_MASK & flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnAppendUSMPrefetchExp( + hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + auto pfnAppendUSMAdviseExp = + context.urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMemory) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_USM_ADVICE_FLAGS_MASK & advice) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + if (size == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, + advice, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -6972,6 +7468,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( if (phEventWaitList != NULL && numEventsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnEnqueueExp( @@ -6980,6 +7484,114 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnCooperativeKernelLaunchExp = + context.urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hKernel) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pGlobalWorkOffset) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pGlobalWorkSize) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + ur_result_t result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + auto pfnSuggestMaxCooperativeGroupCountExp = + context.urDdiTable.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hKernel) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pGroupCountRet) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -7183,12 +7795,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnInit = pDdiTable->pfnInit; - pDdiTable->pfnInit = ur_validation_layer::urInit; - - dditable.pfnTearDown = pDdiTable->pfnTearDown; - pDdiTable->pfnTearDown = ur_validation_layer::urTearDown; - dditable.pfnAdapterGet = pDdiTable->pfnAdapterGet; pDdiTable->pfnAdapterGet = ur_validation_layer::urAdapterGet; @@ -7357,36 +7963,52 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendKernelLaunchExp = ur_validation_layer::urCommandBufferAppendKernelLaunchExp; - dditable.pfnAppendMemcpyUSMExp = pDdiTable->pfnAppendMemcpyUSMExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_validation_layer::urCommandBufferAppendMemcpyUSMExp; + dditable.pfnAppendUSMMemcpyExp = pDdiTable->pfnAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_validation_layer::urCommandBufferAppendUSMMemcpyExp; + + dditable.pfnAppendUSMFillExp = pDdiTable->pfnAppendUSMFillExp; + pDdiTable->pfnAppendUSMFillExp = + ur_validation_layer::urCommandBufferAppendUSMFillExp; + + dditable.pfnAppendMemBufferCopyExp = pDdiTable->pfnAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_validation_layer::urCommandBufferAppendMemBufferCopyExp; - dditable.pfnAppendMembufferCopyExp = pDdiTable->pfnAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_validation_layer::urCommandBufferAppendMembufferCopyExp; + dditable.pfnAppendMemBufferWriteExp = pDdiTable->pfnAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_validation_layer::urCommandBufferAppendMemBufferWriteExp; - dditable.pfnAppendMembufferWriteExp = pDdiTable->pfnAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_validation_layer::urCommandBufferAppendMembufferWriteExp; + dditable.pfnAppendMemBufferReadExp = pDdiTable->pfnAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_validation_layer::urCommandBufferAppendMemBufferReadExp; - dditable.pfnAppendMembufferReadExp = pDdiTable->pfnAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_validation_layer::urCommandBufferAppendMembufferReadExp; + dditable.pfnAppendMemBufferCopyRectExp = + pDdiTable->pfnAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferCopyRectExp; - dditable.pfnAppendMembufferCopyRectExp = - pDdiTable->pfnAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_validation_layer::urCommandBufferAppendMembufferCopyRectExp; + dditable.pfnAppendMemBufferWriteRectExp = + pDdiTable->pfnAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferWriteRectExp; - dditable.pfnAppendMembufferWriteRectExp = - pDdiTable->pfnAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_validation_layer::urCommandBufferAppendMembufferWriteRectExp; + dditable.pfnAppendMemBufferReadRectExp = + pDdiTable->pfnAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_validation_layer::urCommandBufferAppendMemBufferReadRectExp; - dditable.pfnAppendMembufferReadRectExp = - pDdiTable->pfnAppendMembufferReadRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_validation_layer::urCommandBufferAppendMembufferReadRectExp; + dditable.pfnAppendMemBufferFillExp = pDdiTable->pfnAppendMemBufferFillExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_validation_layer::urCommandBufferAppendMemBufferFillExp; + + dditable.pfnAppendUSMPrefetchExp = pDdiTable->pfnAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_validation_layer::urCommandBufferAppendUSMPrefetchExp; + + dditable.pfnAppendUSMAdviseExp = pDdiTable->pfnAppendUSMAdviseExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_validation_layer::urCommandBufferAppendUSMAdviseExp; dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; @@ -7563,6 +8185,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.EnqueueExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnCooperativeKernelLaunchExp = + pDdiTable->pfnCooperativeKernelLaunchExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_validation_layer::urEnqueueCooperativeKernelLaunchExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -7701,6 +8359,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.KernelExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnSuggestMaxCooperativeGroupCountExp = + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_validation_layer::urKernelSuggestMaxCooperativeGroupCountExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses @@ -8297,7 +8991,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } ur_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) { + const std::set &enabledLayerNames, + codeloc_data) { ur_result_t result = UR_RESULT_SUCCESS; if (enabledLayerNames.count(nameFullValidation)) { @@ -8341,6 +9036,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetEnqueueExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetEventProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Event); @@ -8351,6 +9051,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetKernelExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetMemProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Mem); @@ -8409,4 +9114,14 @@ ur_result_t context_t::init(ur_dditable_t *dditable, return result; } +ur_result_t context_t::tearDown() { + ur_result_t result = UR_RESULT_SUCCESS; + + if (enableLeakChecking) { + refCountContext.logInvalidReferences(); + refCountContext.clear(); + } + return result; +} + } // namespace ur_validation_layer diff --git a/source/loader/layers/validation/ur_validation_layer.hpp b/source/loader/layers/validation/ur_validation_layer.hpp index 3201a5345e..e41c621dc8 100644 --- a/source/loader/layers/validation/ur_validation_layer.hpp +++ b/source/loader/layers/validation/ur_validation_layer.hpp @@ -34,7 +34,9 @@ class __urdlllocal context_t : public proxy_layer_context_t { return {nameFullValidation, nameParameterValidation, nameLeakChecking}; } ur_result_t init(ur_dditable_t *dditable, - const std::set &enabledLayerNames) override; + const std::set &enabledLayerNames, + codeloc_data codelocData) override; + ur_result_t tearDown() override; private: const std::string nameFullValidation = "UR_LAYER_FULL_VALIDATION"; diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 877206c062..67ddca9890 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -113,10 +113,11 @@ class AdapterRegistry { // to load the adapter. std::vector> adaptersLoadPaths; - static constexpr std::array knownAdapterNames{ + static constexpr std::array knownAdapterNames{ MAKE_LIBRARY_NAME("ur_adapter_level_zero", "0"), - MAKE_LIBRARY_NAME("ur_adapter_cuda", "0"), - MAKE_LIBRARY_NAME("ur_adapter_hip", "0")}; + MAKE_LIBRARY_NAME("ur_adapter_hip", "0"), + MAKE_LIBRARY_NAME("ur_adapter_opencl", "0"), + MAKE_LIBRARY_NAME("ur_adapter_cuda", "0")}; std::optional> getEnvAdapterSearchPaths() { std::optional> pathStringsOpt; diff --git a/source/loader/ur_codeloc.hpp b/source/loader/ur_codeloc.hpp new file mode 100644 index 0000000000..176ba0b13c --- /dev/null +++ b/source/loader/ur_codeloc.hpp @@ -0,0 +1,35 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_codeloc.hpp + * + */ + +#ifndef UR_CODELOC_HPP +#define UR_CODELOC_HPP 1 + +#include "ur_api.h" +#include + +struct codeloc_data { + codeloc_data() { + codelocCb = nullptr; + codelocUserdata = nullptr; + } + ur_code_location_callback_t codelocCb; + void *codelocUserdata; + + std::optional get_codeloc() { + if (!codelocCb) { + return std::nullopt; + } + return codelocCb(codelocUserdata); + } +}; + +#endif /* UR_CODELOC_HPP */ diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index e192088bbc..54c8f9b4c1 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -33,41 +33,6 @@ ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urInit -__urdlllocal ur_result_t UR_APICALL urInit( - ur_device_init_flags_t device_flags, ///< [in] device initialization flags. - ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. - ur_loader_config_handle_t - hLoaderConfig ///< [in][optional] Handle of loader config handle. -) { - ur_result_t result = UR_RESULT_SUCCESS; - - for (auto &platform : context->platforms) { - if (platform.initStatus != UR_RESULT_SUCCESS) { - continue; - } - platform.initStatus = - platform.dditable.ur.Global.pfnInit(device_flags, hLoaderConfig); - } - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTearDown -__urdlllocal ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { - ur_result_t result = UR_RESULT_SUCCESS; - - for (auto &platform : context->platforms) { - platform.dditable.ur.Global.pfnTearDown(pParams); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet __urdlllocal ur_result_t UR_APICALL urAdapterGet( @@ -475,8 +440,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -5306,7 +5271,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -5343,8 +5308,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( try { // convert platform handle to loader handle - *phEvent = reinterpret_cast( - ur_event_factory.getInstance(*phEvent, dditable)); + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + ur_event_factory.getInstance(*phEvent, dditable)); + } } catch (std::bad_alloc &) { result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -6380,8 +6347,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMemcpyUSMExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +/// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6400,9 +6367,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMemcpyUSMExp = - dditable->ur.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + auto pfnAppendUSMMemcpyExp = + dditable->ur.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6412,7 +6379,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ->handle; // forward to device-platform - result = pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6420,8 +6387,50 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +/// @brief Intercept function for urCommandBufferAppendUSMFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMFillExp = + dditable->ur.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6442,9 +6451,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferCopyExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + auto pfnAppendMemBufferCopyExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6460,7 +6469,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( hDstMem = reinterpret_cast(hDstMem)->handle; // forward to device-platform - result = pfnAppendMembufferCopyExp( + result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6468,8 +6477,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6490,9 +6499,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferWriteExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + auto pfnAppendMemBufferWriteExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6505,7 +6514,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, size, + result = pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6513,8 +6522,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6534,9 +6543,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferReadExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + auto pfnAppendMemBufferReadExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6549,7 +6558,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, size, + result = pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6557,8 +6566,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferCopyRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6586,9 +6595,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferCopyRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + auto pfnAppendMemBufferCopyRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6604,7 +6613,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( hDstMem = reinterpret_cast(hDstMem)->handle; // forward to device-platform - result = pfnAppendMembufferCopyRectExp( + result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6613,8 +6622,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferWriteRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6648,9 +6657,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferWriteRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + auto pfnAppendMemBufferWriteRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6663,7 +6672,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferWriteRectExp( + result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6672,8 +6681,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urCommandBufferAppendMembufferReadRectExp -__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +/// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6705,9 +6714,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( auto dditable = reinterpret_cast(hCommandBuffer) ->dditable; - auto pfnAppendMembufferReadRectExp = - dditable->ur.CommandBufferExp.pfnAppendMembufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + auto pfnAppendMemBufferReadRectExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferReadRectExp; + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } @@ -6720,7 +6729,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( hBuffer = reinterpret_cast(hBuffer)->handle; // forward to device-platform - result = pfnAppendMembufferReadRectExp( + result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -6728,6 +6737,132 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendMemBufferFillExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendMemBufferFillExp = + dditable->ur.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // convert loader handle to platform handle + hBuffer = reinterpret_cast(hBuffer)->handle; + + // forward to device-platform + result = pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMPrefetchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMPrefetchExp = + dditable->ur.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendUSMAdviseExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendUSMAdviseExp = + dditable->ur.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -6793,6 +6928,109 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnCooperativeKernelLaunchExp = + dditable->ur.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + hKernel = reinterpret_cast(hKernel)->handle; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitListLocal.data(), + phEvent); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + ur_event_factory.getInstance(*phEvent, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSuggestMaxCooperativeGroupCountExp +__urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hKernel)->dditable; + auto pfnSuggestMaxCooperativeGroupCountExp = + dditable->ur.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hKernel = reinterpret_cast(hKernel)->handle; + + // forward to device-platform + result = pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -6994,8 +7232,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (ur_loader::context->platforms.size() != 1 || ur_loader::context->forceIntercept) { // return pointers to loader's DDIs - pDdiTable->pfnInit = ur_loader::urInit; - pDdiTable->pfnTearDown = ur_loader::urTearDown; pDdiTable->pfnAdapterGet = ur_loader::urAdapterGet; pDdiTable->pfnAdapterRelease = ur_loader::urAdapterRelease; pDdiTable->pfnAdapterRetain = ur_loader::urAdapterRetain; @@ -7152,20 +7388,28 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnFinalizeExp = ur_loader::urCommandBufferFinalizeExp; pDdiTable->pfnAppendKernelLaunchExp = ur_loader::urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendMemcpyUSMExp = - ur_loader::urCommandBufferAppendMemcpyUSMExp; - pDdiTable->pfnAppendMembufferCopyExp = - ur_loader::urCommandBufferAppendMembufferCopyExp; - pDdiTable->pfnAppendMembufferWriteExp = - ur_loader::urCommandBufferAppendMembufferWriteExp; - pDdiTable->pfnAppendMembufferReadExp = - ur_loader::urCommandBufferAppendMembufferReadExp; - pDdiTable->pfnAppendMembufferCopyRectExp = - ur_loader::urCommandBufferAppendMembufferCopyRectExp; - pDdiTable->pfnAppendMembufferWriteRectExp = - ur_loader::urCommandBufferAppendMembufferWriteRectExp; - pDdiTable->pfnAppendMembufferReadRectExp = - ur_loader::urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur_loader::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur_loader::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur_loader::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur_loader::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur_loader::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur_loader::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur_loader::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur_loader::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur_loader::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur_loader::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; } else { // return pointers directly to platform's DDIs @@ -7321,6 +7565,61 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EnqueueExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetEnqueueExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.EnqueueExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnCooperativeKernelLaunchExp = + ur_loader::urEnqueueCooperativeKernelLaunchExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.EnqueueExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Event table /// with current process' addresses @@ -7451,6 +7750,61 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's KernelExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_kernel_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetKernelExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.KernelExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur_loader::urKernelSuggestMaxCooperativeGroupCountExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.KernelExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Mem table /// with current process' addresses diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 964da234f1..34531ca8b1 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -55,15 +55,22 @@ void context_t::parseEnvEnabledLayers() { void context_t::initLayers() const { for (auto &l : layers) { if (l->isAvailable()) { - l->init(&context->urDdiTable, enabledLayerNames); + l->init(&context->urDdiTable, enabledLayerNames, codelocData); + } + } +} + +void context_t::tearDownLayers() const { + for (auto &l : layers) { + if (l->isAvailable()) { + l->tearDown(); } } } ////////////////////////////////////////////////////////////////////////// -__urdlllocal ur_result_t -context_t::Init(ur_device_init_flags_t device_flags, - ur_loader_config_handle_t hLoaderConfig) { +__urdlllocal ur_result_t context_t::Init( + ur_device_init_flags_t, ur_loader_config_handle_t hLoaderConfig) { ur_result_t result; const char *logger_name = "loader"; logger::init(logger_name); @@ -72,10 +79,11 @@ context_t::Init(ur_device_init_flags_t device_flags, result = ur_loader::context->init(); if (UR_RESULT_SUCCESS == result) { - result = urInit(); + result = urLoaderInit(); } if (hLoaderConfig) { + codelocData = hLoaderConfig->codelocData; enabledLayerNames.merge(hLoaderConfig->getEnabledLayerNames()); } @@ -174,4 +182,28 @@ ur_result_t urLoaderConfigEnableLayer(ur_loader_config_handle_t hLoaderConfig, hLoaderConfig->enabledLayers.insert(pLayerName); return UR_RESULT_SUCCESS; } + +ur_result_t urLoaderTearDown() { + context->tearDownLayers(); + + return UR_RESULT_SUCCESS; +} + +ur_result_t +urLoaderConfigSetCodeLocationCallback(ur_loader_config_handle_t hLoaderConfig, + ur_code_location_callback_t pfnCodeloc, + void *pUserData) { + if (!hLoaderConfig) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + if (!pfnCodeloc) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + hLoaderConfig->codelocData.codelocCb = pfnCodeloc; + hLoaderConfig->codelocData.codelocUserdata = pUserData; + + return UR_RESULT_SUCCESS; +} + } // namespace ur_lib diff --git a/source/loader/ur_lib.hpp b/source/loader/ur_lib.hpp index 1f0f23658b..9d1e02a67e 100644 --- a/source/loader/ur_lib.hpp +++ b/source/loader/ur_lib.hpp @@ -14,6 +14,7 @@ #define UR_LOADER_LIB_H 1 #include "ur_api.h" +#include "ur_codeloc.hpp" #include "ur_ddi.h" #include "ur_proxy_layer.hpp" #include "ur_util.hpp" @@ -42,6 +43,8 @@ struct ur_loader_config_handle_t_ { return refCount.load(std::memory_order_acquire); } std::set &getEnabledLayerNames() { return enabledLayers; } + + codeloc_data codelocData; }; namespace ur_lib { @@ -60,7 +63,7 @@ class __urdlllocal context_t { ur_result_t Init(ur_device_init_flags_t dflags, ur_loader_config_handle_t hLoaderConfig); - ur_result_t urInit(); + ur_result_t urLoaderInit(); ur_dditable_t urDdiTable = {}; const std::vector layers = { @@ -72,9 +75,12 @@ class __urdlllocal context_t { std::string availableLayers; std::set enabledLayerNames; + codeloc_data codelocData; + bool layerExists(const std::string &layerName) const; void parseEnvEnabledLayers(); void initLayers() const; + void tearDownLayers() const; }; extern context_t *context; @@ -87,5 +93,11 @@ ur_result_t urLoaderConfigGetInfo(ur_loader_config_handle_t hLoaderConfig, size_t *pPropSizeRet); ur_result_t urLoaderConfigEnableLayer(ur_loader_config_handle_t hLoaderConfig, const char *pLayerName); +ur_result_t urLoaderTearDown(); +ur_result_t +urLoaderConfigSetCodeLocationCallback(ur_loader_config_handle_t hLoaderConfig, + ur_code_location_callback_t pfnCodeloc, + void *pUserData); + } // namespace ur_lib #endif /* UR_LOADER_LIB_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 7a64efd088..374f193902 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -158,21 +158,55 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +ur_result_t UR_APICALL urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t + hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t + pfnCodeloc, ///< [in] Function pointer to code location callback. + void * + pUserData ///< [in][out][optional] pointer to data to be passed to callback. + ) try { + return ur_lib::urLoaderConfigSetCodeLocationCallback(hLoaderConfig, + pfnCodeloc, pUserData); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -182,51 +216,38 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urInit( +ur_result_t UR_APICALL urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t hLoaderConfig ///< [in][optional] Handle of loader config handle. ) try { + + if (UR_DEVICE_INIT_FLAGS_MASK & device_flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + static ur_result_t result = UR_RESULT_SUCCESS; std::call_once(ur_lib::context->initOnce, [device_flags, hLoaderConfig]() { result = ur_lib::context->Init(device_flags, hLoaderConfig); }); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - auto pfnInit = ur_lib::context->urDdiTable.Global.pfnInit; - if (nullptr == pfnInit) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - return pfnInit(device_flags, hLoaderConfig); + return result; } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters - ) try { - auto pfnTearDown = ur_lib::context->urDdiTable.Global.pfnTearDown; - if (nullptr == pfnTearDown) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - return pfnTearDown(pParams); +ur_result_t UR_APICALL urLoaderTearDown(void) try { + return ur_lib::urLoaderTearDown(); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -280,7 +301,9 @@ ur_result_t UR_APICALL urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -464,6 +487,7 @@ ur_result_t UR_APICALL urAdapterGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t * phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -723,14 +747,18 @@ ur_result_t UR_APICALL urPlatformGetBackendOption( /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -905,6 +933,7 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT ur_result_t UR_APICALL urDevicePartition( @@ -1124,6 +1153,8 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ur_result_t UR_APICALL urContextCreate( @@ -2098,6 +2129,8 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2153,6 +2186,8 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2210,6 +2245,8 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2718,6 +2755,8 @@ ur_result_t UR_APICALL urVirtualMemGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -3791,6 +3830,8 @@ ur_result_t UR_APICALL urKernelSetArgSampler( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object @@ -4005,12 +4046,15 @@ ur_result_t UR_APICALL urQueueGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urQueueCreate( @@ -4323,6 +4367,8 @@ ur_result_t UR_APICALL urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -4538,6 +4584,8 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -4550,9 +4598,11 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_execution_info_t execStatus, ///< [in] execution status of the event @@ -5206,6 +5256,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5268,6 +5323,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( @@ -5333,6 +5390,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -5394,6 +5453,8 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -5667,6 +5728,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -5719,6 +5785,11 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -6059,7 +6130,6 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -6086,7 +6156,7 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) try { auto pfnWriteHostPipe = @@ -6128,6 +6198,8 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -7116,7 +7188,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -7129,19 +7201,74 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMemcpyUSMExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemcpyUSMExp; - if (nullptr == pfnAppendMemcpyUSMExp) { + auto pfnAppendUSMMemcpyExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; + if (nullptr == pfnAppendUSMMemcpyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, + return pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMFillExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; + if (nullptr == pfnAppendUSMFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -7162,7 +7289,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -7177,13 +7304,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferCopyExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferCopyExp; - if (nullptr == pfnAppendMembufferCopyExp) { + auto pfnAppendMemBufferCopyExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; + if (nullptr == pfnAppendMemBufferCopyExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferCopyExp( + return pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7211,7 +7338,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7226,13 +7353,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferWriteExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferWriteExp; - if (nullptr == pfnAppendMembufferWriteExp) { + auto pfnAppendMemBufferWriteExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; + if (nullptr == pfnAppendMemBufferWriteExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferWriteExp(hCommandBuffer, hBuffer, offset, size, + return pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7260,7 +7387,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7274,13 +7401,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferReadExp = - ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMembufferReadExp; - if (nullptr == pfnAppendMembufferReadExp) { + auto pfnAppendMemBufferReadExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; + if (nullptr == pfnAppendMemBufferReadExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferReadExp(hCommandBuffer, hBuffer, offset, size, + return pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); } catch (...) { @@ -7307,7 +7434,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -7329,14 +7456,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferCopyRectExp = + auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferCopyRectExp; - if (nullptr == pfnAppendMembufferCopyRectExp) { + .pfnAppendMemBufferCopyRectExp; + if (nullptr == pfnAppendMemBufferCopyRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferCopyRectExp( + return pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7365,7 +7492,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7393,14 +7520,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferWriteRectExp = + auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferWriteRectExp; - if (nullptr == pfnAppendMembufferWriteRectExp) { + .pfnAppendMemBufferWriteRectExp; + if (nullptr == pfnAppendMemBufferWriteRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferWriteRectExp( + return pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7429,7 +7556,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -7455,14 +7582,14 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command ) try { - auto pfnAppendMembufferReadRectExp = + auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp - .pfnAppendMembufferReadRectExp; - if (nullptr == pfnAppendMembufferReadRectExp) { + .pfnAppendMemBufferReadRectExp; + if (nullptr == pfnAppendMemBufferReadRectExp) { return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMembufferReadRectExp( + return pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); @@ -7470,6 +7597,170 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendMemBufferFillExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; + if (nullptr == pfnAppendMemBufferFillExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendMemBufferFillExp( + hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMPrefetchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; + if (nullptr == pfnAppendUSMPrefetchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. + ) try { + auto pfnAppendUSMAdviseExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; + if (nullptr == pfnAppendUSMAdviseExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -7517,6 +7808,103 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. + ) try { + auto pfnCooperativeKernelLaunchExp = + ur_lib::context->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp; + if (nullptr == pfnCooperativeKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups + ) try { + auto pfnSuggestMaxCooperativeGroupCountExp = + ur_lib::context->urDdiTable.KernelExp + .pfnSuggestMaxCooperativeGroupCountExp; + if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnSuggestMaxCooperativeGroupCountExp(hKernel, pGroupCountRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Import memory into USM /// diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 1328a2b071..9d0a2566f4 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -17,7 +17,7 @@ namespace ur_lib { /////////////////////////////////////////////////////////////////////////////// -__urdlllocal ur_result_t context_t::urInit() { +__urdlllocal ur_result_t context_t::urLoaderInit() { ur_result_t result = UR_RESULT_SUCCESS; if (UR_RESULT_SUCCESS == result) { @@ -45,6 +45,11 @@ __urdlllocal ur_result_t context_t::urInit() { &urDdiTable.Enqueue); } + if (UR_RESULT_SUCCESS == result) { + result = urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.EnqueueExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Event); @@ -55,6 +60,11 @@ __urdlllocal ur_result_t context_t::urInit() { &urDdiTable.Kernel); } + if (UR_RESULT_SUCCESS == result) { + result = urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.KernelExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Mem); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index fac4d47c2d..d465a83cfa 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7,7 +7,7 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * @file ur_api.cpp - * @version v0.7-r0 + * @version v0.8-r0 * */ #include "ur_api.h" @@ -149,21 +149,53 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Initialize the 'oneAPI' adapter(s) +/// @brief Set a function callback for use by the loader to retrieve code +/// location information. +/// +/// @details +/// - The code location callback is optional and provides additional +/// information to the tracing layer about the entry point of the current +/// execution flow. +/// - This functionality can be used to match traced unified runtime +/// function calls with higher-level user calls. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hLoaderConfig` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnCodeloc` +ur_result_t UR_APICALL urLoaderConfigSetCodeLocationCallback( + ur_loader_config_handle_t + hLoaderConfig, ///< [in] Handle to config object the layer will be enabled for. + ur_code_location_callback_t + pfnCodeloc, ///< [in] Function pointer to code location callback. + void * + pUserData ///< [in][out][optional] pointer to data to be passed to callback. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Initialize the 'oneAPI' loader /// /// @details /// - The application must call this function before calling any other /// function. /// - If this function is not called then all other functions will return /// ::UR_RESULT_ERROR_UNINITIALIZED. -/// - Only one instance of each adapter will be initialized per process. +/// - Only one instance of the loader will be initialized per process. /// - The application may call this function multiple times with different /// flags or environment variables enabled. /// - The application must call this function after forking new processes. /// Each forked process must call this function. /// - The application may call this function from simultaneous threads. /// - The implementation of this function must be thread-safe for scenarios -/// where multiple libraries may initialize the adapter(s) simultaneously. +/// where multiple libraries may initialize the loader simultaneously. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -173,7 +205,7 @@ ur_result_t UR_APICALL urLoaderConfigEnableLayer( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_INIT_FLAGS_MASK & device_flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urInit( +ur_result_t UR_APICALL urLoaderInit( ur_device_init_flags_t device_flags, ///< [in] device initialization flags. ///< must be 0 (default) or a combination of ::ur_device_init_flag_t. ur_loader_config_handle_t @@ -184,19 +216,15 @@ ur_result_t UR_APICALL urInit( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Tear down the 'oneAPI' instance and release all its resources +/// @brief Tear down the 'oneAPI' loader and release all its resources /// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED /// - ::UR_RESULT_ERROR_DEVICE_LOST /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pParams` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -ur_result_t UR_APICALL urTearDown( - void *pParams ///< [in] pointer to tear down parameters -) { +ur_result_t UR_APICALL urLoaderTearDown(void) { ur_result_t result = UR_RESULT_SUCCESS; return result; } @@ -244,7 +272,9 @@ ur_result_t UR_APICALL urAdapterGet( /// /// @details /// - When the reference count of the adapter reaches zero, the adapter may -/// perform adapter-specififc resource teardown +/// perform adapter-specififc resource teardown. Resources must be left in +/// a state where it safe for the adapter to be subsequently reinitialized +/// with ::urAdapterGet /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -400,6 +430,7 @@ ur_result_t UR_APICALL urAdapterGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phAdapters` /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phPlatforms != NULL` ur_result_t UR_APICALL urPlatformGet( ur_adapter_handle_t * phAdapters, ///< [in][range(0, NumAdapters)] array of adapters to query for platforms. @@ -618,14 +649,18 @@ ur_result_t UR_APICALL urPlatformGetBackendOption( /// + `NULL == hPlatform` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_DEVICE_TYPE_VPU < DeviceType` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `NumEntries == 0 && phDevices != NULL` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NumEntries > 0 && phDevices == NULL` /// - ::UR_RESULT_ERROR_INVALID_VALUE ur_result_t UR_APICALL urDeviceGet( ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to phDevices. - ///< If phDevices in not NULL then NumEntries should be greater than zero, - ///< otherwise ::UR_RESULT_ERROR_INVALID_VALUE, + ///< If phDevices is not NULL, then NumEntries should be greater than zero. + ///< Otherwise ::UR_RESULT_ERROR_INVALID_SIZE ///< will be returned. ur_device_handle_t * phDevices, ///< [out][optional][range(0, NumEntries)] array of handle of devices. @@ -776,6 +811,7 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// + `NULL == pProperties->pProperties` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT ur_result_t UR_APICALL urDevicePartition( @@ -960,6 +996,8 @@ ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` /// + `NULL == phContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_CONTEXT_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY ur_result_t UR_APICALL urContextCreate( @@ -1781,6 +1819,8 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -1830,6 +1870,8 @@ ur_result_t UR_APICALL urUSMHostAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -1881,6 +1923,8 @@ ur_result_t UR_APICALL urUSMDeviceAlloc( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT @@ -2300,6 +2344,8 @@ ur_result_t UR_APICALL urVirtualMemGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_PHYSICAL_MEM_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phPhysicalMem` /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -3199,6 +3245,8 @@ ur_result_t UR_APICALL urKernelSetArgSampler( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_MEM_FLAGS_MASK & pProperties->memoryAccess` /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object @@ -3379,12 +3427,15 @@ ur_result_t UR_APICALL urQueueGetInfo( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_QUEUE_FLAGS_MASK & pProperties->flags` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phQueue` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE -/// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_HIGH && pProperties->flags & UR_QUEUE_FLAG_PRIORITY_LOW` +/// + `pProperties != NULL && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_BATCHED && pProperties->flags & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urQueueCreate( @@ -3646,6 +3697,8 @@ ur_result_t UR_APICALL urEventGetInfo( /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT @@ -3820,6 +3873,8 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - The registered callback function will be called when the execution /// status of command associated with event changes to an execution status /// equal to or past the status specified by command_exec_status. +/// - `execStatus` must not be `UR_EXECUTION_INFO_QUEUED` as this is the +/// initial state of all events. /// - The application may call this function from simultaneous threads for /// the same context. /// - The implementation of this function should be thread-safe. @@ -3832,9 +3887,11 @@ ur_result_t UR_APICALL urEventCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hEvent` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED < execStatus` +/// + `::UR_EXECUTION_INFO_QUEUED < execStatus` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pfnNotify` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + `execStatus == UR_EXECUTION_INFO_QUEUED` ur_result_t UR_APICALL urEventSetCallback( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_execution_info_t execStatus, ///< [in] execution status of the event @@ -4405,6 +4462,11 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + `offset % patternSize != 0` /// + If `offset + size` results in an out-of-bounds access. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4458,6 +4520,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageRead( @@ -4515,6 +4579,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -4567,6 +4633,8 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `region.width == 0 || region.height == 0 || region.depth == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -4803,6 +4871,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to prefetch USM memory /// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -4848,6 +4921,11 @@ ur_result_t UR_APICALL urEnqueueUSMPrefetch( /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to set USM memory advice /// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_UNINITIALIZED @@ -5142,7 +5220,6 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pipe_symbol` /// + `NULL == pSrc` -/// + `NULL == phEvent` /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` @@ -5169,7 +5246,7 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( ///< events that must be complete before the host pipe write. ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. ur_event_handle_t * - phEvent ///< [out] returns an event object that identifies this write command + phEvent ///< [out][optional] returns an event object that identifies this write command ///< and can be used to query or queue a wait for this command to complete. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -5203,6 +5280,8 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` /// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pUSMDesc && ::UR_USM_ADVICE_FLAGS_MASK & pUSMDesc->hints` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == ppMem` /// + `NULL == pResultPitch` @@ -6007,7 +6086,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. @@ -6024,6 +6103,52 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `patternSize == 0 || size == 0` +/// + `patternSize > size` +/// + `(patternSize & (patternSize - 1)) != 0` +/// + `size % patternSize != 0` +/// + If `size` is higher than the allocation size of `ptr` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + void *pMemory, ///< [in] pointer to USM allocated memory to fill. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Append a memory copy command to a command-buffer object /// @@ -6044,7 +6169,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6084,7 +6209,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6124,7 +6249,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6162,7 +6287,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. @@ -6209,7 +6334,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6262,7 +6387,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( +ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. @@ -6292,6 +6417,143 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a memory fill command to a command-buffer object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// + `NULL == hBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPattern` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + const void *pPattern, ///< [in] pointer to the fill pattern. + size_t patternSize, ///< [in] size in bytes of the pattern. + size_t offset, ///< [in] offset into the buffer. + size_t + size, ///< [in] fill size in bytes, must be a multiple of patternSize. + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Prefetch command to a command-buffer object +/// +/// @details +/// - Prefetching may not be supported for all devices or allocation types. +/// If memory prefetching is not supported, the prefetch hint will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_MIGRATION_FLAGS_MASK & flags` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to USM allocated memory to prefetch. + size_t size, ///< [in] size in bytes to be fetched. + ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append a USM Advise command to a command-buffer object +/// +/// @details +/// - Not all memory advice hints may be supported for all devices or +/// allocation types. If a memory advice hint is not supported, it will be +/// ignored. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMemory` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_ADVICE_FLAGS_MASK & advice` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `size == 0` +/// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object. + const void *pMemory, ///< [in] pointer to the USM memory object. + size_t size, ///< [in] size in bytes to be advised. + ur_usm_advice_flags_t advice, ///< [in] USM memory advice + uint32_t + numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. + const ur_exp_command_buffer_sync_point_t * + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] sync point associated with this command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -6331,6 +6593,86 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a command to execute a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGlobalWorkOffset` +/// + `NULL == pGlobalWorkSize` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Query the maximum number of work groups for a cooperative kernel +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hKernel` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pGroupCountRet` +/// - ::UR_RESULT_ERROR_INVALID_KERNEL +ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Import memory into USM /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index e90b74f4cd..993419bfb0 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -85,7 +85,7 @@ endfunction() add_subdirectory(testing) add_subdirectory(adapters) -add_subdirectory(runtime) +add_subdirectory(adapter) add_subdirectory(platform) add_subdirectory(device) add_subdirectory(context) diff --git a/test/conformance/runtime/CMakeLists.txt b/test/conformance/adapter/CMakeLists.txt similarity index 77% rename from test/conformance/runtime/CMakeLists.txt rename to test/conformance/adapter/CMakeLists.txt index 8c46abd82b..8d71a3cf6a 100644 --- a/test/conformance/runtime/CMakeLists.txt +++ b/test/conformance/adapter/CMakeLists.txt @@ -3,11 +3,9 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_conformance_test(runtime +add_conformance_test(adapter urAdapterGet.cpp urAdapterGetInfo.cpp urAdapterGetLastError.cpp urAdapterRetain.cpp - urAdapterRelease.cpp - urInit.cpp - urTearDown.cpp) + urAdapterRelease.cpp) diff --git a/test/conformance/runtime/fixtures.h b/test/conformance/adapter/fixtures.h similarity index 89% rename from test/conformance/runtime/fixtures.h rename to test/conformance/adapter/fixtures.h index 04f72617dd..31b2a2265d 100644 --- a/test/conformance/runtime/fixtures.h +++ b/test/conformance/adapter/fixtures.h @@ -14,15 +14,14 @@ struct urTest : ::testing::Test { ASSERT_SUCCESS(urLoaderConfigCreate(&loader_config)); ASSERT_SUCCESS(urLoaderConfigEnableLayer(loader_config, "UR_LAYER_FULL_VALIDATION")); - ASSERT_SUCCESS(urInit(device_flags, loader_config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, loader_config)); } void TearDown() override { if (loader_config) { ASSERT_SUCCESS(urLoaderConfigRelease(loader_config)); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } ur_loader_config_handle_t loader_config = nullptr; @@ -35,6 +34,7 @@ struct urAdapterTest : urTest { uint32_t adapter_count; ASSERT_SUCCESS(urAdapterGet(0, nullptr, &adapter_count)); + ASSERT_GT(adapter_count, 0); adapters.resize(adapter_count); ASSERT_SUCCESS(urAdapterGet(adapter_count, adapters.data(), nullptr)); } diff --git a/test/conformance/runtime/urAdapterGet.cpp b/test/conformance/adapter/urAdapterGet.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGet.cpp rename to test/conformance/adapter/urAdapterGet.cpp diff --git a/test/conformance/runtime/urAdapterGetInfo.cpp b/test/conformance/adapter/urAdapterGetInfo.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGetInfo.cpp rename to test/conformance/adapter/urAdapterGetInfo.cpp diff --git a/test/conformance/runtime/urAdapterGetLastError.cpp b/test/conformance/adapter/urAdapterGetLastError.cpp similarity index 100% rename from test/conformance/runtime/urAdapterGetLastError.cpp rename to test/conformance/adapter/urAdapterGetLastError.cpp diff --git a/test/conformance/runtime/urAdapterRelease.cpp b/test/conformance/adapter/urAdapterRelease.cpp similarity index 100% rename from test/conformance/runtime/urAdapterRelease.cpp rename to test/conformance/adapter/urAdapterRelease.cpp diff --git a/test/conformance/runtime/urAdapterRetain.cpp b/test/conformance/adapter/urAdapterRetain.cpp similarity index 100% rename from test/conformance/runtime/urAdapterRetain.cpp rename to test/conformance/adapter/urAdapterRetain.cpp diff --git a/test/conformance/context/urContextCreateWithNativeHandle.cpp b/test/conformance/context/urContextCreateWithNativeHandle.cpp index 04ef93c0f0..69771362b4 100644 --- a/test/conformance/context/urContextCreateWithNativeHandle.cpp +++ b/test/conformance/context/urContextCreateWithNativeHandle.cpp @@ -20,7 +20,7 @@ TEST_P(urContextCreateWithNativeHandleTest, Success) { // and perform some query on it to verify that it works. ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{}; - ASSERT_SUCCESS(urContextCreateWithNativeHandle(native_context, 0, nullptr, + ASSERT_SUCCESS(urContextCreateWithNativeHandle(native_context, 1, &device, &props, &ctx)); ASSERT_NE(ctx, nullptr); diff --git a/test/conformance/device/urDeviceGet.cpp b/test/conformance/device/urDeviceGet.cpp index 85a4818d09..e8aa356a58 100644 --- a/test/conformance/device/urDeviceGet.cpp +++ b/test/conformance/device/urDeviceGet.cpp @@ -49,7 +49,7 @@ TEST_F(urDeviceGetTest, InvalidEnumerationDevicesType) { urDeviceGet(platform, UR_DEVICE_TYPE_FORCE_UINT32, 0, nullptr, &count)); } -TEST_F(urDeviceGetTest, InvalidValueNumEntries) { +TEST_F(urDeviceGetTest, InvalidSizeNumEntries) { uint32_t count = 0; ASSERT_SUCCESS( urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, &count)); @@ -59,3 +59,13 @@ TEST_F(urDeviceGetTest, InvalidValueNumEntries) { UR_RESULT_ERROR_INVALID_SIZE, urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, devices.data(), nullptr)); } + +TEST_F(urDeviceGetTest, InvalidNullPointerDevices) { + uint32_t count = 0; + ASSERT_SUCCESS( + urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, &count)); + ASSERT_NE(count, 0); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urDeviceGet(platform, UR_DEVICE_TYPE_ALL, count, nullptr, nullptr)); +} diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index e5e9f7c310..757e09b6fa 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -240,6 +240,14 @@ INSTANTIATE_TEST_SUITE_P( return ss.str(); }); +bool doesReturnArray(ur_device_info_t info_type) { + if (info_type == UR_DEVICE_INFO_SUPPORTED_PARTITIONS || + info_type == UR_DEVICE_INFO_PARTITION_TYPE) { + return true; + } + return false; +} + TEST_P(urDeviceGetInfoTest, Success) { ur_device_info_t info_type = GetParam(); for (auto device : devices) { @@ -248,7 +256,11 @@ TEST_P(urDeviceGetInfoTest, Success) { urDeviceGetInfo(device, info_type, 0, nullptr, &size); if (result == UR_RESULT_SUCCESS) { + if (doesReturnArray(info_type) && size == 0) { + return; + } ASSERT_NE(size, 0); + if (const auto expected_size = device_info_size_map.find(info_type); expected_size != device_info_size_map.end()) { ASSERT_EQ(expected_size->second, size); diff --git a/test/conformance/enqueue/urEnqueueEventsWait.cpp b/test/conformance/enqueue/urEnqueueEventsWait.cpp index a80c884e87..0b7db213dc 100644 --- a/test/conformance/enqueue/urEnqueueEventsWait.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWait.cpp @@ -80,4 +80,8 @@ TEST_P(urEnqueueEventsWaitTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT(urEnqueueEventsWait(queue1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueEventsWait(queue1, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp index ca465e937e..a107ed7b9c 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp @@ -89,4 +89,9 @@ TEST_P(urEnqueueEventsWaitWithBarrierTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT( urEnqueueEventsWaitWithBarrier(queue1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueEventsWaitWithBarrier(queue1, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 0bca070da9..d9cb79e372 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -56,6 +56,12 @@ TEST_P(urEnqueueKernelLaunchTest, InvalidNullPtrEventWaitList) { &global_offset, &global_size, nullptr, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueKernelLaunch(queue, kernel, n_dimensions, + &global_offset, &global_size, + nullptr, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueKernelLaunchTest, InvalidWorkDimension) { @@ -210,3 +216,49 @@ TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) { ASSERT_EQ(fill_val, data.at(i)); } } + +struct urEnqueueKernelLaunchMultiDeviceTest : public urEnqueueKernelLaunchTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::SetUp()); + queues.reserve(uur::DevicesEnvironment::instance->devices.size()); + for (const auto &device : uur::DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(this->context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urEnqueueKernelLaunchTest::TearDown()); + } + + std::vector queues; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchMultiDeviceTest); + +TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { + ur_mem_handle_t buffer = nullptr; + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + AddPodArg(val); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, n_dimensions, + &global_offset, &global_size, nullptr, + 0, nullptr, nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // Then the remaining queues do blocking reads from the buffer. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + uint32_t output = 0; + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, true, 0, + sizeof(output), &output, 0, + nullptr, nullptr)); + ASSERT_EQ(val, output) << "Result on queue " << i << " did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp index 3eb2308702..f226e7597a 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp @@ -74,6 +74,11 @@ TEST_P(urEnqueueMemBufferCopyTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT(urEnqueueMemBufferCopy(queue, src_buffer, dst_buffer, 0, 0, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferCopy(queue, src_buffer, dst_buffer, 0, 0, + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferCopyTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp index f330503211..873c4953a7 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp @@ -219,6 +219,13 @@ TEST_P(urEnqueueMemBufferCopyRectTest, InvalidNullPtrEventWaitList) { src_region, size, size, size, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferCopyRect(queue, src_buffer, dst_buffer, + src_origin, dst_origin, + src_region, size, size, size, + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } using urEnqueueMemBufferCopyRectMultiDeviceTest = diff --git a/test/conformance/enqueue/urEnqueueMemBufferFill.cpp b/test/conformance/enqueue/urEnqueueMemBufferFill.cpp index cbeae5e85c..d1002960fb 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferFill.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferFill.cpp @@ -183,6 +183,12 @@ TEST_P(urEnqueueMemBufferFillNegativeTest, InvalidNullPtrEventWaitList) { sizeof(uint32_t), 0, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferFill(queue, buffer, &pattern, + sizeof(uint32_t), 0, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferFillNegativeTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp index 5ed576d6f3..fc44360c22 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp @@ -187,6 +187,12 @@ TEST_P(urEnqueueMemBufferMapTest, InvalidNullPtrEventWaitList) { 0, size, 0, &validEvent, nullptr, &map), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferMap(queue, buffer, true, + UR_MAP_FLAG_READ | UR_MAP_FLAG_WRITE, + 0, size, 1, &inv_evt, nullptr, &map), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferMapTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferRead.cpp b/test/conformance/enqueue/urEnqueueMemBufferRead.cpp index 0192333783..6410d6feed 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferRead.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferRead.cpp @@ -49,6 +49,12 @@ TEST_P(urEnqueueMemBufferReadTest, InvalidNullPtrEventWaitList) { output.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferRead(queue, buffer, true, 0, size, + output.data(), 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferReadTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp index ae0cc05332..7068985dfb 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp @@ -176,12 +176,20 @@ TEST_P(urEnqueueMemBufferReadRectTest, InvalidNullPtrEventWaitList) { host_offset, region, size, size, size, size, dst.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemBufferReadRect(queue, buffer, true, buffer_offset, + host_offset, region, size, size, size, size, + dst.data(), 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } using urEnqueueMemBufferReadRectMultiDeviceTest = uur::urMultiDeviceMemBufferQueueTest; -TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, WriteReadDifferentQueues) { +TEST_F(urEnqueueMemBufferReadRectMultiDeviceTest, + WriteRectReadDifferentQueues) { // First queue does a blocking write of 42 into the buffer. // Then a rectangular write the buffer as 1024x1x1 1D. std::vector input(count, 42); diff --git a/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp b/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp index 913d583058..aea6b8face 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferWrite.cpp @@ -61,6 +61,12 @@ TEST_P(urEnqueueMemBufferWriteTest, InvalidNullPtrEventWaitList) { input.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemBufferWrite(queue, buffer, true, 0, size, + input.data(), 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferWriteTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp index e41991e727..d3c7e5c7a3 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp @@ -183,6 +183,13 @@ TEST_P(urEnqueueMemBufferWriteRectTest, InvalidNullPtrEventWaitList) { host_offset, region, size, size, size, size, src.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemBufferWriteRect(queue, buffer, true, buffer_offset, + host_offset, region, size, size, size, size, + src.data(), 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemBufferWriteRectTest, InvalidSize) { diff --git a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp index d3cb5b566e..a22b4baa37 100644 --- a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp @@ -62,7 +62,7 @@ struct urEnqueueMemImageCopyTest void TearDown() override { if (srcImage) { - EXPECT_SUCCESS(urMemRelease(dstImage)); + EXPECT_SUCCESS(urMemRelease(srcImage)); } if (dstImage) { EXPECT_SUCCESS(urMemRelease(dstImage)); @@ -233,6 +233,12 @@ TEST_P(urEnqueueMemImageCopyTest, InvalidNullPtrEventWaitList) { {0, 0, 0}, size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageCopy(queue, srcImage, dstImage, {0, 0, 0}, + {0, 0, 0}, size, 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageCopyTest, InvalidSize) { @@ -245,3 +251,63 @@ TEST_P(urEnqueueMemImageCopyTest, InvalidSize) { {1, 0, 0}, size, 0, nullptr, nullptr)); } + +using urEnqueueMemImageCopyMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageCopyMultiDeviceTest, CopyReadDifferentQueues) { + ur_mem_handle_t dstImage1D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc1D, nullptr, &dstImage1D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image1D, dstImage1D, origin, + origin, region1D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage2D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc2D, nullptr, &dstImage2D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image2D, dstImage2D, origin, + origin, region2D, 0, nullptr, + nullptr)); + + ur_mem_handle_t dstImage3D = nullptr; + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, &format, + &desc3D, nullptr, &dstImage3D)); + ASSERT_SUCCESS(urEnqueueMemImageCopy(queues[0], image3D, dstImage3D, origin, + origin, region3D, 0, nullptr, + nullptr)); + + // Wait for the queue to finish executing. + EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemImageRead.cpp b/test/conformance/enqueue/urEnqueueMemImageRead.cpp index d40625c3e1..d4cf322958 100644 --- a/test/conformance/enqueue/urEnqueueMemImageRead.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageRead.cpp @@ -69,6 +69,12 @@ TEST_P(urEnqueueMemImageReadTest, InvalidNullPtrEventWaitList) { region1D, 0, 0, output.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output.data(), 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageReadTest, InvalidOrigin1D) { @@ -124,3 +130,39 @@ TEST_P(urEnqueueMemImageReadTest, InvalidRegion3D) { bad_region, 0, 0, output.data(), 0, nullptr, nullptr)); } + +using urEnqueueMemImageReadMultiDeviceTest = + uur::urMultiDeviceMemImageWriteTest; + +TEST_F(urEnqueueMemImageReadMultiDeviceTest, WriteReadDifferentQueues) { + // The remaining queues do blocking reads from the image1D/2D/3D. Since the + // queues target different devices this checks that any devices memory has + // been synchronized. + for (unsigned i = 1; i < queues.size(); ++i) { + const auto queue = queues[i]; + + std::vector output1D(width * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image1D, true, origin, + region1D, 0, 0, output1D.data(), 0, + nullptr, nullptr)); + + std::vector output2D(width * height * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image2D, true, origin, + region2D, 0, 0, output2D.data(), 0, + nullptr, nullptr)); + + std::vector output3D(width * height * depth * 4, 42); + ASSERT_SUCCESS(urEnqueueMemImageRead(queue, image3D, true, origin, + region3D, 0, 0, output3D.data(), 0, + nullptr, nullptr)); + + ASSERT_EQ(input1D, output1D) + << "Result on queue " << i << " for 1D image did not match!"; + + ASSERT_EQ(input2D, output2D) + << "Result on queue " << i << " for 2D image did not match!"; + + ASSERT_EQ(input3D, output3D) + << "Result on queue " << i << " for 3D image did not match!"; + } +} diff --git a/test/conformance/enqueue/urEnqueueMemImageWrite.cpp b/test/conformance/enqueue/urEnqueueMemImageWrite.cpp index 7f7968bdff..76b5f0b4dd 100644 --- a/test/conformance/enqueue/urEnqueueMemImageWrite.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageWrite.cpp @@ -66,6 +66,12 @@ TEST_P(urEnqueueMemImageWriteTest, InvalidNullPtrEventWaitList) { region1D, 0, 0, input.data(), 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueMemImageWrite(queue, image1D, true, origin, + region1D, 0, 0, input.data(), 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } TEST_P(urEnqueueMemImageWriteTest, InvalidOrigin1D) { diff --git a/test/conformance/enqueue/urEnqueueMemUnmap.cpp b/test/conformance/enqueue/urEnqueueMemUnmap.cpp index a205ded3b9..046d3088d9 100644 --- a/test/conformance/enqueue/urEnqueueMemUnmap.cpp +++ b/test/conformance/enqueue/urEnqueueMemUnmap.cpp @@ -50,4 +50,9 @@ TEST_P(urEnqueueMemUnmapTest, InvalidNullPtrEventWaitList) { ASSERT_EQ_RESULT( urEnqueueMemUnmap(queue, buffer, map, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT( + urEnqueueMemUnmap(queue, buffer, map, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueReadHostPipe.cpp b/test/conformance/enqueue/urEnqueueReadHostPipe.cpp index 93e82b6531..379ee23f9d 100644 --- a/test/conformance/enqueue/urEnqueueReadHostPipe.cpp +++ b/test/conformance/enqueue/urEnqueueReadHostPipe.cpp @@ -76,4 +76,10 @@ TEST_P(urEnqueueReadHostPipeTest, InvalidEventWaitList) { urEnqueueReadHostPipe(queue, program, pipe_symbol, /*blocking*/ true, &buffer, size, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueReadHostPipe(queue, program, pipe_symbol, + /*blocking*/ true, &buffer, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill.cpp b/test/conformance/enqueue/urEnqueueUSMFill.cpp index e595056035..24a6a240fb 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill.cpp @@ -203,4 +203,9 @@ TEST_P(urEnqueueUSMFillNegativeTest, InvalidEventWaitList) { ASSERT_EQ_RESULT(urEnqueueUSMFill(queue, ptr, pattern_size, pattern.data(), size, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMFill(queue, ptr, pattern_size, pattern.data(), + size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index 9cd5bc7591..d43a758827 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -273,4 +273,10 @@ TEST_P(urEnqueueUSMFill2DNegativeTest, InvalidNullPtrEventWaitList) { pattern.data(), width, 1, 0, &validEvent, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMFill2D(queue, ptr, pitch, pattern_size, + pattern.data(), width, 1, 1, &inv_evt, + nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp index e1af65896c..a2c14a5a92 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp @@ -158,6 +158,11 @@ TEST_P(urEnqueueUSMMemcpyTest, InvalidNullPtrEventWaitList) { allocation_size, 0, &memset_event, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMMemcpy(queue, true, device_dst, device_src, + allocation_size, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueUSMMemcpyTest); diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index e56d2a02c9..d64b499966 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -183,5 +183,10 @@ TEST_P(urEnqueueUSMMemcpy2DNegativeTest, InvalidEventWaitList) { urEnqueueUSMMemcpy2D(queue, true, pDst, pitch, pSrc, pitch, width, height, 0, &event, nullptr)); + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMMemcpy2D(queue, true, pDst, pitch, pSrc, pitch, + width, height, 1, &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); + ASSERT_SUCCESS(urEventRelease(event)); } diff --git a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp index 70b93a55b7..cd7b087876 100644 --- a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp +++ b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp @@ -122,4 +122,10 @@ TEST_P(urEnqueueUSMPrefetchTest, InvalidEventWaitList) { urEnqueueUSMPrefetch(queue, ptr, allocation_size, UR_USM_MIGRATION_FLAG_DEFAULT, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueUSMPrefetch(queue, ptr, allocation_size, + UR_USM_MIGRATION_FLAG_DEFAULT, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp b/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp index 20ef1da38b..86e3b99fe3 100644 --- a/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp +++ b/test/conformance/enqueue/urEnqueueWriteHostPipe.cpp @@ -55,18 +55,6 @@ TEST_P(urEnqueueWriteHostPipeTest, InvalidNullPointerBuffer) { &phEventWaitList, phEvent)); } -TEST_P(urEnqueueWriteHostPipeTest, InvalidNullPointerEvent) { - uint32_t numEventsInWaitList = 0; - ur_event_handle_t phEventWaitList; - ur_event_handle_t *phEvent = nullptr; - - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEnqueueWriteHostPipe(queue, program, pipe_symbol, - /*blocking*/ true, &buffer, size, - numEventsInWaitList, - &phEventWaitList, phEvent)); -} - TEST_P(urEnqueueWriteHostPipeTest, InvalidEventWaitList) { ur_event_handle_t phEventWaitList; ur_event_handle_t *phEvent = nullptr; @@ -88,4 +76,10 @@ TEST_P(urEnqueueWriteHostPipeTest, InvalidEventWaitList) { urEnqueueWriteHostPipe(queue, program, pipe_symbol, /*blocking*/ true, &buffer, size, 0, &validEvent, nullptr)); + + ur_event_handle_t inv_evt = nullptr; + ASSERT_EQ_RESULT(urEnqueueWriteHostPipe(queue, program, pipe_symbol, + /*blocking*/ true, &buffer, size, 1, + &inv_evt, nullptr), + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); } diff --git a/test/conformance/event/fixtures.h b/test/conformance/event/fixtures.h index c1d5f7a7da..ee16f2152d 100644 --- a/test/conformance/event/fixtures.h +++ b/test/conformance/event/fixtures.h @@ -65,6 +65,7 @@ struct urEventReferenceTest : uur::urProfilingQueueTest { input.assign(count, 42); ASSERT_SUCCESS(urEnqueueMemBufferWrite( queue, buffer, false, 0, size, input.data(), 0, nullptr, &event)); + ASSERT_SUCCESS(urEventWait(1, &event)); } void TearDown() override { diff --git a/test/conformance/event/urEventSetCallback.cpp b/test/conformance/event/urEventSetCallback.cpp index 18eb8e00db..ac6f988e71 100644 --- a/test/conformance/event/urEventSetCallback.cpp +++ b/test/conformance/event/urEventSetCallback.cpp @@ -14,8 +14,9 @@ using urEventSetCallbackTest = uur::event::urEventReferenceTest; TEST_P(urEventSetCallbackTest, Success) { struct Callback { - static void callback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { + static void callback([[maybe_unused]] ur_event_handle_t hEvent, + [[maybe_unused]] ur_execution_info_t execStatus, + void *pUserData) { auto status = reinterpret_cast(pUserData); *status = true; @@ -24,7 +25,7 @@ TEST_P(urEventSetCallbackTest, Success) { bool didRun = false; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &didRun)); ASSERT_SUCCESS(urEventWait(1, &event)); @@ -55,13 +56,13 @@ TEST_P(urEventSetCallbackTest, ValidateParameters) { CallbackParameters parameters{}; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, ¶meters)); ASSERT_SUCCESS(urEventWait(1, &event)); ASSERT_SUCCESS(urEventRelease(event)); ASSERT_EQ(event, parameters.event); - ASSERT_EQ(ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + ASSERT_EQ(ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, parameters.execStatus); } @@ -71,34 +72,26 @@ TEST_P(urEventSetCallbackTest, ValidateParameters) { TEST_P(urEventSetCallbackTest, AllStates) { struct CallbackStatus { - bool queued = false; bool submitted = false; bool running = false; bool complete = false; }; struct Callback { - static void callback(ur_event_handle_t hEvent, + static void callback([[maybe_unused]] ur_event_handle_t hEvent, ur_execution_info_t execStatus, void *pUserData) { auto status = reinterpret_cast(pUserData); switch (execStatus) { - case ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED: { - status->queued = true; - break; - } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: { + case ur_execution_info_t::UR_EXECUTION_INFO_SUBMITTED: { status->submitted = true; break; } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: { + case ur_execution_info_t::UR_EXECUTION_INFO_RUNNING: { status->running = true; break; } - case ur_execution_info_t:: - UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: { + case ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE: { status->complete = true; break; } @@ -112,22 +105,18 @@ TEST_P(urEventSetCallbackTest, AllStates) { CallbackStatus status{}; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - Callback::callback, &status)); - ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED, + event, ur_execution_info_t::UR_EXECUTION_INFO_SUBMITTED, Callback::callback, &status)); ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING, + event, ur_execution_info_t::UR_EXECUTION_INFO_RUNNING, Callback::callback, &status)); ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &status)); ASSERT_SUCCESS(urEventWait(1, &event)); ASSERT_SUCCESS(urEventRelease(event)); - ASSERT_TRUE(status.queued); ASSERT_TRUE(status.submitted); ASSERT_TRUE(status.running); ASSERT_TRUE(status.complete); @@ -142,8 +131,9 @@ TEST_P(urEventSetCallbackTest, EventAlreadyCompleted) { ASSERT_SUCCESS(urEventWait(1, &event)); struct Callback { - static void callback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { + static void callback([[maybe_unused]] ur_event_handle_t hEvent, + [[maybe_unused]] ur_execution_info_t execStatus, + void *pUserData) { auto status = reinterpret_cast(pUserData); *status = true; @@ -153,7 +143,7 @@ TEST_P(urEventSetCallbackTest, EventAlreadyCompleted) { bool didRun = false; ASSERT_SUCCESS(urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE, + event, ur_execution_info_t::UR_EXECUTION_INFO_COMPLETE, Callback::callback, &didRun)); ASSERT_SUCCESS(urEventRelease(event)); @@ -165,23 +155,20 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventSetCallbackTest); /* Negative tests */ using urEventSetCallbackNegativeTest = uur::event::urEventTest; -void emptyCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, - void *pUserData) {} +void emptyCallback(ur_event_handle_t, ur_execution_info_t, void *) {} -TEST_P(urEventSetCallbackNegativeTest, InvalidNullHandle) { - - ASSERT_EQ_RESULT( - urEventSetCallback( - nullptr, - ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - emptyCallback, nullptr), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); +TEST_P(urEventSetCallbackNegativeTest, InvalidNullHandleEvent) { + ASSERT_EQ_RESULT(urEventSetCallback( + nullptr, ur_execution_info_t::UR_EXECUTION_INFO_QUEUED, + emptyCallback, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} +TEST_P(urEventSetCallbackNegativeTest, InvalidNullPointerCallback) { ASSERT_EQ_RESULT( - urEventSetCallback( - event, ur_execution_info_t::UR_EXECUTION_INFO_EXECUTION_INFO_QUEUED, - nullptr, nullptr), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); + urEventSetCallback(event, ur_execution_info_t::UR_EXECUTION_INFO_QUEUED, + nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urEventSetCallbackNegativeTest, InvalidEnumeration) { diff --git a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp index 7575fb309f..ab01069c82 100644 --- a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp +++ b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp @@ -25,7 +25,7 @@ struct urKernelCreateWithNativeHandleTest : uur::urKernelTest { ur_kernel_native_properties_t properties = { UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES, /*sType*/ nullptr, /*pNext*/ - true /*isNativeHandleOwned*/ + false /*isNativeHandleOwned*/ }; }; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelCreateWithNativeHandleTest); @@ -56,15 +56,8 @@ TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullHandleProgram) { &properties, &native_kernel)); } -TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullPointerProperties) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urKernelCreateWithNativeHandle(native_kernel_handle, - context, program, nullptr, - &native_kernel)); -} - TEST_P(urKernelCreateWithNativeHandleTest, InvalidNullPointerNativeKernel) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urKernelCreateWithNativeHandle(native_kernel_handle, context, program, &properties, nullptr)); diff --git a/test/conformance/kernel/urKernelSetArgPointer.cpp b/test/conformance/kernel/urKernelSetArgPointer.cpp index 4cb4cb2c8f..50396eb2ed 100644 --- a/test/conformance/kernel/urKernelSetArgPointer.cpp +++ b/test/conformance/kernel/urKernelSetArgPointer.cpp @@ -129,9 +129,9 @@ struct urKernelSetArgPointerNegativeTest : urKernelSetArgPointerTest { } void SetUp() { + UUR_RETURN_ON_FATAL_FAILURE(urKernelSetArgPointerTest::SetUp()); SetUpAllocation(); ASSERT_NE(allocation, nullptr); - UUR_RETURN_ON_FATAL_FAILURE(urKernelSetArgPointerTest::SetUp()); } }; UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgPointerNegativeTest); diff --git a/test/conformance/memory/urMemGetInfo.cpp b/test/conformance/memory/urMemGetInfo.cpp index 355c2c009d..18ec119681 100644 --- a/test/conformance/memory/urMemGetInfo.cpp +++ b/test/conformance/memory/urMemGetInfo.cpp @@ -63,11 +63,11 @@ TEST_P(urMemGetInfoTest, InvalidNullPointerParamValue) { size_t mem_size = 0; ASSERT_EQ_RESULT(urMemGetInfo(buffer, UR_MEM_INFO_SIZE, sizeof(mem_size), nullptr, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urMemGetInfoTest, InvalidNullPointerPropSizeRet) { ASSERT_EQ_RESULT( urMemGetInfo(buffer, UR_MEM_INFO_SIZE, 0, nullptr, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + UR_RESULT_ERROR_INVALID_NULL_POINTER); } diff --git a/test/conformance/platform/fixtures.h b/test/conformance/platform/fixtures.h index 5b532fb433..b294e7031a 100644 --- a/test/conformance/platform/fixtures.h +++ b/test/conformance/platform/fixtures.h @@ -17,7 +17,7 @@ struct urTest : ::testing::Test { ASSERT_SUCCESS(urLoaderConfigCreate(&loader_config)); ASSERT_SUCCESS(urLoaderConfigEnableLayer(loader_config, "UR_LAYER_FULL_VALIDATION")); - ASSERT_SUCCESS(urInit(device_flags, loader_config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, loader_config)); uint32_t adapter_count; ASSERT_SUCCESS(urAdapterGet(0, nullptr, &adapter_count)); @@ -32,8 +32,7 @@ struct urTest : ::testing::Test { if (loader_config) { ASSERT_SUCCESS(urLoaderConfigRelease(loader_config)); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } ur_loader_config_handle_t loader_config = nullptr; diff --git a/test/conformance/program/urProgramCreateWithNativeHandle.cpp b/test/conformance/program/urProgramCreateWithNativeHandle.cpp index 7e0400d294..e121c61de8 100644 --- a/test/conformance/program/urProgramCreateWithNativeHandle.cpp +++ b/test/conformance/program/urProgramCreateWithNativeHandle.cpp @@ -41,12 +41,12 @@ TEST_P(urProgramCreateWithNativeHandleTest, Success) { TEST_P(urProgramCreateWithNativeHandleTest, InvalidNullHandleContext) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urProgramCreateWithNativeHandle(native_program_handle, - context, nullptr, + nullptr, nullptr, &native_program)); } TEST_P(urProgramCreateWithNativeHandleTest, InvalidNullPointerProgram) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urProgramCreateWithNativeHandle( native_program_handle, context, nullptr, nullptr)); } diff --git a/test/conformance/program/urProgramGetBuildInfo.cpp b/test/conformance/program/urProgramGetBuildInfo.cpp index 59eccd4a65..ddc0ff998c 100644 --- a/test/conformance/program/urProgramGetBuildInfo.cpp +++ b/test/conformance/program/urProgramGetBuildInfo.cpp @@ -36,17 +36,21 @@ TEST_P(urProgramGetBuildInfoTest, Success) { TEST_P(urProgramGetBuildInfoTest, InvalidNullHandleProgram) { ur_program_build_status_t programBuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR; - ASSERT_SUCCESS(urProgramGetBuildInfo( - nullptr, device, UR_PROGRAM_BUILD_INFO_STATUS, - sizeof(programBuildStatus), &programBuildStatus, nullptr)); + ASSERT_EQ_RESULT(urProgramGetBuildInfo(nullptr, device, + UR_PROGRAM_BUILD_INFO_STATUS, + sizeof(programBuildStatus), + &programBuildStatus, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetBuildInfoTest, InvalidNullHandleDevice) { ur_program_build_status_t programBuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR; - ASSERT_SUCCESS(urProgramGetBuildInfo( - program, nullptr, UR_PROGRAM_BUILD_INFO_STATUS, - sizeof(programBuildStatus), &programBuildStatus, nullptr)); + ASSERT_EQ_RESULT(urProgramGetBuildInfo(program, nullptr, + UR_PROGRAM_BUILD_INFO_STATUS, + sizeof(programBuildStatus), + &programBuildStatus, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetBuildInfoTest, InvalidEnumeration) { diff --git a/test/conformance/program/urProgramGetInfo.cpp b/test/conformance/program/urProgramGetInfo.cpp index 8e18dc7b87..80d00072e7 100644 --- a/test/conformance/program/urProgramGetInfo.cpp +++ b/test/conformance/program/urProgramGetInfo.cpp @@ -5,7 +5,14 @@ #include -using urProgramGetInfoTest = uur::urProgramTestWithParam; +struct urProgramGetInfoTest : uur::urProgramTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urProgramTestWithParam::SetUp()); + // Some queries need the program to be built. + ASSERT_SUCCESS(urProgramBuild(this->context, program, nullptr)); + } +}; UUR_TEST_SUITE_P( urProgramGetInfoTest, @@ -29,8 +36,9 @@ TEST_P(urProgramGetInfoTest, Success) { TEST_P(urProgramGetInfoTest, InvalidNullHandleProgram) { uint32_t ref_count = 0; - ASSERT_SUCCESS(urProgramGetInfo(nullptr, UR_PROGRAM_INFO_REFERENCE_COUNT, - sizeof(ref_count), &ref_count, nullptr)); + ASSERT_EQ_RESULT(urProgramGetInfo(nullptr, UR_PROGRAM_INFO_REFERENCE_COUNT, + sizeof(ref_count), &ref_count, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urProgramGetInfoTest, InvalidEnumeration) { diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 0f99009abd..da7995a1f4 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -72,7 +72,7 @@ TEST_P(urQueueCreateTest, InvalidValueProperties) { /*.pNext =*/nullptr, /*.flags =*/UR_QUEUE_FLAG_FORCE_UINT32, }; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_VALUE, + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, urQueueCreate(context, device, &props, &queue)); } diff --git a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp index 01e7ca16d5..9f7588601f 100644 --- a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp +++ b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp @@ -23,9 +23,9 @@ TEST_P(urQueueCreateWithNativeHandleTest, Success) { &properties, &q)); ASSERT_NE(q, nullptr); - uint32_t q_size = 0; - ASSERT_SUCCESS(urQueueGetInfo(q, UR_QUEUE_INFO_SIZE, sizeof(uint32_t), - &q_size, nullptr)); - + ur_context_handle_t q_context = nullptr; + ASSERT_SUCCESS(urQueueGetInfo(q, UR_QUEUE_INFO_CONTEXT, sizeof(q_context), + &q_context, nullptr)); + ASSERT_EQ(q_context, context); ASSERT_SUCCESS(urQueueRelease(q)); } diff --git a/test/conformance/queue/urQueueFinish.cpp b/test/conformance/queue/urQueueFinish.cpp index fd557c21b2..069f8b5d67 100644 --- a/test/conformance/queue/urQueueFinish.cpp +++ b/test/conformance/queue/urQueueFinish.cpp @@ -25,7 +25,7 @@ TEST_P(urQueueFinishTest, Success) { ur_event_status_t exec_status; ASSERT_SUCCESS(urEventGetInfo(event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(exec_status), &exec_status, nullptr)); - ASSERT_EQ(exec_status, UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE); + ASSERT_EQ(exec_status, UR_EXECUTION_INFO_COMPLETE); } TEST_P(urQueueFinishTest, InvalidNullHandleQueue) { diff --git a/test/conformance/queue/urQueueGetInfo.cpp b/test/conformance/queue/urQueueGetInfo.cpp index 5f8100b612..9269e4de30 100644 --- a/test/conformance/queue/urQueueGetInfo.cpp +++ b/test/conformance/queue/urQueueGetInfo.cpp @@ -29,17 +29,22 @@ UUR_TEST_SUITE_P(urQueueGetInfoTestWithInfoParam, TEST_P(urQueueGetInfoTestWithInfoParam, Success) { ur_queue_info_t info_type = getParam(); size_t size = 0; - ASSERT_SUCCESS(urQueueGetInfo(queue, info_type, 0, nullptr, &size)); - ASSERT_NE(size, 0); + auto result = urQueueGetInfo(queue, info_type, 0, nullptr, &size); - if (const auto expected_size = queue_info_size_map.find(info_type); - expected_size != queue_info_size_map.end()) { - ASSERT_EQ(expected_size->second, size); - } + if (result == UR_RESULT_SUCCESS) { + ASSERT_NE(size, 0); + + if (const auto expected_size = queue_info_size_map.find(info_type); + expected_size != queue_info_size_map.end()) { + ASSERT_EQ(expected_size->second, size); + } - std::vector data(size); - ASSERT_SUCCESS( - urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); + std::vector data(size); + ASSERT_SUCCESS( + urQueueGetInfo(queue, info_type, size, data.data(), nullptr)); + } else { + ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); + } } using urQueueGetInfoTest = uur::urQueueTest; diff --git a/test/conformance/runtime/runtime_adapter_cuda.match b/test/conformance/runtime/runtime_adapter_cuda.match deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/conformance/runtime/runtime_adapter_hip.match b/test/conformance/runtime/runtime_adapter_hip.match deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/conformance/runtime/runtime_adapter_level_zero.match b/test/conformance/runtime/runtime_adapter_level_zero.match deleted file mode 100644 index 10e5183dbe..0000000000 --- a/test/conformance/runtime/runtime_adapter_level_zero.match +++ /dev/null @@ -1 +0,0 @@ -urAdapterGetLastErrorTest.Success diff --git a/test/conformance/runtime/urTearDown.cpp b/test/conformance/runtime/urTearDown.cpp deleted file mode 100644 index 3639515f82..0000000000 --- a/test/conformance/runtime/urTearDown.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2022-2023 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include - -struct urTearDownTest : testing::Test { - void SetUp() override { - ur_device_init_flags_t device_flags = 0; - ASSERT_SUCCESS(urInit(device_flags, nullptr)); - } -}; - -TEST_F(urTearDownTest, Success) { - ur_tear_down_params_t tear_down_params{}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); -} - -TEST_F(urTearDownTest, InvalidNullPointerParams) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urTearDown(nullptr)); -} diff --git a/test/conformance/source/environment.cpp b/test/conformance/source/environment.cpp index 287310f679..296bc73bb1 100644 --- a/test/conformance/source/environment.cpp +++ b/test/conformance/source/environment.cpp @@ -57,7 +57,7 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) } ur_device_init_flags_t device_flags = 0; - auto initResult = urInit(device_flags, config); + auto initResult = urLoaderInit(device_flags, config); auto configReleaseResult = urLoaderConfigRelease(config); switch (initResult) { case UR_RESULT_SUCCESS: @@ -66,7 +66,7 @@ uur::PlatformEnvironment::PlatformEnvironment(int argc, char **argv) error = ERROR_NO_ADAPTER; return; default: - error = "urInit() failed"; + error = "urLoaderInit() failed"; return; } @@ -159,9 +159,8 @@ void uur::PlatformEnvironment::TearDown() { for (auto adapter : adapters) { urAdapterRelease(adapter); } - ur_tear_down_params_t tear_down_params{}; - if (urTearDown(&tear_down_params)) { - FAIL() << "urTearDown() failed"; + if (urLoaderTearDown()) { + FAIL() << "urLoaderTearDown() failed"; } } @@ -357,8 +356,8 @@ void KernelsEnvironment::LoadSource( binary_out = binary_ptr; } -std::vector -KernelsEnvironment::GetEntryPointNames(std::string program_name) { +std::vector KernelsEnvironment::GetEntryPointNames( + [[maybe_unused]] std::string program_name) { std::vector entry_points; #ifdef KERNELS_ENVIRONMENT entry_points = uur::device_binaries::program_kernel_map[program_name]; diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index fbb8a48fb1..46b6c24ad6 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -601,6 +601,128 @@ struct urMemImageQueueTest : urQueueTest { 0}; // num samples }; +struct urMultiDeviceMemImageTest : urMultiDeviceContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::SetUp()); + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc1D, nullptr, &image1D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc2D, nullptr, &image2D)); + + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &format, &desc3D, nullptr, &image3D)); + } + + void TearDown() override { + if (image1D) { + EXPECT_SUCCESS(urMemRelease(image1D)); + } + if (image2D) { + EXPECT_SUCCESS(urMemRelease(image2D)); + } + if (image3D) { + EXPECT_SUCCESS(urMemRelease(image3D)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceContextTest::TearDown()); + } + + const size_t width = 1024; + const size_t height = 8; + const size_t depth = 2; + ur_mem_handle_t image1D = nullptr; + ur_mem_handle_t image2D = nullptr; + ur_mem_handle_t image3D = nullptr; + ur_rect_region_t region1D{width, 1, 1}; + ur_rect_region_t region2D{width, height, 1}; + ur_rect_region_t region3D{width, height, depth}; + ur_rect_offset_t origin{0, 0, 0}; + ur_image_format_t format = {UR_IMAGE_CHANNEL_ORDER_RGBA, + UR_IMAGE_CHANNEL_TYPE_FLOAT}; + ur_image_desc_t desc1D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE1D, // mem object type + width, // image width + 1, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc2D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE2D, // mem object type + width, // image width + height, // image height + 1, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples + + ur_image_desc_t desc3D = {UR_STRUCTURE_TYPE_IMAGE_DESC, // stype + nullptr, // pNext + UR_MEM_TYPE_IMAGE3D, // mem object type + width, // image width + height, // image height + depth, // image depth + 1, // array size + 0, // row pitch + 0, // slice pitch + 0, // mip levels + 0}; // num samples +}; + +struct urMultiDeviceMemImageQueueTest : urMultiDeviceMemImageTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::SetUp()); + queues.reserve(DevicesEnvironment::instance->devices.size()); + for (const auto &device : DevicesEnvironment::instance->devices) { + ur_queue_handle_t queue = nullptr; + ASSERT_SUCCESS(urQueueCreate(context, device, 0, &queue)); + queues.push_back(queue); + } + } + + void TearDown() override { + for (const auto &queue : queues) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageTest::TearDown()); + } + + std::vector queues; +}; + +struct urMultiDeviceMemImageWriteTest : urMultiDeviceMemImageQueueTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::SetUp()); + + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image1D, true, origin, + region1D, 0, 0, input1D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image2D, true, origin, + region2D, 0, 0, input2D.data(), 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemImageWrite(queues[0], image3D, true, origin, + region3D, 0, 0, input3D.data(), 0, + nullptr, nullptr)); + } + + void TearDown() override { + UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceMemImageQueueTest::TearDown()); + } + + std::vector input1D = std::vector(width * 4, 42); + std::vector input2D = + std::vector(width * height * 4, 42); + std::vector input3D = + std::vector(width * height * depth * 4, 42); +}; + struct urUSMDeviceAllocTest : urQueueTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); @@ -810,7 +932,9 @@ struct urVirtualMemMappedTest : urVirtualMemTest { } void TearDown() override { - EXPECT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); + if (virtual_ptr) { + EXPECT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); + } UUR_RETURN_ON_FATAL_FAILURE(urVirtualMemTest::TearDown()); } }; @@ -826,8 +950,10 @@ struct urVirtualMemMappedTestWithParam : urVirtualMemTestWithParam { } void TearDown() override { - EXPECT_SUCCESS( - urVirtualMemUnmap(this->context, this->virtual_ptr, this->size)); + if (this->virtual_ptr) { + EXPECT_SUCCESS(urVirtualMemUnmap(this->context, this->virtual_ptr, + this->size)); + } UUR_RETURN_ON_FATAL_FAILURE(urVirtualMemTestWithParam::TearDown()); } }; @@ -861,7 +987,9 @@ struct urUSMDeviceAllocTestWithParam : urQueueTestWithParam { } void TearDown() override { - ASSERT_SUCCESS(urUSMFree(this->context, ptr)); + if (ptr) { + ASSERT_SUCCESS(urUSMFree(this->context, ptr)); + } if (pool) { ASSERT_TRUE(use_pool); ASSERT_SUCCESS(urUSMPoolRelease(pool)); diff --git a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp index d4feccd6dc..c3331f1b5b 100644 --- a/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp +++ b/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp @@ -4,8 +4,20 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -using urVirtualMemGranularityGetInfoTest = - uur::urContextTestWithParam; +struct urVirtualMemGranularityGetInfoTest + : uur::urContextTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urContextTestWithParam::SetUp()); + ur_bool_t virtual_memory_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + this->device, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, + sizeof(ur_bool_t), &virtual_memory_support, nullptr)); + if (!virtual_memory_support) { + GTEST_SKIP() << "Virtual memory is not supported."; + } + } +}; UUR_TEST_SUITE_P( urVirtualMemGranularityGetInfoTest, @@ -42,7 +54,19 @@ TEST_P(urVirtualMemGranularityGetInfoTest, Success) { } } -using urVirtualMemGranularityGetInfoNegativeTest = uur::urContextTest; +struct urVirtualMemGranularityGetInfoNegativeTest : uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urContextTest::SetUp()); + + ur_bool_t virtual_memory_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT, sizeof(ur_bool_t), + &virtual_memory_support, nullptr)); + if (!virtual_memory_support) { + GTEST_SKIP() << "Virtual memory is not supported."; + } + } +}; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urVirtualMemGranularityGetInfoNegativeTest); TEST_P(urVirtualMemGranularityGetInfoNegativeTest, InvalidNullHandleContext) { diff --git a/test/layers/tracing/CMakeLists.txt b/test/layers/tracing/CMakeLists.txt index db4b9da590..2ccb4f69b0 100644 --- a/test/layers/tracing/CMakeLists.txt +++ b/test/layers/tracing/CMakeLists.txt @@ -3,9 +3,36 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -set(TEST_NAME example-collected-hello-world) +add_ur_library(test_collector SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/test_collector.cpp +) + +target_include_directories(test_collector PRIVATE + ${CMAKE_SOURCE_DIR}/include +) + +target_link_libraries(test_collector PRIVATE ${TARGET_XPTI}) +target_include_directories(test_collector PRIVATE ${xpti_SOURCE_DIR}/include) + +if(MSVC) + target_compile_definitions(test_collector PRIVATE + XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) +endif() -add_test(NAME ${TEST_NAME} +function(set_tracing_test_props target_name collector_name) + set_tests_properties(${target_name} PROPERTIES + LABELS "tracing" + ) + + set_property(TEST ${target_name} PROPERTY ENVIRONMENT + "XPTI_TRACE_ENABLE=1" + "XPTI_FRAMEWORK_DISPATCHER=$" + "XPTI_SUBSCRIBERS=$" + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + "UR_ENABLE_LAYERS=UR_LAYER_TRACING") +endfunction() + +add_test(NAME example-collected-hello-world COMMAND ${CMAKE_COMMAND} -D MODE=stdout -D TEST_FILE=$ @@ -14,13 +41,28 @@ add_test(NAME ${TEST_NAME} DEPENDS collector hello_world ) -set_tests_properties(${TEST_NAME} PROPERTIES - LABELS "tracing" -) +set_tracing_test_props(example-collected-hello-world collector) + +function(add_tracing_test name) + set(TEST_TARGET_NAME tracing-test-${name}) + add_ur_executable(${TEST_TARGET_NAME} + ${ARGN}) + target_link_libraries(${TEST_TARGET_NAME} + PRIVATE + ${PROJECT_NAME}::loader + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::testing + GTest::gtest_main) + add_test(NAME ${name} + COMMAND ${CMAKE_COMMAND} + -D MODE=stderr + -D TEST_FILE=$ + -D MATCH_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${name}.out.match + -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake + DEPENDS test_collector + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set_tracing_test_props(${name} test_collector) +endfunction() -set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT - "XPTI_TRACE_ENABLE=1" - "XPTI_FRAMEWORK_DISPATCHER=$" - "XPTI_SUBSCRIBERS=$" - "UR_ADAPTERS_FORCE_LOAD=\"$\"" - "UR_ENABLE_LAYERS=UR_LAYER_TRACING") +add_tracing_test(codeloc codeloc.cpp) diff --git a/test/layers/tracing/codeloc.cpp b/test/layers/tracing/codeloc.cpp new file mode 100644 index 0000000000..e0f1f91df1 --- /dev/null +++ b/test/layers/tracing/codeloc.cpp @@ -0,0 +1,53 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file codeloc.cpp + * + */ + +#include +#include + +struct ur_code_location_t test_callback(void *userdata) { + (void)userdata; + + ur_code_location_t codeloc; + codeloc.columnNumber = 1; + codeloc.lineNumber = 2; + codeloc.functionName = "fname"; + codeloc.sourceFile = "sfile"; + + return codeloc; +} + +TEST(LoaderCodeloc, NullCallback) { + ur_loader_config_handle_t loader_config; + ASSERT_EQ(urLoaderConfigCreate(&loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ( + urLoaderConfigSetCodeLocationCallback(loader_config, nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); + urLoaderConfigRelease(loader_config); +} + +TEST(LoaderCodeloc, NullHandle) { + ASSERT_EQ( + urLoaderConfigSetCodeLocationCallback(nullptr, test_callback, nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +TEST(LoaderCodeloc, Success) { + ur_loader_config_handle_t loader_config; + ASSERT_EQ(urLoaderConfigCreate(&loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderConfigSetCodeLocationCallback(loader_config, + test_callback, nullptr), + UR_RESULT_SUCCESS); + urLoaderInit(0, loader_config); + uint32_t nadapters; + urAdapterGet(0, nullptr, &nadapters); + urLoaderConfigRelease(loader_config); +} diff --git a/test/layers/tracing/codeloc.out.match b/test/layers/tracing/codeloc.out.match new file mode 100644 index 0000000000..dc0c2e1335 --- /dev/null +++ b/test/layers/tracing/codeloc.out.match @@ -0,0 +1,2 @@ +begin urAdapterGet 178 fname sfile 2 1 +end urAdapterGet 178 fname sfile 2 1 diff --git a/test/layers/tracing/hello_world.out.match b/test/layers/tracing/hello_world.out.match index 7658650d04..cef17b8fdf 100644 --- a/test/layers/tracing/hello_world.out.match +++ b/test/layers/tracing/hello_world.out.match @@ -1,27 +1,23 @@ -function_with_args_begin(1) - urInit(.device_flags = 0); -function_with_args_end(1) - urInit(...) -> ur_result_t(0); Platform initialized. -function_with_args_begin(2) - urAdapterGet(unimplemented); +function_with_args_begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {{.*}}, .pNumAdapters = {{.*}}); +function_with_args_end(1) - urAdapterGet(...) -> ur_result_t(0); +function_with_args_begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{.*}}, .pNumAdapters = {{.*}}); function_with_args_end(2) - urAdapterGet(...) -> ur_result_t(0); -function_with_args_begin(3) - urAdapterGet(unimplemented); -function_with_args_end(3) - urAdapterGet(...) -> ur_result_t(0); +function_with_args_begin(3) - urPlatformGet(unimplemented); +function_with_args_end(3) - urPlatformGet(...) -> ur_result_t(0); function_with_args_begin(4) - urPlatformGet(unimplemented); function_with_args_end(4) - urPlatformGet(...) -> ur_result_t(0); -function_with_args_begin(5) - urPlatformGet(unimplemented); -function_with_args_end(5) - urPlatformGet(...) -> ur_result_t(0); -function_with_args_begin(6) - urPlatformGetApiVersion(unimplemented); -function_with_args_end(6) - urPlatformGetApiVersion(...) -> ur_result_t(0); +function_with_args_begin(5) - urPlatformGetApiVersion(unimplemented); +function_with_args_end(5) - urPlatformGetApiVersion(...) -> ur_result_t(0); API version: {{0\.[0-9]+}} +function_with_args_begin(6) - urDeviceGet(unimplemented); +function_with_args_end(6) - urDeviceGet(...) -> ur_result_t(0); function_with_args_begin(7) - urDeviceGet(unimplemented); function_with_args_end(7) - urDeviceGet(...) -> ur_result_t(0); -function_with_args_begin(8) - urDeviceGet(unimplemented); -function_with_args_end(8) - urDeviceGet(...) -> ur_result_t(0); +function_with_args_begin(8) - urDeviceGetInfo(unimplemented); +function_with_args_end(8) - urDeviceGetInfo(...) -> ur_result_t(0); function_with_args_begin(9) - urDeviceGetInfo(unimplemented); function_with_args_end(9) - urDeviceGetInfo(...) -> ur_result_t(0); -function_with_args_begin(10) - urDeviceGetInfo(unimplemented); -function_with_args_end(10) - urDeviceGetInfo(...) -> ur_result_t(0); Found a Null Device gpu. -function_with_args_begin(11) - urAdapterRelease(unimplemented); -function_with_args_end(11) - urAdapterRelease(...) -> ur_result_t(0); -function_with_args_begin(12) - urTearDown(unimplemented); -function_with_args_end(12) - urTearDown(...) -> ur_result_t(0); +function_with_args_begin(10) - urAdapterRelease(unimplemented); +function_with_args_end(10) - urAdapterRelease(...) -> ur_result_t(0); diff --git a/test/layers/tracing/test_collector.cpp b/test/layers/tracing/test_collector.cpp new file mode 100644 index 0000000000..6c942c63ec --- /dev/null +++ b/test/layers/tracing/test_collector.cpp @@ -0,0 +1,74 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file test_collector.cpp + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ur_api.h" +#include "xpti/xpti_trace_framework.h" + +constexpr uint16_t TRACE_FN_BEGIN = + static_cast(xpti::trace_point_type_t::function_with_args_begin); +constexpr uint16_t TRACE_FN_END = + static_cast(xpti::trace_point_type_t::function_with_args_end); +constexpr std::string_view UR_STREAM_NAME = "ur"; + +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *child, uint64_t, + const void *user_data) { + auto *args = static_cast(user_data); + auto *payload = xptiQueryPayload(child); + std::cerr << (trace_type == TRACE_FN_BEGIN ? "begin" : "end"); + std::cerr << " " << args->function_name << " " << args->function_id; + if (payload) { + std::cerr << " " << payload->name << " " << payload->source_file << " " + << payload->line_no << " " << payload->column_no; + } + std::cerr << std::endl; +} + +XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, + unsigned int minor_version, const char *, + const char *stream_name) { + if (stream_name == nullptr) { + std::cout << "Stream name not provided. Aborting." << std::endl; + return; + } + if (std::string_view(stream_name) != UR_STREAM_NAME) { + std::cout << "Invalid stream name: " << stream_name << ". Expected " + << UR_STREAM_NAME << ". Aborting." << std::endl; + return; + } + + if (UR_MAKE_VERSION(major_version, minor_version) != + UR_API_VERSION_CURRENT) { + std::cout << "Invalid stream version: " << major_version << "." + << minor_version << ". Expected " + << UR_MAJOR_VERSION(UR_API_VERSION_CURRENT) << "." + << UR_MINOR_VERSION(UR_API_VERSION_CURRENT) << ". Aborting." + << std::endl; + return; + } + + uint8_t stream_id = xptiRegisterStream(stream_name); + + xptiRegisterCallback(stream_id, TRACE_FN_BEGIN, trace_cb); + xptiRegisterCallback(stream_id, TRACE_FN_END, trace_cb); +} + +XPTI_CALLBACK_API void xptiTraceFinish(const char *) { /* noop */ +} diff --git a/test/layers/validation/fixtures.hpp b/test/layers/validation/fixtures.hpp index a41e48b3a4..ab92ba1e01 100644 --- a/test/layers/validation/fixtures.hpp +++ b/test/layers/validation/fixtures.hpp @@ -17,15 +17,14 @@ struct urTest : ::testing::Test { "UR_LAYER_FULL_VALIDATION"), UR_RESULT_SUCCESS); ur_device_init_flags_t device_flags = 0; - ASSERT_EQ(urInit(device_flags, loader_config), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderInit(device_flags, loader_config), UR_RESULT_SUCCESS); } void TearDown() override { if (loader_config) { ASSERT_EQ(urLoaderConfigRelease(loader_config), UR_RESULT_SUCCESS); } - ur_tear_down_params_t tear_down_params{}; - ASSERT_EQ(urTearDown(&tear_down_params), UR_RESULT_SUCCESS); + ASSERT_EQ(urLoaderTearDown(), UR_RESULT_SUCCESS); } ur_loader_config_handle_t loader_config = nullptr; @@ -53,7 +52,12 @@ struct valPlatformsTest : urTest { UR_RESULT_SUCCESS); } - void TearDown() override { urTest::TearDown(); } + void TearDown() override { + for (auto &adapter : adapters) { + ASSERT_EQ(urAdapterRelease(adapter), UR_RESULT_SUCCESS); + } + urTest::TearDown(); + } std::vector adapters; std::vector platforms; diff --git a/test/layers/validation/leaks.cpp b/test/layers/validation/leaks.cpp index b0df81207e..e32aeafc89 100644 --- a/test/layers/validation/leaks.cpp +++ b/test/layers/validation/leaks.cpp @@ -5,6 +5,12 @@ #include "fixtures.hpp" +TEST_F(urTest, testUrAdapterGetLeak) { + ur_adapter_handle_t adapter = nullptr; + ASSERT_EQ(urAdapterGet(1, &adapter, nullptr), UR_RESULT_SUCCESS); + ASSERT_NE(nullptr, adapter); +} + TEST_F(valDeviceTest, testUrContextCreateLeak) { ur_context_handle_t context = nullptr; ASSERT_EQ(urContextCreate(1, &device, nullptr, &context), diff --git a/test/layers/validation/leaks.out.match b/test/layers/validation/leaks.out.match index aadba2252c..9fac722527 100644 --- a/test/layers/validation/leaks.out.match +++ b/test/layers/validation/leaks.out.match @@ -3,28 +3,45 @@ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +\[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ +\[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: +(.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 2 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[ERROR\]: Attempting to retain nonexistent handle [0-9xa-fA-F]+ (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) diff --git a/test/layers/validation/leaks_mt.out.match b/test/layers/validation/leaks_mt.out.match index 7d5a0bedd8..86de1e1d76 100644 --- a/test/layers/validation/leaks_mt.out.match +++ b/test/layers/validation/leaks_mt.out.match @@ -1,10 +1,13 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 3 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 2 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 3 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 4 @@ -13,17 +16,21 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 7 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 8 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 9 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 9 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -1 @@ -39,18 +46,22 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -6 \[ERROR\]: Attempting to release nonexistent handle [0-9xa-fA-F]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to -7 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained -7 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ @@ -67,6 +78,7 @@ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to [1-9]+ \[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 1 +\[DEBUG\]: Reference count for handle [0-9xa-fA-F]+ changed to 0 \[ERROR\]: Retained 1 reference\(s\) to handle [0-9xa-fA-F]+ \[ERROR\]: Handle [0-9xa-fA-F]+ was recorded for first time here: (.*) diff --git a/test/layers/validation/parameters.cpp b/test/layers/validation/parameters.cpp index ee679363dc..c02afd63d6 100644 --- a/test/layers/validation/parameters.cpp +++ b/test/layers/validation/parameters.cpp @@ -5,18 +5,6 @@ #include "fixtures.hpp" -TEST(valTest, urInit) { - ur_loader_config_handle_t config; - urLoaderConfigCreate(&config); - urLoaderConfigEnableLayer(config, "UR_PARAMETER_VALIDATION_LAYER"); - - const ur_device_init_flags_t device_flags = - UR_DEVICE_INIT_FLAG_FORCE_UINT32; - ASSERT_EQ(urInit(device_flags, config), - UR_RESULT_ERROR_INVALID_ENUMERATION); - ASSERT_EQ(urLoaderConfigRelease(config), UR_RESULT_SUCCESS); -} - TEST_F(valPlatformsTest, testUrPlatformGetApiVersion) { ur_api_version_t api_version = {}; diff --git a/test/loader/CMakeLists.txt b/test/loader/CMakeLists.txt index 0dbf999c45..d36f922098 100644 --- a/test/loader/CMakeLists.txt +++ b/test/loader/CMakeLists.txt @@ -10,4 +10,5 @@ set_tests_properties(example-hello-world PROPERTIES LABELS "loader" add_subdirectory(adapter_registry) add_subdirectory(loader_config) +add_subdirectory(loader_lifetime) add_subdirectory(platforms) diff --git a/test/loader/loader_config/CMakeLists.txt b/test/loader/loader_config/CMakeLists.txt index b2c2ffc4ec..db07bec990 100644 --- a/test/loader/loader_config/CMakeLists.txt +++ b/test/loader/loader_config/CMakeLists.txt @@ -3,7 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -add_executable(test-loader-config +add_ur_executable(test-loader-config urLoaderConfigCreate.cpp urLoaderConfigGetInfo.cpp urLoaderConfigEnableLayer.cpp diff --git a/test/loader/loader_lifetime/CMakeLists.txt b/test/loader/loader_lifetime/CMakeLists.txt new file mode 100644 index 0000000000..c76ff87d0b --- /dev/null +++ b/test/loader/loader_lifetime/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2023 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_executable(test-loader-lifetime + urLoaderInit.cpp + urLoaderTearDown.cpp +) + +target_link_libraries(test-loader-lifetime + PRIVATE + ${PROJECT_NAME}::common + ${PROJECT_NAME}::headers + ${PROJECT_NAME}::loader + gmock + GTest::gtest_main +) + +add_test(NAME loader-lifetime + COMMAND test-loader-lifetime + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) diff --git a/test/loader/loader_lifetime/fixtures.hpp b/test/loader/loader_lifetime/fixtures.hpp new file mode 100644 index 0000000000..b1eb3766c5 --- /dev/null +++ b/test/loader/loader_lifetime/fixtures.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UR_LOADER_CONFIG_TEST_FIXTURES_H +#define UR_LOADER_CONFIG_TEST_FIXTURES_H + +#include "ur_api.h" +#include +#include + +#ifndef ASSERT_SUCCESS +#define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(UR_RESULT_SUCCESS, ACTUAL) +#endif + +/// @brief Make a string a valid identifier for gtest. +/// @param str The string to sanitize. +inline std::string GTestSanitizeString(const std::string &str) { + auto str_cpy = str; + std::replace_if( + str_cpy.begin(), str_cpy.end(), [](char c) { return !std::isalnum(c); }, + '_'); + return str_cpy; +} + +#endif diff --git a/test/conformance/runtime/urInit.cpp b/test/loader/loader_lifetime/urLoaderInit.cpp similarity index 67% rename from test/conformance/runtime/urInit.cpp rename to test/loader/loader_lifetime/urLoaderInit.cpp index 1de30ff471..48060240b8 100644 --- a/test/conformance/runtime/urInit.cpp +++ b/test/loader/loader_lifetime/urLoaderInit.cpp @@ -2,11 +2,13 @@ // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include +#include "fixtures.hpp" +#include -using urInitTestWithParam = ::testing::TestWithParam; +using urLoaderInitTestWithParam = + ::testing::TestWithParam; INSTANTIATE_TEST_SUITE_P( - , urInitTestWithParam, + , urLoaderInitTestWithParam, ::testing::Values(UR_DEVICE_INIT_FLAG_GPU, UR_DEVICE_INIT_FLAG_CPU, UR_DEVICE_INIT_FLAG_FPGA, UR_DEVICE_INIT_FLAG_MCA, UR_DEVICE_INIT_FLAG_VPU, @@ -16,24 +18,23 @@ INSTANTIATE_TEST_SUITE_P( [](const ::testing::TestParamInfo &info) { std::stringstream ss; ur_params::serializeFlag(ss, info.param); - return uur::GTestSanitizeString(ss.str()); + return GTestSanitizeString(ss.str()); }); -TEST_P(urInitTestWithParam, Success) { +TEST_P(urLoaderInitTestWithParam, Success) { ur_loader_config_handle_t config = nullptr; urLoaderConfigCreate(&config); urLoaderConfigEnableLayer(config, "UR_LAYER_FULL_VALIDATION"); ur_device_init_flags_t device_flags = GetParam(); - ASSERT_SUCCESS(urInit(device_flags, config)); + ASSERT_SUCCESS(urLoaderInit(device_flags, config)); - ur_tear_down_params_t tear_down_params{nullptr}; - ASSERT_SUCCESS(urTearDown(&tear_down_params)); + ASSERT_SUCCESS(urLoaderTearDown()); } -TEST(urInitTest, ErrorInvalidEnumerationDeviceFlags) { +TEST(urLoaderInitTest, ErrorInvalidEnumerationDeviceFlags) { const ur_device_init_flags_t device_flags = UR_DEVICE_INIT_FLAG_FORCE_UINT32; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, - urInit(device_flags, nullptr)); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_ENUMERATION, + urLoaderInit(device_flags, nullptr)); } diff --git a/test/loader/loader_lifetime/urLoaderTearDown.cpp b/test/loader/loader_lifetime/urLoaderTearDown.cpp new file mode 100644 index 0000000000..a4c3dc83fb --- /dev/null +++ b/test/loader/loader_lifetime/urLoaderTearDown.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2022-2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "fixtures.hpp" + +struct urLoaderTearDownTest : testing::Test { + void SetUp() override { + ur_device_init_flags_t device_flags = 0; + ASSERT_SUCCESS(urLoaderInit(device_flags, nullptr)); + } +}; + +TEST_F(urLoaderTearDownTest, Success) { ASSERT_SUCCESS(urLoaderTearDown()); } diff --git a/test/loader/platforms/no_platforms.match b/test/loader/platforms/no_platforms.match index da17800c0c..b695672e4d 100644 --- a/test/loader/platforms/no_platforms.match +++ b/test/loader/platforms/no_platforms.match @@ -1,2 +1,2 @@ -[INFO]: urInit succeeded. +[INFO]: urLoaderInit succeeded. [INFO]: urPlatformGet found 0 platforms diff --git a/test/loader/platforms/null_platform.match b/test/loader/platforms/null_platform.match index 6c7d8a97f4..29cadc78b5 100644 --- a/test/loader/platforms/null_platform.match +++ b/test/loader/platforms/null_platform.match @@ -1,3 +1,3 @@ -[INFO]: urInit succeeded. +[INFO]: urLoaderInit succeeded. [INFO]: urPlatformGet found 1 platforms -[INFO]: Found UR_PLATFORM_NULL \ No newline at end of file +[INFO]: Found UR_PLATFORM_NULL diff --git a/test/loader/platforms/platforms.cpp b/test/loader/platforms/platforms.cpp index bb4f8fb79d..4bb487b12b 100644 --- a/test/loader/platforms/platforms.cpp +++ b/test/loader/platforms/platforms.cpp @@ -24,25 +24,25 @@ int main(int argc, char *argv[]) { ur_result_t status; // Initialize the platform - status = urInit(0, nullptr); + status = urLoaderInit(0, nullptr); if (status != UR_RESULT_SUCCESS) { - out.error("urInit failed with return code: {}", status); + out.error("urLoaderInit failed with return code: {}", status); return 1; } - out.info("urInit succeeded."); + out.info("urLoaderInit succeeded."); uint32_t adapterCount = 0; std::vector adapters; status = urAdapterGet(0, nullptr, &adapterCount); if (status != UR_RESULT_SUCCESS) { - error("urAdapterGet failed with return code: {}", status); + out.error("urAdapterGet failed with return code: {}", status); return 1; } adapters.resize(adapterCount); status = urAdapterGet(adapterCount, adapters.data(), nullptr); if (status != UR_RESULT_SUCCESS) { - error("urAdapterGet failed with return code: {}", status); + out.error("urAdapterGet failed with return code: {}", status); return 1; } @@ -89,6 +89,6 @@ int main(int argc, char *argv[]) { free(name); } out: - urTearDown(nullptr); + urLoaderTearDown(); return status == UR_RESULT_SUCCESS ? 0 : 1; } diff --git a/test/tools/urtrace/null_hello.match b/test/tools/urtrace/null_hello.match index b58a4d8d96..54c6efb9cb 100644 --- a/test/tools/urtrace/null_hello.match +++ b/test/tools/urtrace/null_hello.match @@ -1,4 +1,3 @@ -urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; Platform initialized. urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; @@ -12,4 +11,3 @@ urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Null Device gpu. urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; -urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_begin.match b/test/tools/urtrace/null_hello_begin.match index 81c15da60f..bf2d85145a 100644 --- a/test/tools/urtrace/null_hello_begin.match +++ b/test/tools/urtrace/null_hello_begin.match @@ -1,27 +1,23 @@ -begin(1) - urInit(.device_flags = 0, .hLoaderConfig = nullptr); -end(1) - urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; Platform initialized. -begin(2) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (0)); -end(2) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(3) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr); -end(3) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; -begin(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (0)); -end(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(5) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {nullptr}, .pNumPlatforms = nullptr); -end(5) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; -begin(6) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.0)); -end(6) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@)) -> UR_RESULT_SUCCESS; +begin(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (0)); +end(1) - urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr); +end(2) - urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; +begin(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (0)); +end(3) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {}, .pNumPlatforms = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {nullptr}, .pNumPlatforms = nullptr); +end(4) - urPlatformGet(.phAdapters = {{{.*}}}, .NumAdapters = 1, .NumEntries = 1, .phPlatforms = {{{.*}}}, .pNumPlatforms = nullptr) -> UR_RESULT_SUCCESS; +begin(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (0.0)); +end(5) - urPlatformGetApiVersion(.hPlatform = {{.*}}, .pVersion = {{.*}} (@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@)) -> UR_RESULT_SUCCESS; API version: {{.*}} -begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (0)); -end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; -begin(8) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {nullptr}, .pNumDevices = nullptr); -end(8) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; -begin(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); -end(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; -begin(10) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); -end(10) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +begin(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (0)); +end(6) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 0, .phDevices = {}, .pNumDevices = {{.*}} (1)) -> UR_RESULT_SUCCESS; +begin(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {nullptr}, .pNumDevices = nullptr); +end(7) - urDeviceGet(.hPlatform = {{.*}}, .DeviceType = UR_DEVICE_TYPE_GPU, .NumEntries = 1, .phDevices = {{{.*}}}, .pNumDevices = nullptr) -> UR_RESULT_SUCCESS; +begin(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(8) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = 4, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; +begin(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr); +end(9) - urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; Found a Null Device gpu. -begin(11) - urAdapterRelease(.hAdapter = {{.*}}); -end(11) - urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; -begin(12) - urTearDown(.pParams = nullptr); -end(12) - urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; +begin(10) - urAdapterRelease(.hAdapter = {{.*}}); +end(10) - urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_json.match b/test/tools/urtrace/null_hello_json.match index 18c5fbac78..5b9377e8d6 100644 --- a/test/tools/urtrace/null_hello_json.match +++ b/test/tools/urtrace/null_hello_json.match @@ -1,6 +1,5 @@ { "traceEvents": [ -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urInit", "args": "(.device_flags = 0, .hLoaderConfig = nullptr)" }, Platform initialized. { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1))" }, { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterGet", "args": "(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr)" }, @@ -14,7 +13,6 @@ API version: @PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@ { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urDeviceGetInfo", "args": "(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = 1023, .pPropValue = {{.*}} (Null Device), .pPropSizeRet = nullptr)" }, Found a Null Device gpu. { "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urAdapterRelease", "args": "(.hAdapter = {{.*}})" }, -{ "cat": "UR", "ph": "X", "pid": {{.*}}, "tid": {{.*}}, "ts": {{.*}}, "dur": {{.*}}, "name": "urTearDown", "args": "(.pParams = nullptr)" }, {"name": "", "cat": "", "ph": "", "pid": "", "tid": "", "ts": ""} ] } diff --git a/test/tools/urtrace/null_hello_no_args.match b/test/tools/urtrace/null_hello_no_args.match index e0afcd2868..6462f41d02 100644 --- a/test/tools/urtrace/null_hello_no_args.match +++ b/test/tools/urtrace/null_hello_no_args.match @@ -1,4 +1,3 @@ -urInit(...) -> UR_RESULT_SUCCESS; Platform initialized. urAdapterGet(...) -> UR_RESULT_SUCCESS; urAdapterGet(...) -> UR_RESULT_SUCCESS; @@ -12,4 +11,3 @@ urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; urDeviceGetInfo(...) -> UR_RESULT_SUCCESS; Found a Null Device gpu. urAdapterRelease(...) -> UR_RESULT_SUCCESS; -urTearDown(...) -> UR_RESULT_SUCCESS; diff --git a/test/tools/urtrace/null_hello_profiling.match b/test/tools/urtrace/null_hello_profiling.match index 635c3c8784..7bd3bd53c1 100644 --- a/test/tools/urtrace/null_hello_profiling.match +++ b/test/tools/urtrace/null_hello_profiling.match @@ -1,4 +1,3 @@ -urInit(.device_flags = 0, .hLoaderConfig = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) Platform initialized. urAdapterGet(.NumEntries = 0, .phAdapters = {}, .pNumAdapters = {{.*}} (1)) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) urAdapterGet(.NumEntries = 1, .phAdapters = {{{.*}}}, .pNumAdapters = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) @@ -12,4 +11,3 @@ urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_TYPE, .propSize = urDeviceGetInfo(.hDevice = {{.*}}, .propName = UR_DEVICE_INFO_NAME, .propSize = {{.*}}, .pPropValue = {{.*}}, .pPropSizeRet = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) Found a Null Device gpu. urAdapterRelease(.hAdapter = {{.*}}) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) -urTearDown(.pParams = nullptr) -> UR_RESULT_SUCCESS; ({{[0-9]+}}ns) diff --git a/test/unified_malloc_framework/common/pool.hpp b/test/unified_malloc_framework/common/pool.hpp index 7a7b650e11..f31acf8d22 100644 --- a/test/unified_malloc_framework/common/pool.hpp +++ b/test/unified_malloc_framework/common/pool.hpp @@ -23,6 +23,7 @@ #include #include "base.hpp" +#include "provider.hpp" #include "umf_helpers.hpp" namespace umf_test { @@ -31,6 +32,17 @@ auto wrapPoolUnique(umf_memory_pool_handle_t hPool) { return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } +template +auto makePoolWithOOMProvider(int allocNum, Args &&...args) { + auto [ret, provider] = + umf::memoryProviderMakeUnique(allocNum); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + auto [retp, pool] = umf::poolMakeUnique( + {std::move(provider)}, std::forward(args)...); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + return std::move(pool); +} + bool isReallocSupported(umf_memory_pool_handle_t hPool) { static constexpr size_t allocSize = 8; bool supported; @@ -76,7 +88,7 @@ struct pool_base { umf_result_t initialize(umf_memory_provider_handle_t *, size_t) noexcept { return UMF_RESULT_SUCCESS; }; - void *malloc(size_t size) noexcept { return nullptr; } + void *malloc([[maybe_unused]] size_t size) noexcept { return nullptr; } void *calloc(size_t, size_t) noexcept { return nullptr; } void *realloc(void *, size_t) noexcept { return nullptr; } void *aligned_malloc(size_t, size_t) noexcept { return nullptr; } @@ -120,7 +132,7 @@ struct malloc_pool : public pool_base { struct proxy_pool : public pool_base { umf_result_t initialize(umf_memory_provider_handle_t *providers, - size_t numProviders) noexcept { + [[maybe_unused]] size_t numProviders) noexcept { this->provider = providers[0]; return UMF_RESULT_SUCCESS; } @@ -128,15 +140,17 @@ struct proxy_pool : public pool_base { void *calloc(size_t num, size_t size) noexcept { void *ptr; auto ret = umfMemoryProviderAlloc(provider, num * size, 0, &ptr); + umf::getPoolLastStatusRef() = ret; - memset(ptr, 0, num * size); - - if (ptr) { - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); + if (!ptr) { + return ptr; } + + memset(ptr, 0, num * size); return ptr; } - void *realloc(void *ptr, size_t size) noexcept { + void *realloc([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { // TODO: not supported umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; @@ -145,18 +159,15 @@ struct proxy_pool : public pool_base { void *aligned_malloc(size_t size, size_t alignment) noexcept { void *ptr; auto ret = umfMemoryProviderAlloc(provider, size, alignment, &ptr); - if (ptr) { - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); - } + umf::getPoolLastStatusRef() = ret; return ptr; } - size_t malloc_usable_size(void *ptr) noexcept { + size_t malloc_usable_size([[maybe_unused]] void *ptr) noexcept { // TODO: not supported return 0; } enum umf_result_t free(void *ptr) noexcept { auto ret = umfMemoryProviderFree(provider, ptr, 0); - EXPECT_EQ_NOEXCEPT(ret, UMF_RESULT_SUCCESS); return ret; } enum umf_result_t get_last_allocation_error() { diff --git a/test/unified_malloc_framework/common/provider.c b/test/unified_malloc_framework/common/provider.c index 8f9e946bfc..303d8aea8d 100644 --- a/test/unified_malloc_framework/common/provider.c +++ b/test/unified_malloc_framework/common/provider.c @@ -23,7 +23,7 @@ static enum umf_result_t nullAlloc(void *provider, size_t size, (void)provider; (void)size; (void)alignment; - (void)ptr; + *ptr = NULL; return UMF_RESULT_SUCCESS; } diff --git a/test/unified_malloc_framework/common/provider.hpp b/test/unified_malloc_framework/common/provider.hpp index 518b2b0528..6b121e39f1 100644 --- a/test/unified_malloc_framework/common/provider.hpp +++ b/test/unified_malloc_framework/common/provider.hpp @@ -30,21 +30,27 @@ struct provider_base { enum umf_result_t alloc(size_t, size_t, void **) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } void get_last_native_error(const char **, int32_t *) noexcept {} - enum umf_result_t get_recommended_page_size(size_t size, - size_t *pageSize) noexcept { + enum umf_result_t + get_recommended_page_size([[maybe_unused]] size_t size, + [[maybe_unused]] size_t *pageSize) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t get_min_page_size(void *ptr, size_t *pageSize) noexcept { + enum umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t purge_lazy(void *ptr, size_t size) noexcept { + enum umf_result_t purge_lazy([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } - enum umf_result_t purge_force(void *ptr, size_t size) noexcept { + enum umf_result_t purge_force([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t size) noexcept { return UMF_RESULT_ERROR_UNKNOWN; } const char *get_name() noexcept { return "base"; } @@ -76,6 +82,28 @@ struct provider_malloc : public provider_base { const char *get_name() noexcept { return "malloc"; } }; +struct provider_mock_out_of_mem : public provider_base { + provider_malloc helper_prov; + int allocNum = 0; + umf_result_t initialize(int allocNum) noexcept { + this->allocNum = allocNum; + return UMF_RESULT_SUCCESS; + } + enum umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { + if (allocNum <= 0) { + *ptr = nullptr; + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + allocNum--; + + return helper_prov.alloc(size, align, ptr); + } + enum umf_result_t free(void *ptr, size_t size) noexcept { + return helper_prov.free(ptr, size); + } + const char *get_name() noexcept { return "mock_out_of_mem"; } +}; + } // namespace umf_test #endif /* UMF_TEST_PROVIDER_HPP */ diff --git a/test/unified_malloc_framework/memoryPool.hpp b/test/unified_malloc_framework/memoryPool.hpp index fde5954cf8..ab923932fb 100644 --- a/test/unified_malloc_framework/memoryPool.hpp +++ b/test/unified_malloc_framework/memoryPool.hpp @@ -3,7 +3,9 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "disjoint_pool.hpp" #include "pool.hpp" +#include "provider.hpp" #include #include @@ -53,6 +55,29 @@ struct umfMultiPoolTest : umfPoolTest { std::vector pools; }; +struct umfMemTest + : umf_test::test, + ::testing::WithParamInterface< + std::tuple, int>> { + umfMemTest() : pool(nullptr, nullptr), expectedRecycledPoolAllocs(0) {} + void SetUp() override { + test::SetUp(); + initialize(); + } + + void TearDown() override { test::TearDown(); } + + void initialize() { + auto [pool_fun, expectedRecycledPoolAllocs] = this->GetParam(); + EXPECT_NE(pool_fun(), nullptr); + this->pool = pool_fun(); + this->expectedRecycledPoolAllocs = expectedRecycledPoolAllocs; + } + + umf::pool_unique_handle_t pool; + int expectedRecycledPoolAllocs; +}; + TEST_P(umfPoolTest, allocFree) { static constexpr size_t allocSize = 64; auto *ptr = umfPoolMalloc(pool.get(), allocSize); @@ -251,6 +276,46 @@ TEST_P(umfPoolTest, multiThreadedMallocFreeRandomSizes) { } } +TEST_P(umfMemTest, outOfMem) { + static constexpr size_t allocSize = 4096; + auto hPool = pool.get(); + + std::vector allocations; + + while (true) { + allocations.emplace_back(umfPoolMalloc(hPool, allocSize)); + if (allocations.back() == nullptr && + umfPoolGetLastAllocationError(hPool) == + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) { + break; + } + ASSERT_NE(allocations.back(), nullptr); + } + + // next part of the test- freeing some memory to allocate it again (as the memory + // should be acquired from the pool itself now, not from the provider), + // is done only for the disjoint pool for now + + // remove last nullptr from the allocations vector + ASSERT_EQ(allocations.back(), nullptr); + allocations.pop_back(); + + ASSERT_NE(allocations.back(), nullptr); + for (int i = 0; i < expectedRecycledPoolAllocs; i++) { + umfPoolFree(hPool, allocations.back()); + allocations.pop_back(); + } + + for (int i = 0; i < expectedRecycledPoolAllocs; i++) { + allocations.emplace_back(umfPoolMalloc(hPool, allocSize)); + ASSERT_NE(allocations.back(), nullptr); + } + + for (auto allocation : allocations) { + umfPoolFree(hPool, allocation); + } +} + #ifdef UMF_ENABLE_POOL_TRACKING_TESTS // TODO: add similar tests for realloc/aligned_alloc, etc. // TODO: add multithreaded tests diff --git a/test/unified_malloc_framework/memoryPoolAPI.cpp b/test/unified_malloc_framework/memoryPoolAPI.cpp index d40254fbf0..82d3768611 100644 --- a/test/unified_malloc_framework/memoryPoolAPI.cpp +++ b/test/unified_malloc_framework/memoryPoolAPI.cpp @@ -82,7 +82,7 @@ TEST_F(test, memoryPoolTrace) { ASSERT_EQ(providerCalls.size(), provider_call_count); ret = umfPoolGetLastAllocationError(tracingPool.get()); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(poolCalls["get_last_native_error"], 1); ASSERT_EQ(poolCalls.size(), ++pool_call_count); @@ -157,6 +157,14 @@ INSTANTIATE_TEST_SUITE_P( .second; })); +INSTANTIATE_TEST_SUITE_P( + proxyPoolOOMTest, umfMemTest, + ::testing::Values(std::tuple( + [] { + return umf_test::makePoolWithOOMProvider(10); + }, + 0))); + ////////////////// Negative test cases ///////////////// TEST_F(test, memoryPoolInvalidProvidersNullptr) { @@ -187,9 +195,10 @@ TEST_P(poolInitializeTest, errorPropagation) { umf_memory_provider_handle_t providers[] = {nullProvider.get()}; struct pool : public umf_test::pool_base { - umf_result_t initialize(umf_memory_provider_handle_t *providers, - size_t numProviders, - umf_result_t errorToReturn) noexcept { + umf_result_t + initialize([[maybe_unused]] umf_memory_provider_handle_t *providers, + [[maybe_unused]] size_t numProviders, + umf_result_t errorToReturn) noexcept { return errorToReturn; } }; @@ -232,7 +241,8 @@ TEST_F(test, getLastFailedMemoryProvider) { return allocResult; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free(void *ptr, + [[maybe_unused]] size_t size) noexcept { ::free(ptr); return UMF_RESULT_SUCCESS; } @@ -254,10 +264,8 @@ TEST_F(test, getLastFailedMemoryProvider) { auto [ret, pool] = umf::poolMakeUnique(&hProvider, 1); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_EQ(umfGetLastFailedMemoryProvider(), nullptr); auto ptr = umfPoolMalloc(pool.get(), allocSize); ASSERT_NE(ptr, nullptr); - ASSERT_EQ(umfGetLastFailedMemoryProvider(), nullptr); umfPoolFree(pool.get(), ptr); // make provider return an error during allocation diff --git a/test/unified_malloc_framework/memoryProviderAPI.cpp b/test/unified_malloc_framework/memoryProviderAPI.cpp index fa02f9eb99..02a7fa357d 100644 --- a/test/unified_malloc_framework/memoryProviderAPI.cpp +++ b/test/unified_malloc_framework/memoryProviderAPI.cpp @@ -23,7 +23,8 @@ TEST_F(test, memoryProviderTrace) { size_t call_count = 0; - auto ret = umfMemoryProviderAlloc(tracingProvider.get(), 0, 0, nullptr); + void *ptr; + auto ret = umfMemoryProviderAlloc(tracingProvider.get(), 0, 0, &ptr); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_EQ(calls["alloc"], 1); ASSERT_EQ(calls.size(), ++call_count); diff --git a/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp b/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp index 9e4d4f7ee6..0e81342bef 100644 --- a/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp +++ b/test/unified_malloc_framework/umf_pools/disjoint_pool.cpp @@ -11,6 +11,7 @@ #include "disjoint_pool.hpp" #include "memoryPool.hpp" +#include "pool.hpp" #include "provider.h" #include "provider.hpp" @@ -42,7 +43,8 @@ TEST_F(test, freeErrorPropagation) { *ptr = malloc(size); return UMF_RESULT_SUCCESS; } - enum umf_result_t free(void *ptr, size_t size) noexcept { + enum umf_result_t free(void *ptr, + [[maybe_unused]] size_t size) noexcept { ::free(ptr); return freeReturn; } @@ -72,6 +74,15 @@ TEST_F(test, freeErrorPropagation) { INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(makePool)); +INSTANTIATE_TEST_SUITE_P( + disjointPoolTests, umfMemTest, + ::testing::Values(std::make_tuple( + [] { + return umf_test::makePoolWithOOMProvider( + static_cast(poolConfig().Capacity), poolConfig()); + }, + static_cast(poolConfig().Capacity) / 2))); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfMultiPoolTest); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(makePool)); diff --git a/test/unit/utils/params.cpp b/test/unit/utils/params.cpp index 964d117e49..d6310f8bbd 100644 --- a/test/unit/utils/params.cpp +++ b/test/unit/utils/params.cpp @@ -17,30 +17,30 @@ template class ParamsTest : public testing::Test { T params; }; -struct UrInitParams { - ur_init_params_t params; +struct UrLoaderInitParams { + ur_loader_init_params_t params; ur_device_init_flags_t flags; ur_loader_config_handle_t config; - UrInitParams(ur_device_init_flags_t _flags) + UrLoaderInitParams(ur_device_init_flags_t _flags) : flags(_flags), config(nullptr) { params.pdevice_flags = &flags; params.phLoaderConfig = &config; } - ur_init_params_t *get_struct() { return ¶ms; } + ur_loader_init_params_t *get_struct() { return ¶ms; } }; -struct UrInitParamsNoFlags : UrInitParams { - UrInitParamsNoFlags() : UrInitParams(0) {} +struct UrLoaderInitParamsNoFlags : UrLoaderInitParams { + UrLoaderInitParamsNoFlags() : UrLoaderInitParams(0) {} const char *get_expected() { return ".device_flags = 0, .hLoaderConfig = nullptr"; }; }; -struct UrInitParamsInvalidFlags : UrInitParams { - UrInitParamsInvalidFlags() - : UrInitParams(UR_DEVICE_INIT_FLAG_GPU | UR_DEVICE_INIT_FLAG_MCA | - UR_BIT(25) | UR_BIT(30) | UR_BIT(31)) {} +struct UrLoaderInitParamsInvalidFlags : UrLoaderInitParams { + UrLoaderInitParamsInvalidFlags() + : UrLoaderInitParams(UR_DEVICE_INIT_FLAG_GPU | UR_DEVICE_INIT_FLAG_MCA | + UR_BIT(25) | UR_BIT(30) | UR_BIT(31)) {} const char *get_expected() { return ".device_flags = UR_DEVICE_INIT_FLAG_GPU \\| " "UR_DEVICE_INIT_FLAG_MCA \\| unknown bit flags " @@ -367,15 +367,41 @@ struct UrDevicePartitionPropertyTest { ur_device_partition_property_t prop; }; +struct UrSamplerAddressModesTest { + UrSamplerAddressModesTest() { + prop.addrModes[0] = UR_SAMPLER_ADDRESSING_MODE_CLAMP; + prop.addrModes[1] = UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + prop.addrModes[2] = UR_SAMPLER_ADDRESSING_MODE_REPEAT; + prop.pNext = nullptr; + prop.stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES; + } + ur_exp_sampler_addr_modes_t &get_struct() { return prop; } + const char *get_expected() { + return "\\(struct ur_exp_sampler_addr_modes_t\\)" + "\\{" + ".stype = UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES, " + ".pNext = nullptr, " + ".addrModes = \\{" + "UR_SAMPLER_ADDRESSING_MODE_CLAMP, " + "UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, " + "UR_SAMPLER_ADDRESSING_MODE_REPEAT" + "\\}" + "\\}"; + } + + ur_exp_sampler_addr_modes_t prop; +}; + using testing::Types; -typedef Types< - UrInitParamsNoFlags, UrInitParamsInvalidFlags, UrUsmHostAllocParamsEmpty, - UrPlatformGetEmptyArray, UrPlatformGetTwoPlatforms, - UrUsmHostAllocParamsUsmDesc, UrUsmHostAllocParamsHostDesc, - UrDeviceGetInfoParamsEmpty, UrDeviceGetInfoParamsName, - UrDeviceGetInfoParamsQueueFlag, UrDeviceGetInfoParamsPartitionArray, - UrContextGetInfoParamsDevicesArray, UrDeviceGetInfoParamsInvalidSize, - UrProgramMetadataTest, UrDevicePartitionPropertyTest> +typedef Types Implementations; using ::testing::MatchesRegex; diff --git a/test/usm/CMakeLists.txt b/test/usm/CMakeLists.txt index b673b6d1b9..fa5454d4db 100644 --- a/test/usm/CMakeLists.txt +++ b/test/usm/CMakeLists.txt @@ -10,6 +10,8 @@ function(add_usm_test name) add_ur_executable(${TEST_TARGET_NAME} ${UR_USM_TEST_DIR}/../conformance/source/environment.cpp ${UR_USM_TEST_DIR}/../conformance/source/main.cpp + ${UR_USM_TEST_DIR}/../unified_malloc_framework/common/provider.c + ${UR_USM_TEST_DIR}/../unified_malloc_framework/common/pool.c ${ARGN}) target_link_libraries(${TEST_TARGET_NAME} PRIVATE @@ -17,10 +19,12 @@ function(add_usm_test name) ${PROJECT_NAME}::loader ur_testing GTest::gtest_main) - add_test(NAME usm-${name} + add_test(NAME usm-${name} COMMAND ${TEST_TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_tests_properties(usm-${name} PROPERTIES LABELS "usm") + set_tests_properties(usm-${name} PROPERTIES + LABELS "usm" + ENVIRONMENT "UR_ADAPTERS_FORCE_LOAD=\"$\"") target_compile_definitions("usm_test-${name}" PRIVATE DEVICES_ENVIRONMENT) endfunction() diff --git a/test/usm/usmPoolManager.cpp b/test/usm/usmPoolManager.cpp index eaf44e119d..fe07d5ebe8 100644 --- a/test/usm/usmPoolManager.cpp +++ b/test/usm/usmPoolManager.cpp @@ -3,19 +3,18 @@ // See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "../unified_malloc_framework/common/pool.hpp" -#include "../unified_malloc_framework/common/provider.hpp" #include "ur_pool_manager.hpp" -#include +#include "../unified_malloc_framework/common/pool.h" +#include "../unified_malloc_framework/common/provider.h" -#include +#include -struct urUsmPoolManagerTest +struct urUsmPoolDescriptorTest : public uur::urMultiDeviceContextTest, ::testing::WithParamInterface {}; -TEST_P(urUsmPoolManagerTest, poolIsPerContextTypeAndDevice) { +TEST_P(urUsmPoolDescriptorTest, poolIsPerContextTypeAndDevice) { auto &devices = uur::DevicesEnvironment::instance->devices; auto poolHandle = this->GetParam(); @@ -49,7 +48,71 @@ TEST_P(urUsmPoolManagerTest, poolIsPerContextTypeAndDevice) { ASSERT_EQ(sharedPools, devices.size() * 2); } -INSTANTIATE_TEST_SUITE_P(urUsmPoolManagerTest, urUsmPoolManagerTest, +INSTANTIATE_TEST_SUITE_P(urUsmPoolDescriptorTest, urUsmPoolDescriptorTest, ::testing::Values(nullptr)); // TODO: add test with sub-devices + +struct urUsmPoolManagerTest : public uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urContextTest::SetUp()); + auto [ret, descs] = usm::pool_descriptor::create(nullptr, context); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + poolDescriptors = std::move(descs); + } + + std::vector poolDescriptors; +}; + +TEST_P(urUsmPoolManagerTest, poolManagerPopulate) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + for (auto &desc : poolDescriptors) { + // Populate the pool manager + auto pool = nullPoolCreate(); + ASSERT_NE(pool, nullptr); + auto poolUnique = umf::pool_unique_handle_t(pool, umfPoolDestroy); + ASSERT_NE(poolUnique, nullptr); + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + } + + for (auto &desc : poolDescriptors) { + // Confirm that there is a pool for each descriptor + auto hPoolOpt = manager.getPool(desc); + ASSERT_TRUE(hPoolOpt.has_value()); + ASSERT_NE(hPoolOpt.value(), nullptr); + } +} + +TEST_P(urUsmPoolManagerTest, poolManagerInsertExisting) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + auto desc = poolDescriptors[0]; + + auto pool = nullPoolCreate(); + ASSERT_NE(pool, nullptr); + auto poolUnique = umf::pool_unique_handle_t(pool, umfPoolDestroy); + ASSERT_NE(poolUnique, nullptr); + + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + // Inserting an existing key should return an error + ret = manager.addPool(desc, poolUnique); + ASSERT_EQ(ret, UR_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(urUsmPoolManagerTest, poolManagerGetNonexistant) { + auto [ret, manager] = usm::pool_manager::create(); + ASSERT_EQ(ret, UR_RESULT_SUCCESS); + + for (auto &desc : poolDescriptors) { + auto hPool = manager.getPool(desc); + ASSERT_FALSE(hPool.has_value()); + } +} + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urUsmPoolManagerTest); diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 3628039ede..9aff32b1a4 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -12,7 +12,7 @@ exhale==0.3.0 idna==2.8 imagesize==1.1.0 Jinja2==2.11.3 -lxml==4.9.1 +lxml==4.9.3 Mako==1.1.0 MarkupSafe==1.1.1 packaging==19.2 diff --git a/tools/urinfo/urinfo.cpp b/tools/urinfo/urinfo.cpp index 8002cf186b..7e33c15a93 100644 --- a/tools/urinfo/urinfo.cpp +++ b/tools/urinfo/urinfo.cpp @@ -26,7 +26,6 @@ struct app { UR_CHECK(urLoaderConfigCreate(&loaderConfig)); UR_CHECK(urLoaderConfigEnableLayer(loaderConfig, "UR_LAYER_FULL_VALIDATION")); - UR_CHECK(urInit(0, loaderConfig)); enumerateDevices(); } @@ -174,7 +173,6 @@ devices which are currently visible in the local execution environment. ~app() { urLoaderConfigRelease(loaderConfig); - urTearDown(nullptr); } }; } // namespace urinfo diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index b502f0d802..2d454afc37 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -245,10 +245,10 @@ class JsonWriter : public TraceWriter { "\"tid\": \"\", \"ts\": \"\"}}"); out.info("]\n}}"); } - void begin(uint64_t id, const char *fname, std::string args) override {} + void begin(uint64_t, const char *, std::string) override {} - void end(uint64_t id, const char *fname, std::string args, Timepoint tp, - Timepoint start_tp, const ur_result_t *resultp) override { + void end(uint64_t, const char *fname, std::string args, Timepoint tp, + Timepoint start_tp, const ur_result_t *) override { auto dur = tp - start_tp; auto ts_us = std::chrono::duration_cast( tp.time_since_epoch()) @@ -314,10 +314,9 @@ std::optional pop_instance_data(uint64_t instance) { return data; } -XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, - xpti::trace_event_data_t *parent, - xpti::trace_event_data_t *event, - uint64_t instance, const void *user_data) { +XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, xpti::trace_event_data_t *, + xpti::trace_event_data_t *, uint64_t instance, + const void *user_data) { // stop the the clock as the very first thing, only used for TRACE_FN_END auto time_for_end = Clock::now(); auto *args = static_cast(user_data); @@ -366,8 +365,7 @@ XPTI_CALLBACK_API void trace_cb(uint16_t trace_type, * Called for every stream. */ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, - unsigned int minor_version, - const char *version_str, + unsigned int minor_version, const char *, const char *stream_name) { if (stream_name == nullptr) { out.debug("Found stream with null name. Skipping...");