diff --git a/.github/workflows/e2e_core.yml b/.github/workflows/e2e_core.yml index e4c59956dc..e2f374ee1a 100644 --- a/.github/workflows/e2e_core.yml +++ b/.github/workflows/e2e_core.yml @@ -66,10 +66,6 @@ jobs: ls -la ./ rm -rf ./* || true - - uses: xt0rted/pull-request-comment-branch@d97294d304604fa98a2600a6e2f916a84b596dc7 # v2.0.0 - id: comment-branch - if: ${{ always() && inputs.trigger != 'schedule' }} - - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.trigger != 'schedule' }} @@ -90,7 +86,18 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: path: ur-repo - ref: ${{ steps.comment-branch.outputs.head_ref }} + + # On issue_comment trigger (for PRs) we need to fetch special ref for + # proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.trigger != 'schedule' }} + working-directory: ${{github.workspace}}/ur-repo + env: + PR_NO: ${{github.event.issue.number}} + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* + git checkout origin/pr/${PR_NO}/merge + git rev-parse origin/pr/${PR_NO}/merge - name: Checkout SYCL uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -191,8 +198,9 @@ jobs: script: | const adapter = '${{ matrix.adapter.name }}'; const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const status = '${{ steps.tests.outcome }}'; - const body = `E2E ${adapter} build: \n${url}\n Status: ${status}`; + const test_status = '${{ steps.tests.outcome }}'; + const job_status = '${{ job.status }}'; + const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`; github.rest.issues.createComment({ issue_number: context.issue.number, diff --git a/CMakeLists.txt b/CMakeLists.txt index c09b5f0a3c..9e0e430c66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,9 @@ set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING "List of sycl targets to build CTS device binaries for") set(UR_CONFORMANCE_AMD_ARCH "" CACHE STRING "AMD device target ID to build CTS binaries for") +# There's little reason not to generate the compile_commands.json +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + include(Assertions) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) diff --git a/scripts/ctest_parser.py b/scripts/ctest_parser.py old mode 100644 new mode 100755 index 1f9c4f6cfe..bd1a84f3ee --- a/scripts/ctest_parser.py +++ b/scripts/ctest_parser.py @@ -33,8 +33,8 @@ def summarize_results(results): total_failed = len(results['Failed']) total_crashed = total - (total_passed + total_skipped + total_failed) - pass_rate_incl_skipped = percent(total_passed, total) - pass_rate_excl_skipped = percent(total_passed, total - total_skipped) + pass_rate_incl_skipped = percent(total_passed + total_skipped, total) + pass_rate_excl_skipped = percent(total_passed, total) skipped_rate = percent(total_skipped, total) failed_rate = percent(total_failed, total) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..d3f270c701 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -66,17 +66,20 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphDestroy(CudaGraph); // Release the memory allocated to the CudaGraphExec - cuGraphExecDestroy(CudaGraphExec); + if (CudaGraphExec) { + cuGraphExecDestroy(CudaGraphExec); + } } ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node{std::move(Node)}, + Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; @@ -365,7 +368,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( try { // Set node param structure with the kernel related data auto &ArgIndices = hKernel->getArgIndices(); - CUDA_KERNEL_NODE_PARAMS NodeParams; + CUDA_KERNEL_NODE_PARAMS NodeParams = {}; NodeParams.func = CuFunc; NodeParams.gridDimX = BlocksPerGrid[0]; NodeParams.gridDimY = BlocksPerGrid[1]; @@ -375,7 +378,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); - NodeParams.extra = nullptr; // Create and add an new kernel node to the Cuda graph UR_CHECK_ERROR(cuGraphAddKernelNode(&GraphNode, hCommandBuffer->CudaGraph, @@ -392,8 +394,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, NodeSP, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, std::move(NodeSP), NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index e2b09059bf..60e03dc655 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -183,7 +183,7 @@ static inline const char *getUrResultString(ur_result_t Result) { struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index dc08af248a..8d2610626e 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -1006,17 +1006,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( ArrayDesc.Format = format; CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapDesc = {}; - mipmapDesc.numLevels = 1; + mipmapDesc.numLevels = pImageDesc->numMipLevel; mipmapDesc.arrayDesc = ArrayDesc; + // External memory is mapped to a CUmipmappedArray + // If desired, a CUarray is retrieved from the mipmaps 0th level CUmipmappedArray memMipMap; UR_CHECK_ERROR(cuExternalMemoryGetMappedMipmappedArray( &memMipMap, (CUexternalMemory)hInteropMem, &mipmapDesc)); - CUarray memArray; - UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0)); + if (pImageDesc->numMipLevel > 1) { + *phImageMem = (ur_exp_image_mem_handle_t)memMipMap; + } else { + CUarray memArray; + UR_CHECK_ERROR(cuMipmappedArrayGetLevel(&memArray, memMipMap, 0)); - *phImageMem = (ur_exp_image_mem_handle_t)memArray; + *phImageMem = (ur_exp_image_mem_handle_t)memArray; + } } catch (ur_result_t Err) { return Err; diff --git a/source/adapters/cuda/tracing.cpp b/source/adapters/cuda/tracing.cpp index e3acf03165..1552e5e236 100644 --- a/source/adapters/cuda/tracing.cpp +++ b/source/adapters/cuda/tracing.cpp @@ -27,20 +27,20 @@ using tracing_event_t = xpti_td *; using subscriber_handle_t = CUpti_SubscriberHandle; -using cuptiSubscribe_fn = CUPTIAPI -CUptiResult (*)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, - void *userdata); +using cuptiSubscribe_fn = + CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle *subscriber, + CUpti_CallbackFunc callback, void *userdata); -using cuptiUnsubscribe_fn = CUPTIAPI -CUptiResult (*)(CUpti_SubscriberHandle subscriber); +using cuptiUnsubscribe_fn = + CUptiResult(CUPTIAPI *)(CUpti_SubscriberHandle subscriber); -using cuptiEnableDomain_fn = CUPTIAPI -CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain); +using cuptiEnableDomain_fn = + CUptiResult(CUPTIAPI *)(uint32_t enable, CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain); -using cuptiEnableCallback_fn = CUPTIAPI -CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber, - CUpti_CallbackDomain domain, CUpti_CallbackId cbid); +using cuptiEnableCallback_fn = + CUptiResult(CUPTIAPI *)(uint32_t enable, CUpti_SubscriberHandle subscriber, + CUpti_CallbackDomain domain, CUpti_CallbackId cbid); #define LOAD_CUPTI_SYM(p, lib, x) \ p.x = (cupti##x##_fn)ur_loader::LibLoader::getFunctionPtr(lib.get(), \ diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 9fd3a06927..180c90d064 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -50,7 +50,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCountInternal{1}, RefCountExternal{1} { + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -65,11 +65,11 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { UR_TRACE(urDeviceRelease(Device)); // Release the memory allocated to the HIPGraph - UR_CHECK_ERROR(hipGraphDestroy(HIPGraph)); + (void)hipGraphDestroy(HIPGraph); // Release the memory allocated to the HIPGraphExec if (HIPGraphExec) { - UR_CHECK_ERROR(hipGraphExecDestroy(HIPGraphExec)); + (void)hipGraphExecDestroy(HIPGraphExec); } } diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index f1b3e32bfb..53b648b5cd 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -254,8 +254,8 @@ struct ur_exp_command_buffer_handle_t_ { ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, - std::shared_ptr HIPNode) { - SyncPoints[SyncPoint] = HIPNode; + std::shared_ptr &&HIPNode) { + SyncPoints[SyncPoint] = std::move(HIPNode); NextSyncPoint++; } @@ -269,7 +269,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_sync_point_t addSyncPoint(std::shared_ptr HIPNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - registerSyncPoint(SyncPoint, HIPNode); + registerSyncPoint(SyncPoint, std::move(HIPNode)); return SyncPoint; } uint32_t incrementInternalReferenceCount() noexcept { diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 62ba260dea..d26d0aeb26 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -122,6 +122,13 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) +if(NOT WIN32) + target_sources(ur_adapter_level_zero + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp + ) +endif() + # TODO: fix level_zero adapter conversion warnings target_compile_options(${TARGET_NAME} PRIVATE $<$:/wd4805 /wd4244> diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 77c0edfb45..39a0934b4f 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -11,6 +11,14 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" +// Due to multiple DLLMain definitions with SYCL, Global Adapter is init at +// variable creation. +#if defined(_WIN32) +ur_adapter_handle_t_ *GlobalAdapter = new ur_adapter_handle_t_(); +#else +ur_adapter_handle_t_ *GlobalAdapter; +#endif + ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { uint32_t ZeDriverCount = 0; ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); @@ -37,8 +45,7 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { ur_result_t adapterStateInit() { return UR_RESULT_SUCCESS; } ur_adapter_handle_t_::ur_adapter_handle_t_() { - - Adapter.PlatformCache.Compute = [](Result &result) { + PlatformCache.Compute = [](Result &result) { static std::once_flag ZeCallCountInitialized; try { std::call_once(ZeCallCountInitialized, []() { @@ -52,7 +59,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { } // initialize level zero only once. - if (Adapter.ZeResult == std::nullopt) { + if (GlobalAdapter->ZeResult == std::nullopt) { // Setting these environment variables before running zeInit will enable // the validation layer in the Level Zero loader. if (UrL0Debug & UR_L0_DEBUG_VALIDATION) { @@ -71,20 +78,21 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { // We must only initialize the driver once, even if urPlatformGet() is // called multiple times. Declaring the return value as "static" ensures // it's only called once. - Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); + GlobalAdapter->ZeResult = + ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY)); } - assert(Adapter.ZeResult != + assert(GlobalAdapter->ZeResult != std::nullopt); // verify that level-zero is initialized PlatformVec platforms; // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. - if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { + if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { result = std::move(platforms); return; } - if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) { + if (*GlobalAdapter->ZeResult != ZE_RESULT_SUCCESS) { urPrint("zeInit: Level Zero initialization failure\n"); - result = ze2urResult(*Adapter.ZeResult); + result = ze2urResult(*GlobalAdapter->ZeResult); return; } @@ -97,7 +105,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { }; } -ur_adapter_handle_t_ Adapter{}; +void globalAdapterOnDemandCleanup() { + if (GlobalAdapter) { + delete GlobalAdapter; + } +} ur_result_t adapterStateTeardown() { bool LeakFound = false; @@ -184,6 +196,11 @@ ur_result_t adapterStateTeardown() { } if (LeakFound) return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + // Due to multiple DLLMain definitions with SYCL, register to cleanup the + // Global Adapter after refcnt is 0 +#if defined(_WIN32) + std::atexit(globalAdapterOnDemandCleanup); +#endif return UR_RESULT_SUCCESS; } @@ -203,11 +220,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( ///< adapters available. ) { if (NumEntries > 0 && Adapters) { - std::lock_guard Lock{Adapter.Mutex}; - if (Adapter.RefCount++ == 0) { - adapterStateInit(); + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (GlobalAdapter->RefCount++ == 0) { + adapterStateInit(); + } + } else { + // If the GetAdapter is called after the Library began or was torndown, + // then temporarily create a new Adapter handle and register a new + // cleanup. + GlobalAdapter = new ur_adapter_handle_t_(); + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (GlobalAdapter->RefCount++ == 0) { + adapterStateInit(); + } + std::atexit(globalAdapterOnDemandCleanup); } - *Adapters = &Adapter; + *Adapters = GlobalAdapter; } if (NumAdapters) { @@ -218,17 +247,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - std::lock_guard Lock{Adapter.Mutex}; - if (--Adapter.RefCount == 0) { - return adapterStateTeardown(); + // Check first if the Adapter pointer is valid + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + if (--GlobalAdapter->RefCount == 0) { + return adapterStateTeardown(); + } } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - std::lock_guard Lock{Adapter.Mutex}; - Adapter.RefCount++; + if (GlobalAdapter) { + std::lock_guard Lock{GlobalAdapter->Mutex}; + GlobalAdapter->RefCount++; + } return UR_RESULT_SUCCESS; } @@ -257,7 +291,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_LEVEL_ZERO); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(Adapter.RefCount.load()); + return ReturnValue(GlobalAdapter->RefCount.load()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index 0942db852a..1fdf3a9294 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -25,4 +25,4 @@ struct ur_adapter_handle_t_ { ZeCache> PlatformCache; }; -extern ur_adapter_handle_t_ Adapter; +extern ur_adapter_handle_t_ *GlobalAdapter; diff --git a/source/adapters/level_zero/adapter_lib_init_linux.cpp b/source/adapters/level_zero/adapter_lib_init_linux.cpp new file mode 100644 index 0000000000..bb6f1d4e6d --- /dev/null +++ b/source/adapters/level_zero/adapter_lib_init_linux.cpp @@ -0,0 +1,25 @@ +//===--------- adapter_lib_init_linux.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "adapter.hpp" +#include "ur_level_zero.hpp" + +void __attribute__((constructor)) createAdapterHandle() { + if (!GlobalAdapter) { + GlobalAdapter = new ur_adapter_handle_t_(); + } +} + +void __attribute__((destructor)) deleteAdapterHandle() { + if (GlobalAdapter) { + delete GlobalAdapter; + GlobalAdapter = nullptr; + } +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 2f6b3a91ff..9fae1ed4af 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1054,6 +1054,19 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() { return EnableRelaxedAllocationLimits; } +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + static const bool UseDriverInOrderLists = [] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + if (!UrRet) + return false; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. @@ -1442,7 +1455,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( // a valid Level Zero device. ur_device_handle_t Dev = nullptr; - if (const auto *platforms = Adapter.PlatformCache->get_value()) { + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { for (const auto &p : *platforms) { Dev = p->getDeviceFromNativeHandle(ZeDevice); if (Dev) { @@ -1453,7 +1466,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( } } } else { - return Adapter.PlatformCache->get_error(); + return GlobalAdapter->PlatformCache->get_error(); } if (Dev == nullptr) diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 94480336c5..a57a97d38d 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object { // Read env settings to select immediate commandlist mode. ImmCmdlistMode useImmediateCommandLists(); + // Whether Adapter uses driver's implementation of in-order lists or not + bool useDriverInOrderLists(); + // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 57b839a714..c9d1c7d6b4 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -43,6 +43,20 @@ static const bool UseMultipleCmdlistBarriers = [] { return std::atoi(UseMultipleCmdlistBarriersFlag) > 0; }(); +bool WaitListEmptyOrAllEventsFromSameQueue( + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList) { + if (!NumEventsInWaitList) + return true; + + for (uint32_t i = 0; i < NumEventsInWaitList; ++i) { + if (Queue != EventWaitList[i]->UrQueue) + return false; + } + + return true; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -206,21 +220,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( bool IsInternal = OutEvent == nullptr; ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; - auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList, - EventWaitList]() { - if (!NumEventsInWaitList) - return true; - - for (uint32_t I = 0; I < NumEventsInWaitList; ++I) - if (Queue != EventWaitList[I]->UrQueue) - return false; - - return true; - }; - // For in-order queue and wait-list which is empty or has events from // the same queue just use the last command event as the barrier event. - if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() && + if (Queue->isInOrderQueue() && + WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, + EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { UR_CALL(urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; @@ -1189,6 +1193,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded) IncludeLastCommandEvent = false; + // If we are using L0 native implementation for handling in-order queues, + // then we don't need to add the last enqueued event into the waitlist, as + // the native driver implementation will already ensure in-order semantics. + // The only exception is when a different immediate command was last used on + // the same UR Queue. + if (CurQueue->Device->useDriverInOrderLists() && CurQueue->isInOrderQueue() && + CurQueue->UsingImmCmdLists) { + auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine); + uint32_t QueueGroupOrdinal, QueueIndex; + auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex, + /*QueryOnly */ true); + auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex]; + IncludeLastCommandEvent &= + CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() && + NextImmCmdList != CurQueue->LastUsedCommandList; + } + try { uint32_t TmpListLength = 0; @@ -1205,6 +1226,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( this->UrEventList = new ur_event_handle_t[EventListLength]; } + // For in-order queue and wait-list which is empty or has events only from + // the same queue then we don't need to wait on any other additional events + if (CurQueue->Device->useDriverInOrderLists() && + CurQueue->isInOrderQueue() && + WaitListEmptyOrAllEventsFromSameQueue(CurQueue, EventListLength, + EventList)) { + this->Length = TmpListLength; + return UR_RESULT_SUCCESS; + } + if (EventListLength > 0) { for (uint32_t I = 0; I < EventListLength; I++) { { diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 0d986de0ad..582121d91a 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -738,13 +738,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( hContext, hDevice, hImageMem, pImageFormat, pImageDesc, phMem, phImage)); struct combined_sampled_image_handle { - uint64_t raw_image_handle; - uint64_t raw_sampler_handle; + uint64_t RawImageHandle; + uint64_t RawSamplerHandle; }; - combined_sampled_image_handle *sampledImageHandle = + auto *SampledImageHandle = reinterpret_cast(phImage); - sampledImageHandle->raw_image_handle = reinterpret_cast(*phImage); - sampledImageHandle->raw_sampler_handle = + SampledImageHandle->RawSamplerHandle = reinterpret_cast(hSampler->ZeSampler); return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 0e5ce3215a..c40e4ef0e3 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // the code can do a urKernelRelease on this kernel. (*Event)->CommandData = (void *)Kernel; - // Increment the reference count of the Kernel and indicate that the Kernel is - // in use. Once the event has been signalled, the code in + // Increment the reference count of the Kernel and indicate that the Kernel + // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. UR_CALL(urKernelRetain(Kernel)); diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index cca3b2173d..ab577247bd 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -29,7 +29,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( ) { // Platform handles are cached for reuse. This is to ensure consistent // handle pointers across invocations and to improve retrieval performance. - if (const auto *cached_platforms = Adapter.PlatformCache->get_value(); + if (const auto *cached_platforms = GlobalAdapter->PlatformCache->get_value(); cached_platforms) { uint32_t nplatforms = (uint32_t)cached_platforms->size(); if (NumPlatforms) { @@ -41,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( } } } else { - return Adapter.PlatformCache->get_error(); + return GlobalAdapter->PlatformCache->get_error(); } return UR_RESULT_SUCCESS; @@ -133,7 +133,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( auto ZeDriver = ur_cast(NativePlatform); uint32_t NumPlatforms = 0; - ur_adapter_handle_t AdapterHandle = &Adapter; + ur_adapter_handle_t AdapterHandle = GlobalAdapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); if (NumPlatforms) { diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 2009c3c6f5..187f4f75f9 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -569,7 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( // Maybe this is not completely correct. uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; - ur_adapter_handle_t AdapterHandle = &Adapter; + ur_adapter_handle_t AdapterHandle = GlobalAdapter; UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); ur_device_handle_t UrDevice = Device; @@ -1870,6 +1870,10 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + if (Device->useDriverInOrderLists() && isInOrderQueue()) { + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER; + } + ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc, &ZeCommandList)); @@ -1985,7 +1989,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { // Evaluate performance of explicit usage for "0" index. if (QueueIndex != 0) { - ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; + ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; + } + + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } // Check if context's command list cache has an immediate command list with diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..4c1bb6bca1 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -15,18 +15,36 @@ struct ur_adapter_handle_t_ { std::mutex Mutex; }; -ur_adapter_handle_t_ adapter{}; +static ur_adapter_handle_t_ *adapter = nullptr; + +static void globalAdapterShutdown() { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + if (adapter) { + delete adapter; + adapter = nullptr; + } +} UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { - std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = std::make_unique(); + // Sometimes urAdaterGet may be called after the library already been torn + // down, we also need to create a temporary handle for it. + if (!adapter) { + adapter = new ur_adapter_handle_t_(); + atexit(globalAdapterShutdown); } - *phAdapters = &adapter; + std::lock_guard Lock{adapter->Mutex}; + if (adapter->RefCount++ == 0) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + } + + *phAdapters = adapter; } if (pNumAdapters) { @@ -37,14 +55,20 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - ++adapter.RefCount; + ++adapter->RefCount; return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - cl_ext::ExtFuncPtrCache.reset(); + // Check first if the adapter is valid pointer + if (adapter) { + std::lock_guard Lock{adapter->Mutex}; + if (--adapter->RefCount == 0) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + } } return UR_RESULT_SUCCESS; } @@ -68,7 +92,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(adapter.RefCount.load()); + return ReturnValue(adapter->RefCount.load()); default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..43d1c12b1e 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -349,7 +349,7 @@ struct ExtFuncPtrCacheT { // piTeardown to avoid issues with static destruction order (a user application // might have static objects that indirectly access this cache in their // destructor). -inline std::unique_ptr ExtFuncPtrCache; +inline ExtFuncPtrCacheT *ExtFuncPtrCache; // USM helper function to get an extension function pointer template diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index 0e350ef3fa..d2ed4853a8 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -224,10 +224,6 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, if (!hPlatform) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - // NumEntries is max number of devices wanted by the caller (max usable length of phDevices) - if (NumEntries < 0) { - return UR_RESULT_ERROR_INVALID_SIZE; - } if (NumEntries > 0 && !phDevices) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -426,8 +422,10 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, for (auto &termPair : mapODS) { std::string backend = termPair.first; - if (backend - .empty()) { // FIXME: never true because getenv_to_map rejects this case + // TODO: Figure out how to process all ODS errors rather than returning + // on the first error. + if (backend.empty()) { + // FIXME: never true because getenv_to_map rejects this case // malformed term: missing backend -- output ERROR, then continue logger::error("ERROR: missing backend, format of filter = " "'[!]backend:filterStrings'"); @@ -459,20 +457,19 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, std::tolower(static_cast(b)); })) { // irrelevant term for current request: different backend -- silently ignore - logger::warning( - "WARNING: ignoring term with irrelevant backend '{}'", backend); - continue; + logger::error("unrecognised backend '{}'", backend); + return UR_RESULT_ERROR_INVALID_VALUE; } if (termPair.second.size() == 0) { - // malformed term: missing filterStrings -- output ERROR, then continue - logger::error("ERROR missing filterStrings, format of filter = " + // malformed term: missing filterStrings -- output ERROR + logger::error("missing filterStrings, format of filter = " "'[!]backend:filterStrings'"); - continue; + return UR_RESULT_ERROR_INVALID_VALUE; } if (std::find_if(termPair.second.cbegin(), termPair.second.cend(), [](const auto &s) { return s.empty(); }) != - termPair.second - .cend()) { // FIXME: never true because getenv_to_map rejects this case + termPair.second.cend()) { + // FIXME: never true because getenv_to_map rejects this case // malformed term: missing filterString -- output warning, then continue logger::warning( "WARNING: empty filterString, format of filterStrings " @@ -483,10 +480,10 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, [](const auto &s) { return std::count(s.cbegin(), s.cend(), '.') > 2; }) != termPair.second.cend()) { - // malformed term: too many dots in filterString -- output warning, then continue - logger::warning("WARNING: too many dots in filterString, format of " - "filterString = 'root[.sub[.subsub]]'"); - continue; + // malformed term: too many dots in filterString + logger::error("too many dots in filterString, format of " + "filterString = 'root[.sub[.subsub]]'"); + return UR_RESULT_ERROR_INVALID_VALUE; } if (std::find_if( termPair.second.cbegin(), termPair.second.cend(), @@ -504,10 +501,9 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, } return false; // no BAD things, so must be okay }) != termPair.second.cend()) { - // malformed term: star dot no-star in filterString -- output warning, then continue - logger::warning( - "WARNING: invalid wildcard in filterString, '*.' => '*.*'"); - continue; + // malformed term: star dot no-star in filterString + logger::error("invalid wildcard in filterString, '*.' => '*.*'"); + return UR_RESULT_ERROR_INVALID_VALUE; } // TODO -- use regex validation_pattern to catch all other syntax errors in the ODS string @@ -552,7 +548,7 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, if (acceptDeviceList.size() == 0 && discardDeviceList.size() == 0) { // nothing in env var was understood as a valid term - return UR_RESULT_ERROR_INVALID_VALUE; + return UR_RESULT_SUCCESS; } else if (acceptDeviceList.size() == 0) { // no accept terms were understood, but at least one discard term was // we are magnanimous to the user when there were bad/ignored accept terms diff --git a/test/conformance/device/urDeviceGet.cpp b/test/conformance/device/urDeviceGet.cpp index e8aa356a58..5ce4c45906 100644 --- a/test/conformance/device/urDeviceGet.cpp +++ b/test/conformance/device/urDeviceGet.cpp @@ -35,6 +35,42 @@ TEST_F(urDeviceGetTest, SuccessSubsetOfDevices) { } } +struct urDeviceGetTestWithDeviceTypeParam + : uur::urAllDevicesTest, + ::testing::WithParamInterface { + + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urAllDevicesTest::SetUp()); + } +}; + +INSTANTIATE_TEST_SUITE_P( + , urDeviceGetTestWithDeviceTypeParam, + ::testing::Values(UR_DEVICE_TYPE_DEFAULT, UR_DEVICE_TYPE_GPU, + UR_DEVICE_TYPE_CPU, UR_DEVICE_TYPE_FPGA, + UR_DEVICE_TYPE_MCA, UR_DEVICE_TYPE_VPU), + [](const ::testing::TestParamInfo &info) { + std::stringstream ss; + ss << info.param; + return ss.str(); + }); + +TEST_P(urDeviceGetTestWithDeviceTypeParam, Success) { + ur_device_type_t device_type = GetParam(); + uint32_t count = 0; + ASSERT_SUCCESS(urDeviceGet(platform, device_type, 0, nullptr, &count)); + ASSERT_GE(devices.size(), count); + + if (count > 0) { + std::vector devices(count); + ASSERT_SUCCESS( + urDeviceGet(platform, device_type, count, devices.data(), nullptr)); + for (auto device : devices) { + ASSERT_NE(nullptr, device); + } + } +} + TEST_F(urDeviceGetTest, InvalidNullHandlePlatform) { uint32_t count; ASSERT_EQ_RESULT( diff --git a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp index 247c9bc3df..4e9af10592 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferMap.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferMap.cpp @@ -21,14 +21,21 @@ TEST_P(urEnqueueMemBufferMapTest, SuccessRead) { } } -TEST_P(urEnqueueMemBufferMapTest, SuccessWrite) { +using urEnqueueMemBufferMapTestWithWriteFlagParam = + uur::urMemBufferQueueTestWithParam; +UUR_TEST_SUITE_P(urEnqueueMemBufferMapTestWithWriteFlagParam, + ::testing::Values(UR_MAP_FLAG_WRITE, + UR_MAP_FLAG_WRITE_INVALIDATE_REGION), + uur::deviceTestWithParamPrinter); + +TEST_P(urEnqueueMemBufferMapTestWithWriteFlagParam, SuccessWrite) { const std::vector input(count, 0); ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, size, input.data(), 0, nullptr, nullptr)); uint32_t *map = nullptr; - ASSERT_SUCCESS(urEnqueueMemBufferMap(queue, buffer, true, UR_MAP_FLAG_WRITE, - 0, size, 0, nullptr, nullptr, + ASSERT_SUCCESS(urEnqueueMemBufferMap(queue, buffer, true, getParam(), 0, + size, 0, nullptr, nullptr, (void **)&map)); for (unsigned i = 0; i < count; ++i) { map[i] = 42; diff --git a/test/conformance/memory/urMemBufferCreate.cpp b/test/conformance/memory/urMemBufferCreate.cpp index a86ebd326e..2e9b46114d 100644 --- a/test/conformance/memory/urMemBufferCreate.cpp +++ b/test/conformance/memory/urMemBufferCreate.cpp @@ -48,6 +48,18 @@ UUR_TEST_SUITE_P(urMemBufferCreateWithHostPtrFlagsTest, UR_MEM_FLAG_USE_HOST_POINTER), uur::deviceTestWithParamPrinter); +TEST_P(urMemBufferCreateWithHostPtrFlagsTest, SUCCESS) { + uur::raii::Mem host_ptr_buffer = nullptr; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_ALLOC_HOST_POINTER, + 4096, nullptr, host_ptr_buffer.ptr())); + + ur_buffer_properties_t properties{UR_STRUCTURE_TYPE_BUFFER_PROPERTIES, + nullptr, host_ptr_buffer.ptr()}; + uur::raii::Mem buffer = nullptr; + ASSERT_SUCCESS(urMemBufferCreate(context, getParam(), 4096, &properties, + buffer.ptr())); +} + TEST_P(urMemBufferCreateWithHostPtrFlagsTest, InvalidHostPtr) { uur::raii::Mem buffer = nullptr; ASSERT_EQ_RESULT( diff --git a/test/conformance/platform/urPlatformGetInfo.cpp b/test/conformance/platform/urPlatformGetInfo.cpp index fa95662ec6..1dc92b26d7 100644 --- a/test/conformance/platform/urPlatformGetInfo.cpp +++ b/test/conformance/platform/urPlatformGetInfo.cpp @@ -19,7 +19,7 @@ INSTANTIATE_TEST_SUITE_P( urPlatformGetInfo, urPlatformGetInfoTest, ::testing::Values(UR_PLATFORM_INFO_NAME, UR_PLATFORM_INFO_VENDOR_NAME, UR_PLATFORM_INFO_VERSION, UR_PLATFORM_INFO_EXTENSIONS, - UR_PLATFORM_INFO_PROFILE), + UR_PLATFORM_INFO_PROFILE, UR_PLATFORM_INFO_BACKEND), [](const ::testing::TestParamInfo &info) { std::stringstream ss; ss << info.param; @@ -30,11 +30,17 @@ TEST_P(urPlatformGetInfoTest, Success) { size_t size = 0; ur_platform_info_t info_type = GetParam(); ASSERT_SUCCESS(urPlatformGetInfo(platform, info_type, 0, nullptr, &size)); - ASSERT_NE(size, 0); + if (info_type == UR_PLATFORM_INFO_BACKEND) { + ASSERT_EQ(size, sizeof(ur_platform_backend_t)); + } else { + ASSERT_NE(size, 0); + } std::vector name(size); ASSERT_SUCCESS( urPlatformGetInfo(platform, info_type, size, name.data(), nullptr)); - ASSERT_EQ(size, std::strlen(name.data()) + 1); + if (info_type != UR_PLATFORM_INFO_BACKEND) { + ASSERT_EQ(size, std::strlen(name.data()) + 1); + } } TEST_P(urPlatformGetInfoTest, InvalidNullHandlePlatform) { diff --git a/test/conformance/program/urProgramBuild.cpp b/test/conformance/program/urProgramBuild.cpp index ec65074c78..97e4db77e3 100644 --- a/test/conformance/program/urProgramBuild.cpp +++ b/test/conformance/program/urProgramBuild.cpp @@ -12,6 +12,11 @@ TEST_P(urProgramBuildTest, Success) { ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); } +TEST_P(urProgramBuildTest, SuccessWithOptions) { + const char *pOptions = ""; + ASSERT_SUCCESS(urProgramBuild(context, program, pOptions)); +} + TEST_P(urProgramBuildTest, InvalidNullHandleContext) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urProgramBuild(nullptr, program, nullptr)); diff --git a/test/conformance/program/urProgramCreateWithIL.cpp b/test/conformance/program/urProgramCreateWithIL.cpp index b439850bac..00f41da6ef 100644 --- a/test/conformance/program/urProgramCreateWithIL.cpp +++ b/test/conformance/program/urProgramCreateWithIL.cpp @@ -36,6 +36,15 @@ TEST_P(urProgramCreateWithILTest, Success) { ASSERT_SUCCESS(urProgramRelease(program)); } +TEST_P(urProgramCreateWithILTest, SuccessWithProperties) { + ur_program_properties_t properties{UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES}; + ur_program_handle_t program = nullptr; + ASSERT_SUCCESS(urProgramCreateWithIL( + context, il_binary->data(), il_binary->size(), &properties, &program)); + ASSERT_NE(nullptr, program); + ASSERT_SUCCESS(urProgramRelease(program)); +} + TEST_P(urProgramCreateWithILTest, InvalidNullHandle) { ur_program_handle_t program = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match index 98c622a472..c2887b1063 100644 --- a/test/conformance/queue/queue_adapter_native_cpu.match +++ b/test/conformance/queue/queue_adapter_native_cpu.match @@ -1,5 +1,14 @@ urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PROFILING_ENABLE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_ON_DEVICE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_ON_DEVICE_DEFAULT +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_DISCARD_EVENTS +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PRIORITY_LOW +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_PRIORITY_HIGH +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SUBMISSION_BATCHED +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_USE_DEFAULT_STREAM +urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM urQueueFinishTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_ urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___UR_QUEUE_INFO_CONTEXT diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 1c5ca6e614..03cda76d50 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -17,7 +17,16 @@ TEST_P(urQueueCreateTest, Success) { using urQueueCreateWithParamTest = uur::urContextTestWithParam; UUR_TEST_SUITE_P(urQueueCreateWithParamTest, testing::Values(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, - UR_QUEUE_FLAG_PROFILING_ENABLE), + UR_QUEUE_FLAG_PROFILING_ENABLE, + UR_QUEUE_FLAG_ON_DEVICE, + UR_QUEUE_FLAG_ON_DEVICE_DEFAULT, + UR_QUEUE_FLAG_DISCARD_EVENTS, + UR_QUEUE_FLAG_PRIORITY_LOW, + UR_QUEUE_FLAG_PRIORITY_HIGH, + UR_QUEUE_FLAG_SUBMISSION_BATCHED, + UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE, + UR_QUEUE_FLAG_USE_DEFAULT_STREAM, + UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM), uur::deviceTestWithParamPrinter); TEST_P(urQueueCreateWithParamTest, SuccessWithProperties) { diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index cf01015eb4..8e0c86f9b3 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -526,6 +526,26 @@ struct urMemBufferQueueTest : urQueueTest { ur_mem_handle_t buffer = nullptr; }; +template +struct urMemBufferQueueTestWithParam : urQueueTestWithParam { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::SetUp()); + ASSERT_SUCCESS(urMemBufferCreate(this->context, UR_MEM_FLAG_READ_WRITE, + size, nullptr, &buffer)); + } + + void TearDown() override { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam::TearDown()); + } + + const size_t count = 8; + const size_t size = sizeof(uint32_t) * count; + ur_mem_handle_t buffer = nullptr; +}; + struct urMemImageQueueTest : urQueueTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urQueueTest::SetUp()); diff --git a/tools/urinfo/urinfo.cpp b/tools/urinfo/urinfo.cpp index d1463ea4fa..bf7173234b 100644 --- a/tools/urinfo/urinfo.cpp +++ b/tools/urinfo/urinfo.cpp @@ -14,6 +14,8 @@ namespace urinfo { struct app { bool verbose = false; + bool linear_ids = true; + bool ignore_device_selector = false; ur_loader_config_handle_t loaderConfig = nullptr; std::vector adapters; std::unordered_map> @@ -23,6 +25,16 @@ struct app { app(int argc, const char **argv) { parseArgs(argc, argv); + if (!ignore_device_selector) { + if (auto device_selector = std::getenv("ONEAPI_DEVICE_SELECTOR")) { + std::fprintf(stderr, + "info: Output filtered by ONEAPI_DEVICE_SELECTOR " + "environment variable, which is set to \"%s\".\n" + "To see all devices, use the " + "--ignore-device-selector CLI option.\n\n", + device_selector); + } + } UR_CHECK(urLoaderConfigCreate(&loaderConfig)); UR_CHECK(urLoaderConfigEnableLayer(loaderConfig, "UR_LAYER_FULL_VALIDATION")); @@ -40,6 +52,10 @@ devices which are currently visible in the local execution environment. -h, --help show this help message and exit --version show version number and exit -v, --verbose print additional information + --no-linear-ids do not show linear device ids + --ignore-device-selector + do not use ONEAPI_DEVICE_SELECTOR to filter list of + devices )"; for (int argi = 1; argi < argc; argi++) { std::string_view arg{argv[argi]}; @@ -51,6 +67,10 @@ devices which are currently visible in the local execution environment. std::exit(0); } else if (arg == "-v" || arg == "--verbose") { verbose = true; + } else if (arg == "--no-linear-ids") { + linear_ids = false; + } else if (arg == "--ignore-device-selector") { + ignore_device_selector = true; } else { std::fprintf(stderr, "error: invalid argument: %s\n", argv[argi]); @@ -65,12 +85,14 @@ devices which are currently visible in the local execution environment. uint32_t numAdapters = 0; UR_CHECK(urAdapterGet(0, nullptr, &numAdapters)); if (numAdapters == 0) { - std::cout << "No adapters found.\n"; std::exit(0); } adapters.resize(numAdapters); UR_CHECK(urAdapterGet(numAdapters, adapters.data(), nullptr)); + auto urDeviceGetFn = + ignore_device_selector ? urDeviceGet : urDeviceGetSelected; + for (size_t adapterIndex = 0; adapterIndex < adapters.size(); adapterIndex++) { auto adapter = adapters[adapterIndex]; @@ -78,8 +100,6 @@ devices which are currently visible in the local execution environment. uint32_t numPlatforms = 0; UR_CHECK(urPlatformGet(&adapter, 1, 0, nullptr, &numPlatforms)); if (numPlatforms == 0) { - std::cout << "No platforms found in adapter " << adapterIndex - << ".\n"; continue; } adapterPlatformsMap[adapter].resize(numPlatforms); @@ -92,17 +112,15 @@ devices which are currently visible in the local execution environment. auto platform = adapterPlatformsMap[adapter][platformIndex]; // Enumerate devices uint32_t numDevices = 0; - UR_CHECK(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, - &numDevices)); + UR_CHECK(urDeviceGetFn(platform, UR_DEVICE_TYPE_ALL, 0, nullptr, + &numDevices)); if (numDevices == 0) { - std::cout << "No devices found platform " << platformIndex - << ".\n"; continue; } platformDevicesMap[platform].resize(numDevices); - UR_CHECK(urDeviceGet(platform, UR_DEVICE_TYPE_ALL, numDevices, - platformDevicesMap[platform].data(), - nullptr)); + UR_CHECK(urDeviceGetFn(platform, UR_DEVICE_TYPE_ALL, numDevices, + platformDevicesMap[platform].data(), + nullptr)); } } } @@ -112,6 +130,8 @@ devices which are currently visible in the local execution environment. adapterIndex++) { auto adapter = adapters[adapterIndex]; auto &platforms = adapterPlatformsMap[adapter]; + size_t adapter_device_id = 0; + std::string adapter_backend = urinfo::getAdapterBackend(adapter); for (size_t platformIndex = 0; platformIndex < platforms.size(); platformIndex++) { auto platform = platforms[platformIndex]; @@ -119,16 +139,28 @@ devices which are currently visible in the local execution environment. for (size_t deviceIndex = 0; deviceIndex < devices.size(); deviceIndex++) { auto device = devices[deviceIndex]; - std::cout << "[adapter(" << adapterIndex << "," - << urinfo::getAdapterBackend(adapter) << "):" - << "platform(" << platformIndex << "):" - << "device(" << deviceIndex << "," - << urinfo::getDeviceType(device) << ")] " - << urinfo::getPlatformName(platform) << ", " - << urinfo::getDeviceName(device) << " " + auto device_type = urinfo::getDeviceType(device); + + if (linear_ids) { + std::cout << "[" << adapter_backend << ":" + << device_type << "]"; + std::cout << "[" << adapter_backend << ":" + << adapter_device_id << "]"; + } else { + std::cout << "[adapter(" << adapterIndex << "," + << adapter_backend << "):" + << "platform(" << platformIndex << "):" + << "device(" << deviceIndex << "," + << device_type << ")]"; + } + + std::cout << " " << urinfo::getPlatformName(platform) + << ", " << urinfo::getDeviceName(device) << " " << urinfo::getDeviceVersion(device) << " " << "[" << urinfo::getDeviceDriverVersion(device) << "]\n"; + + adapter_device_id++; } } }