From f2cd2a80e7277fc62d8802673ce6ab2fac6fcbd0 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 13 Jun 2024 04:31:13 -0700 Subject: [PATCH] [SYCL] Disable in-order queue barrier optimization while profiling (#14123) Current implementation of profiling info for NOP barriers is inconsistent with other events from the same queue (e.g., if the previous event started after the barrier was submitted). To make them consistent while keeping the optimization, we would need to duplicate the event on our side and make the duplicate check and potentially use profiling info of its previous event. Instead, as the first step, disable the NOP optimization during profiling since profiling is known to incur a performance hit anyway. The proper duplicate event approach can be implemented as a follow up if this causes issues for users. Partially reverts https://github.com/intel/llvm/pull/12949 --- sycl/source/detail/event_impl.cpp | 22 ++-------- sycl/source/detail/event_impl.hpp | 5 --- sycl/source/queue.cpp | 22 ++-------- .../Regression/in_order_barrier_profiling.cpp | 42 +++++++++++++++++++ .../Regression/nop_event_profiling.cpp | 38 ----------------- 5 files changed, 50 insertions(+), 79 deletions(-) create mode 100644 sycl/test-e2e/Regression/in_order_barrier_profiling.cpp delete mode 100644 sycl/test-e2e/Regression/nop_event_profiling.cpp diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 19558851798ad..c7d245e5e91c0 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -167,15 +167,11 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event, } } -event_impl::event_impl(const QueueImplPtr &Queue) { +event_impl::event_impl(const QueueImplPtr &Queue) + : MQueue{Queue}, + MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled}, + MFallbackProfiling{MIsProfilingEnabled && Queue->isProfilingFallback()} { this->setContextImpl(Queue->getContextImplPtr()); - this->associateWithQueue(Queue); -} - -void event_impl::associateWithQueue(const QueueImplPtr &Queue) { - MQueue = Queue; - MIsProfilingEnabled = Queue->is_host() || Queue->MIsProfilingEnabled; - MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback(); if (Queue->is_host()) { MState.store(HES_NotComplete); if (Queue->has_property()) { @@ -337,11 +333,6 @@ template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); - - // For nop command start time is equal to submission time. - if (isNOP() && MSubmitTime) - return MSubmitTime; - if (!MHostEvent) { if (MEvent) { auto StartTime = @@ -369,11 +360,6 @@ event_impl::get_profiling_info() { template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); - - // For nop command end time is equal to submission time. - if (isNOP() && MSubmitTime) - return MSubmitTime; - if (!MHostEvent) { if (MEvent) { auto EndTime = diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 56827e3373249..91bef738450d3 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -244,11 +244,6 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; - /// Associate event with provided queue. - /// - /// @return - void associateWithQueue(const QueueImplPtr &Queue); - /// Indicates if this event is not associated with any command and doesn't /// have native handle. /// diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 15d7f11fcb42d..db3ce2f5cb1b3 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -214,22 +214,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { assert(!QueueImpl->getCommandGraph() && "Should not be called in on graph recording."); - auto LastEvent = QueueImpl->getLastEvent(); - if (QueueImpl->MDiscardEvents) { - std::cout << "Discard event enabled" << std::endl; - return LastEvent; - } - - auto LastEventImpl = detail::getSyclObjImpl(LastEvent); - // If last event is default constructed event then we want to associate it - // with the queue and record submission time if profiling is enabled. Such - // event corresponds to NOP and its submit time is same as start time and - // end time. - if (!LastEventImpl->isContextInitialized()) { - LastEventImpl->associateWithQueue(QueueImpl); - LastEventImpl->setSubmissionTime(); - } - return detail::createSyclObjFromImpl(LastEventImpl); + return QueueImpl->getLastEvent(); } /// Prevents any commands submitted afterward to this queue from executing @@ -240,7 +225,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { - if (is_in_order() && !impl->getCommandGraph()) + if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled) return getBarrierEventForInorderQueueHelper(impl); return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); @@ -262,7 +247,8 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, auto EventImpl = detail::getSyclObjImpl(Event); return !EventImpl->isContextInitialized() || EventImpl->isNOP(); }); - if (is_in_order() && !impl->getCommandGraph() && AllEventsEmptyOrNop) + if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled && + AllEventsEmptyOrNop) return getBarrierEventForInorderQueueHelper(impl); return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, diff --git a/sycl/test-e2e/Regression/in_order_barrier_profiling.cpp b/sycl/test-e2e/Regression/in_order_barrier_profiling.cpp new file mode 100644 index 0000000000000..88ee69098177a --- /dev/null +++ b/sycl/test-e2e/Regression/in_order_barrier_profiling.cpp @@ -0,0 +1,42 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +//==----------------- in_order_barrier_profiling.cpp -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Level Zero adapter has a similar in-order queue barrier optimization that +// leads to incorrect profiling values. +// UNSUPPORTED: level_zero +#include + +#include + +using namespace sycl; + +// Checks that the barrier profiling info is consistent with the previous +// command, despite the fact that the latter started after the barrier was +// submitted. +int main() { + queue Q({property::queue::in_order(), property::queue::enable_profiling()}); + + buffer Buf(range<1>(1)); + event KernelEvent; + event BarrierEvent; + { + auto HostAcc = Buf.get_access(); + KernelEvent = Q.submit([&](handler &cgh) { + auto Acc = Buf.get_access(cgh); + cgh.single_task([=]() {}); + }); + BarrierEvent = Q.ext_oneapi_submit_barrier(); + } + uint64_t KernelEnd = + KernelEvent.get_profiling_info(); + uint64_t BarrierStart = + BarrierEvent.get_profiling_info(); + assert(KernelEnd <= BarrierStart); +} diff --git a/sycl/test-e2e/Regression/nop_event_profiling.cpp b/sycl/test-e2e/Regression/nop_event_profiling.cpp deleted file mode 100644 index 65f0f065e5f83..0000000000000 --- a/sycl/test-e2e/Regression/nop_event_profiling.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// Test to check that it is possible to get profiling info from the event -// returned by barrier which turns into NOP. - -#include - -#include - -int main() { - sycl::event start; - sycl::event stop; - sycl::queue q{sycl::property_list(sycl::property::queue::in_order(), - sycl::property::queue::enable_profiling())}; - float elapsed = 0; - - start = q.ext_oneapi_submit_barrier(); - std::cout << "before parallel_for" << std::endl; - q.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 16) * sycl::range<3>(1, 1, 16), - sycl::range<3>(1, 1, 16)), - [=](sycl::nd_item<3> item_ct1) { - int d = 123; - for (int i = 0; i < 10000; i++) { - d = d * i; - } - }); - std::cout << "after parallel_for" << std::endl; - stop = q.ext_oneapi_submit_barrier(); - stop.wait_and_throw(); - elapsed = - (stop.get_profiling_info() - - start.get_profiling_info()) / - 1000000.0f; - std::cout << "elapsed:" << elapsed << std::endl; - return 0; -}