Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Implement sycl_ext_oneapi_profiling_tag extension #12838

Merged
merged 22 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
85f47a3
[SYCL] Implement profiling tag extension
steffenlarsen Jan 22, 2024
19926ab
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen Mar 5, 2024
cf7cfaa
Require queue_profiling aspect
steffenlarsen Mar 6, 2024
530a527
Revert extension changes
steffenlarsen Mar 7, 2024
17950e5
Add checks for start
steffenlarsen Mar 7, 2024
78ff547
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen Mar 12, 2024
f162ae0
Bump PI minor version
steffenlarsen Mar 12, 2024
c90de05
Finish comment
steffenlarsen Mar 12, 2024
1f9ab61
Fix missed merge conflicts
steffenlarsen Mar 12, 2024
2a647d9
Merge branch 'sycl' into steffen/event_with_enqueue
steffenlarsen Mar 13, 2024
88829f1
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen Apr 16, 2024
e09a0a1
Fix trailing whitespaces
steffenlarsen Apr 16, 2024
00c586e
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen Apr 16, 2024
0bcb356
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen Apr 30, 2024
e20123b
Make profiling_tag.hpp self-contained
steffenlarsen Apr 30, 2024
76ab830
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen May 8, 2024
b21ec7c
Bump tag again
steffenlarsen May 8, 2024
c478047
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen May 8, 2024
4b3f410
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen May 13, 2024
0e77ef7
Address comments
steffenlarsen May 13, 2024
2e516f1
Merge remote-tracking branch 'intel/sycl' into steffen/event_with_enq…
steffenlarsen May 14, 2024
0a09aef
Merge branch 'sycl' into steffen/event_with_enqueue
steffenlarsen May 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def AspectExt_oneapi_graph : Aspect<"ext_oneapi_graph">;
def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">;
def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">;
def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">;
// Deprecated aspects
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
Expand Down Expand Up @@ -138,7 +139,7 @@ def : TargetInfo<"__TestAspectList",
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph,
AspectExt_oneapi_private_alloca],
AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag],
[]>;
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
// match.
Expand Down
7 changes: 7 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class CG {
CopyImage = 23,
SemaphoreWait = 24,
SemaphoreSignal = 25,
ProfilingTag = 26,
};

struct StorageInitHelper {
Expand Down Expand Up @@ -363,6 +364,12 @@ class CGBarrier : public CG {
MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {}
};

class CGProfilingTag : public CG {
public:
CGProfilingTag(CG::StorageInitHelper CGData, detail::code_location loc = {})
: CG(CG::ProfilingTag, std::move(CGData), std::move(loc)) {}
};

/// "Copy 2D USM" command group class.
class CGCopy2DUSM : public CG {
void *MSrc;
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ _PI_API(piEventRetain)
_PI_API(piEventRelease)
_PI_API(piextEventGetNativeHandle)
_PI_API(piextEventCreateWithNativeHandle)
_PI_API(piEnqueueTimestampRecordingExp)
// Sampler
_PI_API(piSamplerCreate)
_PI_API(piSamplerGetInfo)
Expand Down
10 changes: 9 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,11 @@
// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D
// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM
// - PI_EXT_ONEAPI_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D
// 15.51 Added piEnqueueTimestampRecordingExp and
// PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT.

#define _PI_H_VERSION_MAJOR 15
#define _PI_H_VERSION_MINOR 50
#define _PI_H_VERSION_MINOR 51

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -456,6 +458,8 @@ typedef enum {
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT = 0x2010E,
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT = 0x2010F,

PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x20110,
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved

PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS = 0x20110,

// Composite device
Expand Down Expand Up @@ -1767,6 +1771,10 @@ __SYCL_EXPORT pi_result piEventRetain(pi_event event);

__SYCL_EXPORT pi_result piEventRelease(pi_event event);

__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp(
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved
pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

/// Gets the native handle of a PI event object.
///
/// \param event is the PI event to get the native handle of.
Expand Down
10 changes: 10 additions & 0 deletions sycl/include/sycl/device_aspect_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
#endif

#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0
#endif

#ifndef __SYCL_ANY_DEVICE_HAS_host__
// __SYCL_ASPECT(host, 0)
#define __SYCL_ANY_DEVICE_HAS_host__ 0
Expand Down Expand Up @@ -740,3 +745,8 @@
//__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72)
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_bindless_sampled_image_fetch_3d__ 0
#endif

#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 67)
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0
#endif
44 changes: 44 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//==--------- profiling_tag.hpp --- SYCL profiling tag extension -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/aspects.hpp>
#include <sycl/event.hpp>
#include <sycl/handler.hpp>
#include <sycl/properties/queue_properties.hpp>
#include <sycl/queue.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

inline event submit_profiling_tag(queue &Queue,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
if (Queue.get_device().has(aspect::ext_oneapi_queue_profiling_tag)) {
return Queue.submit(
[=](handler &CGH) {
sycl::detail::HandlerAccess::internalProfilingTagImpl(CGH);
},
CodeLoc);
}

// If it is not supported natively on the device, we use another path if
// profiling is enabled.
if (!Queue.has_property<sycl::property::queue::enable_profiling>())
ldrumm marked this conversation as resolved.
Show resolved Hide resolved
throw sycl::exception(
make_error_code(errc::invalid),
"Device must either have aspect::ext_oneapi_queue_profiling_tag or the "
"queue must have profiling enabled.");
return Queue.ext_oneapi_submit_barrier();
}

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
18 changes: 18 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ class stream_impl;
template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder>
class image_accessor;
class HandlerAccess;
template <typename RetType, typename Func, typename Arg>
static Arg member_ptr_helper(RetType (Func::*)(Arg) const);

Expand Down Expand Up @@ -3653,11 +3654,28 @@ class __SYCL_EXPORT handler {
// Set that an ND Range was used during a call to parallel_for
void setNDRangeUsed(bool Value);

inline void internalProfilingTagImpl() {
throwIfActionIsCreated();
setType(detail::CG::ProfilingTag);
}

friend class detail::HandlerAccess;

protected:
/// Registers event dependencies in this command group.
void depends_on(const detail::EventImplPtr &Event);
/// Registers event dependencies in this command group.
void depends_on(const std::vector<detail::EventImplPtr> &Events);
};

namespace detail {
class HandlerAccess {
public:
static void internalProfilingTagImpl(handler &Handler) {
Handler.internalProfilingTagImpl();
}
};
} // namespace detail

} // namespace _V1
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/info/aspects.def
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d_usm, 69)
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70)
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71)
__SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72)
__SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
#include <sycl/ext/oneapi/experimental/profiling_tag.hpp>
#include <sycl/ext/oneapi/experimental/root_group.hpp>
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
#include <sycl/ext/oneapi/filter_selector.hpp>
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

//
// Sampler
//
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/native_cpu/pi_native_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

//
// Sampler
//
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 8438b8a1de1ddc6e05e3bc012fb6be74d2e830e4
# Merge: 6076b0622452 847b3ace28b8
# Author: Fábio <fabio.mestre@codeplay.com>
# Date: Mon May 6 18:00:46 2024 +0100
# Merge branch 'main' into fabio/command-buffer-refactor
# [EXP][Command-Buffer] Remove duplicated code from headers
set(UNIFIED_RUNTIME_TAG 8438b8a1de1ddc6e05e3bc012fb6be74d2e830e4)
# commit 6d5d84c2ad706e32c7ae01d63a08cbb03410e580
# Merge: 7ce68e09 cb3d9454
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Wed May 8 16:02:54 2024 +0100
# Merge pull request #1588 from steffenlarsen/steffen/fix_timestamp_merge_mistake
# [CUDA] Add back device info enums after merge mistake
set(UNIFIED_RUNTIME_TAG 6d5d84c2ad706e32c7ae01d63a08cbb03410e580)

fetch_adapter_source(level_zero
${UNIFIED_RUNTIME_REPO}
Expand Down
20 changes: 20 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
PI_TO_UR_MAP_DEVICE_INFO(
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT,
UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP)
PI_TO_UR_MAP_DEVICE_INFO(
PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT,
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP)
PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT,
UR_DEVICE_INFO_ESIMD_SUPPORT)
PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES,
Expand Down Expand Up @@ -4362,6 +4365,23 @@ inline pi_result piEventRelease(pi_event Event) {
return PI_SUCCESS;
}

inline pi_result piEnqueueTimestampRecordingExp(pi_queue Queue,
pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {

ur_queue_handle_t UrQueue = reinterpret_cast<ur_queue_handle_t>(Queue);
const ur_event_handle_t *UrEventWaitList =
reinterpret_cast<const ur_event_handle_t *>(EventWaitList);
ur_event_handle_t *UREvent = reinterpret_cast<ur_event_handle_t *>(Event);

HANDLE_ERRORS(urEnqueueTimestampRecordingExp(
UrQueue, Blocking, NumEventsInWaitList, UrEventWaitList, UREvent));

return PI_SUCCESS;
}

// Events
///////////////////////////////////////////////////////////////////////////////

Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,13 @@ __SYCL_EXPORT pi_result piextEventCreateWithNativeHandle(
OwnNativeHandle, Event);
}

__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp(
pi_queue Queue, pi_bool Blocking, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

__SYCL_EXPORT pi_result piEnqueueMemImageFill(
pi_queue Queue, pi_mem Image, const void *FillColor, const size_t *Origin,
const size_t *Region, pi_uint32 NumEventsInWaitList,
Expand Down Expand Up @@ -1512,6 +1519,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_API(piextEventGetNativeHandle)
_PI_API(piEventGetProfilingInfo)
_PI_API(piEventCreate)
_PI_API(piEnqueueTimestampRecordingExp)

_PI_API(piSamplerCreate)
_PI_API(piSamplerGetInfo)
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,14 @@ bool device_impl::has(aspect Aspect) const {
return be == sycl::backend::ext_oneapi_level_zero ||
be == sycl::backend::opencl;
}
case aspect::ext_oneapi_queue_profiling_tag: {
pi_bool support = PI_FALSE;
bool call_successful =
getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
MDevice, PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT,
sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
return call_successful && support;
}
}
throw runtime_error("This device aspect has not been implemented yet.",
PI_ERROR_INVALID_DEVICE);
Expand Down
10 changes: 8 additions & 2 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void event_impl::checkProfilingPreconditions() const {
"Profiling information is unavailable as the event "
"has no associated queue.");
}
if (!MIsProfilingEnabled) {
if (!MIsProfilingEnabled && !MProfilingTagEvent) {
throw sycl::exception(
make_error_code(sycl::errc::invalid),
"Profiling information is unavailable as the queue associated with "
Expand All @@ -302,6 +302,12 @@ template <>
uint64_t
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
checkProfilingPreconditions();
if (isProfilingTagEvent()) {
// For profiling tag events we rely on the submission time reported as
// the start time has undefined behavior.
return get_event_profiling_info<info::event_profiling::command_submit>(
this->getHandleRef(), this->getPlugin());
}

// The delay between the submission and the actual start of a CommandBuffer
// can be short. Consequently, the submission time, which is based on
Expand Down Expand Up @@ -558,7 +564,7 @@ void event_impl::cleanDepEventsThroughOneLevel() {
}

void event_impl::setSubmissionTime() {
if (!MIsProfilingEnabled)
if (!MIsProfilingEnabled && !MProfilingTagEvent)
return;
if (!MFallbackProfiling) {
if (QueueImplPtr Queue = MQueue.lock()) {
Expand Down
Loading
Loading