Skip to content

Commit

Permalink
[SYCL] Implement profiling tag extension
Browse files Browse the repository at this point in the history
This commit adds the implementation of the sycl_ext_oneapi_profiling_tag
extension. Moving the extension to experimental will happen in a
follow-up patch.

Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
  • Loading branch information
steffenlarsen committed Mar 5, 2024
1 parent 7a21f07 commit 85f47a3
Show file tree
Hide file tree
Showing 36 changed files with 397 additions and 27 deletions.
4 changes: 3 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def AspectExt_oneapi_tangle_group : Aspect<"ext_oneapi_tangle_group">;
def AspectExt_intel_matrix : Aspect<"ext_intel_matrix">;
def AspectExt_oneapi_is_composite : Aspect<"ext_oneapi_is_composite">;
def AspectExt_oneapi_is_component : Aspect<"ext_oneapi_is_component">;
def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">;
// Deprecated aspects
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
Expand Down Expand Up @@ -119,7 +120,8 @@ def : TargetInfo<"__TestAspectList",
AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export,
AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd,
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component],
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
AspectExt_oneapi_queue_profiling_tag],
[]>;
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
// match.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ int main() {
q.wait();

uint64_t elapsed =
end.get_profiling_info<sycl::info::event_profiling::command_start>() -
end.get_profiling_info<sycl::info::event_profiling::command_end>() -
start.get_profiling_info<sycl::info::event_profiling::command_end>();
std::cout << "Execution time: " << elapsed << " (nanoseconds)\n";
}
Expand Down
7 changes: 7 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class CG {
CopyImage = 23,
SemaphoreWait = 24,
SemaphoreSignal = 25,
ProfilingTag = 26,
};

struct StorageInitHelper {
Expand Down Expand Up @@ -363,6 +364,12 @@ class CGBarrier : public CG {
MEventsWaitWithBarrier(std::move(EventsWaitWithBarrier)) {}
};

class CGProfilingTag : public CG {
public:
CGProfilingTag(CG::StorageInitHelper CGData, detail::code_location loc = {})
: CG(CG::ProfilingTag, std::move(CGData), std::move(loc)) {}
};

/// "Copy 2D USM" command group class.
class CGCopy2DUSM : public CG {
void *MSrc;
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ _PI_API(piEventRetain)
_PI_API(piEventRelease)
_PI_API(piextEventGetNativeHandle)
_PI_API(piextEventCreateWithNativeHandle)
_PI_API(piEnqueueTimestampRecordingExp)
// Sampler
_PI_API(piSamplerCreate)
_PI_API(piSamplerGetInfo)
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ typedef enum {
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT = 0x2010E,
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT = 0x2010F,

PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x20110,

PI_EXT_ONEAPI_DEVICE_INFO_MATRIX_COMBINATIONS = 0x20110,

// Composite device
Expand Down Expand Up @@ -1721,6 +1723,10 @@ __SYCL_EXPORT pi_result piEventRetain(pi_event event);

__SYCL_EXPORT pi_result piEventRelease(pi_event event);

__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp(
pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

/// Gets the native handle of a PI event object.
///
/// \param event is the PI event to get the native handle of.
Expand Down
10 changes: 10 additions & 0 deletions sycl/include/sycl/device_aspect_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,11 @@
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_is_component__ 0
#endif

#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 61)
#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0
#endif

#ifndef __SYCL_ANY_DEVICE_HAS_host__
// __SYCL_ASPECT(host, 0)
#define __SYCL_ANY_DEVICE_HAS_host__ 0
Expand Down Expand Up @@ -617,3 +622,8 @@
// __SYCL_ASPECT(ext_oneapi_is_component, 60)
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_is_component__ 0
#endif

#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__
// __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 61)
#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0
#endif
42 changes: 42 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/profiling_tag.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//==--------- profiling_tag.hpp --- SYCL profiling tag extension -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/event.hpp>
#include <sycl/properties/queue_properties.hpp>
#include <sycl/queue.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

inline event submit_profiling_tag(queue &Queue,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
if (Queue.get_device().has(aspect::ext_oneapi_queue_profiling_tag)) {
return Queue.submit(
[=](handler &CGH) {
sycl::detail::HandlerAccess::internalProfilingTagImpl(CGH);
},
CodeLoc);
}

// If it is not supported natively on the device, we use another path if
// profiling is enabled.
if (!Queue.has_property<sycl::property::queue::enable_profiling>())
throw sycl::exception(
make_error_code(errc::invalid),
"Device must either have aspect::ext_oneapi_queue_profiling_tag or the "
"queue must have profiling enabled.");
return Queue.ext_oneapi_submit_barrier();
}

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
18 changes: 18 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ class stream_impl;
template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder>
class image_accessor;
class HandlerAccess;
template <typename RetType, typename Func, typename Arg>
static Arg member_ptr_helper(RetType (Func::*)(Arg) const);

Expand Down Expand Up @@ -3677,6 +3678,23 @@ class __SYCL_EXPORT handler {
"for use with the SYCL Graph extension.");
}
}

inline void internalProfilingTagImpl() {
throwIfActionIsCreated();
setType(detail::CG::ProfilingTag);
}

friend class detail::HandlerAccess;
};

namespace detail {
class HandlerAccess {
public:
static void internalProfilingTagImpl(handler &Handler) {
Handler.internalProfilingTagImpl();
}
};
} // namespace detail

} // namespace _V1
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/info/aspects.def
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ __SYCL_ASPECT(ext_oneapi_tangle_group, 57)
__SYCL_ASPECT(ext_intel_matrix, 58)
__SYCL_ASPECT(ext_oneapi_is_composite, 59)
__SYCL_ASPECT(ext_oneapi_is_component, 60)
__SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 61)
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
#include <sycl/ext/oneapi/experimental/profiling_tag.hpp>
#include <sycl/ext/oneapi/experimental/root_group.hpp>
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
#include <sycl/ext/oneapi/filter_selector.hpp>
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

//
// Sampler
//
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/native_cpu/pi_native_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

//
// Sampler
//
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,14 @@ pi_result piextEventCreateWithNativeHandle(pi_native_handle NativeHandle,
OwnNativeHandle, Event);
}

pi_result piEnqueueTimestampRecordingExp(pi_queue Queue, pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

pi_result piSamplerCreate(pi_context Context,
const pi_sampler_properties *SamplerProperties,
pi_sampler *RetSampler) {
Expand Down
11 changes: 2 additions & 9 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,8 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")

# commit a2757b2931daa2f8d7c9dd51b0fc846be1fd49a7
# Merge: 9b936b5 + f78d369
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Tue Feb 27 11:34:58 2024 +0000
# Merge pull request #1254 from Bensuo/cmdbuf-support-hip
# [EXP][CMDBUF] HIP adapter support for command buffers
set(UNIFIED_RUNTIME_TAG a2757b2931daa2f8d7c9dd51b0fc846be1fd49a7 )
set(UNIFIED_RUNTIME_REPO "https://github.com/steffenlarsen/unified-runtime")
set(UNIFIED_RUNTIME_TAG steffen/record_event)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
20 changes: 20 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1272,6 +1272,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
PI_TO_UR_MAP_DEVICE_INFO(
PI_EXT_ONEAPI_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT,
UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP)
PI_TO_UR_MAP_DEVICE_INFO(
PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT,
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP)
PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT,
UR_DEVICE_INFO_ESIMD_SUPPORT)
PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES,
Expand Down Expand Up @@ -4312,6 +4315,23 @@ inline pi_result piEventRelease(pi_event Event) {
return PI_SUCCESS;
}

inline pi_result piEnqueueTimestampRecordingExp(pi_queue Queue,
pi_bool Blocking,
pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList,
pi_event *Event) {

ur_queue_handle_t UrQueue = reinterpret_cast<ur_queue_handle_t>(Queue);
const ur_event_handle_t *UrEventWaitList =
reinterpret_cast<const ur_event_handle_t *>(EventWaitList);
ur_event_handle_t *UREvent = reinterpret_cast<ur_event_handle_t *>(Event);

HANDLE_ERRORS(urEnqueueTimestampRecordingExp(
UrQueue, Blocking, NumEventsInWaitList, UrEventWaitList, UREvent));

return PI_SUCCESS;
}

// Events
///////////////////////////////////////////////////////////////////////////////

Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,13 @@ __SYCL_EXPORT pi_result piextEventCreateWithNativeHandle(
OwnNativeHandle, Event);
}

__SYCL_EXPORT pi_result piEnqueueTimestampRecordingExp(
pi_queue Queue, pi_bool Blocking, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *Event) {
return pi2ur::piEnqueueTimestampRecordingExp(
Queue, Blocking, NumEventsInWaitList, EventWaitList, Event);
}

__SYCL_EXPORT pi_result piEnqueueMemImageFill(
pi_queue Queue, pi_mem Image, const void *FillColor, const size_t *Origin,
const size_t *Region, pi_uint32 NumEventsInWaitList,
Expand Down Expand Up @@ -1480,6 +1487,7 @@ __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_API(piextEventGetNativeHandle)
_PI_API(piEventGetProfilingInfo)
_PI_API(piEventCreate)
_PI_API(piEnqueueTimestampRecordingExp)

_PI_API(piSamplerCreate)
_PI_API(piSamplerGetInfo)
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,14 @@ bool device_impl::has(aspect Aspect) const {

return Result != nullptr;
}
case aspect::ext_oneapi_queue_profiling_tag: {
pi_bool support = PI_FALSE;
bool call_successful =
getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
MDevice, PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT,
sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
return call_successful && support;
}
}
throw runtime_error("This device aspect has not been implemented yet.",
PI_ERROR_INVALID_DEVICE);
Expand Down
Loading

0 comments on commit 85f47a3

Please sign in to comment.