Skip to content

Commit

Permalink
Merge pull request #1400 from steffenlarsen/steffen/record_event
Browse files Browse the repository at this point in the history
[UR][L0][CUDA][HIP] Add enqueue timestamp recording extension
  • Loading branch information
kbenzie authored May 8, 2024
2 parents 5a23b18 + 84bad6c commit 7ce68e0
Show file tree
Hide file tree
Showing 41 changed files with 1,027 additions and 39 deletions.
57 changes: 56 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -1641,6 +1642,7 @@ typedef enum ur_device_info_t {
///< backed 3D sampled image data.
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
///< non-USM backed 3D sampled image data.
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1666,7 +1668,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -5618,6 +5620,7 @@ typedef enum ur_command_t {
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
/// @cond
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -8890,6 +8893,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
#if !defined(__GNUC__)
#pragma region enqueue timestamp recording(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Enqueue a command for recording the device timestamp
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hQueue`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == phEvent`
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
UR_APIEXPORT ur_result_t UR_APICALL
urEnqueueTimestampRecordingExp(
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
bool blocking, ///< [in] indicates whether the call to this function should block until
///< until the device timestamp recording command has executed on the
///< device.
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
///< events that must be complete before the kernel execution.
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
///< events.
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
///< execution instance. Profiling information can be queried
///< from this event as if `hQueue` had profiling enabled. Querying
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
///< reports the timestamp at the time of the call to this function.
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
///< reports the timestamp recorded when the command is executed on the device.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -10600,6 +10643,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
ur_event_handle_t **pphEvent;
} ur_enqueue_cooperative_kernel_launch_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urEnqueueTimestampRecordingExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
ur_queue_handle_t *phQueue;
bool *pblocking;
uint32_t *pnumEventsInWaitList;
const ur_event_handle_t **pphEventWaitList;
ur_event_handle_t **pphEvent;
} ur_enqueue_timestamp_recording_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
10 changes: 10 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1448,10 +1448,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
ur_queue_handle_t,
bool,
uint32_t,
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of EnqueueExp functions pointers
typedef struct ur_enqueue_exp_dditable_t {
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
} ur_enqueue_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
/// @returns
Expand Down
66 changes: 66 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -2571,6 +2574,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4280,6 +4286,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -8788,6 +8806,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
break;
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -14104,6 +14125,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {

os << ".hQueue = ";

ur::details::printPtr(os,
*(params->phQueue));

os << ", ";
os << ".blocking = ";

os << *(params->pblocking);

os << ", ";
os << ".numEventsInWaitList = ";

os << *(params->pnumEventsInWaitList);

os << ", ";
os << ".phEventWaitList = {";
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphEventWaitList))[i]);
}
os << "}";

os << ", ";
os << ".phEvent = ";

ur::details::printPtr(os,
*(params->pphEvent));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
/// @returns
Expand Down Expand Up @@ -17126,6 +17189,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
} break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
} break;
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
} break;
Expand Down
70 changes: 70 additions & 0 deletions scripts/core/EXP-ENQUEUE-TIMESTAMP-RECORDING.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<%
OneApi=tags['$OneApi']
x=tags['$x']
X=x.upper()
%>

.. _experimental-enqueue-timestamp-recording:

================================================================================
Enqueue Timestamp Recording
================================================================================

.. warning::

Experimental features:

* May be replaced, updated, or removed at any time.
* Do not require maintaining API/ABI stability of their own additions over
time.
* Do not require conformance testing of their own additions.


Motivation
--------------------------------------------------------------------------------
Currently, the only way to get timestamp information is through enabling
profiling on a queue and retrieving the information from events coming from
commands submitted to it. However, not all systems give full control of the
queue construction to the programmer wanting the profiling information. To amend
this, this extension adds the ability to enqueue a timestamp recording on any
queue, with or without profiling enabled. This event can in turn be queried for
the usual profiling information.


API
--------------------------------------------------------------------------------

Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ${x}_device_info_t
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP

* ${x}_command_t
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP

Functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ${x}EnqueueTimestampRecordingExp

Changelog
--------------------------------------------------------------------------------

+-----------+------------------------+
| Revision | Changes |
+===========+========================+
| 1.0 | Initial Draft |
+-----------+------------------------+


Support
--------------------------------------------------------------------------------

Adapters which support this experimental feature *must* return true for the new
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.


Contributors
--------------------------------------------------------------------------------

* Steffen Larsen `steffen.larsen@intel.com <steffen.larsen@intel.com>`_
66 changes: 66 additions & 0 deletions scripts/core/exp-enqueue-timestamp-recording.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#
# Copyright (C) 2024 Intel Corporation
#
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# See YaML.md for syntax definition
#
--- #--------------------------------------------------------------------------
type: header
desc: "Intel $OneApi Unified Runtime Experimental APIs for enqueuing timestamp recordings"
ordinal: "99"
--- #--------------------------------------------------------------------------
type: enum
extend: true
typed_etors: true
desc: "Extension enums to $x_device_info_t to support timestamp recordings."
name: $x_device_info_t
etors:
- name: TIMESTAMP_RECORDING_SUPPORT_EXP
value: "0x2018"
desc: "[$x_bool_t] returns true if the device supports timestamp recording"
--- #--------------------------------------------------------------------------
type: enum
extend: true
desc: "Command Type experimental enumerations."
name: $x_command_t
etors:
- name: TIMESTAMP_RECORDING_EXP
value: "0x2002"
desc: Event created by $xEnqueueTimestampRecordingExp
--- #--------------------------------------------------------------------------
type: function
desc: "Enqueue a command for recording the device timestamp"
class: $xEnqueue
name: TimestampRecordingExp
params:
- type: $x_queue_handle_t
name: hQueue
desc: "[in] handle of the queue object"
- type: bool
name: blocking
desc: |
[in] indicates whether the call to this function should block until
until the device timestamp recording command has executed on the
device.
- type: uint32_t
name: numEventsInWaitList
desc: "[in] size of the event wait list"
- type: "const $x_event_handle_t*"
name: phEventWaitList
desc: |
[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution.
If nullptr, the numEventsInWaitList must be 0, indicating no wait events.
- type: $x_event_handle_t*
name: phEvent
desc: |
[in,out] return an event object that identifies this particular kernel execution instance. Profiling information can be queried
from this event as if `hQueue` had profiling enabled. Querying `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
reports the timestamp at the time of the call to this function. Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
reports the timestamp recorded when the command is executed on the device.
returns:
- $X_RESULT_ERROR_INVALID_NULL_HANDLE
- $X_RESULT_ERROR_INVALID_NULL_POINTER
- $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
3 changes: 3 additions & 0 deletions scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,9 @@ etors:
- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP
desc: Enumerator for $xCommandBufferCommandGetInfoExp
value: '222'
- name: ENQUEUE_TIMESTAMP_RECORDING_EXP
desc: Enumerator for $xEnqueueTimestampRecordingExp
value: '223'
---
type: enum
desc: Defines structure types
Expand Down
Loading

0 comments on commit 7ce68e0

Please sign in to comment.