Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UR][L0][CUDA][HIP] Add enqueue timestamp recording extension #1400

Merged
merged 30 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9aceab4
Add enqueue timestamp recording extension
steffenlarsen Jan 17, 2024
578f347
Address comments
steffenlarsen Feb 29, 2024
b7b0a6f
Revert CUDA change
steffenlarsen Feb 29, 2024
e08bf52
Correctly enable recordings in HIP and CUDA for timestamp events
steffenlarsen Mar 1, 2024
bf5ea14
Fix use of event handle
steffenlarsen Mar 1, 2024
206c4b1
Fix faulty disjunction
steffenlarsen Mar 1, 2024
b690920
Fix faulty disjunction 2
steffenlarsen Mar 1, 2024
5db5ba8
Allow event creation to record timing for events
steffenlarsen Mar 1, 2024
2f0e050
Address comments
steffenlarsen Mar 5, 2024
32abd74
Merge branch 'main' into steffen/record_event
steffenlarsen Mar 5, 2024
4261d04
Merge branch 'main' into steffen/record_event
steffenlarsen Mar 8, 2024
5caceaf
Amend comments
steffenlarsen Mar 12, 2024
0a958b3
Merge branch 'main' into steffen/record_event
steffenlarsen Mar 13, 2024
0dd1815
Merge remote-tracking branch 'intel/main' into steffen/record_event
steffenlarsen Apr 8, 2024
d5aaba0
Fix merge mistake
steffenlarsen Apr 8, 2024
5cdc724
Merge branch 'main' into steffen/record_event
steffenlarsen Apr 12, 2024
e7f496d
Merge branch 'main' into steffen/record_event
steffenlarsen Apr 16, 2024
5fd441e
Move timestamp query to after commandlist get
steffenlarsen Apr 16, 2024
2404fe6
Stop making new heap allocations for each recording
steffenlarsen Apr 16, 2024
be01218
Merge remote-tracking branch 'intel/main' into steffen/record_event
steffenlarsen Apr 17, 2024
73de142
Fix diff
steffenlarsen Apr 17, 2024
4a855ca
Merge remote-tracking branch 'intel/main' into steffen/record_event
steffenlarsen Apr 18, 2024
4f0bf8c
Fix diff
steffenlarsen Apr 18, 2024
f7fe03e
Merge branch 'main' into steffen/record_event
steffenlarsen Apr 24, 2024
ecb6a82
Remove old use of urPrint
steffenlarsen Apr 24, 2024
cf13442
Merge branch 'main' into steffen/record_event
steffenlarsen Apr 30, 2024
c804856
Merge remote-tracking branch 'intel/main' into steffen/record_event
steffenlarsen May 2, 2024
308f1dd
Merge remote-tracking branch 'intel/main' into steffen/record_event
steffenlarsen May 6, 2024
06432bf
Remove trailing ws
steffenlarsen May 7, 2024
84bad6c
Add wait-list to get-command-list
steffenlarsen May 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -1641,6 +1642,7 @@ typedef enum ur_device_info_t {
///< backed 3D sampled image data.
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
///< non-USM backed 3D sampled image data.
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1666,7 +1668,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -5618,6 +5620,7 @@ typedef enum ur_command_t {
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
/// @cond
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -8890,6 +8893,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
#if !defined(__GNUC__)
#pragma region enqueue timestamp recording(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Enqueue a command for recording the device timestamp
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hQueue`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == phEvent`
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
UR_APIEXPORT ur_result_t UR_APICALL
urEnqueueTimestampRecordingExp(
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
bool blocking, ///< [in] indicates whether the call to this function should block until
///< until the device timestamp recording command has executed on the
///< device.
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
///< events that must be complete before the kernel execution.
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
///< events.
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
///< execution instance. Profiling information can be queried
///< from this event as if `hQueue` had profiling enabled. Querying
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
///< reports the timestamp at the time of the call to this function.
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
///< reports the timestamp recorded when the command is executed on the device.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -10600,6 +10643,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
ur_event_handle_t **pphEvent;
} ur_enqueue_cooperative_kernel_launch_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urEnqueueTimestampRecordingExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
ur_queue_handle_t *phQueue;
bool *pblocking;
uint32_t *pnumEventsInWaitList;
const ur_event_handle_t **pphEventWaitList;
ur_event_handle_t **pphEvent;
} ur_enqueue_timestamp_recording_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
10 changes: 10 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1448,10 +1448,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
ur_queue_handle_t,
bool,
uint32_t,
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of EnqueueExp functions pointers
typedef struct ur_enqueue_exp_dditable_t {
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
} ur_enqueue_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
/// @returns
Expand Down
66 changes: 66 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -2571,6 +2574,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4280,6 +4286,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -8788,6 +8806,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
break;
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -14104,6 +14125,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {

os << ".hQueue = ";

ur::details::printPtr(os,
*(params->phQueue));

os << ", ";
os << ".blocking = ";

os << *(params->pblocking);

os << ", ";
os << ".numEventsInWaitList = ";

os << *(params->pnumEventsInWaitList);

os << ", ";
os << ".phEventWaitList = {";
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphEventWaitList))[i]);
}
os << "}";

os << ", ";
os << ".phEvent = ";

ur::details::printPtr(os,
*(params->pphEvent));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
/// @returns
Expand Down Expand Up @@ -17126,6 +17189,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
} break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
} break;
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
} break;
Expand Down
70 changes: 70 additions & 0 deletions scripts/core/EXP-ENQUEUE-TIMESTAMP-RECORDING.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<%
OneApi=tags['$OneApi']
x=tags['$x']
X=x.upper()
%>

.. _experimental-enqueue-timestamp-recording:

================================================================================
Enqueue Timestamp Recording
================================================================================

.. warning::

Experimental features:

* May be replaced, updated, or removed at any time.
* Do not require maintaining API/ABI stability of their own additions over
time.
* Do not require conformance testing of their own additions.


Motivation
--------------------------------------------------------------------------------
Currently, the only way to get timestamp information is through enabling
profiling on a queue and retrieving the information from events coming from
commands submitted to it. However, not all systems give full control of the
queue construction to the programmer wanting the profiling information. To amend
this, this extension adds the ability to enqueue a timestamp recording on any
queue, with or without profiling enabled. This event can in turn be queried for
the usual profiling information.


API
--------------------------------------------------------------------------------

Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ${x}_device_info_t
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP

* ${x}_command_t
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP

Functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ${x}EnqueueTimestampRecordingExp

Changelog
--------------------------------------------------------------------------------

+-----------+------------------------+
| Revision | Changes |
+===========+========================+
| 1.0 | Initial Draft |
+-----------+------------------------+


Support
--------------------------------------------------------------------------------

Adapters which support this experimental feature *must* return true for the new
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.


Contributors
--------------------------------------------------------------------------------

* Steffen Larsen `steffen.larsen@intel.com <steffen.larsen@intel.com>`_
66 changes: 66 additions & 0 deletions scripts/core/exp-enqueue-timestamp-recording.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#
# Copyright (C) 2024 Intel Corporation
#
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# See YaML.md for syntax definition
#
--- #--------------------------------------------------------------------------
type: header
desc: "Intel $OneApi Unified Runtime Experimental APIs for enqueuing timestamp recordings"
ordinal: "99"
--- #--------------------------------------------------------------------------
type: enum
extend: true
typed_etors: true
desc: "Extension enums to $x_device_info_t to support timestamp recordings."
name: $x_device_info_t
etors:
- name: TIMESTAMP_RECORDING_SUPPORT_EXP
value: "0x2018"
desc: "[$x_bool_t] returns true if the device supports timestamp recording"
--- #--------------------------------------------------------------------------
type: enum
extend: true
desc: "Command Type experimental enumerations."
name: $x_command_t
etors:
- name: TIMESTAMP_RECORDING_EXP
value: "0x2002"
desc: Event created by $xEnqueueTimestampRecordingExp
--- #--------------------------------------------------------------------------
type: function
desc: "Enqueue a command for recording the device timestamp"
class: $xEnqueue
name: TimestampRecordingExp
params:
- type: $x_queue_handle_t
name: hQueue
desc: "[in] handle of the queue object"
- type: bool
name: blocking
desc: |
[in] indicates whether the call to this function should block until
until the device timestamp recording command has executed on the
device.
- type: uint32_t
name: numEventsInWaitList
desc: "[in] size of the event wait list"
- type: "const $x_event_handle_t*"
name: phEventWaitList
desc: |
[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution.
If nullptr, the numEventsInWaitList must be 0, indicating no wait events.
- type: $x_event_handle_t*
name: phEvent
desc: |
[in,out] return an event object that identifies this particular kernel execution instance. Profiling information can be queried
from this event as if `hQueue` had profiling enabled. Querying `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
reports the timestamp at the time of the call to this function. Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
reports the timestamp recorded when the command is executed on the device.
returns:
- $X_RESULT_ERROR_INVALID_NULL_HANDLE
- $X_RESULT_ERROR_INVALID_NULL_POINTER
- $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
3 changes: 3 additions & 0 deletions scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,9 @@ etors:
- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP
desc: Enumerator for $xCommandBufferCommandGetInfoExp
value: '222'
- name: ENQUEUE_TIMESTAMP_RECORDING_EXP
desc: Enumerator for $xEnqueueTimestampRecordingExp
value: '223'
---
type: enum
desc: Defines structure types
Expand Down
Loading