Skip to content

Commit

Permalink
Merge branch 'main' into fix_usm_allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
lbushi25 authored May 13, 2024
2 parents 74e18f1 + 4c69624 commit 2d02c21
Show file tree
Hide file tree
Showing 90 changed files with 2,434 additions and 691 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/get_system_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function system_info {
echo "**********/proc/meminfo**********"
cat /proc/meminfo
echo "**********build/bin/urinfo**********"
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo || true
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo --no-linear-ids --verbose || true
echo "******OpenCL*******"
# The driver version of OpenCL Graphics is the compute-runtime version
clinfo || echo "OpenCL not installed"
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR)
project(unified-runtime VERSION 0.9.0)

include(GNUInstallDirs)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
## Table of contents

- [Unified Runtime](#unified-runtime)
- [Adapters](#adapters)
- [Table of contents](#table-of-contents)
- [Contents of the repo](#contents-of-the-repo)
- [Integration](#integration)
Expand All @@ -29,7 +28,7 @@
- [Adapter naming convention](#adapter-naming-convention)
- [Source code generation](#source-code-generation)
- [Documentation](#documentation)
6. [Release Process](#release-process)
- [Release Process](#release-process)

## Contents of the repo

Expand Down Expand Up @@ -88,7 +87,7 @@ for more detailed instructions on the correct setup.

Required packages:
- C++ compiler with C++17 support
- [CMake](https://cmake.org/) >= 3.14.0
- [CMake](https://cmake.org/) >= 3.20.0
- Python v3.6.6 or later

### Windows
Expand Down Expand Up @@ -141,6 +140,7 @@ List of options provided by CMake:
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
Expand Down
57 changes: 56 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -1641,6 +1642,7 @@ typedef enum ur_device_info_t {
///< backed 3D sampled image data.
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
///< non-USM backed 3D sampled image data.
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1666,7 +1668,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -5618,6 +5620,7 @@ typedef enum ur_command_t {
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
/// @cond
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -8890,6 +8893,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
#if !defined(__GNUC__)
#pragma region enqueue timestamp recording(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Enqueue a command for recording the device timestamp
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hQueue`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == phEvent`
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
UR_APIEXPORT ur_result_t UR_APICALL
urEnqueueTimestampRecordingExp(
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
bool blocking, ///< [in] indicates whether the call to this function should block until
///< until the device timestamp recording command has executed on the
///< device.
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
///< events that must be complete before the kernel execution.
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
///< events.
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
///< execution instance. Profiling information can be queried
///< from this event as if `hQueue` had profiling enabled. Querying
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
///< reports the timestamp at the time of the call to this function.
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
///< reports the timestamp recorded when the command is executed on the device.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -10600,6 +10643,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
ur_event_handle_t **pphEvent;
} ur_enqueue_cooperative_kernel_launch_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urEnqueueTimestampRecordingExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
ur_queue_handle_t *phQueue;
bool *pblocking;
uint32_t *pnumEventsInWaitList;
const ur_event_handle_t **pphEventWaitList;
ur_event_handle_t **pphEvent;
} ur_enqueue_timestamp_recording_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
10 changes: 10 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1448,10 +1448,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
ur_queue_handle_t,
bool,
uint32_t,
const ur_event_handle_t *,
ur_event_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of EnqueueExp functions pointers
typedef struct ur_enqueue_exp_dditable_t {
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
} ur_enqueue_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
/// @returns
Expand Down
66 changes: 66 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -2571,6 +2574,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4280,6 +4286,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -8788,6 +8806,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
break;
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -14104,6 +14125,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {

os << ".hQueue = ";

ur::details::printPtr(os,
*(params->phQueue));

os << ", ";
os << ".blocking = ";

os << *(params->pblocking);

os << ", ";
os << ".numEventsInWaitList = ";

os << *(params->pnumEventsInWaitList);

os << ", ";
os << ".phEventWaitList = {";
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
if (i != 0) {
os << ", ";
}

ur::details::printPtr(os,
(*(params->pphEventWaitList))[i]);
}
os << "}";

os << ", ";
os << ".phEvent = ";

ur::details::printPtr(os,
*(params->pphEvent));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
/// @returns
Expand Down Expand Up @@ -17126,6 +17189,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
} break;
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
} break;
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
} break;
Expand Down
70 changes: 70 additions & 0 deletions scripts/core/EXP-ENQUEUE-TIMESTAMP-RECORDING.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<%
OneApi=tags['$OneApi']
x=tags['$x']
X=x.upper()
%>

.. _experimental-enqueue-timestamp-recording:

================================================================================
Enqueue Timestamp Recording
================================================================================

.. warning::

Experimental features:

* May be replaced, updated, or removed at any time.
* Do not require maintaining API/ABI stability of their own additions over
time.
* Do not require conformance testing of their own additions.


Motivation
--------------------------------------------------------------------------------
Currently, the only way to get timestamp information is through enabling
profiling on a queue and retrieving the information from events coming from
commands submitted to it. However, not all systems give full control of the
queue construction to the programmer wanting the profiling information. To amend
this, this extension adds the ability to enqueue a timestamp recording on any
queue, with or without profiling enabled. This event can in turn be queried for
the usual profiling information.


API
--------------------------------------------------------------------------------

Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ${x}_device_info_t
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP

* ${x}_command_t
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP

Functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* ${x}EnqueueTimestampRecordingExp

Changelog
--------------------------------------------------------------------------------

+-----------+------------------------+
| Revision | Changes |
+===========+========================+
| 1.0 | Initial Draft |
+-----------+------------------------+


Support
--------------------------------------------------------------------------------

Adapters which support this experimental feature *must* return true for the new
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.


Contributors
--------------------------------------------------------------------------------

* Steffen Larsen `steffen.larsen@intel.com <steffen.larsen@intel.com>`_
Loading

0 comments on commit 2d02c21

Please sign in to comment.