From 39e42df7e567230642a452dfbc5e51d46fe8fa87 Mon Sep 17 00:00:00 2001 From: Brandon Yates Date: Wed, 30 Aug 2023 07:44:15 -0400 Subject: [PATCH] Update to L0 Spec 1.7.8 Signed-off-by: Brandon Yates --- CHANGELOG.md | 4 + CMakeLists.txt | 2 +- include/ze.py | 2 +- include/ze_api.h | 5 +- include/ze_ddi.h | 2 +- include/zes.py | 163 +++- include/zes_api.h | 304 ++++-- include/zes_ddi.h | 48 +- include/zet.py | 11 +- include/zet_api.h | 13 +- include/zet_ddi.h | 2 +- scripts/input.json | 897 +++++++++++++++--- source/drivers/null/ze_nullddi.cpp | 3 +- source/drivers/null/zes_nullddi.cpp | 165 +++- source/layers/tracing/ze_trcddi.cpp | 3 +- .../validation/common/zes_entry_points.h | 4 +- .../ze_handle_lifetime.cpp | 3 +- .../zes_handle_lifetime.cpp | 88 +- .../zes_handle_lifetime.h | 4 +- .../ze_parameter_validation.cpp | 3 +- .../zes_parameter_validation.cpp | 104 +- .../zes_parameter_validation.h | 4 +- source/layers/validation/ze_valddi.cpp | 3 +- source/layers/validation/zes_valddi.cpp | 213 ++++- source/lib/ze_libapi.cpp | 3 +- source/lib/zes_libapi.cpp | 206 ++-- source/lib/zes_libddi.cpp | 12 + source/lib/zet_libapi.cpp | 2 +- source/loader/ze_ldrddi.cpp | 3 +- source/loader/zes_ldrddi.cpp | 207 +++- 30 files changed, 1970 insertions(+), 513 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df278a5..c7e2867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ # Level zero loader changelog + +## v1.14.0 +* Update to spec 1.7.8 + ## v1.13.5 * Updated code generating scripts for updated spec and init functionality * Change zeInit to only init on the first call in a process diff --git a/CMakeLists.txt b/CMakeLists.txt index 3815e1b..642ed98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ endif() # This project follows semantic versioning (https://semver.org/). Only set the # major and minor version here - patch version is determined dynamically. -project(level-zero VERSION 1.13) +project(level-zero VERSION 1.14) # Patch version corresponds to # of commits on master since last version # major/minor tag (e.g., v1.0). If not building in a git repository, then get diff --git a/include/ze.py b/include/ze.py index d2ee497..617b4ab 100644 --- a/include/ze.py +++ b/include/ze.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: MIT @file ze.py - @version v1.7-r1.7.0 + @version v1.7-r1.7.9 """ import platform diff --git a/include/ze_api.h b/include/ze_api.h index 5b87c56..9a2f875 100644 --- a/include/ze_api.h +++ b/include/ze_api.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file ze_api.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZE_API_H @@ -9206,7 +9206,8 @@ zeEventQueryKernelTimestampsExt( ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/include/ze_ddi.h b/include/ze_ddi.h index 29188bb..363b38e 100644 --- a/include/ze_ddi.h +++ b/include/ze_ddi.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file ze_ddi.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZE_DDI_H diff --git a/include/zes.py b/include/zes.py index 2267768..885707d 100644 --- a/include/zes.py +++ b/include/zes.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: MIT @file zes.py - @version v1.7-r1.7.0 + @version v1.7-r1.7.9 """ import platform @@ -342,23 +342,25 @@ class zes_device_properties_t(Structure): ## device properties ("numSubdevices", c_ulong), ## [out] Number of sub-devices. A value of 0 indicates that this device ## doesn't have sub-devices. - ("serialNumber", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Manufacturing serial number (NULL terminated string value). Will - ## be set to the string "unkown" if this cannot be determined for the - ## device. - ("boardNumber", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Manufacturing board number (NULL terminated string value). Will - ## be set to the string "unkown" if this cannot be determined for the - ## device. + ("serialNumber", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Manufacturing serial number (NULL terminated string value). This + ## value is intended to reflect the Part ID/SoC ID assigned by + ## manufacturer that is unique for a SoC. Will be set to the string + ## "unknown" if this cannot be determined for the device. + ("boardNumber", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Manufacturing board number (NULL terminated string value). + ## Alternatively "boardSerialNumber", this value is intended to reflect + ## the string printed on board label by manufacturer. Will be set to the + ## string "unknown" if this cannot be determined for the device. ("brandName", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Brand name of the device (NULL terminated string value). Will be - ## set to the string "unkown" if this cannot be determined for the + ## set to the string "unknown" if this cannot be determined for the ## device. ("modelName", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Model name of the device (NULL terminated string value). Will be - ## set to the string "unkown" if this cannot be determined for the + ## set to the string "unknown" if this cannot be determined for the ## device. ("vendorName", c_char * ZES_STRING_PROPERTY_SIZE), ## [out] Vendor name of the device (NULL terminated string value). Will - ## be set to the string "unkown" if this cannot be determined for the + ## be set to the string "unknown" if this cannot be determined for the ## device. ("driverVersion", c_char * ZES_STRING_PROPERTY_SIZE) ## [out] Installed driver version (NULL terminated string value). Will be - ## set to the string "unkown" if this cannot be determined for the + ## set to the string "unknown" if this cannot be determined for the ## device. ] @@ -914,26 +916,6 @@ class zes_engine_properties_t(Structure): ("subdeviceId", c_ulong) ## [out] If onSubdevice is true, this gives the ID of the sub-device ] -############################################################################### -## @brief Extension properties related to Engine Groups -## -## @details -## - This structure may be returned from ::zesEngineGetProperties via the -## `pNext` member of ::zes_engine_properties_t. -## - Used for SRIOV per Virtual Function device utilization by -## ::zes_engine_group_t -class zes_engine_ext_properties_t(Structure): - _fields_ = [ - ("stype", zes_structure_type_t), ## [in] type of this structure - ("pNext", c_void_p), ## [in,out][optional] must be null or a pointer to an extension-specific - ## structure (i.e. contains stype and pNext). - ("countOfVirtualFunctionInstance", c_ulong) ## [out] Number of Virtual Function(VF) instances associated with engine - ## to monitor the global utilization of hardware across all Virtual - ## Function from a Physical Function (PF) instance. These global and - ## VF-by-VF views should provide engine group and individual engine level - ## granularity. - ] - ############################################################################### ## @brief Engine activity counters ## @@ -2189,6 +2171,90 @@ class zes_power_ext_properties_t(Structure): ("defaultLimit", POINTER(zes_power_limit_ext_desc_t)) ## [out] the factory default limit of the part. ] +############################################################################### +## @brief Engine Activity Extension Name +ZES_ENGINE_ACTIVITY_EXT_NAME = "ZES_extension_engine_activity" + +############################################################################### +## @brief Engine Activity Extension Version(s) +class zes_engine_activity_ext_version_v(IntEnum): + _1_0 = ZE_MAKE_VERSION( 1, 0 ) ## version 1.0 + CURRENT = ZE_MAKE_VERSION( 1, 0 ) ## latest known version + +class zes_engine_activity_ext_version_t(c_int): + def __str__(self): + return str(zes_engine_activity_ext_version_v(self.value)) + + +############################################################################### +## @brief Extension properties related to Engine Groups +## +## @details +## - This structure may be passed to ::zesEngineGetProperties by having the +## pNext member of ::zes_engine_properties_t point at this struct. +## - Used for SRIOV per Virtual Function device utilization by +## ::zes_engine_group_t +class zes_engine_ext_properties_t(Structure): + _fields_ = [ + ("stype", zes_structure_type_t), ## [in] type of this structure + ("pNext", c_void_p), ## [in,out][optional] must be null or a pointer to an extension-specific + ## structure (i.e. contains stype and pNext). + ("countOfVirtualFunctionInstance", c_ulong) ## [out] Number of Virtual Function(VF) instances associated with engine + ## to monitor the utilization of hardware across all Virtual Function + ## from a Physical Function (PF) instance. + ## These VF-by-VF views should provide engine group and individual engine + ## level granularity. + ## This count represents the number of VF instances that are actively + ## using the resource represented by the engine handle. + ] + +############################################################################### +## @brief RAS Get State Extension Name +ZES_RAS_GET_STATE_EXP_NAME = "ZES_extension_ras_state" + +############################################################################### +## @brief RAS Get State Extension Version(s) +class zes_ras_state_exp_version_v(IntEnum): + _1_0 = ZE_MAKE_VERSION( 1, 0 ) ## version 1.0 + CURRENT = ZE_MAKE_VERSION( 1, 0 ) ## latest known version + +class zes_ras_state_exp_version_t(c_int): + def __str__(self): + return str(zes_ras_state_exp_version_v(self.value)) + + +############################################################################### +## @brief RAS error categories +class zes_ras_error_category_exp_v(IntEnum): + RESET = 0 ## The number of accelerator engine resets attempted by the driver + PROGRAMMING_ERRORS = 1 ## The number of hardware exceptions generated by the way workloads have + ## programmed the hardware + DRIVER_ERRORS = 2 ## The number of low level driver communication errors have occurred + COMPUTE_ERRORS = 3 ## The number of errors that have occurred in the compute accelerator + ## hardware + NON_COMPUTE_ERRORS = 4 ## The number of errors that have occurred in the fixed-function + ## accelerator hardware + CACHE_ERRORS = 5 ## The number of errors that have occurred in caches (L1/L3/register + ## file/shared local memory/sampler) + DISPLAY_ERRORS = 6 ## The number of errors that have occurred in the display + MEMORY_ERRORS = 7 ## The number of errors that have occurred in Memory + SCALE_ERRORS = 8 ## The number of errors that have occurred in Scale Fabric + L3FABRIC_ERRORS = 9 ## The number of errors that have occurred in L3 Fabric + +class zes_ras_error_category_exp_t(c_int): + def __str__(self): + return str(zes_ras_error_category_exp_v(self.value)) + + +############################################################################### +## @brief Extension structure for providing RAS error counters for different +## error sets +class zes_ras_state_exp_t(Structure): + _fields_ = [ + ("category", zes_ras_error_category_exp_t), ## [out] category for which error counter is provided. + ("errorCounter", c_ulonglong) ## [out] Current value of RAS counter for specific error category. + ] + ############################################################################### __use_win_types = "Windows" == platform.uname()[0] @@ -3318,6 +3384,29 @@ class _zes_ras_dditable_t(Structure): ("pfnGetState", c_void_p) ## _zesRasGetState_t ] +############################################################################### +## @brief Function-pointer for zesRasGetStateExp +if __use_win_types: + _zesRasGetStateExp_t = WINFUNCTYPE( ze_result_t, zes_ras_handle_t, POINTER(c_ulong), POINTER(zes_ras_state_exp_t) ) +else: + _zesRasGetStateExp_t = CFUNCTYPE( ze_result_t, zes_ras_handle_t, POINTER(c_ulong), POINTER(zes_ras_state_exp_t) ) + +############################################################################### +## @brief Function-pointer for zesRasClearStateExp +if __use_win_types: + _zesRasClearStateExp_t = WINFUNCTYPE( ze_result_t, zes_ras_handle_t, zes_ras_error_category_exp_t ) +else: + _zesRasClearStateExp_t = CFUNCTYPE( ze_result_t, zes_ras_handle_t, zes_ras_error_category_exp_t ) + + +############################################################################### +## @brief Table of RasExp functions pointers +class _zes_ras_exp_dditable_t(Structure): + _fields_ = [ + ("pfnGetStateExp", c_void_p), ## _zesRasGetStateExp_t + ("pfnClearStateExp", c_void_p) ## _zesRasClearStateExp_t + ] + ############################################################################### ## @brief Function-pointer for zesDiagnosticsGetProperties if __use_win_types: @@ -3370,6 +3459,7 @@ class _zes_dditable_t(Structure): ("Fan", _zes_fan_dditable_t), ("Led", _zes_led_dditable_t), ("Ras", _zes_ras_dditable_t), + ("RasExp", _zes_ras_exp_dditable_t), ("Diagnostics", _zes_diagnostics_dditable_t) ] @@ -3673,6 +3763,17 @@ def __init__(self, version : ze_api_version_t): self.zesRasSetConfig = _zesRasSetConfig_t(self.__dditable.Ras.pfnSetConfig) self.zesRasGetState = _zesRasGetState_t(self.__dditable.Ras.pfnGetState) + # call driver to get function pointers + _RasExp = _zes_ras_exp_dditable_t() + r = ze_result_v(self.__dll.zesGetRasExpProcAddrTable(version, byref(_RasExp))) + if r != ze_result_v.SUCCESS: + raise Exception(r) + self.__dditable.RasExp = _RasExp + + # attach function interface to function address + self.zesRasGetStateExp = _zesRasGetStateExp_t(self.__dditable.RasExp.pfnGetStateExp) + self.zesRasClearStateExp = _zesRasClearStateExp_t(self.__dditable.RasExp.pfnClearStateExp) + # call driver to get function pointers _Diagnostics = _zes_diagnostics_dditable_t() r = ze_result_v(self.__dll.zesGetDiagnosticsProcAddrTable(version, byref(_Diagnostics))) diff --git a/include/zes_api.h b/include/zes_api.h index 4fc1ef7..991b26d 100644 --- a/include/zes_api.h +++ b/include/zes_api.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file zes_api.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZES_API_H @@ -305,10 +305,6 @@ typedef struct _zes_device_ecc_properties_t zes_device_ecc_properties_t; /// @brief Forward-declare zes_engine_properties_t typedef struct _zes_engine_properties_t zes_engine_properties_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Forward-declare zes_engine_ext_properties_t -typedef struct _zes_engine_ext_properties_t zes_engine_ext_properties_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Forward-declare zes_engine_stats_t typedef struct _zes_engine_stats_t zes_engine_stats_t; @@ -501,6 +497,14 @@ typedef struct _zes_power_limit_ext_desc_t zes_power_limit_ext_desc_t; /// @brief Forward-declare zes_power_ext_properties_t typedef struct _zes_power_ext_properties_t zes_power_ext_properties_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Forward-declare zes_engine_ext_properties_t +typedef struct _zes_engine_ext_properties_t zes_engine_ext_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Forward-declare zes_ras_state_exp_t +typedef struct _zes_ras_state_exp_t zes_ras_state_exp_t; + #if !defined(__GNUC__) #pragma endregion @@ -762,23 +766,25 @@ typedef struct _zes_device_properties_t ///< device properties uint32_t numSubdevices; ///< [out] Number of sub-devices. A value of 0 indicates that this device ///< doesn't have sub-devices. - char serialNumber[ZES_STRING_PROPERTY_SIZE]; ///< [out] Manufacturing serial number (NULL terminated string value). Will - ///< be set to the string "unkown" if this cannot be determined for the - ///< device. - char boardNumber[ZES_STRING_PROPERTY_SIZE]; ///< [out] Manufacturing board number (NULL terminated string value). Will - ///< be set to the string "unkown" if this cannot be determined for the - ///< device. + char serialNumber[ZES_STRING_PROPERTY_SIZE]; ///< [out] Manufacturing serial number (NULL terminated string value). This + ///< value is intended to reflect the Part ID/SoC ID assigned by + ///< manufacturer that is unique for a SoC. Will be set to the string + ///< "unknown" if this cannot be determined for the device. + char boardNumber[ZES_STRING_PROPERTY_SIZE]; ///< [out] Manufacturing board number (NULL terminated string value). + ///< Alternatively "boardSerialNumber", this value is intended to reflect + ///< the string printed on board label by manufacturer. Will be set to the + ///< string "unknown" if this cannot be determined for the device. char brandName[ZES_STRING_PROPERTY_SIZE]; ///< [out] Brand name of the device (NULL terminated string value). Will be - ///< set to the string "unkown" if this cannot be determined for the + ///< set to the string "unknown" if this cannot be determined for the ///< device. char modelName[ZES_STRING_PROPERTY_SIZE]; ///< [out] Model name of the device (NULL terminated string value). Will be - ///< set to the string "unkown" if this cannot be determined for the + ///< set to the string "unknown" if this cannot be determined for the ///< device. char vendorName[ZES_STRING_PROPERTY_SIZE]; ///< [out] Vendor name of the device (NULL terminated string value). Will - ///< be set to the string "unkown" if this cannot be determined for the + ///< be set to the string "unknown" if this cannot be determined for the ///< device. char driverVersion[ZES_STRING_PROPERTY_SIZE]; ///< [out] Installed driver version (NULL terminated string value). Will be - ///< set to the string "unkown" if this cannot be determined for the + ///< set to the string "unknown" if this cannot be determined for the ///< device. } zes_device_properties_t; @@ -2290,27 +2296,6 @@ typedef struct _zes_engine_properties_t } zes_engine_properties_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Extension properties related to Engine Groups -/// -/// @details -/// - This structure may be returned from ::zesEngineGetProperties via the -/// `pNext` member of ::zes_engine_properties_t. -/// - Used for SRIOV per Virtual Function device utilization by -/// ::zes_engine_group_t -typedef struct _zes_engine_ext_properties_t -{ - zes_structure_type_t stype; ///< [in] type of this structure - void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific - ///< structure (i.e. contains stype and pNext). - uint32_t countOfVirtualFunctionInstance; ///< [out] Number of Virtual Function(VF) instances associated with engine - ///< to monitor the global utilization of hardware across all Virtual - ///< Function from a Physical Function (PF) instance. These global and - ///< VF-by-VF views should provide engine group and individual engine level - ///< granularity. - -} zes_engine_ext_properties_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Engine activity counters /// @@ -2415,40 +2400,6 @@ zesEngineGetActivity( ///< counters. ); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Get the activity stats for each Virtual Function (VF) associated with -/// engine group. This function is used from a Physical Function (PF) -/// interface when GPU is virtualized (SRIOV) into Virtual Function and -/// Physical Function devices -/// -/// @details -/// - The application may call this function from simultaneous threads. -/// - The implementation of this function should be lock-free. -/// -/// @returns -/// - ::ZE_RESULT_SUCCESS -/// - ::ZE_RESULT_ERROR_UNINITIALIZED -/// - ::ZE_RESULT_ERROR_DEVICE_LOST -/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY -/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `nullptr == hEngine` -/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER -/// + `nullptr == pCount` -ZE_APIEXPORT ze_result_t ZE_APICALL -zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ); - #if !defined(__GNUC__) #pragma endregion #endif @@ -3030,14 +2981,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ); @@ -6406,6 +6352,208 @@ zesPowerSetLimitsExt( zes_power_limit_ext_desc_t* pSustained ///< [in][optional][range(0, *pCount)] Array of power limit descriptors. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Level-Zero Sysman Extension APIs for Engine Activity +#if !defined(__GNUC__) +#pragma region engineActivity +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef ZES_ENGINE_ACTIVITY_EXT_NAME +/// @brief Engine Activity Extension Name +#define ZES_ENGINE_ACTIVITY_EXT_NAME "ZES_extension_engine_activity" +#endif // ZES_ENGINE_ACTIVITY_EXT_NAME + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Engine Activity Extension Version(s) +typedef enum _zes_engine_activity_ext_version_t +{ + ZES_ENGINE_ACTIVITY_EXT_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 + ZES_ENGINE_ACTIVITY_EXT_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version + ZES_ENGINE_ACTIVITY_EXT_VERSION_FORCE_UINT32 = 0x7fffffff + +} zes_engine_activity_ext_version_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extension properties related to Engine Groups +/// +/// @details +/// - This structure may be passed to ::zesEngineGetProperties by having the +/// pNext member of ::zes_engine_properties_t point at this struct. +/// - Used for SRIOV per Virtual Function device utilization by +/// ::zes_engine_group_t +typedef struct _zes_engine_ext_properties_t +{ + zes_structure_type_t stype; ///< [in] type of this structure + void* pNext; ///< [in,out][optional] must be null or a pointer to an extension-specific + ///< structure (i.e. contains stype and pNext). + uint32_t countOfVirtualFunctionInstance; ///< [out] Number of Virtual Function(VF) instances associated with engine + ///< to monitor the utilization of hardware across all Virtual Function + ///< from a Physical Function (PF) instance. + ///< These VF-by-VF views should provide engine group and individual engine + ///< level granularity. + ///< This count represents the number of VF instances that are actively + ///< using the resource represented by the engine handle. + +} zes_engine_ext_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get activity stats for Physical Function (PF) and each Virtual +/// Function (VF) associated with engine group. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hEngine` +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// + `nullptr == pCount` +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE - "Engine activity extension is not supported in the environment." +ZE_APIEXPORT ze_result_t ZE_APICALL +zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ); + +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Level-Zero Sysman Extension APIs for RAS Get State and Clear State +#if !defined(__GNUC__) +#pragma region rasState +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef ZES_RAS_GET_STATE_EXP_NAME +/// @brief RAS Get State Extension Name +#define ZES_RAS_GET_STATE_EXP_NAME "ZES_extension_ras_state" +#endif // ZES_RAS_GET_STATE_EXP_NAME + +/////////////////////////////////////////////////////////////////////////////// +/// @brief RAS Get State Extension Version(s) +typedef enum _zes_ras_state_exp_version_t +{ + ZES_RAS_STATE_EXP_VERSION_1_0 = ZE_MAKE_VERSION( 1, 0 ), ///< version 1.0 + ZES_RAS_STATE_EXP_VERSION_CURRENT = ZE_MAKE_VERSION( 1, 0 ), ///< latest known version + ZES_RAS_STATE_EXP_VERSION_FORCE_UINT32 = 0x7fffffff + +} zes_ras_state_exp_version_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief RAS error categories +typedef enum _zes_ras_error_category_exp_t +{ + ZES_RAS_ERROR_CATEGORY_EXP_RESET = 0, ///< The number of accelerator engine resets attempted by the driver + ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS = 1, ///< The number of hardware exceptions generated by the way workloads have + ///< programmed the hardware + ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS = 2, ///< The number of low level driver communication errors have occurred + ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS = 3, ///< The number of errors that have occurred in the compute accelerator + ///< hardware + ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS = 4, ///< The number of errors that have occurred in the fixed-function + ///< accelerator hardware + ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS = 5, ///< The number of errors that have occurred in caches (L1/L3/register + ///< file/shared local memory/sampler) + ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS = 6, ///< The number of errors that have occurred in the display + ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS = 7, ///< The number of errors that have occurred in Memory + ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS = 8, ///< The number of errors that have occurred in Scale Fabric + ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS = 9, ///< The number of errors that have occurred in L3 Fabric + ZES_RAS_ERROR_CATEGORY_EXP_FORCE_UINT32 = 0x7fffffff + +} zes_ras_error_category_exp_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extension structure for providing RAS error counters for different +/// error sets +typedef struct _zes_ras_state_exp_t +{ + zes_ras_error_category_exp_t category; ///< [out] category for which error counter is provided. + uint64_t errorCounter; ///< [out] Current value of RAS counter for specific error category. + +} zes_ras_state_exp_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Ras Get State +/// +/// @details +/// - This function retrieves error counters for different RAS error +/// categories. +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hRas` +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// + `nullptr == pCount` +ZE_APIEXPORT ze_result_t ZE_APICALL +zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Ras Clear State +/// +/// @details +/// - This function clears error counters for a RAS error category. +/// - Clearing errors will affect other threads/applications - the counter +/// values will start from zero. +/// - Clearing errors requires write permissions. +/// - The application should not call this function from simultaneous +/// threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hRas` +/// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION +/// + `::ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS < category` +/// - ::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS +/// + Don't have permissions to clear error counters. +ZE_APIEXPORT ze_result_t ZE_APICALL +zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ); + #if !defined(__GNUC__) #pragma endregion #endif diff --git a/include/zes_ddi.h b/include/zes_ddi.h index 6614cc1..b849b98 100644 --- a/include/zes_ddi.h +++ b/include/zes_ddi.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file zes_ddi.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZES_DDI_H @@ -1584,6 +1584,51 @@ typedef ze_result_t (ZE_APICALL *zes_pfnGetRasProcAddrTable_t)( zes_ras_dditable_t* ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for zesRasGetStateExp +typedef ze_result_t (ZE_APICALL *zes_pfnRasGetStateExp_t)( + zes_ras_handle_t, + uint32_t*, + zes_ras_state_exp_t* + ); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for zesRasClearStateExp +typedef ze_result_t (ZE_APICALL *zes_pfnRasClearStateExp_t)( + zes_ras_handle_t, + zes_ras_error_category_exp_t + ); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of RasExp functions pointers +typedef struct _zes_ras_exp_dditable_t +{ + zes_pfnRasGetStateExp_t pfnGetStateExp; + zes_pfnRasClearStateExp_t pfnClearStateExp; +} zes_ras_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's RasExp table +/// with current process' addresses +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_VERSION +ZE_DLLEXPORT ze_result_t ZE_APICALL +zesGetRasExpProcAddrTable( + ze_api_version_t version, ///< [in] API version requested + zes_ras_exp_dditable_t* pDdiTable ///< [in,out] pointer to table of DDI function pointers + ); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for zesGetRasExpProcAddrTable +typedef ze_result_t (ZE_APICALL *zes_pfnGetRasExpProcAddrTable_t)( + ze_api_version_t, + zes_ras_exp_dditable_t* + ); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for zesDiagnosticsGetProperties typedef ze_result_t (ZE_APICALL *zes_pfnDiagnosticsGetProperties_t)( @@ -1661,6 +1706,7 @@ typedef struct _zes_dditable_t zes_fan_dditable_t Fan; zes_led_dditable_t Led; zes_ras_dditable_t Ras; + zes_ras_exp_dditable_t RasExp; zes_diagnostics_dditable_t Diagnostics; } zes_dditable_t; diff --git a/include/zet.py b/include/zet.py index 12ee311..a5c2743 100644 --- a/include/zet.py +++ b/include/zet.py @@ -4,7 +4,7 @@ SPDX-License-Identifier: MIT @file zet.py - @version v1.7-r1.7.0 + @version v1.7-r1.7.9 """ import platform @@ -469,8 +469,13 @@ class zet_metric_streamer_desc_t(Structure): ("pNext", c_void_p), ## [in][optional] must be null or a pointer to an extension-specific ## structure (i.e. contains stype and pNext). ("notifyEveryNReports", c_ulong), ## [in,out] number of collected reports after which notification event - ## will be signalled - ("samplingPeriod", c_ulong) ## [in,out] streamer sampling period in nanoseconds + ## will be signaled. If the requested value is not supported exactly, + ## then the driver may use a value that is the closest supported + ## approximation and shall update this member during ::zetMetricStreamerOpen. + ("samplingPeriod", c_ulong) ## [in,out] streamer sampling period in nanoseconds. If the requested + ## value is not supported exactly, then the driver may use a value that + ## is the closest supported approximation and shall update this member + ## during ::zetMetricStreamerOpen. ] ############################################################################### diff --git a/include/zet_api.h b/include/zet_api.h index 05b1c14..0e8765c 100644 --- a/include/zet_api.h +++ b/include/zet_api.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file zet_api.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZET_API_H @@ -1157,8 +1157,13 @@ typedef struct _zet_metric_streamer_desc_t const void* pNext; ///< [in][optional] must be null or a pointer to an extension-specific ///< structure (i.e. contains stype and pNext). uint32_t notifyEveryNReports; ///< [in,out] number of collected reports after which notification event - ///< will be signalled - uint32_t samplingPeriod; ///< [in,out] streamer sampling period in nanoseconds + ///< will be signaled. If the requested value is not supported exactly, + ///< then the driver may use a value that is the closest supported + ///< approximation and shall update this member during ::zetMetricStreamerOpen. + uint32_t samplingPeriod; ///< [in,out] streamer sampling period in nanoseconds. If the requested + ///< value is not supported exactly, then the driver may use a value that + ///< is the closest supported approximation and shall update this member + ///< during ::zetMetricStreamerOpen. } zet_metric_streamer_desc_t; @@ -1410,7 +1415,7 @@ zetMetricQueryDestroy( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Resets a metric query object back to inital state. +/// @brief Resets a metric query object back to initial state. /// /// @details /// - The application must ensure the device is not currently referencing diff --git a/include/zet_ddi.h b/include/zet_ddi.h index be21172..f56a727 100644 --- a/include/zet_ddi.h +++ b/include/zet_ddi.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: MIT * * @file zet_ddi.h - * @version v1.7-r1.7.0 + * @version v1.7-r1.7.9 * */ #ifndef _ZET_DDI_H diff --git a/scripts/input.json b/scripts/input.json index 6d5f6ad..dbb3082 100644 --- a/scripts/input.json +++ b/scripts/input.json @@ -201,8 +201,8 @@ "owner": "$sDevice", "struct": [ "$s_engine_properties_t", - "$s_engine_ext_properties_t", - "$s_engine_stats_t" + "$s_engine_stats_t", + "$s_engine_ext_properties_t" ] }, "$sFabricPort": { @@ -496,13 +496,16 @@ "$sRas": { "enum": [ "$s_ras_error_type_t", - "$s_ras_error_cat_t" + "$s_ras_error_cat_t", + "$s_ras_error_category_exp_t" ], "function": [ "GetProperties", "GetConfig", "SetConfig", - "GetState" + "GetState", + "GetStateExp", + "ClearStateExp" ], "handle": [ "$s_ras_handle_t" @@ -516,7 +519,8 @@ "struct": [ "$s_ras_properties_t", "$s_ras_state_t", - "$s_ras_config_t" + "$s_ras_config_t", + "$s_ras_state_exp_t" ] }, "$sScheduler": { @@ -1621,6 +1625,14 @@ ], "max": "$S_DIAG_RESULT_REBOOT_FOR_REPAIR" }, + "$s_engine_activity_ext_version_t": { + "class": "", + "etors": [ + "$S_ENGINE_ACTIVITY_EXT_VERSION_1_0", + "$S_ENGINE_ACTIVITY_EXT_VERSION_CURRENT" + ], + "max": "$S_ENGINE_ACTIVITY_EXT_VERSION_CURRENT" + }, "$s_engine_group_t": { "class": "$sEngine", "etors": [ @@ -1961,6 +1973,22 @@ ], "max": "$S_RAS_ERROR_CAT_DISPLAY_ERRORS" }, + "$s_ras_error_category_exp_t": { + "class": "$sRas", + "etors": [ + "$S_RAS_ERROR_CATEGORY_EXP_RESET", + "$S_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS", + "$S_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS" + ], + "max": "$S_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS" + }, "$s_ras_error_type_t": { "class": "$sRas", "etors": [ @@ -1969,6 +1997,14 @@ ], "max": "$S_RAS_ERROR_TYPE_UNCORRECTABLE" }, + "$s_ras_state_exp_version_t": { + "class": "", + "etors": [ + "$S_RAS_STATE_EXP_VERSION_1_0", + "$S_RAS_STATE_EXP_VERSION_CURRENT" + ], + "max": "$S_RAS_STATE_EXP_VERSION_CURRENT" + }, "$s_repair_status_t": { "class": "$sDevice", "etors": [ @@ -2293,6 +2329,7 @@ "$X_API_VERSION_1_4", "$X_API_VERSION_1_5", "$X_API_VERSION_1_6", + "$X_API_VERSION_1_7", "$X_API_VERSION_CURRENT" ], "max": "$X_API_VERSION_CURRENT" @@ -4776,6 +4813,17 @@ } ] }, + "$sRasClearStateExp": { + "class": "$sRas", + "params": [ + { + "type": "$s_ras_handle_t" + }, + { + "type": "$s_ras_error_category_exp_t" + } + ] + }, "$sRasGetConfig": { "class": "$sRas", "params": [ @@ -4812,6 +4860,20 @@ } ] }, + "$sRasGetStateExp": { + "class": "$sRas", + "params": [ + { + "type": "$s_ras_handle_t" + }, + { + "type": "uint32_t*" + }, + { + "type": "$s_ras_state_exp_t*" + } + ] + }, "$sRasSetConfig": { "class": "$sRas", "params": [ @@ -8256,6 +8318,12 @@ "0xFFFFFFFF" ] }, + "$S_ENGINE_ACTIVITY_EXT_NAME": { + "class": "", + "values": [ + "\"$XS_extension_engine_activity\"" + ] + }, "$S_FAN_TEMP_SPEED_PAIR_COUNT": { "class": "", "values": [ @@ -8292,6 +8360,12 @@ "\"$XS_extension_power_limits\"" ] }, + "$S_RAS_GET_STATE_EXP_NAME": { + "class": "", + "values": [ + "\"$XS_extension_ras_state\"" + ] + }, "$S_SCHED_WATCHDOG_DISABLE": { "class": "", "values": [ @@ -8892,37 +8966,37 @@ "type": "uint32_t" }, { - "desc": "[out] Manufacturing serial number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing serial number (NULL terminated string value). This value is intended to reflect the Part ID/SoC ID assigned by manufacturer that is unique for a SoC. Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "serialNumber[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Manufacturing board number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing board number (NULL terminated string value). Alternatively \"boardSerialNumber\", this value is intended to reflect the string printed on board label by manufacturer. Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "boardNumber[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "brandName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "modelName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "vendorName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "init": null, "name": "driverVersion[$S_STRING_PROPERTY_SIZE]", "type": "char" @@ -9019,7 +9093,7 @@ "class": "$sEngine", "members": [ { - "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the global utilization of hardware across all Virtual Function from a Physical Function (PF) instance. These global and VF-by-VF views should provide engine group and individual engine level granularity.", + "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the utilization of hardware across all Virtual Function from a Physical Function (PF) instance.\nThese VF-by-VF views should provide engine group and individual engine level granularity.\nThis count represents the number of VF instances that are actively using the resource represented by the engine handle.\n", "init": null, "name": "countOfVirtualFunctionInstance", "type": "uint32_t" @@ -10411,6 +10485,23 @@ } ] }, + "$s_ras_state_exp_t": { + "class": "$sRas", + "members": [ + { + "desc": "[out] category for which error counter is provided.", + "init": null, + "name": "category", + "type": "$s_ras_error_category_exp_t" + }, + { + "desc": "[out] Current value of RAS counter for specific error category.", + "init": null, + "name": "errorCounter", + "type": "uint64_t" + } + ] + }, "$s_ras_state_t": { "class": "$sRas", "members": [ @@ -11018,13 +11109,13 @@ "class": "$tMetricStreamer", "members": [ { - "desc": "[in,out] number of collected reports after which notification event will be signalled", + "desc": "[in,out] number of collected reports after which notification event will be signaled. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during $tMetricStreamerOpen.\n", "init": null, "name": "notifyEveryNReports", "type": "uint32_t" }, { - "desc": "[in,out] streamer sampling period in nanoseconds", + "desc": "[in,out] streamer sampling period in nanoseconds. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during $tMetricStreamerOpen.\n", "init": null, "name": "samplingPeriod", "type": "uint32_t" @@ -14904,10 +14995,16 @@ "value": "ZE_MAKE_VERSION( 1, 6 )", "version": "1.6" }, + { + "desc": "version 1.7", + "name": "ZE_API_VERSION_1_7", + "value": "ZE_MAKE_VERSION( 1, 7 )", + "version": "1.7" + }, { "desc": "latest known version", "name": "ZE_API_VERSION_CURRENT", - "value": "ZE_MAKE_VERSION( 1, 6 )" + "value": "ZE_MAKE_VERSION( 1, 7 )" } ], "name": "ze_api_version_t", @@ -18316,6 +18413,24 @@ "name": "zes_diag_result_t", "type": "enum" }, + "zes_engine_activity_ext_version_t": { + "desc": "Engine Activity Extension Version(s)", + "etors": [ + { + "desc": "version 1.0", + "name": "ZES_ENGINE_ACTIVITY_EXT_VERSION_1_0", + "value": "ZE_MAKE_VERSION( 1, 0 )" + }, + { + "desc": "latest known version", + "name": "ZES_ENGINE_ACTIVITY_EXT_VERSION_CURRENT", + "value": "ZE_MAKE_VERSION( 1, 0 )" + } + ], + "name": "zes_engine_activity_ext_version_t", + "type": "enum", + "version": "1.7" + }, "zes_engine_group_t": { "class": "zesEngine", "desc": "Accelerator engine groups", @@ -19366,6 +19481,75 @@ "name": "zes_ras_error_cat_t", "type": "enum" }, + "zes_ras_error_category_exp_t": { + "class": "zesRas", + "desc": "RAS error categories", + "etors": [ + { + "desc": "The number of accelerator engine resets attempted by the driver", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_RESET", + "value": "0", + "version": "1.7" + }, + { + "desc": "The number of hardware exceptions generated by the way workloads have programmed the hardware", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS", + "value": "1", + "version": "1.7" + }, + { + "desc": "The number of low level driver communication errors have occurred", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS", + "value": "2", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the compute accelerator hardware", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS", + "value": "3", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the fixed-function accelerator hardware", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS", + "value": "4", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS", + "value": "5", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the display", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS", + "value": "6", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in Memory", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS", + "value": "7", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in Scale Fabric", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS", + "value": "8", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in L3 Fabric", + "name": "ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS", + "value": "9", + "version": "1.7" + } + ], + "name": "zes_ras_error_category_exp_t", + "type": "enum", + "version": "1.7" + }, "zes_ras_error_type_t": { "class": "zesRas", "desc": "RAS error type", @@ -19384,6 +19568,24 @@ "name": "zes_ras_error_type_t", "type": "enum" }, + "zes_ras_state_exp_version_t": { + "desc": "RAS Get State Extension Version(s)", + "etors": [ + { + "desc": "version 1.0", + "name": "ZES_RAS_STATE_EXP_VERSION_1_0", + "value": "ZE_MAKE_VERSION( 1, 0 )" + }, + { + "desc": "latest known version", + "name": "ZES_RAS_STATE_EXP_VERSION_CURRENT", + "value": "ZE_MAKE_VERSION( 1, 0 )" + } + ], + "name": "zes_ras_state_exp_version_t", + "type": "enum", + "version": "1.7" + }, "zes_repair_status_t": { "class": "zesDevice", "desc": "Device repair status", @@ -23493,6 +23695,64 @@ ], "type": "function" }, + "ClearStateExp": { + "class": "zesRas", + "desc": "Ras Clear State", + "details": [ + "This function clears error counters for a RAS error category.", + "Clearing errors will affect other threads/applications - the counter values will start from zero.", + "Clearing errors requires write permissions.", + "The application should not call this function from simultaneous threads.", + "The implementation of this function should be lock-free." + ], + "hash": "94c31ad19460e9ec5a14b5d704aa570c4b03bf09d0a0c48838190b0a3e50f362", + "name": "ClearStateExp", + "params": [ + { + "desc": "[in] Handle for the component.", + "name": "hRas", + "type": "zes_ras_handle_t" + }, + { + "desc": "[in] category for which error counter is to be cleared.", + "name": "category", + "type": "zes_ras_error_category_exp_t" + } + ], + "returns": [ + { + "ZE_RESULT_SUCCESS": [] + }, + { + "ZE_RESULT_ERROR_UNINITIALIZED": [] + }, + { + "ZE_RESULT_ERROR_DEVICE_LOST": [] + }, + { + "ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] + }, + { + "ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] + }, + { + "ZE_RESULT_ERROR_INVALID_NULL_HANDLE": [ + "`nullptr == hRas`" + ] + }, + { + "ZE_RESULT_ERROR_INVALID_ENUMERATION": [ + "`ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS < category`" + ] + }, + { + "ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS": [ + "Don't have permissions to clear error counters." + ] + } + ], + "type": "function" + }, "Close": { "class": "zetMetricStreamer", "desc": "Closes metric streamer.", @@ -25834,7 +26094,7 @@ }, "GetActivityExt": { "class": "zesEngine", - "desc": "Get the activity stats for each Virtual Function (VF) associated with engine group. This function is used from a Physical Function (PF) interface when GPU is virtualized (SRIOV) into Virtual Function and Physical Function devices", + "desc": "Get activity stats for Physical Function (PF) and each Virtual Function (VF) associated with engine group.", "details": [ "The application may call this function from simultaneous threads.", "The implementation of this function should be lock-free." @@ -25848,12 +26108,12 @@ "type": "zes_engine_handle_t" }, { - "desc": "[in,out] Pointer to the number of engine stats descriptors.\n - if count is zero, the driver shall update the value with the total number of components of this type.\n - if count is greater than the total number of components available, the driver shall update the value with the correct number of components available.\n", + "desc": "[in,out] Pointer to the number of VF engine stats descriptors.\n - if count is zero, the driver shall update the value with the total number of engine stats available.\n - if count is greater than the total number of engine stats available, the driver shall update the value with the correct number of engine stats available.\n - The count returned is the sum of number of VF instances currently available and the PF instance.\n", "name": "pCount", "type": "uint32_t*" }, { - "desc": "[in,out][optional][range(0, *pCount)] array of engine group activity counters.\n - if count is less than the total number of components available, the driver shall only retrieve that number of components.\n", + "desc": "[in,out][optional][range(0, *pCount)] array of engine group activity counters.\n - if count is less than the total number of engine stats available, then driver shall only retrieve that number of stats.\n - the implementation shall populate the vector with engine stat for PF at index 0 of the vector followed by user provided pCount-1 number of VF engine stats.\n", "name": "pStats", "type": "zes_engine_stats_t*" } @@ -25883,6 +26143,9 @@ "ZE_RESULT_ERROR_INVALID_NULL_POINTER": [ "`nullptr == pCount`" ] + }, + { + "ZE_RESULT_ERROR_UNSUPPORTED_FEATURE - \"Engine activity extension is not supported in the environment.\"": [] } ], "type": "function", @@ -29034,12 +29297,12 @@ "type": "uint32_t" }, { - "desc": "[in][range(0, numPorts)] array of handle of components of this type.\nif numPorts is less than the number of components of this type that are available, then the driver shall only retrieve that number of component handles.\nif numPorts is greater than the number of components of this type that are available, then the driver shall only retrieve up to correct number of available ports enumerated in zesDeviceEnumFabricPorts.\n", + "desc": "[in][range(0, numPorts)] array of fabric port handles provided by user to gather throughput values. \n", "name": "phPort", "type": "zes_fabric_port_handle_t*" }, { - "desc": "[out][range(0, numPorts)] array of Fabric port throughput counters from multiple ports of type zes_fabric_port_throughput_t.\n", + "desc": "[out][range(0, numPorts)] array of fabric port throughput counters from multiple ports of type zes_fabric_port_throughput_t.\n", "name": "pThroughput", "type": "zes_fabric_port_throughput_t**" } @@ -29790,6 +30053,62 @@ ], "type": "function" }, + "GetStateExp": { + "class": "zesRas", + "desc": "Ras Get State", + "details": [ + "This function retrieves error counters for different RAS error categories.", + "The application may call this function from simultaneous threads.", + "The implementation of this function should be lock-free." + ], + "hash": "9e9e646c541c50da30308738da88832993b6df10556583499dff169f46e4b908", + "name": "GetStateExp", + "params": [ + { + "desc": "[in] Handle for the component.", + "name": "hRas", + "type": "zes_ras_handle_t" + }, + { + "desc": "[in,out] pointer to the number of RAS state structures that can be retrieved.\nif count is zero, then the driver shall update the value with the total number of error categories for which state can be retrieved.\nif count is greater than the number of RAS states available, then the driver shall update the value with the correct number of RAS states available.\n", + "name": "pCount", + "type": "uint32_t*" + }, + { + "desc": "[in,out][optional][range(0, *pCount)] array of query results for RAS error states for different categories.\nif count is less than the number of RAS states available, then driver shall only retrieve that number of RAS states.\n", + "name": "pState", + "type": "zes_ras_state_exp_t*" + } + ], + "returns": [ + { + "ZE_RESULT_SUCCESS": [] + }, + { + "ZE_RESULT_ERROR_UNINITIALIZED": [] + }, + { + "ZE_RESULT_ERROR_DEVICE_LOST": [] + }, + { + "ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] + }, + { + "ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] + }, + { + "ZE_RESULT_ERROR_INVALID_NULL_HANDLE": [ + "`nullptr == hRas`" + ] + }, + { + "ZE_RESULT_ERROR_INVALID_NULL_POINTER": [ + "`nullptr == pCount`" + ] + } + ], + "type": "function" + }, "GetStatus": { "class": "zeContext", "desc": "Returns current status of the context.", @@ -32384,7 +32703,7 @@ "type": "uint32_t*" }, { - "desc": "[in][optional] pointer to event query properties structure(s).\n - This parameter may be null when `*pCount` is zero.\n - if `*pCount` is less than the number of event packets available, the driver may only update `*pCount` elements, starting at element zero.\n - if `*pCount` is greater than the number of event packets available, the driver may only update the valid elements.\n", + "desc": "[in,out][optional][range(0, *pCount)] pointer to event query properties structure(s).\n - This parameter may be null when `*pCount` is zero.\n - if `*pCount` is less than the number of event packets available, the driver may only update `*pCount` elements, starting at element zero.\n - if `*pCount` is greater than the number of event packets available, the driver may only update the valid elements.\n", "name": "pResults", "type": "ze_event_query_kernel_timestamps_results_ext_properties_t*" } @@ -36262,6 +36581,13 @@ "type": "macro", "value": "0xFFFFFFFF" }, + "ZES_ENGINE_ACTIVITY_EXT_NAME": { + "desc": "Engine Activity Extension Name", + "name": "ZES_ENGINE_ACTIVITY_EXT_NAME", + "type": "macro", + "value": "\"ZES_extension_engine_activity\"", + "version": "1.7" + }, "ZES_FAN_TEMP_SPEED_PAIR_COUNT": { "desc": "Maximum number of fan temperature/speed pairs in the fan speed table.", "name": "ZES_FAN_TEMP_SPEED_PAIR_COUNT", @@ -36299,6 +36625,13 @@ "value": "\"ZES_extension_power_limits\"", "version": "1.4" }, + "ZES_RAS_GET_STATE_EXP_NAME": { + "desc": "RAS Get State Extension Name", + "name": "ZES_RAS_GET_STATE_EXP_NAME", + "type": "macro", + "value": "\"ZES_extension_ras_state\"", + "version": "1.7" + }, "ZES_SCHED_WATCHDOG_DISABLE": { "desc": "Disable forward progress guard timeout.", "name": "ZES_SCHED_WATCHDOG_DISABLE", @@ -40914,32 +41247,32 @@ "type": "uint32_t" }, { - "desc": "[out] Manufacturing serial number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing serial number (NULL terminated string value). This value is intended to reflect the Part ID/SoC ID assigned by manufacturer that is unique for a SoC. Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "serialNumber[ZES_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Manufacturing board number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing board number (NULL terminated string value). Alternatively \"boardSerialNumber\", this value is intended to reflect the string printed on board label by manufacturer. Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "boardNumber[ZES_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "brandName[ZES_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "modelName[ZES_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "vendorName[ZES_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "driverVersion[ZES_STRING_PROPERTY_SIZE]", "type": "char" } @@ -41068,7 +41401,7 @@ "class": "zesEngine", "desc": "Extension properties related to Engine Groups", "details": [ - "This structure may be returned from zesEngineGetProperties via the `pNext` member of zes_engine_properties_t.", + "This structure may be passed to zesEngineGetProperties by having the pNext member of zes_engine_properties_t point at this struct.", "Used for SRIOV per Virtual Function device utilization by zes_engine_group_t" ], "members": [ @@ -41085,7 +41418,7 @@ "type": "void*" }, { - "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the global utilization of hardware across all Virtual Function from a Physical Function (PF) instance. These global and VF-by-VF views should provide engine group and individual engine level granularity.", + "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the utilization of hardware across all Virtual Function from a Physical Function (PF) instance.\nThese VF-by-VF views should provide engine group and individual engine level granularity.\nThis count represents the number of VF instances that are actively using the resource represented by the engine handle.\n", "name": "countOfVirtualFunctionInstance", "type": "uint32_t" } @@ -42885,6 +43218,25 @@ "name": "zes_ras_properties_t", "type": "struct" }, + "zes_ras_state_exp_t": { + "class": "zesRas", + "desc": "Extension structure for providing RAS error counters for different error sets", + "members": [ + { + "desc": "[out] category for which error counter is provided.", + "name": "category", + "type": "zes_ras_error_category_exp_t" + }, + { + "desc": "[out] Current value of RAS counter for specific error category.", + "name": "errorCounter", + "type": "uint64_t" + } + ], + "name": "zes_ras_state_exp_t", + "type": "struct", + "version": "1.7" + }, "zes_ras_state_t": { "base": "zes_base_state_t", "class": "zesRas", @@ -43731,12 +44083,12 @@ "type": "const void*" }, { - "desc": "[in,out] number of collected reports after which notification event will be signalled", + "desc": "[in,out] number of collected reports after which notification event will be signaled. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during zetMetricStreamerOpen.\n", "name": "notifyEveryNReports", "type": "uint32_t" }, { - "desc": "[in,out] streamer sampling period in nanoseconds", + "desc": "[in,out] streamer sampling period in nanoseconds. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during zetMetricStreamerOpen.\n", "name": "samplingPeriod", "type": "uint32_t" } @@ -45234,10 +45586,16 @@ "value": "$X_MAKE_VERSION( 1, 6 )", "version": "1.6" }, + { + "desc": "version 1.7", + "name": "$X_API_VERSION_1_7", + "value": "$X_MAKE_VERSION( 1, 7 )", + "version": "1.7" + }, { "desc": "latest known version", "name": "$X_API_VERSION_CURRENT", - "value": "$X_MAKE_VERSION( 1, 6 )" + "value": "$X_MAKE_VERSION( 1, 7 )" } ], "name": "$x_api_version_t", @@ -61311,7 +61669,7 @@ "type": "uint32_t*" }, { - "desc": "[in][optional] pointer to event query properties structure(s).\n - This parameter may be null when `*pCount` is zero.\n - if `*pCount` is less than the number of event packets available, the driver may only update `*pCount` elements, starting at element zero.\n - if `*pCount` is greater than the number of event packets available, the driver may only update the valid elements.\n", + "desc": "[in,out][optional][range(0, *pCount)] pointer to event query properties structure(s).\n - This parameter may be null when `*pCount` is zero.\n - if `*pCount` is less than the number of event packets available, the driver may only update `*pCount` elements, starting at element zero.\n - if `*pCount` is greater than the number of event packets available, the driver may only update the valid elements.\n", "name": "pResults", "type": "$x_event_query_kernel_timestamps_results_ext_properties_t*" } @@ -65342,12 +65700,12 @@ "type": "const void*" }, { - "desc": "[in,out] number of collected reports after which notification event will be signalled", + "desc": "[in,out] number of collected reports after which notification event will be signaled. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during $tMetricStreamerOpen.\n", "name": "notifyEveryNReports", "type": "uint32_t" }, { - "desc": "[in,out] streamer sampling period in nanoseconds", + "desc": "[in,out] streamer sampling period in nanoseconds. If the requested value is not supported exactly, then the driver may use a value that is the closest supported approximation and shall update this member during $tMetricStreamerOpen.\n", "name": "samplingPeriod", "type": "uint32_t" } @@ -65862,7 +66220,7 @@ }, { "class": "$tMetricQuery", - "desc": "Resets a metric query object back to inital state.", + "desc": "Resets a metric query object back to initial state.", "details": [ "The application must ensure the device is not currently referencing the query before it is reset", "The application must **not** call this function from simultaneous threads with the same query handle." @@ -68177,32 +68535,32 @@ "type": "uint32_t" }, { - "desc": "[out] Manufacturing serial number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing serial number (NULL terminated string value). This value is intended to reflect the Part ID/SoC ID assigned by manufacturer that is unique for a SoC. Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "serialNumber[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Manufacturing board number (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Manufacturing board number (NULL terminated string value). Alternatively \"boardSerialNumber\", this value is intended to reflect the string printed on board label by manufacturer. Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "boardNumber[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Brand name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "brandName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Model name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "modelName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Vendor name of the device (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "vendorName[$S_STRING_PROPERTY_SIZE]", "type": "char" }, { - "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unkown\" if this cannot be determined for the device.", + "desc": "[out] Installed driver version (NULL terminated string value). Will be set to the string \"unknown\" if this cannot be determined for the device.", "name": "driverVersion[$S_STRING_PROPERTY_SIZE]", "type": "char" } @@ -71371,37 +71729,6 @@ "name": "$s_engine_properties_t", "type": "struct" }, - { - "base": "$s_base_properties_t", - "class": "$sEngine", - "desc": "Extension properties related to Engine Groups", - "details": [ - "This structure may be returned from $sEngineGetProperties via the `pNext` member of $s_engine_properties_t.", - "Used for SRIOV per Virtual Function device utilization by $s_engine_group_t" - ], - "members": [ - { - "desc": "[in] type of this structure", - "init": "$S_STRUCTURE_TYPE_ENGINE_EXT_PROPERTIES", - "name": "stype", - "type": "$s_structure_type_t" - }, - { - "desc": "[in,out][optional] must be null or a pointer to an extension-specific structure (i.e. contains stype and pNext).", - "init": "nullptr", - "name": "pNext", - "type": "void*" - }, - { - "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the global utilization of hardware across all Virtual Function from a Physical Function (PF) instance. These global and VF-by-VF views should provide engine group and individual engine level granularity.", - "name": "countOfVirtualFunctionInstance", - "type": "uint32_t" - } - ], - "name": "$s_engine_ext_properties_t", - "type": "struct", - "version": "1.7" - }, { "class": "$sEngine", "desc": "Engine activity counters", @@ -71580,62 +71907,6 @@ "type": "function", "version": "1.7" }, - { - "class": "$sEngine", - "desc": "Get the activity stats for each Virtual Function (VF) associated with engine group. This function is used from a Physical Function (PF) interface when GPU is virtualized (SRIOV) into Virtual Function and Physical Function devices", - "details": [ - "The application may call this function from simultaneous threads.", - "The implementation of this function should be lock-free." - ], - "hash": "7c7eb1843ce9221ba257b856861d9929463cc144d7c4bbad5350c04491413ccd", - "name": "GetActivityExt", - "params": [ - { - "desc": "[in] Handle for the component.", - "name": "hEngine", - "type": "$s_engine_handle_t" - }, - { - "desc": "[in,out] Pointer to the number of engine stats descriptors.\n - if count is zero, the driver shall update the value with the total number of components of this type.\n - if count is greater than the total number of components available, the driver shall update the value with the correct number of components available.\n", - "name": "pCount", - "type": "uint32_t*" - }, - { - "desc": "[in,out][optional][range(0, *pCount)] array of engine group activity counters.\n - if count is less than the total number of components available, the driver shall only retrieve that number of components.\n", - "name": "pStats", - "type": "$s_engine_stats_t*" - } - ], - "returns": [ - { - "$X_RESULT_SUCCESS": [] - }, - { - "$X_RESULT_ERROR_UNINITIALIZED": [] - }, - { - "$X_RESULT_ERROR_DEVICE_LOST": [] - }, - { - "$X_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] - }, - { - "$X_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] - }, - { - "$X_RESULT_ERROR_INVALID_NULL_HANDLE": [ - "`nullptr == hEngine`" - ] - }, - { - "$X_RESULT_ERROR_INVALID_NULL_POINTER": [ - "`nullptr == pCount`" - ] - } - ], - "type": "function", - "version": "1.7" - }, { "desc": "C++ wrapper for a Sysman device engine group", "members": [ @@ -72760,12 +73031,12 @@ "type": "uint32_t" }, { - "desc": "[in][range(0, numPorts)] array of handle of components of this type.\nif numPorts is less than the number of components of this type that are available, then the driver shall only retrieve that number of component handles.\nif numPorts is greater than the number of components of this type that are available, then the driver shall only retrieve up to correct number of available ports enumerated in $sDeviceEnumFabricPorts.\n", + "desc": "[in][range(0, numPorts)] array of fabric port handles provided by user to gather throughput values. \n", "name": "phPort", "type": "$s_fabric_port_handle_t*" }, { - "desc": "[out][range(0, numPorts)] array of Fabric port throughput counters from multiple ports of type $s_fabric_port_throughput_t.\n", + "desc": "[out][range(0, numPorts)] array of fabric port throughput counters from multiple ports of type $s_fabric_port_throughput_t.\n", "name": "pThroughput", "type": "$s_fabric_port_throughput_t**" } @@ -79598,6 +79869,370 @@ "type": "function" } ] + }, + { + "header": { + "desc": "Intel $OneApi Level-Zero Sysman Extension APIs for Engine Activity", + "ordinal": 1700, + "type": "header", + "version": "1.7" + }, + "name": "engineActivity", + "objects": [ + { + "desc": "Engine Activity Extension Name", + "name": "$S_ENGINE_ACTIVITY_EXT_NAME", + "type": "macro", + "value": "\"$XS_extension_engine_activity\"", + "version": "1.7" + }, + { + "desc": "Engine Activity Extension Version(s)", + "etors": [ + { + "desc": "version 1.0", + "name": "$S_ENGINE_ACTIVITY_EXT_VERSION_1_0", + "value": "$X_MAKE_VERSION( 1, 0 )" + }, + { + "desc": "latest known version", + "name": "$S_ENGINE_ACTIVITY_EXT_VERSION_CURRENT", + "value": "$X_MAKE_VERSION( 1, 0 )" + } + ], + "name": "$s_engine_activity_ext_version_t", + "type": "enum", + "version": "1.7" + }, + { + "base": "$s_base_properties_t", + "class": "$sEngine", + "desc": "Extension properties related to Engine Groups", + "details": [ + "This structure may be passed to $sEngineGetProperties by having the pNext member of $s_engine_properties_t point at this struct.", + "Used for SRIOV per Virtual Function device utilization by $s_engine_group_t" + ], + "members": [ + { + "desc": "[in] type of this structure", + "init": "$S_STRUCTURE_TYPE_ENGINE_EXT_PROPERTIES", + "name": "stype", + "type": "$s_structure_type_t" + }, + { + "desc": "[in,out][optional] must be null or a pointer to an extension-specific structure (i.e. contains stype and pNext).", + "init": "nullptr", + "name": "pNext", + "type": "void*" + }, + { + "desc": "[out] Number of Virtual Function(VF) instances associated with engine to monitor the utilization of hardware across all Virtual Function from a Physical Function (PF) instance.\nThese VF-by-VF views should provide engine group and individual engine level granularity.\nThis count represents the number of VF instances that are actively using the resource represented by the engine handle.\n", + "name": "countOfVirtualFunctionInstance", + "type": "uint32_t" + } + ], + "name": "$s_engine_ext_properties_t", + "type": "struct", + "version": "1.7" + }, + { + "class": "$sEngine", + "desc": "Get activity stats for Physical Function (PF) and each Virtual Function (VF) associated with engine group.", + "details": [ + "The application may call this function from simultaneous threads.", + "The implementation of this function should be lock-free." + ], + "hash": "7c7eb1843ce9221ba257b856861d9929463cc144d7c4bbad5350c04491413ccd", + "name": "GetActivityExt", + "params": [ + { + "desc": "[in] Handle for the component.", + "name": "hEngine", + "type": "$s_engine_handle_t" + }, + { + "desc": "[in,out] Pointer to the number of VF engine stats descriptors.\n - if count is zero, the driver shall update the value with the total number of engine stats available.\n - if count is greater than the total number of engine stats available, the driver shall update the value with the correct number of engine stats available.\n - The count returned is the sum of number of VF instances currently available and the PF instance.\n", + "name": "pCount", + "type": "uint32_t*" + }, + { + "desc": "[in,out][optional][range(0, *pCount)] array of engine group activity counters.\n - if count is less than the total number of engine stats available, then driver shall only retrieve that number of stats.\n - the implementation shall populate the vector with engine stat for PF at index 0 of the vector followed by user provided pCount-1 number of VF engine stats.\n", + "name": "pStats", + "type": "$s_engine_stats_t*" + } + ], + "returns": [ + { + "$X_RESULT_SUCCESS": [] + }, + { + "$X_RESULT_ERROR_UNINITIALIZED": [] + }, + { + "$X_RESULT_ERROR_DEVICE_LOST": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] + }, + { + "$X_RESULT_ERROR_INVALID_NULL_HANDLE": [ + "`nullptr == hEngine`" + ] + }, + { + "$X_RESULT_ERROR_INVALID_NULL_POINTER": [ + "`nullptr == pCount`" + ] + }, + { + "$X_RESULT_ERROR_UNSUPPORTED_FEATURE - \"Engine activity extension is not supported in the environment.\"": [] + } + ], + "type": "function", + "version": "1.7" + } + ] + }, + { + "header": { + "desc": "Intel $OneApi Level-Zero Sysman Extension APIs for RAS Get State and Clear State", + "ordinal": 1700, + "type": "header", + "version": "1.7" + }, + "name": "rasState", + "objects": [ + { + "desc": "RAS Get State Extension Name", + "name": "$S_RAS_GET_STATE_EXP_NAME", + "type": "macro", + "value": "\"$XS_extension_ras_state\"", + "version": "1.7" + }, + { + "desc": "RAS Get State Extension Version(s)", + "etors": [ + { + "desc": "version 1.0", + "name": "$S_RAS_STATE_EXP_VERSION_1_0", + "value": "$X_MAKE_VERSION( 1, 0 )" + }, + { + "desc": "latest known version", + "name": "$S_RAS_STATE_EXP_VERSION_CURRENT", + "value": "$X_MAKE_VERSION( 1, 0 )" + } + ], + "name": "$s_ras_state_exp_version_t", + "type": "enum", + "version": "1.7" + }, + { + "class": "$sRas", + "desc": "RAS error categories", + "etors": [ + { + "desc": "The number of accelerator engine resets attempted by the driver", + "name": "$S_RAS_ERROR_CATEGORY_EXP_RESET", + "value": "0", + "version": "1.7" + }, + { + "desc": "The number of hardware exceptions generated by the way workloads have programmed the hardware", + "name": "$S_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS", + "value": "1", + "version": "1.7" + }, + { + "desc": "The number of low level driver communication errors have occurred", + "name": "$S_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS", + "value": "2", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the compute accelerator hardware", + "name": "$S_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS", + "value": "3", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the fixed-function accelerator hardware", + "name": "$S_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS", + "value": "4", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)", + "name": "$S_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS", + "value": "5", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in the display", + "name": "$S_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS", + "value": "6", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in Memory", + "name": "$S_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS", + "value": "7", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in Scale Fabric", + "name": "$S_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS", + "value": "8", + "version": "1.7" + }, + { + "desc": "The number of errors that have occurred in L3 Fabric", + "name": "$S_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS", + "value": "9", + "version": "1.7" + } + ], + "name": "$s_ras_error_category_exp_t", + "type": "enum", + "version": "1.7" + }, + { + "class": "$sRas", + "desc": "Extension structure for providing RAS error counters for different error sets", + "members": [ + { + "desc": "[out] category for which error counter is provided.", + "name": "category", + "type": "$s_ras_error_category_exp_t" + }, + { + "desc": "[out] Current value of RAS counter for specific error category.", + "name": "errorCounter", + "type": "uint64_t" + } + ], + "name": "$s_ras_state_exp_t", + "type": "struct", + "version": "1.7" + }, + { + "class": "$sRas", + "desc": "Ras Get State", + "details": [ + "This function retrieves error counters for different RAS error categories.", + "The application may call this function from simultaneous threads.", + "The implementation of this function should be lock-free." + ], + "hash": "9e9e646c541c50da30308738da88832993b6df10556583499dff169f46e4b908", + "name": "GetStateExp", + "params": [ + { + "desc": "[in] Handle for the component.", + "name": "hRas", + "type": "$s_ras_handle_t" + }, + { + "desc": "[in,out] pointer to the number of RAS state structures that can be retrieved.\nif count is zero, then the driver shall update the value with the total number of error categories for which state can be retrieved.\nif count is greater than the number of RAS states available, then the driver shall update the value with the correct number of RAS states available.\n", + "name": "pCount", + "type": "uint32_t*" + }, + { + "desc": "[in,out][optional][range(0, *pCount)] array of query results for RAS error states for different categories.\nif count is less than the number of RAS states available, then driver shall only retrieve that number of RAS states.\n", + "name": "pState", + "type": "$s_ras_state_exp_t*" + } + ], + "returns": [ + { + "$X_RESULT_SUCCESS": [] + }, + { + "$X_RESULT_ERROR_UNINITIALIZED": [] + }, + { + "$X_RESULT_ERROR_DEVICE_LOST": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] + }, + { + "$X_RESULT_ERROR_INVALID_NULL_HANDLE": [ + "`nullptr == hRas`" + ] + }, + { + "$X_RESULT_ERROR_INVALID_NULL_POINTER": [ + "`nullptr == pCount`" + ] + } + ], + "type": "function" + }, + { + "class": "$sRas", + "desc": "Ras Clear State", + "details": [ + "This function clears error counters for a RAS error category.", + "Clearing errors will affect other threads/applications - the counter values will start from zero.", + "Clearing errors requires write permissions.", + "The application should not call this function from simultaneous threads.", + "The implementation of this function should be lock-free." + ], + "hash": "94c31ad19460e9ec5a14b5d704aa570c4b03bf09d0a0c48838190b0a3e50f362", + "name": "ClearStateExp", + "params": [ + { + "desc": "[in] Handle for the component.", + "name": "hRas", + "type": "$s_ras_handle_t" + }, + { + "desc": "[in] category for which error counter is to be cleared.", + "name": "category", + "type": "$s_ras_error_category_exp_t" + } + ], + "returns": [ + { + "$X_RESULT_SUCCESS": [] + }, + { + "$X_RESULT_ERROR_UNINITIALIZED": [] + }, + { + "$X_RESULT_ERROR_DEVICE_LOST": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_HOST_MEMORY": [] + }, + { + "$X_RESULT_ERROR_OUT_OF_DEVICE_MEMORY": [] + }, + { + "$X_RESULT_ERROR_INVALID_NULL_HANDLE": [ + "`nullptr == hRas`" + ] + }, + { + "$X_RESULT_ERROR_INVALID_ENUMERATION": [ + "`$S_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS < category`" + ] + }, + { + "$X_RESULT_ERROR_INSUFFICIENT_PERMISSIONS": [ + "Don't have permissions to clear error counters." + ] + } + ], + "type": "function" + } + ] } ] ] diff --git a/source/drivers/null/ze_nullddi.cpp b/source/drivers/null/ze_nullddi.cpp index 13008a4..0b1a307 100644 --- a/source/drivers/null/ze_nullddi.cpp +++ b/source/drivers/null/ze_nullddi.cpp @@ -4218,7 +4218,8 @@ namespace driver ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/drivers/null/zes_nullddi.cpp b/source/drivers/null/zes_nullddi.cpp index c73c91b..90f2a90 100644 --- a/source/drivers/null/zes_nullddi.cpp +++ b/source/drivers/null/zes_nullddi.cpp @@ -1042,38 +1042,6 @@ namespace driver return result; } - /////////////////////////////////////////////////////////////////////////////// - /// @brief Intercept function for zesEngineGetActivityExt - __zedlllocal ze_result_t ZE_APICALL - zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) - { - ze_result_t result = ZE_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnGetActivityExt = context.zesDdiTable.Engine.pfnGetActivityExt; - if( nullptr != pfnGetActivityExt ) - { - result = pfnGetActivityExt( hEngine, pCount, pStats ); - } - else - { - // generic implementation - } - - return result; - } - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for zesDeviceEventRegister __zedlllocal ze_result_t ZE_APICALL @@ -1390,14 +1358,9 @@ namespace driver zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -3493,6 +3456,99 @@ namespace driver return result; } + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesEngineGetActivityExt + __zedlllocal ze_result_t ZE_APICALL + zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetActivityExt = context.zesDdiTable.Engine.pfnGetActivityExt; + if( nullptr != pfnGetActivityExt ) + { + result = pfnGetActivityExt( hEngine, pCount, pStats ); + } + else + { + // generic implementation + } + + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasGetStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetStateExp = context.zesDdiTable.RasExp.pfnGetStateExp; + if( nullptr != pfnGetStateExp ) + { + result = pfnGetStateExp( hRas, pCount, pState ); + } + else + { + // generic implementation + } + + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasClearStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnClearStateExp = context.zesDdiTable.RasExp.pfnClearStateExp; + if( nullptr != pfnClearStateExp ) + { + result = pfnClearStateExp( hRas, category ); + } + else + { + // generic implementation + } + + return result; + } + } // namespace driver #if defined(__cplusplus) @@ -4125,6 +4181,35 @@ zesGetRasProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's RasExp table +/// with current process' addresses +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_VERSION +ZE_DLLEXPORT ze_result_t ZE_APICALL +zesGetRasExpProcAddrTable( + ze_api_version_t version, ///< [in] API version requested + zes_ras_exp_dditable_t* pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) +{ + if( nullptr == pDdiTable ) + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + + if( driver::context.version < version ) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; + + ze_result_t result = ZE_RESULT_SUCCESS; + + pDdiTable->pfnGetStateExp = driver::zesRasGetStateExp; + + pDdiTable->pfnClearStateExp = driver::zesRasClearStateExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Scheduler table /// with current process' addresses diff --git a/source/layers/tracing/ze_trcddi.cpp b/source/layers/tracing/ze_trcddi.cpp index f712936..ddd2bdf 100644 --- a/source/layers/tracing/ze_trcddi.cpp +++ b/source/layers/tracing/ze_trcddi.cpp @@ -6245,7 +6245,8 @@ namespace tracing_layer ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/layers/validation/common/zes_entry_points.h b/source/layers/validation/common/zes_entry_points.h index 3d56fbb..92d7dab 100644 --- a/source/layers/validation/common/zes_entry_points.h +++ b/source/layers/validation/common/zes_entry_points.h @@ -55,7 +55,6 @@ class ZESValidationEntryPoints { virtual ze_result_t zesDeviceEnumEngineGroups( zes_device_handle_t hDevice, uint32_t* pCount, zes_engine_handle_t* phEngine ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesEngineGetProperties( zes_engine_handle_t hEngine, zes_engine_properties_t* pProperties ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesEngineGetActivity( zes_engine_handle_t hEngine, zes_engine_stats_t* pStats ) {return ZE_RESULT_SUCCESS;} - virtual ze_result_t zesEngineGetActivityExt( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesDeviceEventRegister( zes_device_handle_t hDevice, zes_event_type_flags_t events ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesDriverEventListen( ze_driver_handle_t hDriver, uint32_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesDriverEventListenEx( ze_driver_handle_t hDriver, uint64_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) {return ZE_RESULT_SUCCESS;} @@ -145,6 +144,9 @@ class ZESValidationEntryPoints { virtual ze_result_t zesTemperatureGetState( zes_temp_handle_t hTemperature, double* pTemperature ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesPowerGetLimitsExt( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) {return ZE_RESULT_SUCCESS;} virtual ze_result_t zesPowerSetLimitsExt( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) {return ZE_RESULT_SUCCESS;} + virtual ze_result_t zesEngineGetActivityExt( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) {return ZE_RESULT_SUCCESS;} + virtual ze_result_t zesRasGetStateExp( zes_ras_handle_t hRas, uint32_t* pCount, zes_ras_state_exp_t* pState ) {return ZE_RESULT_SUCCESS;} + virtual ze_result_t zesRasClearStateExp( zes_ras_handle_t hRas, zes_ras_error_category_exp_t category ) {return ZE_RESULT_SUCCESS;} virtual ~ZESValidationEntryPoints() {} }; } \ No newline at end of file diff --git a/source/layers/validation/handle_lifetime_tracking/ze_handle_lifetime.cpp b/source/layers/validation/handle_lifetime_tracking/ze_handle_lifetime.cpp index ed8de49..7b3fc66 100644 --- a/source/layers/validation/handle_lifetime_tracking/ze_handle_lifetime.cpp +++ b/source/layers/validation/handle_lifetime_tracking/ze_handle_lifetime.cpp @@ -2681,7 +2681,8 @@ namespace validation_layer ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.cpp b/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.cpp index f4b53fa..902aeae 100644 --- a/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.cpp +++ b/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.cpp @@ -544,26 +544,6 @@ namespace validation_layer return ZE_RESULT_SUCCESS; } ze_result_t - ZESHandleLifetimeValidation::zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) - { - - if ( !context.handleLifetime->isHandleValid( hEngine )){ - return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; - } - return ZE_RESULT_SUCCESS; - } - ze_result_t ZESHandleLifetimeValidation::zesDeviceEventRegister( zes_device_handle_t hDevice, ///< [in] The device handle. zes_event_type_flags_t events ///< [in] List of events to listen to. @@ -752,14 +732,9 @@ namespace validation_layer ZESHandleLifetimeValidation::zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -1888,4 +1863,61 @@ namespace validation_layer } return ZE_RESULT_SUCCESS; } + ze_result_t + ZESHandleLifetimeValidation::zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) + { + + if ( !context.handleLifetime->isHandleValid( hEngine )){ + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + return ZE_RESULT_SUCCESS; + } + ze_result_t + ZESHandleLifetimeValidation::zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) + { + + if ( !context.handleLifetime->isHandleValid( hRas )){ + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + return ZE_RESULT_SUCCESS; + } + ze_result_t + ZESHandleLifetimeValidation::zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) + { + + if ( !context.handleLifetime->isHandleValid( hRas )){ + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + return ZE_RESULT_SUCCESS; + } } \ No newline at end of file diff --git a/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.h b/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.h index bd8e715..81e668d 100644 --- a/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.h +++ b/source/layers/validation/handle_lifetime_tracking/zes_handle_lifetime.h @@ -55,7 +55,6 @@ namespace validation_layer ze_result_t zesDeviceEnumEngineGroups ( zes_device_handle_t hDevice, uint32_t* pCount, zes_engine_handle_t* phEngine ) override; ze_result_t zesEngineGetProperties ( zes_engine_handle_t hEngine, zes_engine_properties_t* pProperties ) override; ze_result_t zesEngineGetActivity ( zes_engine_handle_t hEngine, zes_engine_stats_t* pStats ) override; - ze_result_t zesEngineGetActivityExt ( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) override; ze_result_t zesDeviceEventRegister ( zes_device_handle_t hDevice, zes_event_type_flags_t events ) override; ze_result_t zesDriverEventListen ( ze_driver_handle_t hDriver, uint32_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) override; ze_result_t zesDriverEventListenEx ( ze_driver_handle_t hDriver, uint64_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) override; @@ -145,6 +144,9 @@ namespace validation_layer ze_result_t zesTemperatureGetState ( zes_temp_handle_t hTemperature, double* pTemperature ) override; ze_result_t zesPowerGetLimitsExt ( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) override; ze_result_t zesPowerSetLimitsExt ( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) override; + ze_result_t zesEngineGetActivityExt ( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) override; + ze_result_t zesRasGetStateExp ( zes_ras_handle_t hRas, uint32_t* pCount, zes_ras_state_exp_t* pState ) override; + ze_result_t zesRasClearStateExp ( zes_ras_handle_t hRas, zes_ras_error_category_exp_t category ) override; }; } diff --git a/source/layers/validation/parameter_validation/ze_parameter_validation.cpp b/source/layers/validation/parameter_validation/ze_parameter_validation.cpp index 6f4eaf8..74cb38c 100644 --- a/source/layers/validation/parameter_validation/ze_parameter_validation.cpp +++ b/source/layers/validation/parameter_validation/ze_parameter_validation.cpp @@ -3286,7 +3286,8 @@ namespace validation_layer ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/layers/validation/parameter_validation/zes_parameter_validation.cpp b/source/layers/validation/parameter_validation/zes_parameter_validation.cpp index 178068e..32802dd 100644 --- a/source/layers/validation/parameter_validation/zes_parameter_validation.cpp +++ b/source/layers/validation/parameter_validation/zes_parameter_validation.cpp @@ -762,30 +762,6 @@ namespace validation_layer } - ze_result_t - ZESParameterValidation::zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) - { - if( nullptr == hEngine ) - return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; - - if( nullptr == pCount ) - return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - - return ZE_RESULT_SUCCESS; - } - - ze_result_t ZESParameterValidation::zesDeviceEventRegister( zes_device_handle_t hDevice, ///< [in] The device handle. @@ -1021,14 +997,9 @@ namespace validation_layer ZESParameterValidation::zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -2448,4 +2419,73 @@ namespace validation_layer return ParameterValidation::validateExtensions(pSustained); } + + ze_result_t + ZESParameterValidation::zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) + { + if( nullptr == hEngine ) + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + + if( nullptr == pCount ) + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + + return ZE_RESULT_SUCCESS; + } + + + ze_result_t + ZESParameterValidation::zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) + { + if( nullptr == hRas ) + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + + if( nullptr == pCount ) + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + + return ZE_RESULT_SUCCESS; + } + + + ze_result_t + ZESParameterValidation::zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) + { + if( nullptr == hRas ) + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + + if( ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS < category ) + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + + return ZE_RESULT_SUCCESS; + } + } \ No newline at end of file diff --git a/source/layers/validation/parameter_validation/zes_parameter_validation.h b/source/layers/validation/parameter_validation/zes_parameter_validation.h index 7608423..b72a495 100644 --- a/source/layers/validation/parameter_validation/zes_parameter_validation.h +++ b/source/layers/validation/parameter_validation/zes_parameter_validation.h @@ -57,7 +57,6 @@ namespace validation_layer ze_result_t zesDeviceEnumEngineGroups ( zes_device_handle_t hDevice, uint32_t* pCount, zes_engine_handle_t* phEngine ) override; ze_result_t zesEngineGetProperties ( zes_engine_handle_t hEngine, zes_engine_properties_t* pProperties ) override; ze_result_t zesEngineGetActivity ( zes_engine_handle_t hEngine, zes_engine_stats_t* pStats ) override; - ze_result_t zesEngineGetActivityExt ( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) override; ze_result_t zesDeviceEventRegister ( zes_device_handle_t hDevice, zes_event_type_flags_t events ) override; ze_result_t zesDriverEventListen ( ze_driver_handle_t hDriver, uint32_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) override; ze_result_t zesDriverEventListenEx ( ze_driver_handle_t hDriver, uint64_t timeout, uint32_t count, zes_device_handle_t* phDevices, uint32_t* pNumDeviceEvents, zes_event_type_flags_t* pEvents ) override; @@ -147,6 +146,9 @@ namespace validation_layer ze_result_t zesTemperatureGetState ( zes_temp_handle_t hTemperature, double* pTemperature ) override; ze_result_t zesPowerGetLimitsExt ( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) override; ze_result_t zesPowerSetLimitsExt ( zes_pwr_handle_t hPower, uint32_t* pCount, zes_power_limit_ext_desc_t* pSustained ) override; + ze_result_t zesEngineGetActivityExt ( zes_engine_handle_t hEngine, uint32_t* pCount, zes_engine_stats_t* pStats ) override; + ze_result_t zesRasGetStateExp ( zes_ras_handle_t hRas, uint32_t* pCount, zes_ras_state_exp_t* pState ) override; + ze_result_t zesRasClearStateExp ( zes_ras_handle_t hRas, zes_ras_error_category_exp_t category ) override; }; } \ No newline at end of file diff --git a/source/layers/validation/ze_valddi.cpp b/source/layers/validation/ze_valddi.cpp index 2954484..f0b6963 100644 --- a/source/layers/validation/ze_valddi.cpp +++ b/source/layers/validation/ze_valddi.cpp @@ -5865,7 +5865,8 @@ namespace validation_layer ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/layers/validation/zes_valddi.cpp b/source/layers/validation/zes_valddi.cpp index 05c3a1f..f8a73d4 100644 --- a/source/layers/validation/zes_valddi.cpp +++ b/source/layers/validation/zes_valddi.cpp @@ -1425,48 +1425,6 @@ namespace validation_layer return result; } - /////////////////////////////////////////////////////////////////////////////// - /// @brief Intercept function for zesEngineGetActivityExt - __zedlllocal ze_result_t ZE_APICALL - zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) - { - auto pfnGetActivityExt = context.zesDdiTable.Engine.pfnGetActivityExt; - - if( nullptr == pfnGetActivityExt ) - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; - - if( context.enableParameterValidation ) - { - auto result = context.paramValidation->zesParamValidation.zesEngineGetActivityExt( hEngine, pCount, pStats ); - if(result!=ZE_RESULT_SUCCESS) return result; - } - - - if( context.enableThreadingValidation ){ - //Unimplemented - } - - - if(context.enableHandleLifetime ){ - auto result = context.handleLifetime->zesHandleLifetime.zesEngineGetActivityExt( hEngine, pCount, pStats ); - if(result!=ZE_RESULT_SUCCESS) return result; - } - - auto result = pfnGetActivityExt( hEngine, pCount, pStats ); - return result; - } - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for zesDeviceEventRegister __zedlllocal ze_result_t ZE_APICALL @@ -1890,14 +1848,9 @@ namespace validation_layer zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -4735,6 +4688,132 @@ namespace validation_layer return result; } + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesEngineGetActivityExt + __zedlllocal ze_result_t ZE_APICALL + zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) + { + auto pfnGetActivityExt = context.zesDdiTable.Engine.pfnGetActivityExt; + + if( nullptr == pfnGetActivityExt ) + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if( context.enableParameterValidation ) + { + auto result = context.paramValidation->zesParamValidation.zesEngineGetActivityExt( hEngine, pCount, pStats ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + + if( context.enableThreadingValidation ){ + //Unimplemented + } + + + if(context.enableHandleLifetime ){ + auto result = context.handleLifetime->zesHandleLifetime.zesEngineGetActivityExt( hEngine, pCount, pStats ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + auto result = pfnGetActivityExt( hEngine, pCount, pStats ); + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasGetStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) + { + auto pfnGetStateExp = context.zesDdiTable.RasExp.pfnGetStateExp; + + if( nullptr == pfnGetStateExp ) + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if( context.enableParameterValidation ) + { + auto result = context.paramValidation->zesParamValidation.zesRasGetStateExp( hRas, pCount, pState ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + + if( context.enableThreadingValidation ){ + //Unimplemented + } + + + if(context.enableHandleLifetime ){ + auto result = context.handleLifetime->zesHandleLifetime.zesRasGetStateExp( hRas, pCount, pState ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + auto result = pfnGetStateExp( hRas, pCount, pState ); + + if( result == ZE_RESULT_SUCCESS && context.enableHandleLifetime ){ + } + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasClearStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) + { + auto pfnClearStateExp = context.zesDdiTable.RasExp.pfnClearStateExp; + + if( nullptr == pfnClearStateExp ) + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + + if( context.enableParameterValidation ) + { + auto result = context.paramValidation->zesParamValidation.zesRasClearStateExp( hRas, category ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + + if( context.enableThreadingValidation ){ + //Unimplemented + } + + + if(context.enableHandleLifetime ){ + auto result = context.handleLifetime->zesHandleLifetime.zesRasClearStateExp( hRas, category ); + if(result!=ZE_RESULT_SUCCESS) return result; + } + + auto result = pfnClearStateExp( hRas, category ); + return result; + } + } // namespace validation_layer #if defined(__cplusplus) @@ -5528,6 +5607,40 @@ zesGetRasProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's RasExp table +/// with current process' addresses +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_VERSION +ZE_DLLEXPORT ze_result_t ZE_APICALL +zesGetRasExpProcAddrTable( + ze_api_version_t version, ///< [in] API version requested + zes_ras_exp_dditable_t* pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) +{ + auto& dditable = validation_layer::context.zesDdiTable.RasExp; + + if( nullptr == pDdiTable ) + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + + if (ZE_MAJOR_VERSION(validation_layer::context.version) != ZE_MAJOR_VERSION(version) || + ZE_MINOR_VERSION(validation_layer::context.version) > ZE_MINOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; + + ze_result_t result = ZE_RESULT_SUCCESS; + + dditable.pfnGetStateExp = pDdiTable->pfnGetStateExp; + pDdiTable->pfnGetStateExp = validation_layer::zesRasGetStateExp; + + dditable.pfnClearStateExp = pDdiTable->pfnClearStateExp; + pDdiTable->pfnClearStateExp = validation_layer::zesRasClearStateExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Scheduler table /// with current process' addresses diff --git a/source/lib/ze_libapi.cpp b/source/lib/ze_libapi.cpp index 07300c8..67ade0a 100644 --- a/source/lib/ze_libapi.cpp +++ b/source/lib/ze_libapi.cpp @@ -7548,7 +7548,8 @@ zeEventQueryKernelTimestampsExt( ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/lib/zes_libapi.cpp b/source/lib/zes_libapi.cpp index 26c4929..fb02169 100644 --- a/source/lib/zes_libapi.cpp +++ b/source/lib/zes_libapi.cpp @@ -1698,55 +1698,6 @@ zesEngineGetActivity( return pfnGetActivity( hEngine, pStats ); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Get the activity stats for each Virtual Function (VF) associated with -/// engine group. This function is used from a Physical Function (PF) -/// interface when GPU is virtualized (SRIOV) into Virtual Function and -/// Physical Function devices -/// -/// @details -/// - The application may call this function from simultaneous threads. -/// - The implementation of this function should be lock-free. -/// -/// @returns -/// - ::ZE_RESULT_SUCCESS -/// - ::ZE_RESULT_ERROR_UNINITIALIZED -/// - ::ZE_RESULT_ERROR_DEVICE_LOST -/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY -/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `nullptr == hEngine` -/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER -/// + `nullptr == pCount` -ze_result_t ZE_APICALL -zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) -{ - if(ze_lib::context->inTeardown) { - return ZE_RESULT_ERROR_UNINITIALIZED; - } - - auto pfnGetActivityExt = ze_lib::context->zesDdiTable.Engine.pfnGetActivityExt; - if( nullptr == pfnGetActivityExt ) { - if(!ze_lib::context->isInitialized) - return ZE_RESULT_ERROR_UNINITIALIZED; - else - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - return pfnGetActivityExt( hEngine, pCount, pStats ); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Specify the list of events to listen to for a given device /// @@ -2252,14 +2203,9 @@ ze_result_t ZE_APICALL zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -5661,4 +5607,150 @@ zesPowerSetLimitsExt( return pfnSetLimitsExt( hPower, pCount, pSustained ); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get activity stats for Physical Function (PF) and each Virtual +/// Function (VF) associated with engine group. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hEngine` +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// + `nullptr == pCount` +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE - "Engine activity extension is not supported in the environment." +ze_result_t ZE_APICALL +zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) +{ + if(ze_lib::context->inTeardown) { + return ZE_RESULT_ERROR_UNINITIALIZED; + } + + auto pfnGetActivityExt = ze_lib::context->zesDdiTable.Engine.pfnGetActivityExt; + if( nullptr == pfnGetActivityExt ) { + if(!ze_lib::context->isInitialized) + return ZE_RESULT_ERROR_UNINITIALIZED; + else + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return pfnGetActivityExt( hEngine, pCount, pStats ); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Ras Get State +/// +/// @details +/// - This function retrieves error counters for different RAS error +/// categories. +/// - The application may call this function from simultaneous threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hRas` +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// + `nullptr == pCount` +ze_result_t ZE_APICALL +zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) +{ + if(ze_lib::context->inTeardown) { + return ZE_RESULT_ERROR_UNINITIALIZED; + } + + auto pfnGetStateExp = ze_lib::context->zesDdiTable.RasExp.pfnGetStateExp; + if( nullptr == pfnGetStateExp ) { + if(!ze_lib::context->isInitialized) + return ZE_RESULT_ERROR_UNINITIALIZED; + else + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return pfnGetStateExp( hRas, pCount, pState ); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Ras Clear State +/// +/// @details +/// - This function clears error counters for a RAS error category. +/// - Clearing errors will affect other threads/applications - the counter +/// values will start from zero. +/// - Clearing errors requires write permissions. +/// - The application should not call this function from simultaneous +/// threads. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hRas` +/// - ::ZE_RESULT_ERROR_INVALID_ENUMERATION +/// + `::ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS < category` +/// - ::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS +/// + Don't have permissions to clear error counters. +ze_result_t ZE_APICALL +zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) +{ + if(ze_lib::context->inTeardown) { + return ZE_RESULT_ERROR_UNINITIALIZED; + } + + auto pfnClearStateExp = ze_lib::context->zesDdiTable.RasExp.pfnClearStateExp; + if( nullptr == pfnClearStateExp ) { + if(!ze_lib::context->isInitialized) + return ZE_RESULT_ERROR_UNINITIALIZED; + else + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return pfnClearStateExp( hRas, category ); +} + } // extern "C" diff --git a/source/lib/zes_libddi.cpp b/source/lib/zes_libddi.cpp index e970d79..c87ee2e 100644 --- a/source/lib/zes_libddi.cpp +++ b/source/lib/zes_libddi.cpp @@ -133,6 +133,13 @@ namespace ze_lib result = getTable( ZE_API_VERSION_CURRENT, &zesDdiTable.Ras ); } + if( ZE_RESULT_SUCCESS == result ) + { + auto getTable = reinterpret_cast( + GET_FUNCTION_PTR(loader, "zesGetRasExpProcAddrTable") ); + result = getTable( ZE_API_VERSION_CURRENT, &zesDdiTable.RasExp ); + } + if( ZE_RESULT_SUCCESS == result ) { auto getTable = reinterpret_cast( @@ -241,6 +248,11 @@ namespace ze_lib result = zesGetRasProcAddrTable( ZE_API_VERSION_CURRENT, &zesDdiTable.Ras ); } + if( ZE_RESULT_SUCCESS == result ) + { + result = zesGetRasExpProcAddrTable( ZE_API_VERSION_CURRENT, &zesDdiTable.RasExp ); + } + if( ZE_RESULT_SUCCESS == result ) { result = zesGetSchedulerProcAddrTable( ZE_API_VERSION_CURRENT, &zesDdiTable.Scheduler ); diff --git a/source/lib/zet_libapi.cpp b/source/lib/zet_libapi.cpp index bdaa3b8..2afef9f 100644 --- a/source/lib/zet_libapi.cpp +++ b/source/lib/zet_libapi.cpp @@ -1199,7 +1199,7 @@ zetMetricQueryDestroy( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Resets a metric query object back to inital state. +/// @brief Resets a metric query object back to initial state. /// /// @details /// - The application must ensure the device is not currently referencing diff --git a/source/loader/ze_ldrddi.cpp b/source/loader/ze_ldrddi.cpp index f996c95..8f5a7bb 100644 --- a/source/loader/ze_ldrddi.cpp +++ b/source/loader/ze_ldrddi.cpp @@ -5216,7 +5216,8 @@ namespace loader ///< available, the driver shall update the value with the correct value. ///< - Buffer(s) for query results must be sized by the application to ///< accommodate a minimum of `*pCount` elements. - ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in][optional] pointer to event query properties structure(s). + ze_event_query_kernel_timestamps_results_ext_properties_t* pResults ///< [in,out][optional][range(0, *pCount)] pointer to event query + ///< properties structure(s). ///< - This parameter may be null when `*pCount` is zero. ///< - if `*pCount` is less than the number of event packets available, ///< the driver may only update `*pCount` elements, starting at element zero. diff --git a/source/loader/zes_ldrddi.cpp b/source/loader/zes_ldrddi.cpp index c84e9d8..4c1cbaf 100644 --- a/source/loader/zes_ldrddi.cpp +++ b/source/loader/zes_ldrddi.cpp @@ -1177,39 +1177,6 @@ namespace loader return result; } - /////////////////////////////////////////////////////////////////////////////// - /// @brief Intercept function for zesEngineGetActivityExt - __zedlllocal ze_result_t ZE_APICALL - zesEngineGetActivityExt( - zes_engine_handle_t hEngine, ///< [in] Handle for the component. - uint32_t* pCount, ///< [in,out] Pointer to the number of engine stats descriptors. - ///< - if count is zero, the driver shall update the value with the total - ///< number of components of this type. - ///< - if count is greater than the total number of components available, - ///< the driver shall update the value with the correct number of - ///< components available. - zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. - ///< - if count is less than the total number of components available, the - ///< driver shall only retrieve that number of components. - ) - { - ze_result_t result = ZE_RESULT_SUCCESS; - - // extract driver's function pointer table - auto dditable = reinterpret_cast( hEngine )->dditable; - auto pfnGetActivityExt = dditable->zes.Engine.pfnGetActivityExt; - if( nullptr == pfnGetActivityExt ) - return ZE_RESULT_ERROR_UNINITIALIZED; - - // convert loader handle to driver handle - hEngine = reinterpret_cast( hEngine )->handle; - - // forward to device-driver - result = pfnGetActivityExt( hEngine, pCount, pStats ); - - return result; - } - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for zesDeviceEventRegister __zedlllocal ze_result_t ZE_APICALL @@ -1561,14 +1528,9 @@ namespace loader zesFabricPortGetMultiPortThroughput( zes_device_handle_t hDevice, ///< [in] Sysman handle of the device. uint32_t numPorts, ///< [in] Number of ports enumerated in function ::zesDeviceEnumFabricPorts - zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of handle of components of this type. - ///< if numPorts is less than the number of components of this type that - ///< are available, then the driver shall only retrieve that number of - ///< component handles. - ///< if numPorts is greater than the number of components of this type that - ///< are available, then the driver shall only retrieve up to correct - ///< number of available ports enumerated in ::zesDeviceEnumFabricPorts. - zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of Fabric port throughput counters + zes_fabric_port_handle_t* phPort, ///< [in][range(0, numPorts)] array of fabric port handles provided by user + ///< to gather throughput values. + zes_fabric_port_throughput_t** pThroughput ///< [out][range(0, numPorts)] array of fabric port throughput counters ///< from multiple ports of type ::zes_fabric_port_throughput_t. ) { @@ -3904,6 +3866,102 @@ namespace loader return result; } + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesEngineGetActivityExt + __zedlllocal ze_result_t ZE_APICALL + zesEngineGetActivityExt( + zes_engine_handle_t hEngine, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] Pointer to the number of VF engine stats descriptors. + ///< - if count is zero, the driver shall update the value with the total + ///< number of engine stats available. + ///< - if count is greater than the total number of engine stats + ///< available, the driver shall update the value with the correct number + ///< of engine stats available. + ///< - The count returned is the sum of number of VF instances currently + ///< available and the PF instance. + zes_engine_stats_t* pStats ///< [in,out][optional][range(0, *pCount)] array of engine group activity counters. + ///< - if count is less than the total number of engine stats available, + ///< then driver shall only retrieve that number of stats. + ///< - the implementation shall populate the vector with engine stat for + ///< PF at index 0 of the vector followed by user provided pCount-1 number + ///< of VF engine stats. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // extract driver's function pointer table + auto dditable = reinterpret_cast( hEngine )->dditable; + auto pfnGetActivityExt = dditable->zes.Engine.pfnGetActivityExt; + if( nullptr == pfnGetActivityExt ) + return ZE_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to driver handle + hEngine = reinterpret_cast( hEngine )->handle; + + // forward to device-driver + result = pfnGetActivityExt( hEngine, pCount, pStats ); + + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasGetStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasGetStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + uint32_t* pCount, ///< [in,out] pointer to the number of RAS state structures that can be retrieved. + ///< if count is zero, then the driver shall update the value with the + ///< total number of error categories for which state can be retrieved. + ///< if count is greater than the number of RAS states available, then the + ///< driver shall update the value with the correct number of RAS states available. + zes_ras_state_exp_t* pState ///< [in,out][optional][range(0, *pCount)] array of query results for RAS + ///< error states for different categories. + ///< if count is less than the number of RAS states available, then driver + ///< shall only retrieve that number of RAS states. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // extract driver's function pointer table + auto dditable = reinterpret_cast( hRas )->dditable; + auto pfnGetStateExp = dditable->zes.RasExp.pfnGetStateExp; + if( nullptr == pfnGetStateExp ) + return ZE_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to driver handle + hRas = reinterpret_cast( hRas )->handle; + + // forward to device-driver + result = pfnGetStateExp( hRas, pCount, pState ); + + return result; + } + + /////////////////////////////////////////////////////////////////////////////// + /// @brief Intercept function for zesRasClearStateExp + __zedlllocal ze_result_t ZE_APICALL + zesRasClearStateExp( + zes_ras_handle_t hRas, ///< [in] Handle for the component. + zes_ras_error_category_exp_t category ///< [in] category for which error counter is to be cleared. + ) + { + ze_result_t result = ZE_RESULT_SUCCESS; + + // extract driver's function pointer table + auto dditable = reinterpret_cast( hRas )->dditable; + auto pfnClearStateExp = dditable->zes.RasExp.pfnClearStateExp; + if( nullptr == pfnClearStateExp ) + return ZE_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to driver handle + hRas = reinterpret_cast( hRas )->handle; + + // forward to device-driver + result = pfnClearStateExp( hRas, category ); + + return result; + } + } // namespace loader #if defined(__cplusplus) @@ -5215,6 +5273,73 @@ zesGetRasProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's RasExp table +/// with current process' addresses +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::ZE_RESULT_ERROR_UNSUPPORTED_VERSION +ZE_DLLEXPORT ze_result_t ZE_APICALL +zesGetRasExpProcAddrTable( + ze_api_version_t version, ///< [in] API version requested + zes_ras_exp_dditable_t* pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) +{ + if( loader::context->drivers.size() < 1 ) + return ZE_RESULT_ERROR_UNINITIALIZED; + + if( nullptr == pDdiTable ) + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + + if( loader::context->version < version ) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; + + ze_result_t result = ZE_RESULT_SUCCESS; + + // Load the device-driver DDI tables + for( auto& drv : loader::context->drivers ) + { + if(drv.initStatus != ZE_RESULT_SUCCESS) + continue; + auto getTable = reinterpret_cast( + GET_FUNCTION_PTR( drv.handle, "zesGetRasExpProcAddrTable") ); + if(!getTable) + continue; + result = getTable( version, &drv.dditable.zes.RasExp); + } + + + if( ZE_RESULT_SUCCESS == result ) + { + if( ( loader::context->drivers.size() > 1 ) || loader::context->forceIntercept ) + { + // return pointers to loader's DDIs + pDdiTable->pfnGetStateExp = loader::zesRasGetStateExp; + pDdiTable->pfnClearStateExp = loader::zesRasClearStateExp; + } + else + { + // return pointers directly to driver's DDIs + *pDdiTable = loader::context->drivers.front().dditable.zes.RasExp; + } + } + + // If the validation layer is enabled, then intercept the loader's DDIs + if(( ZE_RESULT_SUCCESS == result ) && ( nullptr != loader::context->validationLayer )) + { + auto getTable = reinterpret_cast( + GET_FUNCTION_PTR(loader::context->validationLayer, "zesGetRasExpProcAddrTable") ); + if(!getTable) + return ZE_RESULT_ERROR_UNINITIALIZED; + result = getTable( version, pDdiTable ); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Scheduler table /// with current process' addresses