From c583eccc1903dca89d52c19c65e4ff10fb686a2e Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 2 Oct 2023 22:58:16 +0000 Subject: [PATCH 1/7] [UR][L0] Add support for passing device list to urProgramBuild piProgramBuild receives a list of devices, while urProgramBuild does not. This produces a series of issues when a UR program needs to be created for a specific device. So define a new API, called urProgramBuildExp to pass this list. Authored-by: jaime.a.arteaga.molina@intel.com --- include/ur.py | 27 ++++++ include/ur_api.h | 50 ++++++++++ include/ur_ddi.h | 37 ++++++++ scripts/core/program.yml | 33 +++++++ scripts/core/registry.yml | 3 + source/adapters/adapter.def.in | 1 + source/adapters/adapter.map.in | 1 + source/adapters/null/ur_nullddi.cpp | 57 +++++++++++ source/common/ur_params.hpp | 45 +++++++++ source/loader/layers/tracing/ur_trcddi.cpp | 69 ++++++++++++++ source/loader/layers/validation/ur_valddi.cpp | 76 +++++++++++++++ source/loader/ur_ldrddi.cpp | 94 +++++++++++++++++++ source/loader/ur_libapi.cpp | 47 ++++++++++ source/loader/ur_libddi.cpp | 5 + source/ur_api.cpp | 41 ++++++++ 15 files changed, 586 insertions(+) diff --git a/include/ur.py b/include/ur.py index 2b49088119..f7c3e59c82 100644 --- a/include/ur.py +++ b/include/ur.py @@ -196,6 +196,7 @@ class ur_function_v(IntEnum): ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo + PROGRAM_BUILD_EXP = 182 ## Enumerator for ::urProgramBuildExp class ur_function_t(c_int): def __str__(self): @@ -2569,6 +2570,21 @@ class ur_program_dditable_t(Structure): ("pfnCreateWithNativeHandle", c_void_p) ## _urProgramCreateWithNativeHandle_t ] +############################################################################### +## @brief Function-pointer for urProgramBuildExp +if __use_win_types: + _urProgramBuildExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) +else: + _urProgramBuildExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) + + +############################################################################### +## @brief Table of ProgramExp functions pointers +class ur_program_exp_dditable_t(Structure): + _fields_ = [ + ("pfnBuildExp", c_void_p) ## _urProgramBuildExp_t + ] + ############################################################################### ## @brief Function-pointer for urKernelCreate if __use_win_types: @@ -3754,6 +3770,7 @@ class ur_dditable_t(Structure): ("Context", ur_context_dditable_t), ("Event", ur_event_dditable_t), ("Program", ur_program_dditable_t), + ("ProgramExp", ur_program_exp_dditable_t), ("Kernel", ur_kernel_dditable_t), ("Sampler", ur_sampler_dditable_t), ("Mem", ur_mem_dditable_t), @@ -3856,6 +3873,16 @@ def __init__(self, version : ur_api_version_t): self.urProgramGetNativeHandle = _urProgramGetNativeHandle_t(self.__dditable.Program.pfnGetNativeHandle) self.urProgramCreateWithNativeHandle = _urProgramCreateWithNativeHandle_t(self.__dditable.Program.pfnCreateWithNativeHandle) + # call driver to get function pointers + ProgramExp = ur_program_exp_dditable_t() + r = ur_result_v(self.__dll.urGetProgramExpProcAddrTable(version, byref(ProgramExp))) + if r != ur_result_v.SUCCESS: + raise Exception(r) + self.__dditable.ProgramExp = ProgramExp + + # attach function interface to function address + self.urProgramBuildExp = _urProgramBuildExp_t(self.__dditable.ProgramExp.pfnBuildExp) + # call driver to get function pointers Kernel = ur_kernel_dditable_t() r = ur_result_v(self.__dll.urGetKernelProcAddrTable(version, byref(Kernel))) diff --git a/include/ur_api.h b/include/ur_api.h index 677c31005f..945d1d61ce 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -205,6 +205,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_PROGRAM_BUILD_EXP = 182, ///< Enumerator for ::urProgramBuildExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -4000,6 +4001,43 @@ urProgramBuild( const char *pOptions ///< [in][optional] pointer to build options null-terminated string. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `hContext`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +UR_APIEXPORT ur_result_t UR_APICALL +urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char *pOptions ///< [in][optional] pointer to build options null-terminated string. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// @@ -8531,6 +8569,18 @@ typedef struct ur_program_build_params_t { const char **ppOptions; } ur_program_build_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urProgramBuildExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_program_build_exp_params_t { + ur_context_handle_t *phContext; + ur_program_handle_t *phProgram; + uint32_t *pnumDevices; + ur_device_handle_t **pphDevices; + const char **ppOptions; +} ur_program_build_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramCompile /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index a0c2a5012d..d640b6b28a 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -408,6 +408,42 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetProgramProcAddrTable_t)( ur_api_version_t, ur_program_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urProgramBuildExp +typedef ur_result_t(UR_APICALL *ur_pfnProgramBuildExp_t)( + ur_context_handle_t, + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of ProgramExp functions pointers +typedef struct ur_program_exp_dditable_t { + ur_pfnProgramBuildExp_t pfnBuildExp; +} ur_program_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetProgramExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetProgramExpProcAddrTable_t)( + ur_api_version_t, + ur_program_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urKernelCreate typedef ur_result_t(UR_APICALL *ur_pfnKernelCreate_t)( @@ -2139,6 +2175,7 @@ typedef struct ur_dditable_t { ur_context_dditable_t Context; ur_event_dditable_t Event; ur_program_dditable_t Program; + ur_program_exp_dditable_t ProgramExp; ur_kernel_dditable_t Kernel; ur_sampler_dditable_t Sampler; ur_mem_dditable_t Mem; diff --git a/scripts/core/program.yml b/scripts/core/program.yml index acab24c3bd..4886cb83cc 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -182,6 +182,39 @@ returns: - "If an error occurred when building `hProgram`." --- #-------------------------------------------------------------------------- type: function +desc: "Produces an executable program from one program, negates need for the linking step." +class: $xProgram +name: BuildExp +ordinal: "2" +decl: static +analogue: + - "**clBuildProgram**" +details: + - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point, the program passed will contain a binary of the $X_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in `hContext`." +params: + - type: $x_context_handle_t + name: hContext + desc: "[in] handle of the context instance." + - type: $x_program_handle_t + name: hProgram + desc: "[in] Handle of the program to build." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: const char* + name: pOptions + desc: "[in][optional] pointer to build options null-terminated string." +returns: + - $X_RESULT_ERROR_INVALID_PROGRAM: + - "If `hProgram` isn't a valid program object." + - $X_RESULT_ERROR_PROGRAM_BUILD_FAILURE: + - "If an error occurred when building `hProgram`." +--- #-------------------------------------------------------------------------- +type: function desc: "Produces an executable program from one or more programs." class: $xProgram name: Compile diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 30596ec14e..7fc6dc3fa0 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -529,6 +529,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: PROGRAM_BUILD_EXP + desc: Enumerator for $xProgramBuildExp + value: '182' --- type: enum desc: Defines structure types diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index de0b4fa8ee..057c03a93d 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -11,6 +11,7 @@ EXPORTS urGetPhysicalMemProcAddrTable urGetPlatformProcAddrTable urGetProgramProcAddrTable + urGetProgramExpProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable urGetUSMProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index 4379e1f7de..114c6168bd 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -11,6 +11,7 @@ urGetPhysicalMemProcAddrTable; urGetPlatformProcAddrTable; urGetProgramProcAddrTable; + urGetProgramExpProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; urGetUSMProcAddrTable; diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index f9b8fb4d11..47fed71faf 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -1792,6 +1792,33 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnBuildExp = d_context.urDdiTable.ProgramExp.pfnBuildExp; + if (nullptr != pfnBuildExp) { + result = + pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -5614,6 +5641,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnBuildExp = driver::urProgramBuildExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Queue table /// with current process' addresses diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 4c1c90e993..83455b20eb 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -1141,6 +1141,10 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + + case UR_FUNCTION_PROGRAM_BUILD_EXP: + os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; + break; default: os << "unknown enumerator"; break; @@ -14007,6 +14011,44 @@ operator<<(std::ostream &os, const struct ur_program_build_params_t *params) { return os; } +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_program_build_exp_params_t *params) { + + os << ".hContext = "; + + ur_params::serializePtr(os, *(params->phContext)); + + os << ", "; + os << ".hProgram = "; + + ur_params::serializePtr(os, *(params->phProgram)); + + os << ", "; + os << ".numDevices = "; + + os << *(params->pnumDevices); + + os << ", "; + os << ".phDevices = {"; + for (size_t i = 0; + *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } + + ur_params::serializePtr(os, (*(params->pphDevices))[i]); + } + os << "}"; + + os << ", "; + os << ".pOptions = "; + + ur_params::serializePtr(os, *(params->ppOptions)); + + return os; +} + inline std::ostream & operator<<(std::ostream &os, const struct ur_program_compile_params_t *params) { @@ -15731,6 +15773,9 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_PROGRAM_BUILD: { os << (const struct ur_program_build_params_t *)params; } break; + case UR_FUNCTION_PROGRAM_BUILD_EXP: { + os << (const struct ur_program_build_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_COMPILE: { os << (const struct ur_program_compile_params_t *)params; } break; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f30fac3807..eeafc45c97 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -2030,6 +2030,37 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; + + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_program_build_exp_params_t params = {&hContext, &hProgram, &numDevices, + &phDevices, &pOptions}; + uint64_t instance = context.notify_begin(UR_FUNCTION_PROGRAM_BUILD_EXP, + "urProgramBuildExp", ¶ms); + + ur_result_t result = + pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); + + context.notify_end(UR_FUNCTION_PROGRAM_BUILD_EXP, "urProgramBuildExp", + ¶ms, &result, instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -6560,6 +6591,39 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.ProgramExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnBuildExp = pDdiTable->pfnBuildExp; + pDdiTable->pfnBuildExp = ur_tracing_layer::urProgramBuildExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Queue table /// with current process' addresses /// @@ -6976,6 +7040,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Program); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetProgramExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->ProgramExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetQueueProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Queue); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 46b0eef491..8f5375533e 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -2486,6 +2486,43 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; + + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hContext) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hProgram) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -7934,6 +7971,40 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.ProgramExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnBuildExp = pDdiTable->pfnBuildExp; + pDdiTable->pfnBuildExp = ur_validation_layer::urProgramBuildExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Queue table /// with current process' addresses @@ -8371,6 +8442,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Program); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetProgramExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->ProgramExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetQueueProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Queue); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index e192088bbc..00bbb22596 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -2346,6 +2346,46 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hContext)->dditable; + auto pfnBuildExp = dditable->ur.ProgramExp.pfnBuildExp; + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hContext = reinterpret_cast(hContext)->handle; + + // convert loader handle to platform handle + hProgram = reinterpret_cast(hProgram)->handle; + + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } + + // forward to device-platform + result = pfnBuildExp(hContext, hProgram, numDevices, phDevicesLocal.data(), + pOptions); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -7700,6 +7740,60 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetProgramExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.ProgramExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnBuildExp = ur_loader::urProgramBuildExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.ProgramExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Queue table /// with current process' addresses diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 7a64efd088..5af81fe31c 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2929,6 +2929,53 @@ ur_result_t UR_APICALL urProgramBuild( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `hContext`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + auto pfnBuildExp = ur_lib::context->urDdiTable.ProgramExp.pfnBuildExp; + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 1328a2b071..2d0095f41d 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -74,6 +74,11 @@ __urdlllocal ur_result_t context_t::urInit() { &urDdiTable.Program); } + if (UR_RESULT_SUCCESS == result) { + result = urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.ProgramExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Queue); diff --git a/source/ur_api.cpp b/source/ur_api.cpp index fac4d47c2d..a9f724718f 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2473,6 +2473,47 @@ ur_result_t UR_APICALL urProgramBuild( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `hContext`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// From 92e608f03fbfc9324928e66fd26110ad0f12b766 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 3 Oct 2023 12:34:19 +0100 Subject: [PATCH 2/7] [UR][L0] Add multi-device-compile experimental feature Expand upon the introduction of `urProgramBuildExp` and include `urProgramCompileExp` and `urProgramLinkExp` which include a device-list in place of a context. These more closely align with the PI/OpenCL analogues but only to introduce device-lists, not all extant arguments from those entry-points. This patch also moves the `urProgramBuildExp` definition into an experimental feature file and introduces a brief document containing motivation. --- include/ur.py | 33 ++- include/ur_api.h | 192 ++++++++++++++---- include/ur_ddi.h | 22 +- scripts/core/EXP-MULTI-DEVICE-COMPILE.rst | 64 ++++++ scripts/core/exp-multi-device-compile.yml | 125 ++++++++++++ scripts/core/program.yml | 33 --- scripts/core/registry.yml | 8 +- source/adapters/null/ur_nullddi.cpp | 114 ++++++++--- source/common/ur_params.hpp | 107 +++++++++- source/loader/layers/tracing/ur_trcddi.cpp | 133 +++++++++--- source/loader/layers/validation/ur_valddi.cpp | 156 ++++++++++---- source/loader/ur_ldrddi.cpp | 174 ++++++++++++---- source/loader/ur_libapi.cpp | 192 +++++++++++++----- source/ur_api.cpp | 167 +++++++++++---- 14 files changed, 1214 insertions(+), 306 deletions(-) create mode 100644 scripts/core/EXP-MULTI-DEVICE-COMPILE.rst create mode 100644 scripts/core/exp-multi-device-compile.yml diff --git a/include/ur.py b/include/ur.py index f7c3e59c82..45ce583f42 100644 --- a/include/ur.py +++ b/include/ur.py @@ -196,7 +196,9 @@ class ur_function_v(IntEnum): ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo - PROGRAM_BUILD_EXP = 182 ## Enumerator for ::urProgramBuildExp + PROGRAM_BUILD_EXP = 197 ## Enumerator for ::urProgramBuildExp + PROGRAM_COMPILE_EXP = 198 ## Enumerator for ::urProgramCompileExp + PROGRAM_LINK_EXP = 199 ## Enumerator for ::urProgramLinkExp class ur_function_t(c_int): def __str__(self): @@ -2254,6 +2256,11 @@ class ur_exp_command_buffer_sync_point_t(c_ulong): class ur_exp_command_buffer_handle_t(c_void_p): pass +############################################################################### +## @brief The extension string which defines support for test +## which is returned when querying device extensions. +UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP = "ur_exp_multi_device_compile" + ############################################################################### ## @brief Supported peer info class ur_exp_peer_info_v(IntEnum): @@ -2573,16 +2580,32 @@ class ur_program_dditable_t(Structure): ############################################################################### ## @brief Function-pointer for urProgramBuildExp if __use_win_types: - _urProgramBuildExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) + _urProgramBuildExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) +else: + _urProgramBuildExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) + +############################################################################### +## @brief Function-pointer for urProgramCompileExp +if __use_win_types: + _urProgramCompileExp_t = WINFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) +else: + _urProgramCompileExp_t = CFUNCTYPE( ur_result_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) + +############################################################################### +## @brief Function-pointer for urProgramLinkExp +if __use_win_types: + _urProgramLinkExp_t = WINFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) else: - _urProgramBuildExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, ur_program_handle_t, c_ulong, POINTER(ur_device_handle_t), c_char_p ) + _urProgramLinkExp_t = CFUNCTYPE( ur_result_t, ur_context_handle_t, c_ulong, POINTER(ur_device_handle_t), c_ulong, POINTER(ur_program_handle_t), c_char_p, POINTER(ur_program_handle_t) ) ############################################################################### ## @brief Table of ProgramExp functions pointers class ur_program_exp_dditable_t(Structure): _fields_ = [ - ("pfnBuildExp", c_void_p) ## _urProgramBuildExp_t + ("pfnBuildExp", c_void_p), ## _urProgramBuildExp_t + ("pfnCompileExp", c_void_p), ## _urProgramCompileExp_t + ("pfnLinkExp", c_void_p) ## _urProgramLinkExp_t ] ############################################################################### @@ -3882,6 +3905,8 @@ def __init__(self, version : ur_api_version_t): # attach function interface to function address self.urProgramBuildExp = _urProgramBuildExp_t(self.__dditable.ProgramExp.pfnBuildExp) + self.urProgramCompileExp = _urProgramCompileExp_t(self.__dditable.ProgramExp.pfnCompileExp) + self.urProgramLinkExp = _urProgramLinkExp_t(self.__dditable.ProgramExp.pfnLinkExp) # call driver to get function pointers Kernel = ur_kernel_dditable_t() diff --git a/include/ur_api.h b/include/ur_api.h index 945d1d61ce..11a0c697d2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -205,7 +205,9 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo - UR_FUNCTION_PROGRAM_BUILD_EXP = 182, ///< Enumerator for ::urProgramBuildExp + UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp + UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp + UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -4001,43 +4003,6 @@ urProgramBuild( const char *pOptions ///< [in][optional] pointer to build options null-terminated string. ); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Produces an executable program from one program, negates need for the -/// linking step. -/// -/// @details -/// - The application may call this function from simultaneous threads. -/// - Following a successful call to this entry point, the program passed -/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type -/// for each device in `hContext`. -/// -/// @remarks -/// _Analogues_ -/// - **clBuildProgram** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hContext` -/// + `NULL == hProgram` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == phDevices` -/// - ::UR_RESULT_ERROR_INVALID_PROGRAM -/// + If `hProgram` isn't a valid program object. -/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE -/// + If an error occurred when building `hProgram`. -UR_APIEXPORT ur_result_t UR_APICALL -urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char *pOptions ///< [in][optional] pointer to build options null-terminated string. -); - /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// @@ -8068,6 +8033,131 @@ urCommandBufferEnqueueExp( ///< command-buffer execution instance. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental APIs for multi-device compile +#if !defined(__GNUC__) +#pragma region multi device compile(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP +/// @brief The extension string which defines support for test +/// which is returned when querying device extensions. +#define UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP "ur_exp_multi_device_compile" +#endif // UR_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +UR_APIEXPORT ur_result_t UR_APICALL +urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char *pOptions ///< [in][optional] pointer to build options null-terminated string. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point `hProgram` will +/// contain a binary of the ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clCompileProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred while compiling `hProgram`. +UR_APIEXPORT ur_result_t UR_APICALL +urProgramCompileExp( + ur_program_handle_t hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char *pOptions ///< [in][optional] pointer to build options null-terminated string. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the program returned +/// in `phProgram` will contain a binary of the +/// ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in +/// `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clLinkProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// + `NULL == phPrograms` +/// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t *phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char *pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t *phProgram ///< [out] pointer to handle of program object created. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -8574,7 +8664,6 @@ typedef struct ur_program_build_params_t { /// @details Each entry is a pointer to the parameter passed to the function; /// allowing the callback the ability to modify the parameter's value typedef struct ur_program_build_exp_params_t { - ur_context_handle_t *phContext; ur_program_handle_t *phProgram; uint32_t *pnumDevices; ur_device_handle_t **pphDevices; @@ -8591,6 +8680,17 @@ typedef struct ur_program_compile_params_t { const char **ppOptions; } ur_program_compile_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urProgramCompileExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_program_compile_exp_params_t { + ur_program_handle_t *phProgram; + uint32_t *pnumDevices; + ur_device_handle_t **pphDevices; + const char **ppOptions; +} ur_program_compile_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramLink /// @details Each entry is a pointer to the parameter passed to the function; @@ -8603,6 +8703,20 @@ typedef struct ur_program_link_params_t { ur_program_handle_t **pphProgram; } ur_program_link_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urProgramLinkExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_program_link_exp_params_t { + ur_context_handle_t *phContext; + uint32_t *pnumDevices; + ur_device_handle_t **pphDevices; + uint32_t *pcount; + const ur_program_handle_t **pphPrograms; + const char **ppOptions; + ur_program_handle_t **pphProgram; +} ur_program_link_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramRetain /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index d640b6b28a..24d5427191 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -411,16 +411,36 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetProgramProcAddrTable_t)( /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramBuildExp typedef ur_result_t(UR_APICALL *ur_pfnProgramBuildExp_t)( - ur_context_handle_t, ur_program_handle_t, uint32_t, ur_device_handle_t *, const char *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urProgramCompileExp +typedef ur_result_t(UR_APICALL *ur_pfnProgramCompileExp_t)( + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urProgramLinkExp +typedef ur_result_t(UR_APICALL *ur_pfnProgramLinkExp_t)( + ur_context_handle_t, + uint32_t, + ur_device_handle_t *, + uint32_t, + const ur_program_handle_t *, + const char *, + ur_program_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of ProgramExp functions pointers typedef struct ur_program_exp_dditable_t { ur_pfnProgramBuildExp_t pfnBuildExp; + ur_pfnProgramCompileExp_t pfnCompileExp; + ur_pfnProgramLinkExp_t pfnLinkExp; } ur_program_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst b/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst new file mode 100644 index 0000000000..d4c2a6cb7a --- /dev/null +++ b/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst @@ -0,0 +1,64 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-multi-device-compile: + +================================================================================ +Multi Device Compile +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + + +Motivation +-------------------------------------------------------------------------------- + +Instead of relying on the list of devices used to create a context, provide +interfaces which instead take a list of devices. This more closely aligns with +PI and OpenCL. Introduced to workaround a regression. May be superseded in +future. + +API +-------------------------------------------------------------------------------- + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}ProgramBuildExp +* ${x}ProgramCompileExp +* ${x}ProgramLinkExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return the valid string +defined in ``${X}_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP`` +as one of the options from ${x}DeviceGetInfo when querying for +${X}_DEVICE_INFO_EXTENSIONS. Conversely, before using any of the +functionality defined in this experimental feature the user *must* use the +device query to determine if the adapter supports this feature. + +Contributors +-------------------------------------------------------------------------------- + +* Kenneth Benzie (Benie) `k.benzie@codeplay.com `_ diff --git a/scripts/core/exp-multi-device-compile.yml b/scripts/core/exp-multi-device-compile.yml new file mode 100644 index 0000000000..b51f938f7e --- /dev/null +++ b/scripts/core/exp-multi-device-compile.yml @@ -0,0 +1,125 @@ +# +# Copyright (C) 2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for multi-device compile" +ordinal: "99" + +--- #-------------------------------------------------------------------------- +type: macro +desc: | + The extension string which defines support for test + which is returned when querying device extensions. +name: $X_MULTI_DEVICE_COMPILE_EXTENSION_STRING_EXP +value: "\"$x_exp_multi_device_compile\"" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Produces an executable program from one program, negates need for the linking step." +class: $xProgram +name: BuildExp +ordinal: "2" +decl: static +analogue: + - "**clBuildProgram**" +details: + - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point, the program passed will contain a binary of the $X_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in `phDevices`." +params: + - type: $x_program_handle_t + name: hProgram + desc: "[in] Handle of the program to build." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: const char* + name: pOptions + desc: "[in][optional] pointer to build options null-terminated string." +returns: + - $X_RESULT_ERROR_INVALID_PROGRAM: + - "If `hProgram` isn't a valid program object." + - $X_RESULT_ERROR_PROGRAM_BUILD_FAILURE: + - "If an error occurred when building `hProgram`." + +--- #-------------------------------------------------------------------------- +type: function +desc: "Produces an executable program from one or more programs." +class: $xProgram +name: CompileExp +decl: static +ordinal: "3" +analogue: + - "**clCompileProgram**" +details: + - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point `hProgram` will contain a binary of the $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT type for each device in `phDevices`." +params: + - type: $x_program_handle_t + name: hProgram + desc: "[in][out] handle of the program to compile." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: const char* + name: pOptions + desc: "[in][optional] pointer to build options null-terminated string." +returns: + - $X_RESULT_ERROR_INVALID_PROGRAM: + - "If `hProgram` isn't a valid program object." + - $X_RESULT_ERROR_PROGRAM_BUILD_FAILURE: + - "If an error occurred while compiling `hProgram`." + +--- #-------------------------------------------------------------------------- +type: function +desc: "Produces an executable program from one or more programs." +class: $xProgram +name: LinkExp +decl: static +ordinal: "4" +analogue: + - "**clLinkProgram**" +details: + - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point the program returned in `phProgram` will contain a binary of the $X_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in `phDevices`." +params: + - type: $x_context_handle_t + name: hContext + desc: "[in] handle of the context instance." + - type: uint32_t + name: numDevices + desc: "[in] number of devices" + - type: $x_device_handle_t* + name: phDevices + desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: uint32_t + name: count + desc: "[in] number of program handles in `phPrograms`." + - type: const $x_program_handle_t* + name: phPrograms + desc: "[in][range(0, count)] pointer to array of program handles." + - type: const char* + name: pOptions + desc: "[in][optional] pointer to linker options null-terminated string." + - type: $x_program_handle_t* + name: phProgram + desc: "[out] pointer to handle of program object created." +returns: + - $X_RESULT_ERROR_INVALID_PROGRAM: + - "If one of the programs in `phPrograms` isn't a valid program object." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`count == 0`" + - $X_RESULT_ERROR_PROGRAM_LINK_FAILURE: + - "If an error occurred while linking `phPrograms`." diff --git a/scripts/core/program.yml b/scripts/core/program.yml index 4886cb83cc..acab24c3bd 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -182,39 +182,6 @@ returns: - "If an error occurred when building `hProgram`." --- #-------------------------------------------------------------------------- type: function -desc: "Produces an executable program from one program, negates need for the linking step." -class: $xProgram -name: BuildExp -ordinal: "2" -decl: static -analogue: - - "**clBuildProgram**" -details: - - "The application may call this function from simultaneous threads." - - "Following a successful call to this entry point, the program passed will contain a binary of the $X_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in `hContext`." -params: - - type: $x_context_handle_t - name: hContext - desc: "[in] handle of the context instance." - - type: $x_program_handle_t - name: hProgram - desc: "[in] Handle of the program to build." - - type: uint32_t - name: numDevices - desc: "[in] number of devices" - - type: $x_device_handle_t* - name: phDevices - desc: "[in][range(0, numDevices)] pointer to array of device handles" - - type: const char* - name: pOptions - desc: "[in][optional] pointer to build options null-terminated string." -returns: - - $X_RESULT_ERROR_INVALID_PROGRAM: - - "If `hProgram` isn't a valid program object." - - $X_RESULT_ERROR_PROGRAM_BUILD_FAILURE: - - "If an error occurred when building `hProgram`." ---- #-------------------------------------------------------------------------- -type: function desc: "Produces an executable program from one or more programs." class: $xProgram name: Compile diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 7fc6dc3fa0..2d6ce08500 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -531,7 +531,13 @@ etors: value: '181' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp - value: '182' + value: '197' +- name: PROGRAM_COMPILE_EXP + desc: Enumerator for $xProgramCompileExp + value: '198' +- name: PROGRAM_LINK_EXP + desc: Enumerator for $xProgramLinkExp + value: '199' --- type: enum desc: Defines structure types diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 47fed71faf..8e95e26ccf 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -1792,33 +1792,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urProgramBuildExp -__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. - ) try { - ur_result_t result = UR_RESULT_SUCCESS; - - // if the driver has created a custom function, then call it instead of using the generic path - auto pfnBuildExp = d_context.urDdiTable.ProgramExp.pfnBuildExp; - if (nullptr != pfnBuildExp) { - result = - pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); - } else { - // generic implementation - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -4926,6 +4899,89 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnBuildExp = d_context.urDdiTable.ProgramExp.pfnBuildExp; + if (nullptr != pfnBuildExp) { + result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCompileExp +__urdlllocal ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnCompileExp = d_context.urDdiTable.ProgramExp.pfnCompileExp; + if (nullptr != pfnCompileExp) { + result = pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnLinkExp = d_context.urDdiTable.ProgramExp.pfnLinkExp; + if (nullptr != pfnLinkExp) { + result = pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + pOptions, phProgram); + } else { + // generic implementation + *phProgram = reinterpret_cast(d_context.get()); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -5666,6 +5722,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = driver::urProgramBuildExp; + pDdiTable->pfnCompileExp = driver::urProgramCompileExp; + + pDdiTable->pfnLinkExp = driver::urProgramLinkExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 83455b20eb..22b3b3110e 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -1145,6 +1145,14 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; + + case UR_FUNCTION_PROGRAM_COMPILE_EXP: + os << "UR_FUNCTION_PROGRAM_COMPILE_EXP"; + break; + + case UR_FUNCTION_PROGRAM_LINK_EXP: + os << "UR_FUNCTION_PROGRAM_LINK_EXP"; + break; default: os << "unknown enumerator"; break; @@ -14015,11 +14023,6 @@ inline std::ostream & operator<<(std::ostream &os, const struct ur_program_build_exp_params_t *params) { - os << ".hContext = "; - - ur_params::serializePtr(os, *(params->phContext)); - - os << ", "; os << ".hProgram = "; ur_params::serializePtr(os, *(params->phProgram)); @@ -14069,6 +14072,39 @@ operator<<(std::ostream &os, const struct ur_program_compile_params_t *params) { return os; } +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_program_compile_exp_params_t *params) { + + os << ".hProgram = "; + + ur_params::serializePtr(os, *(params->phProgram)); + + os << ", "; + os << ".numDevices = "; + + os << *(params->pnumDevices); + + os << ", "; + os << ".phDevices = {"; + for (size_t i = 0; + *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } + + ur_params::serializePtr(os, (*(params->pphDevices))[i]); + } + os << "}"; + + os << ", "; + os << ".pOptions = "; + + ur_params::serializePtr(os, *(params->ppOptions)); + + return os; +} + inline std::ostream &operator<<(std::ostream &os, const struct ur_program_link_params_t *params) { @@ -14106,6 +14142,61 @@ inline std::ostream &operator<<(std::ostream &os, return os; } +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_program_link_exp_params_t *params) { + + os << ".hContext = "; + + ur_params::serializePtr(os, *(params->phContext)); + + os << ", "; + os << ".numDevices = "; + + os << *(params->pnumDevices); + + os << ", "; + os << ".phDevices = {"; + for (size_t i = 0; + *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) { + if (i != 0) { + os << ", "; + } + + ur_params::serializePtr(os, (*(params->pphDevices))[i]); + } + os << "}"; + + os << ", "; + os << ".count = "; + + os << *(params->pcount); + + os << ", "; + os << ".phPrograms = {"; + for (size_t i = 0; *(params->pphPrograms) != NULL && i < *params->pcount; + ++i) { + if (i != 0) { + os << ", "; + } + + ur_params::serializePtr(os, (*(params->pphPrograms))[i]); + } + os << "}"; + + os << ", "; + os << ".pOptions = "; + + ur_params::serializePtr(os, *(params->ppOptions)); + + os << ", "; + os << ".phProgram = "; + + ur_params::serializePtr(os, *(params->pphProgram)); + + return os; +} + inline std::ostream & operator<<(std::ostream &os, const struct ur_program_retain_params_t *params) { @@ -15779,9 +15870,15 @@ inline int serializeFunctionParams(std::ostream &os, uint32_t function, case UR_FUNCTION_PROGRAM_COMPILE: { os << (const struct ur_program_compile_params_t *)params; } break; + case UR_FUNCTION_PROGRAM_COMPILE_EXP: { + os << (const struct ur_program_compile_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_LINK: { os << (const struct ur_program_link_params_t *)params; } break; + case UR_FUNCTION_PROGRAM_LINK_EXP: { + os << (const struct ur_program_link_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_RETAIN: { os << (const struct ur_program_retain_params_t *)params; } break; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index eeafc45c97..beb32b715d 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -2030,37 +2030,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urProgramBuildExp -__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. -) { - auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; - - if (nullptr == pfnBuildExp) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - } - - ur_program_build_exp_params_t params = {&hContext, &hProgram, &numDevices, - &phDevices, &pOptions}; - uint64_t instance = context.notify_begin(UR_FUNCTION_PROGRAM_BUILD_EXP, - "urProgramBuildExp", ¶ms); - - ur_result_t result = - pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); - - context.notify_end(UR_FUNCTION_PROGRAM_BUILD_EXP, "urProgramBuildExp", - ¶ms, &result, instance); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -5681,6 +5650,102 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; + + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_program_build_exp_params_t params = {&hProgram, &numDevices, &phDevices, + &pOptions}; + uint64_t instance = context.notify_begin(UR_FUNCTION_PROGRAM_BUILD_EXP, + "urProgramBuildExp", ¶ms); + + ur_result_t result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + + context.notify_end(UR_FUNCTION_PROGRAM_BUILD_EXP, "urProgramBuildExp", + ¶ms, &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCompileExp +__urdlllocal ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnCompileExp = context.urDdiTable.ProgramExp.pfnCompileExp; + + if (nullptr == pfnCompileExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_program_compile_exp_params_t params = {&hProgram, &numDevices, + &phDevices, &pOptions}; + uint64_t instance = context.notify_begin(UR_FUNCTION_PROGRAM_COMPILE_EXP, + "urProgramCompileExp", ¶ms); + + ur_result_t result = + pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + + context.notify_end(UR_FUNCTION_PROGRAM_COMPILE_EXP, "urProgramCompileExp", + ¶ms, &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + auto pfnLinkExp = context.urDdiTable.ProgramExp.pfnLinkExp; + + if (nullptr == pfnLinkExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_program_link_exp_params_t params = {&hContext, &numDevices, &phDevices, + &count, &phPrograms, &pOptions, + &phProgram}; + uint64_t instance = context.notify_begin(UR_FUNCTION_PROGRAM_LINK_EXP, + "urProgramLinkExp", ¶ms); + + ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, count, + phPrograms, pOptions, phProgram); + + context.notify_end(UR_FUNCTION_PROGRAM_LINK_EXP, "urProgramLinkExp", + ¶ms, &result, instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -6621,6 +6686,12 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramExpProcAddrTable( dditable.pfnBuildExp = pDdiTable->pfnBuildExp; pDdiTable->pfnBuildExp = ur_tracing_layer::urProgramBuildExp; + dditable.pfnCompileExp = pDdiTable->pfnCompileExp; + pDdiTable->pfnCompileExp = ur_tracing_layer::urProgramCompileExp; + + dditable.pfnLinkExp = pDdiTable->pfnLinkExp; + pDdiTable->pfnLinkExp = ur_tracing_layer::urProgramLinkExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 8f5375533e..ab1708c0b6 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -2486,43 +2486,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urProgramBuildExp -__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. -) { - auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; - - if (nullptr == pfnBuildExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (context.enableParameterValidation) { - if (NULL == hContext) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } - - if (NULL == hProgram) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } - - if (NULL == phDevices) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - } - - ur_result_t result = - pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -7017,6 +6980,119 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnBuildExp = context.urDdiTable.ProgramExp.pfnBuildExp; + + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hProgram) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCompileExp +__urdlllocal ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnCompileExp = context.urDdiTable.ProgramExp.pfnCompileExp; + + if (nullptr == pfnCompileExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hProgram) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + auto pfnLinkExp = context.urDdiTable.ProgramExp.pfnLinkExp; + + if (nullptr == pfnLinkExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hContext) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == phPrograms) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == phProgram) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (count == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, count, + phPrograms, pOptions, phProgram); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -8002,6 +8078,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( dditable.pfnBuildExp = pDdiTable->pfnBuildExp; pDdiTable->pfnBuildExp = ur_validation_layer::urProgramBuildExp; + dditable.pfnCompileExp = pDdiTable->pfnCompileExp; + pDdiTable->pfnCompileExp = ur_validation_layer::urProgramCompileExp; + + dditable.pfnLinkExp = pDdiTable->pfnLinkExp; + pDdiTable->pfnLinkExp = ur_validation_layer::urProgramLinkExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 00bbb22596..649fc0ad88 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -2346,46 +2346,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urProgramBuildExp -__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. -) { - ur_result_t result = UR_RESULT_SUCCESS; - - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBuildExp = dditable->ur.ProgramExp.pfnBuildExp; - if (nullptr == pfnBuildExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) { - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - } - - // forward to device-platform - result = pfnBuildExp(hContext, hProgram, numDevices, phDevicesLocal.data(), - pOptions); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCompile __urdlllocal ur_result_t UR_APICALL urProgramCompile( @@ -6833,6 +6793,138 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +__urdlllocal ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hProgram)->dditable; + auto pfnBuildExp = dditable->ur.ProgramExp.pfnBuildExp; + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hProgram = reinterpret_cast(hProgram)->handle; + + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } + + // forward to device-platform + result = pfnBuildExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCompileExp +__urdlllocal ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hProgram)->dditable; + auto pfnCompileExp = dditable->ur.ProgramExp.pfnCompileExp; + if (nullptr == pfnCompileExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hProgram = reinterpret_cast(hProgram)->handle; + + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } + + // forward to device-platform + result = + pfnCompileExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hContext)->dditable; + auto pfnLinkExp = dditable->ur.ProgramExp.pfnLinkExp; + if (nullptr == pfnLinkExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hContext = reinterpret_cast(hContext)->handle; + + // convert loader handles to platform handles + auto phDevicesLocal = std::vector(numDevices); + for (size_t i = 0; i < numDevices; ++i) { + phDevicesLocal[i] = + reinterpret_cast(phDevices[i])->handle; + } + + // convert loader handles to platform handles + auto phProgramsLocal = std::vector(count); + for (size_t i = 0; i < count; ++i) { + phProgramsLocal[i] = + reinterpret_cast(phPrograms[i])->handle; + } + + // forward to device-platform + result = pfnLinkExp(hContext, numDevices, phDevicesLocal.data(), count, + phProgramsLocal.data(), pOptions, phProgram); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *phProgram = reinterpret_cast( + ur_program_factory.getInstance(*phProgram, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMImportExp __urdlllocal ur_result_t UR_APICALL urUSMImportExp( @@ -7784,6 +7876,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_loader::context->forceIntercept) { // return pointers to loader's DDIs pDdiTable->pfnBuildExp = ur_loader::urProgramBuildExp; + pDdiTable->pfnCompileExp = ur_loader::urProgramCompileExp; + pDdiTable->pfnLinkExp = ur_loader::urProgramLinkExp; } else { // return pointers directly to platform's DDIs *pDdiTable = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 5af81fe31c..ccf1e1e2cf 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -2929,53 +2929,6 @@ ur_result_t UR_APICALL urProgramBuild( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Produces an executable program from one program, negates need for the -/// linking step. -/// -/// @details -/// - The application may call this function from simultaneous threads. -/// - Following a successful call to this entry point, the program passed -/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type -/// for each device in `hContext`. -/// -/// @remarks -/// _Analogues_ -/// - **clBuildProgram** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hContext` -/// + `NULL == hProgram` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == phDevices` -/// - ::UR_RESULT_ERROR_INVALID_PROGRAM -/// + If `hProgram` isn't a valid program object. -/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE -/// + If an error occurred when building `hProgram`. -ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. - ) try { - auto pfnBuildExp = ur_lib::context->urDdiTable.ProgramExp.pfnBuildExp; - if (nullptr == pfnBuildExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - return pfnBuildExp(hContext, hProgram, numDevices, phDevices, pOptions); -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// @@ -7564,6 +7517,151 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + auto pfnBuildExp = ur_lib::context->urDdiTable.ProgramExp.pfnBuildExp; + if (nullptr == pfnBuildExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnBuildExp(hProgram, numDevices, phDevices, pOptions); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point `hProgram` will +/// contain a binary of the ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clCompileProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred while compiling `hProgram`. +ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. + ) try { + auto pfnCompileExp = ur_lib::context->urDdiTable.ProgramExp.pfnCompileExp; + if (nullptr == pfnCompileExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnCompileExp(hProgram, numDevices, phDevices, pOptions); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the program returned +/// in `phProgram` will contain a binary of the +/// ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in +/// `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clLinkProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// + `NULL == phPrograms` +/// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. + ) try { + auto pfnLinkExp = ur_lib::context->urDdiTable.ProgramExp.pfnLinkExp; + if (nullptr == pfnLinkExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + pOptions, phProgram); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Import memory into USM /// diff --git a/source/ur_api.cpp b/source/ur_api.cpp index a9f724718f..bc5b473cf9 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -2473,47 +2473,6 @@ ur_result_t UR_APICALL urProgramBuild( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Produces an executable program from one program, negates need for the -/// linking step. -/// -/// @details -/// - The application may call this function from simultaneous threads. -/// - Following a successful call to this entry point, the program passed -/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type -/// for each device in `hContext`. -/// -/// @remarks -/// _Analogues_ -/// - **clBuildProgram** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hContext` -/// + `NULL == hProgram` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == phDevices` -/// - ::UR_RESULT_ERROR_INVALID_PROGRAM -/// + If `hProgram` isn't a valid program object. -/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE -/// + If an error occurred when building `hProgram`. -ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ///< [in] number of devices - ur_device_handle_t * - phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles - const char * - pOptions ///< [in][optional] pointer to build options null-terminated string. -) { - ur_result_t result = UR_RESULT_SUCCESS; - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one or more programs. /// @@ -6372,6 +6331,132 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one program, negates need for the +/// linking step. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point, the program passed +/// will contain a binary of the ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clBuildProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred when building `hProgram`. +ur_result_t UR_APICALL urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point `hProgram` will +/// contain a binary of the ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT type +/// for each device in `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clCompileProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hProgram` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If `hProgram` isn't a valid program object. +/// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE +/// + If an error occurred while compiling `hProgram`. +ur_result_t UR_APICALL urProgramCompileExp( + ur_program_handle_t + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Produces an executable program from one or more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the program returned +/// in `phProgram` will contain a binary of the +/// ::UR_PROGRAM_BINARY_TYPE_EXECUTABLE type for each device in +/// `phDevices`. +/// +/// @remarks +/// _Analogues_ +/// - **clLinkProgram** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phDevices` +/// + `NULL == phPrograms` +/// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Import memory into USM /// From 1bb0543f3783d98fadb806f3dcca3ec1108afa6e Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 15 Nov 2023 16:31:35 +0000 Subject: [PATCH 3/7] Add exp-multi-device-compiler entry points to adapter interface loaders. --- source/adapters/cuda/ur_interface_loader.cpp | 15 +++++++++++++++ source/adapters/hip/ur_interface_loader.cpp | 15 +++++++++++++++ .../adapters/level_zero/ur_interface_loader.cpp | 15 +++++++++++++++ source/adapters/opencl/ur_interface_loader.cpp | 15 +++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index b87934182c..164d7f9581 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -390,6 +390,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnBuildExp = nullptr; + pDdiTable->pfnCompileExp = nullptr; + pDdiTable->pfnLinkExp = nullptr; + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 26292b9528..56b00ab273 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -345,6 +345,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnBuildExp = nullptr; + pDdiTable->pfnCompileExp = nullptr; + pDdiTable->pfnLinkExp = nullptr; + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 5f6da8fd86..bfad5d2cd4 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -430,3 +430,18 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } + +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnBuildExp = nullptr; + pDdiTable->pfnCompileExp = nullptr; + pDdiTable->pfnLinkExp = nullptr; + return retVal; +} diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 7333385182..eb86951a7d 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -381,6 +381,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnBuildExp = nullptr; + pDdiTable->pfnCompileExp = nullptr; + pDdiTable->pfnLinkExp = nullptr; + return retVal; +} + #if defined(__cplusplus) } // extern "C" #endif From a42a607bac53d198ed838d29db37b987e039f680 Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Fri, 6 Oct 2023 16:50:25 -0700 Subject: [PATCH 4/7] [UR][L0] Add support for passing device list to urProgramBuild/Link/Compile piProgramBuild receives a list of devices, while urProgramBuild does not. This produces a series of issues when a UR program needs to be created for a specific device. So define a new API, called urProgramBuildExp to pass this list. Signed-off-by: Spruit, Neil R Co-authored-by: Jaime Arteaga --- source/adapters/cuda/program.cpp | 22 +++++++++++ source/adapters/hip/program.cpp | 22 +++++++++++ source/adapters/level_zero/program.cpp | 51 ++++++++++++++++++++++++-- source/ur/ur.hpp | 38 +++++++++++++++++++ 4 files changed, 129 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index bee94d00a6..7bd6660292 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -226,6 +226,22 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_context_handle_t, + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_context_handle_t, + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + /// Loads the images from a UR program into a CUmodule that can be /// used later on to extract functions (kernels). /// See \ref ur_program_handle_t for implementation details. @@ -248,6 +264,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext, return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *, uint32_t, + ur_device_handle_t *, const char *, ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + /// Creates a new UR program object that is the outcome of linking all input /// programs. /// \TODO Implement linker options, requires mapping of OpenCL to CUDA diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 10d3080007..2c05cc06b3 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -245,6 +245,22 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, return urProgramBuild(hContext, hProgram, pOptions); } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_context_handle_t, + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_context_handle_t, + ur_program_handle_t, + uint32_t, + ur_device_handle_t *, + const char *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + /// Loads the images from a UR program into a hipModule_t that can be /// used later on to extract functions (kernels). /// See \ref ur_program_handle_t for implementation details. @@ -264,6 +280,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *, uint32_t, + ur_device_handle_t *, const char *, ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(ur_context_handle_t, uint32_t, const ur_program_handle_t *, const char *, diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 5bf517d55b..547409022f 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -112,6 +112,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( ur_program_handle_t Program, ///< [in] Handle of the program to build. const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. +) { + return urProgramBuildExp(Context, Program, 1, Context->Devices.data(), + Options); +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t Context, ///< [in] handle of the context instance. + ur_program_handle_t Program, ///< [in] Handle of the program to build. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *Options ///< [in][optional] pointer to build options + ///< null-terminated string. ) { // TODO // Check if device belongs to associated context. @@ -142,8 +153,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( ZeModuleDesc.pBuildFlags = Options; ZeModuleDesc.pConstants = Shim.ze(); - ze_device_handle_t ZeDevice = Context->Devices[0]->ZeDevice; + ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; ze_context_handle_t ZeContext = Program->Context->ZeContext; + std::ignore = Context; + std::ignore = numDevices; ze_module_handle_t ZeModule = nullptr; ur_result_t Result = UR_RESULT_SUCCESS; @@ -185,6 +198,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( return Result; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( + ur_context_handle_t Context, ///< [in] handle of the context instance. + ur_program_handle_t + Program, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *Options ///< [in][optional] pointer to build options + ///< null-terminated string. +) { + std::ignore = numDevices; + std::ignore = phDevices; + return urProgramCompile(Context, Program, Options); +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t @@ -225,7 +251,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( ur_program_handle_t *Program ///< [out] pointer to handle of program object created. ) { - UR_ASSERT(Context->isValidDevice(Context->Devices[0]), + return urProgramLinkExp(Context, Count, Programs, 1, Context->Devices.data(), + Options, Program); +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t Context, ///< [in] handle of the context instance. + uint32_t Count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to + ///< array of program handles. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *Options, ///< [in][optional] pointer to linker options + ///< null-terminated string. + ur_program_handle_t + *Program ///< [out] pointer to handle of program object created. +) { + std::ignore = numDevices; + + UR_ASSERT(Context->isValidDevice(phDevices[0]), UR_RESULT_ERROR_INVALID_DEVICE); // We do not support any link flags at this time because the Level Zero API @@ -320,7 +363,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( // input module. // // TODO: Remove this workaround when the driver is fixed. - if (!Context->Devices[0]->Platform->ZeDriverModuleProgramExtensionFound || + if (!phDevices[0]->Platform->ZeDriverModuleProgramExtensionFound || (Count == 1)) { if (Count == 1) { ZeModuleDesc.pNext = nullptr; @@ -336,7 +379,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( } // Call the Level Zero API to compile, link, and create the module. - ze_device_handle_t ZeDevice = Context->Devices[0]->ZeDevice; + ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; ze_context_handle_t ZeContext = Context->ZeContext; ze_module_handle_t ZeModule = nullptr; ze_module_build_log_handle_t ZeBuildLog = nullptr; diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 0437d719ba..ce3b8994ae 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -295,3 +295,41 @@ class UrReturnHelper { void *param_value; size_t *param_value_size_ret; }; + +// Needed to have compatibility with piProgramBuild +// when passing a specific list of devices +// See: https://github.com/oneapi-src/unified-runtime/issues/912 +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *pOptions ///< [in][optional] pointer to build options + ///< null-terminated string. +); + +// Needed to have compatibility with piProgramCompile +// when passing a specific list of devices +// See: https://github.com/oneapi-src/unified-runtime/issues/912 +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( + ur_context_handle_t Context, ///< [in] handle of the context instance. + ur_program_handle_t + Program, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *Options ///< [in][optional] pointer to build options + ///< null-terminated string. +); + +// Needed to have compatibility with piProgramLink +// when passing a specific list of devices +// See: https://github.com/oneapi-src/unified-runtime/issues/912 +UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( + ur_context_handle_t Context, ///< [in] handle of the context instance. + uint32_t Count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to + ///< array of program handles. + uint32_t numDevices, ur_device_handle_t *phDevices, + const char *Options, ///< [in][optional] pointer to linker options + ///< null-terminated string. + ur_program_handle_t + *Program ///< [out] pointer to handle of program object created. +); From 26b9829efc247031c2e64e9a495b08c06559ec59 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 16 Nov 2023 17:22:23 +0000 Subject: [PATCH 5/7] [L0] Hook up multi-device compile entry points Add the `urProgramBuildExp`, `urProgramCompileExp`, and `urProgramLinkExp` to the loader `ur_program_exp_dditable_t`. Also add `"ur_exp_multi_device_compile"` to the list of extensions supported by the L0 adapter, enables the SYCL RT to query support. --- source/adapters/level_zero/device.cpp | 2 ++ source/adapters/level_zero/ur_interface_loader.cpp | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 35e48931b2..f5b00d80cc 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -190,6 +190,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( // Return supported for the UR command-buffer experimental feature SupportedExtensions += ("ur_exp_command_buffer "); + // Return supported for the UR multi-device compile experimental feature + SupportedExtensions += ("ur_exp_multi_device_compile "); return ReturnValue(SupportedExtensions.c_str()); } diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index bfad5d2cd4..b508f7277c 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -440,8 +440,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( if (UR_RESULT_SUCCESS != retVal) { return retVal; } - pDdiTable->pfnBuildExp = nullptr; - pDdiTable->pfnCompileExp = nullptr; - pDdiTable->pfnLinkExp = nullptr; + pDdiTable->pfnBuildExp = urProgramBuildExp; + pDdiTable->pfnCompileExp = urProgramCompileExp; + pDdiTable->pfnLinkExp = urProgramLinkExp; return retVal; } From 4b5e5590a96cb92c6f4341b20e1bed780ff656bd Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 16 Nov 2023 17:56:13 +0000 Subject: [PATCH 6/7] [L0] Fix urProgramLinkExp argument order Align the `urProgramLinkExp` spec and implementation argument orders to fix Windows link error. --- source/adapters/level_zero/program.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 547409022f..af6d9da8bd 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -256,11 +256,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( } UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. + ur_context_handle_t Context, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to + ///< array of device handles uint32_t Count, ///< [in] number of program handles in `phPrograms`. const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to ///< array of program handles. - uint32_t numDevices, ur_device_handle_t *phDevices, const char *Options, ///< [in][optional] pointer to linker options ///< null-terminated string. ur_program_handle_t From 0790bf8fd34f5d41b5ef4ffc120a6a2acc84b454 Mon Sep 17 00:00:00 2001 From: "Spruit, Neil R" Date: Thu, 16 Nov 2023 15:46:33 -0800 Subject: [PATCH 7/7] Fix urProgramCompileExp, urProgramBuildExp, and urProgramLinkExp definition to match spec Signed-off-by: Spruit, Neil R --- source/adapters/cuda/program.cpp | 6 +- source/adapters/hip/program.cpp | 6 +- source/adapters/level_zero/program.cpp | 123 ++++++++++++------------- source/ur/ur.hpp | 38 -------- 4 files changed, 65 insertions(+), 108 deletions(-) diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 7bd6660292..814d59b132 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -226,16 +226,14 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_context_handle_t, - ur_program_handle_t, +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_context_handle_t, - ur_program_handle_t, +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, const char *) { diff --git a/source/adapters/hip/program.cpp b/source/adapters/hip/program.cpp index 2c05cc06b3..b33ba5a62f 100644 --- a/source/adapters/hip/program.cpp +++ b/source/adapters/hip/program.cpp @@ -245,16 +245,14 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, return urProgramBuild(hContext, hProgram, pOptions); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_context_handle_t, - ur_program_handle_t, +UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_context_handle_t, - ur_program_handle_t, +UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, const char *) { diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index af6d9da8bd..92a3c87aea 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -113,16 +113,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { - return urProgramBuildExp(Context, Program, 1, Context->Devices.data(), - Options); + return urProgramBuildExp(Program, 1, Context->Devices.data(), Options); } UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. - ur_program_handle_t Program, ///< [in] Handle of the program to build. - uint32_t numDevices, ur_device_handle_t *phDevices, - const char *Options ///< [in][optional] pointer to build options - ///< null-terminated string. + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to + ///< array of device handles + const char *pOptions ///< [in][optional] pointer to build options + ///< null-terminated string. ) { // TODO // Check if device belongs to associated context. @@ -131,43 +131,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( // UR_RESULT_ERROR_INVALID_VALUE); // We should have either IL or native device code. - UR_ASSERT(Program->Code, UR_RESULT_ERROR_INVALID_PROGRAM); + UR_ASSERT(hProgram->Code, UR_RESULT_ERROR_INVALID_PROGRAM); // It is legal to build a program created from either IL or from native // device code. - if (Program->State != ur_program_handle_t_::IL && - Program->State != ur_program_handle_t_::Native) { + if (hProgram->State != ur_program_handle_t_::IL && + hProgram->State != ur_program_handle_t_::Native) { return UR_RESULT_ERROR_INVALID_OPERATION; } - std::scoped_lock Guard(Program->Mutex); + std::scoped_lock Guard(hProgram->Mutex); // Ask Level Zero to build and load the native code onto the device. ZeStruct ZeModuleDesc; - ur_program_handle_t_::SpecConstantShim Shim(Program); - ZeModuleDesc.format = (Program->State == ur_program_handle_t_::IL) + ur_program_handle_t_::SpecConstantShim Shim(hProgram); + ZeModuleDesc.format = (hProgram->State == ur_program_handle_t_::IL) ? ZE_MODULE_FORMAT_IL_SPIRV : ZE_MODULE_FORMAT_NATIVE; - ZeModuleDesc.inputSize = Program->CodeLength; - ZeModuleDesc.pInputModule = Program->Code.get(); - ZeModuleDesc.pBuildFlags = Options; + ZeModuleDesc.inputSize = hProgram->CodeLength; + ZeModuleDesc.pInputModule = hProgram->Code.get(); + ZeModuleDesc.pBuildFlags = pOptions; ZeModuleDesc.pConstants = Shim.ze(); ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; - ze_context_handle_t ZeContext = Program->Context->ZeContext; - std::ignore = Context; + ze_context_handle_t ZeContext = hProgram->Context->ZeContext; std::ignore = numDevices; ze_module_handle_t ZeModule = nullptr; ur_result_t Result = UR_RESULT_SUCCESS; - Program->State = ur_program_handle_t_::Exe; + hProgram->State = ur_program_handle_t_::Exe; ze_result_t ZeResult = ZE_CALL_NOCHECK(zeModuleCreate, (ZeContext, ZeDevice, &ZeModuleDesc, - &ZeModule, &Program->ZeBuildLog)); + &ZeModule, &hProgram->ZeBuildLog)); if (ZeResult != ZE_RESULT_SUCCESS) { // We adjust ur_program below to avoid attempting to release zeModule when // RT calls urProgramRelease(). - Program->State = ur_program_handle_t_::Invalid; + hProgram->State = ur_program_handle_t_::Invalid; Result = ze2urResult(ZeResult); if (ZeModule) { ZE_CALL_NOCHECK(zeModuleDestroy, (ZeModule)); @@ -179,9 +178,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( // call to zeModuleDynamicLink. However, modules created with // urProgramBuild are supposed to be fully linked and ready to use. // Therefore, do an extra check now for unresolved symbols. - ZeResult = checkUnresolvedSymbols(ZeModule, &Program->ZeBuildLog); + ZeResult = checkUnresolvedSymbols(ZeModule, &hProgram->ZeBuildLog); if (ZeResult != ZE_RESULT_SUCCESS) { - Program->State = ur_program_handle_t_::Invalid; + hProgram->State = ur_program_handle_t_::Invalid; Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) ? UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE : ze2urResult(ZeResult); @@ -193,22 +192,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( } // We no longer need the IL / native code. - Program->Code.reset(); - Program->ZeModule = ZeModule; + hProgram->Code.reset(); + hProgram->ZeModule = ZeModule; return Result; } UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t - Program, ///< [in][out] handle of the program to compile. - uint32_t numDevices, ur_device_handle_t *phDevices, - const char *Options ///< [in][optional] pointer to build options - ///< null-terminated string. + hProgram, ///< [in][out] handle of the program to compile. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to + ///< array of device handles + const char *pOptions ///< [in][optional] pointer to build options + ///< null-terminated string. ) { std::ignore = numDevices; std::ignore = phDevices; - return urProgramCompile(Context, Program, Options); + return urProgramCompile(hProgram->Context, hProgram, pOptions); } UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( @@ -251,38 +251,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( ur_program_handle_t *Program ///< [out] pointer to handle of program object created. ) { - return urProgramLinkExp(Context, Count, Programs, 1, Context->Devices.data(), + return urProgramLinkExp(Context, Count, Context->Devices.data(), 1, Programs, Options, Program); } UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. + ur_context_handle_t hContext, ///< [in] handle of the context instance. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to ///< array of device handles - uint32_t Count, ///< [in] number of program handles in `phPrograms`. - const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to - ///< array of program handles. - const char *Options, ///< [in][optional] pointer to linker options - ///< null-terminated string. + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t *phPrograms, ///< [in][range(0, count)] pointer to + ///< array of program handles. + const char *pOptions, ///< [in][optional] pointer to linker options + ///< null-terminated string. ur_program_handle_t - *Program ///< [out] pointer to handle of program object created. + *phProgram ///< [out] pointer to handle of program object created. ) { std::ignore = numDevices; - - UR_ASSERT(Context->isValidDevice(phDevices[0]), + UR_ASSERT(hContext->isValidDevice(phDevices[0]), UR_RESULT_ERROR_INVALID_DEVICE); // We do not support any link flags at this time because the Level Zero API // does not have any way to pass flags that are specific to linking. - if (Options && *Options != '\0') { + if (pOptions && *pOptions != '\0') { std::string ErrorMessage( "Level Zero does not support kernel link flags: \""); - ErrorMessage.append(Options); + ErrorMessage.append(pOptions); ErrorMessage.push_back('\"'); ur_program_handle_t_ *UrProgram = new ur_program_handle_t_( - ur_program_handle_t_::Invalid, Context, ErrorMessage); - *Program = reinterpret_cast(UrProgram); + ur_program_handle_t_::Invalid, hContext, ErrorMessage); + *phProgram = reinterpret_cast(UrProgram); return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; } @@ -299,11 +298,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( // potential if there was some other code that holds more than one of these // locks simultaneously with "exclusive" access. However, there is no such // code like that, so this is also not a danger. - std::vector> Guards(Count); - for (uint32_t I = 0; I < Count; I++) { - std::shared_lock Guard(Programs[I]->Mutex); + std::vector> Guards(count); + for (uint32_t I = 0; I < count; I++) { + std::shared_lock Guard(phPrograms[I]->Mutex); Guards[I].swap(Guard); - if (Programs[I]->State != ur_program_handle_t_::Object) { + if (phPrograms[I]->State != ur_program_handle_t_::Object) { return UR_RESULT_ERROR_INVALID_OPERATION; } } @@ -316,15 +315,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( // Construct a ze_module_program_exp_desc_t which contains information about // all of the modules that will be linked together. ZeStruct ZeExtModuleDesc; - std::vector CodeSizes(Count); - std::vector CodeBufs(Count); - std::vector BuildFlagPtrs(Count); - std::vector SpecConstPtrs(Count); + std::vector CodeSizes(count); + std::vector CodeBufs(count); + std::vector BuildFlagPtrs(count); + std::vector SpecConstPtrs(count); std::vector SpecConstShims; - SpecConstShims.reserve(Count); + SpecConstShims.reserve(count); - for (uint32_t I = 0; I < Count; I++) { - ur_program_handle_t Program = Programs[I]; + for (uint32_t I = 0; I < count; I++) { + ur_program_handle_t Program = phPrograms[I]; CodeSizes[I] = Program->CodeLength; CodeBufs[I] = Program->Code.get(); BuildFlagPtrs[I] = Program->BuildFlags.c_str(); @@ -332,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( SpecConstPtrs[I] = SpecConstShims[I].ze(); } - ZeExtModuleDesc.count = Count; + ZeExtModuleDesc.count = count; ZeExtModuleDesc.inputSizes = CodeSizes.data(); ZeExtModuleDesc.pInputModules = CodeBufs.data(); ZeExtModuleDesc.pBuildFlags = BuildFlagPtrs.data(); @@ -366,8 +365,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( // // TODO: Remove this workaround when the driver is fixed. if (!phDevices[0]->Platform->ZeDriverModuleProgramExtensionFound || - (Count == 1)) { - if (Count == 1) { + (count == 1)) { + if (count == 1) { ZeModuleDesc.pNext = nullptr; ZeModuleDesc.inputSize = ZeExtModuleDesc.inputSizes[0]; ZeModuleDesc.pInputModule = ZeExtModuleDesc.pInputModules[0]; @@ -382,7 +381,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( // Call the Level Zero API to compile, link, and create the module. ze_device_handle_t ZeDevice = phDevices[0]->ZeDevice; - ze_context_handle_t ZeContext = Context->ZeContext; + ze_context_handle_t ZeContext = hContext->ZeContext; ze_module_handle_t ZeModule = nullptr; ze_module_build_log_handle_t ZeBuildLog = nullptr; ze_result_t ZeResult = @@ -420,8 +419,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( ? ur_program_handle_t_::Exe : ur_program_handle_t_::Invalid; ur_program_handle_t_ *UrProgram = - new ur_program_handle_t_(State, Context, ZeModule, ZeBuildLog); - *Program = reinterpret_cast(UrProgram); + new ur_program_handle_t_(State, hContext, ZeModule, ZeBuildLog); + *phProgram = reinterpret_cast(UrProgram); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index ce3b8994ae..0437d719ba 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -295,41 +295,3 @@ class UrReturnHelper { void *param_value; size_t *param_value_size_ret; }; - -// Needed to have compatibility with piProgramBuild -// when passing a specific list of devices -// See: https://github.com/oneapi-src/unified-runtime/issues/912 -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( - ur_context_handle_t hContext, ///< [in] handle of the context instance. - ur_program_handle_t hProgram, ///< [in] Handle of the program to build. - uint32_t numDevices, ur_device_handle_t *phDevices, - const char *pOptions ///< [in][optional] pointer to build options - ///< null-terminated string. -); - -// Needed to have compatibility with piProgramCompile -// when passing a specific list of devices -// See: https://github.com/oneapi-src/unified-runtime/issues/912 -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. - ur_program_handle_t - Program, ///< [in][out] handle of the program to compile. - uint32_t numDevices, ur_device_handle_t *phDevices, - const char *Options ///< [in][optional] pointer to build options - ///< null-terminated string. -); - -// Needed to have compatibility with piProgramLink -// when passing a specific list of devices -// See: https://github.com/oneapi-src/unified-runtime/issues/912 -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t Context, ///< [in] handle of the context instance. - uint32_t Count, ///< [in] number of program handles in `phPrograms`. - const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to - ///< array of program handles. - uint32_t numDevices, ur_device_handle_t *phDevices, - const char *Options, ///< [in][optional] pointer to linker options - ///< null-terminated string. - ur_program_handle_t - *Program ///< [out] pointer to handle of program object created. -);