diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index c351a60e24..6aef92c220 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -24,6 +24,7 @@ constexpr int kUsmDeviceRedzoneMagic = (char)0x81; constexpr int kUsmHostRedzoneMagic = (char)0x82; constexpr int kUsmSharedRedzoneMagic = (char)0x83; constexpr int kMemBufferRedzoneMagic = (char)0x84; +constexpr int kDeviceGlobalRedZoneMagic = (char)0x85; constexpr auto kSPIR_AsanShadowMemoryGlobalStart = "__AsanShadowMemoryGlobalStart"; @@ -36,6 +37,9 @@ constexpr auto kSPIR_DeviceType = "__DeviceType"; constexpr auto kSPIR_DeviceSanitizerReportMem = "__DeviceSanitizerReportMem"; +constexpr auto kSPIR_AsanDeviceGlobalCount = "__AsanDeviceGlobalCount"; +constexpr auto kSPIR_AsanDeviceGlobalMetadata = "__AsanDeviceGlobalMetadata"; + DeviceSanitizerReport SPIR_DeviceSanitizerReportMem; uptr MemToShadow_CPU(uptr USM_SHADOW_BASE, uptr UPtr) { @@ -78,6 +82,19 @@ ur_program_handle_t getProgram(ur_kernel_handle_t Kernel) { return Program; } +void getProgramDevices(ur_program_handle_t Program, + std::vector &Devices) { + size_t PropSize; + [[maybe_unused]] ur_result_t Result = context.urDdiTable.Program.pfnGetInfo( + Program, UR_PROGRAM_INFO_DEVICES, 0, nullptr, &PropSize); + assert(Result == UR_RESULT_SUCCESS); + + Devices.resize(PropSize / sizeof(ur_device_handle_t)); + Result = context.urDdiTable.Program.pfnGetInfo( + Program, UR_PROGRAM_INFO_DEVICES, PropSize, Devices.data(), nullptr); + assert(Result == UR_RESULT_SUCCESS); +} + size_t getLocalMemorySize(ur_device_handle_t Device) { size_t LocalMemorySize; [[maybe_unused]] auto Result = context.urDdiTable.Device.pfnGetInfo( @@ -124,7 +141,7 @@ SanitizerInterceptor::~SanitizerInterceptor() { ur_result_t SanitizerInterceptor::allocateMemory( ur_context_handle_t Context, ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, - void **ResultPtr, USMMemoryType Type) { + void **ResultPtr, AllocType Type) { auto Alignment = Properties->align; assert(Alignment == 0 || IsPowerOfTwo(Alignment)); @@ -147,13 +164,13 @@ ur_result_t SanitizerInterceptor::allocateMemory( void *Allocated = nullptr; - if (Type == USMMemoryType::DEVICE) { + if (Type == AllocType::DEVICE_USM) { UR_CALL(context.urDdiTable.USM.pfnDeviceAlloc( Context, Device, Properties, Pool, NeededSize, &Allocated)); - } else if (Type == USMMemoryType::HOST) { + } else if (Type == AllocType::HOST_USM) { UR_CALL(context.urDdiTable.USM.pfnHostAlloc(Context, Properties, Pool, NeededSize, &Allocated)); - } else if (Type == USMMemoryType::SHARE) { + } else if (Type == AllocType::SHARED_USM) { UR_CALL(context.urDdiTable.USM.pfnSharedAlloc( Context, Device, Properties, Pool, NeededSize, &Allocated)); } else { @@ -173,31 +190,31 @@ ur_result_t SanitizerInterceptor::allocateMemory( *ResultPtr = reinterpret_cast(UserBegin); - auto AllocInfo = std::make_shared( - USMAllocInfo{AllocBegin, UserBegin, UserEnd, NeededSize, Type}); + auto AI = std::make_shared( + AllocInfo{AllocBegin, UserBegin, UserEnd, NeededSize, Type}); // For updating shadow memory if (DeviceInfo) { // device/shared USM std::scoped_lock Guard(DeviceInfo->Mutex); - DeviceInfo->AllocInfos.emplace_back(AllocInfo); + DeviceInfo->AllocInfos.emplace_back(AI); } else { // host USM's AllocInfo needs to insert into all devices for (auto &pair : ContextInfo->DeviceMap) { auto DeviceInfo = pair.second; std::scoped_lock Guard(DeviceInfo->Mutex); - DeviceInfo->AllocInfos.emplace_back(AllocInfo); + DeviceInfo->AllocInfos.emplace_back(AI); } } // For memory release { std::scoped_lock Guard(ContextInfo->Mutex); - ContextInfo->AllocatedUSMMap[AllocBegin] = std::move(AllocInfo); + ContextInfo->AllocatedUSMMap[AllocBegin] = std::move(AI); } context.logger.info( "AllocInfos(AllocBegin={}, User={}-{}, NeededSize={}, Type={})", (void *)AllocBegin, (void *)UserBegin, (void *)UserEnd, NeededSize, - Type); + ToString(Type)); return UR_RESULT_SUCCESS; } @@ -285,8 +302,8 @@ void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, KernelName = DemangleName(KernelName); context.logger.always("\n====ERROR: DeviceSanitizer: {} on {}", - DeviceSanitizerFormat(AH->ErrorType), - DeviceSanitizerFormat(AH->MemoryType)); + ToString(AH->ErrorType), + ToString(AH->MemoryType)); context.logger.always( "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, " "{}, {})", @@ -478,7 +495,7 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow( /// ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping ur_result_t SanitizerInterceptor::enqueueAllocInfo( ur_context_handle_t Context, ur_device_handle_t Device, - ur_queue_handle_t Queue, std::shared_ptr &AllocInfo, + ur_queue_handle_t Queue, std::shared_ptr &AllocInfo, ur_event_handle_t &LastEvent) { // Init zero UR_CALL(enqueueMemSetShadow(Context, Device, Queue, AllocInfo->AllocBegin, @@ -499,18 +516,21 @@ ur_result_t SanitizerInterceptor::enqueueAllocInfo( int ShadowByte; switch (AllocInfo->Type) { - case USMMemoryType::HOST: + case AllocType::HOST_USM: ShadowByte = kUsmHostRedzoneMagic; break; - case USMMemoryType::DEVICE: + case AllocType::DEVICE_USM: ShadowByte = kUsmDeviceRedzoneMagic; break; - case USMMemoryType::SHARE: + case AllocType::SHARED_USM: ShadowByte = kUsmSharedRedzoneMagic; break; - case USMMemoryType::MEM_BUFFER: + case AllocType::MEM_BUFFER: ShadowByte = kMemBufferRedzoneMagic; break; + case AllocType::DEVICE_GLOBAL: + ShadowByte = kDeviceGlobalRedZoneMagic; + break; default: ShadowByte = 0xff; assert(false && "Unknow AllocInfo Type"); @@ -556,6 +576,62 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) { return UR_RESULT_SUCCESS; } +ur_result_t +SanitizerInterceptor::registerDeviceGlobals(ur_context_handle_t Context, + ur_program_handle_t Program) { + std::vector Devices; + getProgramDevices(Program, Devices); + + for (auto Device : Devices) { + ur_queue_handle_t Queue; + ur_result_t Result = context.urDdiTable.Queue.pfnCreate( + Context, Device, nullptr, &Queue); + if (Result != UR_RESULT_SUCCESS) { + context.logger.error("Failed to create command queue: {}", Result); + return Result; + } + + uint64_t NumOfDeviceGlobal; + Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( + Queue, Program, kSPIR_AsanDeviceGlobalCount, true, + sizeof(NumOfDeviceGlobal), 0, &NumOfDeviceGlobal, 0, nullptr, + nullptr); + if (Result == UR_RESULT_ERROR_INVALID_ARGUMENT) { + context.logger.info("No device globals"); + continue; + } else if (Result != UR_RESULT_SUCCESS) { + context.logger.error("Device Global[{}] Read Failed: {}", + kSPIR_AsanDeviceGlobalCount, Result); + return Result; + } + + std::vector GVInfos(NumOfDeviceGlobal); + Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( + Queue, Program, kSPIR_AsanDeviceGlobalMetadata, true, + sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, &GVInfos[0], 0, + nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + context.logger.error("Device Global[{}] Read Failed: {}", + kSPIR_AsanDeviceGlobalMetadata, Result); + return Result; + } + + auto ContextInfo = getContextInfo(Context); + auto DeviceInfo = ContextInfo->getDeviceInfo(Device); + for (size_t i = 0; i < NumOfDeviceGlobal; i++) { + auto AI = std::make_shared(AllocInfo{ + GVInfos[i].Addr, GVInfos[i].Addr, + GVInfos[i].Addr + GVInfos[i].Size, GVInfos[i].SizeWithRedZone, + AllocType::DEVICE_GLOBAL}); + + std::scoped_lock Guard(DeviceInfo->Mutex); + DeviceInfo->AllocInfos.emplace_back(AI); + } + } + + return UR_RESULT_SUCCESS; +} + ur_result_t SanitizerInterceptor::insertContext(ur_context_handle_t Context) { auto ContextInfo = std::make_shared(); diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index 0753b53d63..c13d1a3b61 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -22,14 +22,20 @@ namespace ur_sanitizer_layer { -enum USMMemoryType { DEVICE, SHARE, HOST, MEM_BUFFER }; +enum class AllocType : uint32_t { + DEVICE_USM, + SHARED_USM, + HOST_USM, + MEM_BUFFER, + DEVICE_GLOBAL +}; -struct USMAllocInfo { +struct AllocInfo { uptr AllocBegin; uptr UserBegin; uptr UserEnd; size_t AllocSize; - USMMemoryType Type; + AllocType Type; }; enum class DeviceType { UNKNOWN, CPU, GPU_PVC, GPU_DG2 }; @@ -42,7 +48,7 @@ struct DeviceInfo { // Lock InitPool & AllocInfos ur_shared_mutex Mutex; - std::vector> AllocInfos; + std::vector> AllocInfos; }; struct QueueInfo { @@ -64,7 +70,7 @@ struct ContextInfo { return QueueMap[Queue]; } - std::shared_ptr getUSMAllocInfo(uptr Address) { + std::shared_ptr getUSMAllocInfo(uptr Address) { std::shared_lock Guard(Mutex); assert(AllocatedUSMMap.find(Address) != AllocatedUSMMap.end()); return AllocatedUSMMap[Address]; @@ -78,7 +84,7 @@ struct ContextInfo { /// key: USMAllocInfo.AllocBegin /// value: USMAllocInfo /// Use AllocBegin as key can help to detect underflow pointer - std::map> AllocatedUSMMap; + std::map> AllocatedUSMMap; }; struct LaunchInfo { @@ -95,6 +101,12 @@ struct LaunchInfo { ~LaunchInfo(); }; +struct DeviceGlobalInfo { + uptr Size; + uptr SizeWithRedZone; + uptr Addr; +}; + class SanitizerInterceptor { public: SanitizerInterceptor(); @@ -105,9 +117,12 @@ class SanitizerInterceptor { ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, - void **ResultPtr, USMMemoryType Type); + void **ResultPtr, AllocType Type); ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); + ur_result_t registerDeviceGlobals(ur_context_handle_t Context, + ur_program_handle_t Program); + ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, ur_event_handle_t &Event, @@ -131,7 +146,7 @@ class SanitizerInterceptor { ur_result_t enqueueAllocInfo(ur_context_handle_t Context, ur_device_handle_t Device, ur_queue_handle_t Queue, - std::shared_ptr &AlloccInfo, + std::shared_ptr &AI, ur_event_handle_t &LastEvent); /// Initialize Global Variables & Kernel Name at first Launch @@ -163,4 +178,21 @@ class SanitizerInterceptor { bool m_ShadowMemInited; }; +inline const char *ToString(AllocType Type) { + switch (Type) { + case AllocType::DEVICE_USM: + return "Device USM"; + case AllocType::HOST_USM: + return "Host USM"; + case AllocType::SHARED_USM: + return "Shared USM"; + case AllocType::MEM_BUFFER: + return "Memory Buffer"; + case AllocType::DEVICE_GLOBAL: + return "Device Global"; + default: + return "Unknown Type"; + } +} + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/device_sanitizer_report.hpp b/source/loader/layers/sanitizer/device_sanitizer_report.hpp index 11ae721434..374ffc8d62 100644 --- a/source/loader/layers/sanitizer/device_sanitizer_report.hpp +++ b/source/loader/layers/sanitizer/device_sanitizer_report.hpp @@ -32,6 +32,7 @@ enum class DeviceSanitizerMemoryType : int32_t { LOCAL, PRIVATE, MEM_BUFFER, + DEVICE_GLOBAL, }; struct DeviceSanitizerReport { @@ -58,26 +59,28 @@ struct DeviceSanitizerReport { bool IsRecover = false; }; -inline const char *DeviceSanitizerFormat(DeviceSanitizerMemoryType MemoryType) { +inline const char *ToString(DeviceSanitizerMemoryType MemoryType) { switch (MemoryType) { case DeviceSanitizerMemoryType::USM_DEVICE: - return "USM Device Memory"; + return "Device USM"; case DeviceSanitizerMemoryType::USM_HOST: - return "USM Host Memory"; + return "Host USM"; case DeviceSanitizerMemoryType::USM_SHARED: - return "USM Shared Memory"; + return "Shared USM"; case DeviceSanitizerMemoryType::LOCAL: return "Local Memory"; case DeviceSanitizerMemoryType::PRIVATE: return "Private Memory"; case DeviceSanitizerMemoryType::MEM_BUFFER: return "Memory Buffer"; + case DeviceSanitizerMemoryType::DEVICE_GLOBAL: + return "Device Global"; default: return "Unknown Memory"; } } -inline const char *DeviceSanitizerFormat(DeviceSanitizerErrorType ErrorType) { +inline const char *ToString(DeviceSanitizerErrorType ErrorType) { switch (ErrorType) { case DeviceSanitizerErrorType::OUT_OF_BOUNDS: return "out-of-bounds-access"; diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp index 7bd49e4ff0..f4e4f3ef01 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -36,7 +36,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( context.logger.debug("==== urUSMHostAlloc"); return context.interceptor->allocateMemory( - hContext, nullptr, pUSMDesc, pool, size, ppMem, USMMemoryType::HOST); + hContext, nullptr, pUSMDesc, pool, size, ppMem, AllocType::HOST_USM); } /////////////////////////////////////////////////////////////////////////////// @@ -61,7 +61,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( context.logger.debug("==== urUSMDeviceAlloc"); return context.interceptor->allocateMemory( - hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::DEVICE); + hContext, hDevice, pUSMDesc, pool, size, ppMem, AllocType::DEVICE_USM); } /////////////////////////////////////////////////////////////////////////////// @@ -86,7 +86,7 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( context.logger.debug("==== urUSMSharedAlloc"); return context.interceptor->allocateMemory( - hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::SHARE); + hContext, hDevice, pUSMDesc, pool, size, ppMem, AllocType::SHARED_USM); } /////////////////////////////////////////////////////////////////////////////// @@ -156,6 +156,28 @@ __urdlllocal ur_result_t UR_APICALL urQueueRelease( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuild +__urdlllocal ur_result_t UR_APICALL urProgramBuild( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_program_handle_t hProgram, ///< [in] handle of the program object + const char *pOptions ///< [in] string of build options +) { + auto pfnProgramBuild = context.urDdiTable.Program.pfnBuild; + + if (nullptr == pfnProgramBuild) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + context.logger.debug("==== urProgramBuild"); + + UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions)); + + UR_CALL(context.interceptor->registerDeviceGlobals(hContext, hProgram)); + + return UR_RESULT_SUCCESS; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueKernelLaunch __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( @@ -227,7 +249,7 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_event_handle_t hEvent{}; ur_result_t result = pfnKernelLaunch( hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numEventsInWaitList, phEventWaitList, &hEvent); + pLocalWorkSize, hEvents.size(), hEvents.data(), &hEvent); if (result == UR_RESULT_SUCCESS) { context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent, @@ -376,6 +398,34 @@ __urdlllocal ur_result_t UR_APICALL urGetContextProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Program table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_program_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_sanitizer_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + pDdiTable->pfnBuild = ur_sanitizer_layer::urProgramBuild; + + return UR_RESULT_SUCCESS; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Enqueue table /// with current process' addresses /// @@ -506,6 +556,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Context); } + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::urGetProgramProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->Program); + } + if (UR_RESULT_SUCCESS == result) { result = ur_sanitizer_layer::urGetEnqueueProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Enqueue);