diff --git a/.gitignore b/.gitignore index 85770fe15c..89736ad22a 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,7 @@ out/ # External content */**/external + +# VS clangd +/.cache +/compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index fcdf90f173..20e8da9d6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ option(UR_USE_UBSAN "enable UndefinedBehaviorSanitizer" OFF) option(UR_USE_MSAN "enable MemorySanitizer" OFF) option(UR_USE_TSAN "enable ThreadSanitizer" OFF) option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF) +option(UR_ENABLE_SANITIZER "enable device sanitizer" ON) option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON) option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF) @@ -121,6 +122,10 @@ if(UR_ENABLE_TRACING) endif() endif() +if(UR_ENABLE_SANITIZER) + add_compile_definitions(UR_ENABLE_SANITIZER) +endif() + if(UR_USE_ASAN) add_sanitizer_flag(address) endif() diff --git a/README.md b/README.md index 57536f237a..8bc58a92a2 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,7 @@ List of options provided by CMake: | UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF | | UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF | | UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF | +| UR_ENABLE_SANITIZER | Enable device sanitizer layer | ON/OFF | ON | | UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 | | UR_BUILD_ADAPTER_L0 | Build the Level-Zero adapter | ON/OFF | OFF | | UR_BUILD_ADAPTER_OPENCL | Build the OpenCL adapter | ON/OFF | OFF | diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst index d557193ef0..ae94301d55 100644 --- a/scripts/core/INTRO.rst +++ b/scripts/core/INTRO.rst @@ -179,6 +179,15 @@ Unified Runtime loader implements tracing support through the `XPTI framework (args)...); } +template +inline void always(const char *format, Args &&...args) { + get_logger().always(format, std::forward(args)...); +} + inline void setLevel(logger::Level level) { get_logger().setLevel(level); } inline void setFlushLevel(logger::Level level) { diff --git a/source/common/logger/ur_logger_details.hpp b/source/common/logger/ur_logger_details.hpp index 4759a2fd24..6ff279ad1a 100644 --- a/source/common/logger/ur_logger_details.hpp +++ b/source/common/logger/ur_logger_details.hpp @@ -51,6 +51,14 @@ class Logger { log(logger::Level::ERR, format, std::forward(args)...); } + template + void always(const char *format, Args &&...args) { + if (sink) { + sink->log(logger::Level::QUIET, format, + std::forward(args)...); + } + } + template void log(logger::Level level, const char *format, Args &&...args) { if (level < this->level) { diff --git a/source/common/logger/ur_sinks.hpp b/source/common/logger/ur_sinks.hpp index cb8c751e4d..db30f3c8ed 100644 --- a/source/common/logger/ur_sinks.hpp +++ b/source/common/logger/ur_sinks.hpp @@ -22,7 +22,7 @@ class Sink { template void log(logger::Level level, const char *fmt, Args &&...args) { std::ostringstream buffer; - if (!skip_prefix) { + if (!skip_prefix && level != logger::Level::QUIET) { buffer << "<" << logger_name << ">" << "[" << level_to_str(level) << "]: "; } diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index d4f5bc73a5..b0c8bbcb86 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -101,6 +101,23 @@ if(UR_ENABLE_TRACING) ) endif() +if(UR_ENABLE_SANITIZER) + target_sources(ur_loader + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/device_sanitizer_report.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp + ) + target_include_directories(ur_loader PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../" + ) +endif() + # link validation backtrace dependencies if(UNIX) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp new file mode 100644 index 0000000000..394405c056 --- /dev/null +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -0,0 +1,605 @@ +//===----------------------------------------------------------------------===// +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file asan_interceptor.cpp + * + */ + +#include "asan_interceptor.hpp" +#include "device_sanitizer_report.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { + +namespace { + +// These magic values are written to shadow for better error +// reporting. +constexpr int kUsmDeviceRedzoneMagic = (char)0x81; +constexpr int kUsmHostRedzoneMagic = (char)0x82; +constexpr int kUsmSharedRedzoneMagic = (char)0x83; +constexpr int kMemBufferRedzoneMagic = (char)0x84; + +constexpr auto kSPIR_AsanShadowMemoryGlobalStart = + "__AsanShadowMemoryGlobalStart"; +constexpr auto kSPIR_AsanShadowMemoryGlobalEnd = "__AsanShadowMemoryGlobalEnd"; + +constexpr auto kSPIR_DeviceSanitizerReportMem = "__DeviceSanitizerReportMem"; + +DeviceSanitizerReport SPIR_DeviceSanitizerReportMem; + +uptr MemToShadow_PVC(uptr USM_SHADOW_BASE, uptr UPtr) { + if (UPtr & 0xFF00000000000000ULL) { // Device USM + return USM_SHADOW_BASE + 0x200000000000ULL + + ((UPtr & 0xFFFFFFFFFFFFULL) >> 3); + } else { // Only consider 47bit VA + return USM_SHADOW_BASE + ((UPtr & 0x7FFFFFFFFFFFULL) >> 3); + } +} + +ur_context_handle_t getContext(ur_queue_handle_t Queue) { + ur_context_handle_t Context; + [[maybe_unused]] auto Result = context.urDdiTable.Queue.pfnGetInfo( + Queue, UR_QUEUE_INFO_CONTEXT, sizeof(ur_context_handle_t), &Context, + nullptr); + assert(Result == UR_RESULT_SUCCESS); + return Context; +} + +ur_device_handle_t getDevice(ur_queue_handle_t Queue) { + ur_device_handle_t Device; + [[maybe_unused]] auto Result = context.urDdiTable.Queue.pfnGetInfo( + Queue, UR_QUEUE_INFO_DEVICE, sizeof(ur_device_handle_t), &Device, + nullptr); + assert(Result == UR_RESULT_SUCCESS); + return Device; +} + +ur_program_handle_t getProgram(ur_kernel_handle_t Kernel) { + ur_program_handle_t Program; + [[maybe_unused]] auto Result = context.urDdiTable.Kernel.pfnGetInfo( + Kernel, UR_KERNEL_INFO_PROGRAM, sizeof(ur_program_handle_t), &Program, + nullptr); + assert(Result == UR_RESULT_SUCCESS); + return Program; +} + +} // namespace + +/// The memory chunk allocated from the underlying allocator looks like this: +/// L L L L L L U U U U U U R R +/// L -- left redzone words (0 or more bytes) +/// U -- user memory. +/// R -- right redzone (0 or more bytes) +/// +/// ref: "compiler-rt/lib/asan/asan_allocator.cpp" Allocator::Allocate +ur_result_t SanitizerInterceptor::allocateMemory( + ur_context_handle_t Context, ur_device_handle_t Device, + const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, + void **ResultPtr, USMMemoryType Type) { + auto Alignment = Properties->align; + assert(Alignment == 0 || IsPowerOfTwo(Alignment)); + + auto ContextInfo = getContextInfo(Context); + std::shared_ptr DeviceInfo; + if (Device) { + DeviceInfo = ContextInfo->getDeviceInfo(Device); + } + + if (Alignment == 0) { + Alignment = + DeviceInfo ? DeviceInfo->Alignment : ASAN_SHADOW_GRANULARITY; + } + + // Copy from LLVM compiler-rt/lib/asan + uptr RZLog = ComputeRZLog(Size); + uptr RZSize = RZLog2Size(RZLog); + uptr RoundedSize = RoundUpTo(Size, Alignment); + uptr NeededSize = RoundedSize + RZSize * 2; + + void *Allocated = nullptr; + + if (Type == USMMemoryType::DEVICE) { + UR_CALL(context.urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, NeededSize, &Allocated)); + } else if (Type == USMMemoryType::HOST) { + UR_CALL(context.urDdiTable.USM.pfnHostAlloc(Context, Properties, Pool, + NeededSize, &Allocated)); + } else if (Type == USMMemoryType::SHARE) { + UR_CALL(context.urDdiTable.USM.pfnSharedAlloc( + Context, Device, Properties, Pool, NeededSize, &Allocated)); + } else { + context.logger.error("Unsupport memory type"); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + + // Copy from LLVM compiler-rt/lib/asan + uptr AllocBegin = reinterpret_cast(Allocated); + [[maybe_unused]] uptr AllocEnd = AllocBegin + NeededSize; + uptr UserBegin = AllocBegin + RZSize; + if (!IsAligned(UserBegin, Alignment)) { + UserBegin = RoundUpTo(UserBegin, Alignment); + } + uptr UserEnd = UserBegin + Size; + assert(UserEnd <= AllocEnd); + + *ResultPtr = reinterpret_cast(UserBegin); + + auto AllocInfo = std::make_shared( + USMAllocInfo{AllocBegin, UserBegin, UserEnd, NeededSize, Type}); + + // For updating shadow memory + if (DeviceInfo) { // device/shared USM + std::scoped_lock Guard(DeviceInfo->Mutex); + DeviceInfo->AllocInfos.emplace_back(AllocInfo); + } else { // host USM's AllocInfo needs to insert into all devices + for (auto &pair : ContextInfo->DeviceMap) { + auto DeviceInfo = pair.second; + std::scoped_lock Guard(DeviceInfo->Mutex); + DeviceInfo->AllocInfos.emplace_back(AllocInfo); + } + } + + // For memory release + { + std::scoped_lock Guard(ContextInfo->Mutex); + ContextInfo->AllocatedUSMMap[AllocBegin] = AllocInfo; + } + + context.logger.info( + "AllocInfos(AllocBegin={}, User={}-{}, NeededSize={}, Type={})", + (void *)AllocBegin, (void *)UserBegin, (void *)UserEnd, NeededSize, + Type); + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, + void *Ptr) { + auto ContextInfo = getContextInfo(Context); + + std::shared_lock Guard(ContextInfo->Mutex); + + auto Addr = reinterpret_cast(Ptr); + // Find the last element is not greater than key + auto AllocInfoIt = ContextInfo->AllocatedUSMMap.upper_bound((uptr)Addr); + if (AllocInfoIt == ContextInfo->AllocatedUSMMap.begin()) { + context.logger.error( + "Can't find release pointer({}) in AllocatedAddressesMap", Ptr); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + --AllocInfoIt; + auto &AllocInfo = AllocInfoIt->second; + + context.logger.debug("USMAllocInfo(AllocBegin={}, UserBegin={})", + AllocInfo->AllocBegin, AllocInfo->UserBegin); + + if (Addr != AllocInfo->UserBegin) { + context.logger.error("Releasing pointer({}) is not match to {}", Ptr, + AllocInfo->UserBegin); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + + // TODO: Update shadow memory + return context.urDdiTable.USM.pfnFree(Context, + (void *)AllocInfo->AllocBegin); +} + +bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + ur_event_handle_t &Event) { + prepareLaunch(Queue, Kernel); + + UR_CALL(updateShadowMemory(Queue)); + + // Return LastEvent in QueueInfo + auto Context = getContext(Queue); + auto ContextInfo = getContextInfo(Context); + auto QueueInfo = ContextInfo->getQueueInfo(Queue); + + std::scoped_lock Guard(QueueInfo->Mutex); + Event = QueueInfo->LastEvent; + QueueInfo->LastEvent = nullptr; + + return true; +} + +void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + ur_event_handle_t &Event) { + auto Program = getProgram(Kernel); + ur_event_handle_t ReadEvent{}; + + // If kernel has defined SPIR_DeviceSanitizerReportMem, then we try to read it + // to host, but it's okay that it isn't defined + auto Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( + Queue, Program, kSPIR_DeviceSanitizerReportMem, true, + sizeof(SPIR_DeviceSanitizerReportMem), 0, + &SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent); + + if (Result == UR_RESULT_SUCCESS) { + Event = ReadEvent; + + auto AH = &SPIR_DeviceSanitizerReportMem; + if (!AH->Flag) { + return; + } + + const char *File = AH->File[0] ? AH->File : ""; + const char *Func = AH->Func[0] ? AH->Func : ""; + + context.logger.always("\n====ERROR: DeviceSanitizer: {} on {}", + DeviceSanitizerFormat(AH->ErrorType), + DeviceSanitizerFormat(AH->MemoryType)); + context.logger.always( + "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, " + "{}, {})", + AH->IsWrite ? "WRITE" : "READ", AH->AccessSize, Func, AH->LID0, + AH->LID1, AH->LID2, AH->GID0, AH->GID1, AH->GID2); + context.logger.always(" #0 {} {}:{}", Func, File, AH->Line); + if (!AH->IsRecover) { + abort(); + } + } +} + +std::string SanitizerInterceptor::getKernelName(ur_kernel_handle_t Kernel) { + size_t KernelNameSize = 0; + [[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetInfo( + Kernel, UR_KERNEL_INFO_FUNCTION_NAME, 0, nullptr, &KernelNameSize); + assert(Res == UR_RESULT_SUCCESS); + + std::vector KernelNameBuf(KernelNameSize); + Res = context.urDdiTable.Kernel.pfnGetInfo( + Kernel, UR_KERNEL_INFO_FUNCTION_NAME, KernelNameSize, + KernelNameBuf.data(), nullptr); + assert(Res == UR_RESULT_SUCCESS); + + return std::string(KernelNameBuf.data(), KernelNameSize - 1); +} + +ur_result_t SanitizerInterceptor::allocShadowMemory( + ur_context_handle_t Context, std::shared_ptr &DeviceInfo) { + if (DeviceInfo->Type == DeviceType::CPU) { + context.logger.error("Unsupport device type"); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } else if (DeviceInfo->Type == DeviceType::GPU_PVC) { + /// SHADOW MEMORY MAPPING (PVC, with CPU 47bit) + /// Host/Shared USM : 0x0 ~ 0x0fff_ffff_ffff + /// ? : 0x1000_0000_0000 ~ 0x1fff_ffff_ffff + /// Device USM : 0x2000_0000_0000 ~ 0x3fff_ffff_ffff + constexpr size_t SHADOW_SIZE = 1ULL << 46; + + // TODO: Protect Bad Zone + auto Result = context.urDdiTable.VirtualMem.pfnReserve( + Context, nullptr, SHADOW_SIZE, (void **)&DeviceInfo->ShadowOffset); + if (Result != UR_RESULT_SUCCESS) { + context.logger.error("Failed to allocate shadow memory on PVC: {}", + Result); + return Result; + } + + DeviceInfo->ShadowOffsetEnd = DeviceInfo->ShadowOffset + SHADOW_SIZE; + } else { + context.logger.error("Unsupport device type"); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + context.logger.info("ShadowMemory(Global): {} - {}", + (void *)DeviceInfo->ShadowOffset, + (void *)DeviceInfo->ShadowOffsetEnd); + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::enqueueMemSetShadow( + ur_context_handle_t Context, ur_device_handle_t Device, + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, + ur_event_handle_t DepEvent, ur_event_handle_t *OutEvent) { + + uint32_t NumEventsInWaitList = DepEvent ? 1 : 0; + const ur_event_handle_t *EventsWaitList = DepEvent ? &DepEvent : nullptr; + + auto ContextInfo = getContextInfo(Context); + auto DeviceInfo = ContextInfo->getDeviceInfo(Device); + + if (DeviceInfo->Type == DeviceType::CPU) { + context.logger.error("Unsupport device type"); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } else if (DeviceInfo->Type == DeviceType::GPU_PVC) { + ur_event_handle_t InternalEvent{}; + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + + uptr ShadowBegin = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr); + uptr ShadowEnd = + MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr + Size - 1); + + { + static const size_t PageSize = [Context, Device]() { + size_t Size; + [[maybe_unused]] auto Result = + context.urDdiTable.VirtualMem.pfnGranularityGetInfo( + Context, Device, + UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED, + sizeof(Size), &Size, nullptr); + assert(Result == UR_RESULT_SUCCESS); + context.logger.info("PVC PageSize: {}", Size); + return Size; + }(); + + ur_physical_mem_properties_t Desc{ + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0}; + static ur_physical_mem_handle_t PhysicalMem{}; + + // Make sure [Ptr, Ptr + Size] is mapped to physical memory + for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); + MappedPtr <= ShadowEnd; MappedPtr += PageSize) { + if (!PhysicalMem) { + auto URes = context.urDdiTable.PhysicalMem.pfnCreate( + Context, Device, PageSize, &Desc, &PhysicalMem); + if (URes != UR_RESULT_SUCCESS) { + context.logger.error("urPhysicalMemCreate(): {}", URes); + return URes; + } + } + + context.logger.debug("urVirtualMemMap: {} ~ {}", + (void *)MappedPtr, + (void *)(MappedPtr + PageSize - 1)); + + // FIXME: No flag to check the failed reason is VA is already mapped + auto URes = context.urDdiTable.VirtualMem.pfnMap( + Context, (void *)MappedPtr, PageSize, PhysicalMem, 0, + UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE); + if (URes != UR_RESULT_SUCCESS) { + context.logger.debug("urVirtualMemMap(): {}", URes); + } + + // Initialize to zero + if (URes == UR_RESULT_SUCCESS) { + // Reset PhysicalMem to null since it's been mapped + PhysicalMem = nullptr; + + const char Pattern[] = {0}; + + auto URes = context.urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)MappedPtr, 1, Pattern, PageSize, + NumEventsInWaitList, EventsWaitList, Event); + if (URes != UR_RESULT_SUCCESS) { + context.logger.error("urEnqueueUSMFill(): {}", URes); + return URes; + } + + NumEventsInWaitList = 1; + EventsWaitList = Event; + } + } + } + + const char Pattern[] = {(char)Value}; + auto URes = context.urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)ShadowBegin, 1, Pattern, + (ShadowEnd - ShadowBegin + 1), NumEventsInWaitList, EventsWaitList, + Event); + if (URes != UR_RESULT_SUCCESS) { + context.logger.error("urEnqueueUSMFill(): {}", URes); + return URes; + } + } else { + context.logger.error("Unsupport device type"); + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + return UR_RESULT_SUCCESS; +} + +/// Each 8 bytes of application memory are mapped into one byte of shadow memory +/// The meaning of that byte: +/// - Negative: All bytes are not accessible (poisoned) +/// - 0: All bytes are accessible +/// - 1 <= k <= 7: Only the first k bytes is accessible +/// +/// ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping +ur_result_t SanitizerInterceptor::enqueueAllocInfo( + ur_context_handle_t Context, ur_device_handle_t Device, + ur_queue_handle_t Queue, std::shared_ptr &AllocInfo, + ur_event_handle_t &LastEvent) { + // Init zero + UR_CALL(enqueueMemSetShadow(Context, Device, Queue, AllocInfo->AllocBegin, + AllocInfo->AllocSize, 0, LastEvent, + &LastEvent)); + + uptr TailBegin = RoundUpTo(AllocInfo->UserEnd, ASAN_SHADOW_GRANULARITY); + uptr TailEnd = AllocInfo->AllocBegin + AllocInfo->AllocSize; + + // User tail + if (TailBegin != AllocInfo->UserEnd) { + auto Value = AllocInfo->UserEnd - + RoundDownTo(AllocInfo->UserEnd, ASAN_SHADOW_GRANULARITY); + UR_CALL(enqueueMemSetShadow(Context, Device, Queue, AllocInfo->UserEnd, + 1, static_cast(Value), LastEvent, + &LastEvent)); + } + + int ShadowByte; + switch (AllocInfo->Type) { + case USMMemoryType::HOST: + ShadowByte = kUsmHostRedzoneMagic; + break; + case USMMemoryType::DEVICE: + ShadowByte = kUsmDeviceRedzoneMagic; + break; + case USMMemoryType::SHARE: + ShadowByte = kUsmSharedRedzoneMagic; + break; + case USMMemoryType::MEM_BUFFER: + ShadowByte = kMemBufferRedzoneMagic; + break; + default: + ShadowByte = 0xff; + assert(false && "Unknow AllocInfo Type"); + } + + // Left red zone + UR_CALL(enqueueMemSetShadow(Context, Device, Queue, AllocInfo->AllocBegin, + AllocInfo->UserBegin - AllocInfo->AllocBegin, + ShadowByte, LastEvent, &LastEvent)); + + // Right red zone + UR_CALL(enqueueMemSetShadow(Context, Device, Queue, TailBegin, + TailEnd - TailBegin, ShadowByte, LastEvent, + &LastEvent)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) { + auto Context = getContext(Queue); + auto Device = getDevice(Queue); + assert(Device != nullptr); + + auto ContextInfo = getContextInfo(Context); + + auto HostInfo = ContextInfo->getDeviceInfo(nullptr); + auto DeviceInfo = ContextInfo->getDeviceInfo(Device); + auto QueueInfo = ContextInfo->getQueueInfo(Queue); + + std::shared_lock HostGuard(HostInfo->Mutex, + std::defer_lock); + std::unique_lock DeviceGuard(DeviceInfo->Mutex, + std::defer_lock); + std::scoped_lock, + std::unique_lock, ur_mutex> + Guard(HostGuard, DeviceGuard, QueueInfo->Mutex); + + ur_event_handle_t LastEvent = QueueInfo->LastEvent; + + for (auto &AllocInfo : DeviceInfo->AllocInfos) { + UR_CALL(enqueueAllocInfo(Context, Device, Queue, AllocInfo, LastEvent)); + } + DeviceInfo->AllocInfos.clear(); + + QueueInfo->LastEvent = LastEvent; + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::insertContext(ur_context_handle_t Context) { + auto ContextInfo = std::make_shared(); + + std::scoped_lock Guard(m_ContextMapMutex); + assert(m_ContextMap.find(Context) == m_ContextMap.end()); + m_ContextMap.emplace(Context, std::move(ContextInfo)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) { + std::scoped_lock Guard(m_ContextMapMutex); + assert(m_ContextMap.find(Context) != m_ContextMap.end()); + m_ContextMap.erase(Context); + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::insertDevice(ur_context_handle_t Context, + ur_device_handle_t Device) { + auto DeviceInfo = std::make_shared(); + + // Query device type + ur_device_type_t DeviceType; + UR_CALL(context.urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_TYPE, sizeof(DeviceType), &DeviceType, nullptr)); + switch (DeviceType) { + case UR_DEVICE_TYPE_CPU: + DeviceInfo->Type = DeviceType::CPU; + break; + case UR_DEVICE_TYPE_GPU: + DeviceInfo->Type = DeviceType::GPU_PVC; + break; + default: + DeviceInfo->Type = DeviceType::UNKNOWN; + } + + // Query alignment + UR_CALL(context.urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, + sizeof(DeviceInfo->Alignment), &DeviceInfo->Alignment, nullptr)); + + // Allocate shadow memory + UR_CALL(allocShadowMemory(Context, DeviceInfo)); + + auto ContextInfo = getContextInfo(Context); + std::scoped_lock Guard(ContextInfo->Mutex); + ContextInfo->DeviceMap.emplace(Device, std::move(DeviceInfo)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::insertQueue(ur_context_handle_t Context, + ur_queue_handle_t Queue) { + auto QueueInfo = std::make_shared(); + QueueInfo->LastEvent = nullptr; + + auto ContextInfo = getContextInfo(Context); + std::scoped_lock Guard(ContextInfo->Mutex); + ContextInfo->QueueMap.emplace(Queue, std::move(QueueInfo)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t SanitizerInterceptor::eraseQueue(ur_context_handle_t Context, + ur_queue_handle_t Queue) { + auto ContextInfo = getContextInfo(Context); + std::scoped_lock Guard(ContextInfo->Mutex); + assert(ContextInfo->QueueMap.find(Queue) != ContextInfo->QueueMap.end()); + ContextInfo->QueueMap.erase(Queue); + return UR_RESULT_SUCCESS; +} + +void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, + ur_kernel_handle_t Kernel) { + auto Context = getContext(Queue); + auto Device = getDevice(Queue); + auto Program = getProgram(Kernel); + + auto ContextInfo = getContextInfo(Context); + auto DeviceInfo = ContextInfo->getDeviceInfo(Device); + auto QueueInfo = ContextInfo->getQueueInfo(Queue); + + std::scoped_lock Guard(QueueInfo->Mutex); + ur_event_handle_t LastEvent = QueueInfo->LastEvent; + + { + // Set global variable to program + auto EnqueueWriteGlobal = [&](const char *Name, const void *Value) { + ur_event_handle_t NewEvent{}; + uint32_t NumEvents = LastEvent ? 1 : 0; + const ur_event_handle_t *EventsList = + LastEvent ? &LastEvent : nullptr; + auto Result = + context.urDdiTable.Enqueue.pfnDeviceGlobalVariableWrite( + Queue, Program, Name, false, sizeof(uptr), 0, Value, + NumEvents, EventsList, &NewEvent); + if (Result != UR_RESULT_SUCCESS) { + context.logger.warning("Device Global[{}] Write Failed: {}", + Name, Result); + return false; + } + LastEvent = NewEvent; + return true; + }; + + // Device shadow memory offset + EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalStart, + &DeviceInfo->ShadowOffset); + EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalEnd, + &DeviceInfo->ShadowOffsetEnd); + } + + QueueInfo->LastEvent = LastEvent; +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp new file mode 100644 index 0000000000..edad3f926e --- /dev/null +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -0,0 +1,141 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_sanitizer_layer.cpp + * + */ + +#pragma once + +#include "common.hpp" + +#include +#include +#include +#include + +namespace ur_sanitizer_layer { + +enum USMMemoryType { DEVICE, SHARE, HOST, MEM_BUFFER }; + +struct USMAllocInfo { + uptr AllocBegin; + uptr UserBegin; + uptr UserEnd; + size_t AllocSize; + USMMemoryType Type; +}; + +enum class DeviceType { UNKNOWN, CPU, GPU_PVC, GPU_DG2 }; + +struct DeviceInfo { + DeviceType Type; + size_t Alignment; + uptr ShadowOffset; + uptr ShadowOffsetEnd; + + // Lock InitPool & AllocInfos + ur_shared_mutex Mutex; + std::vector> AllocInfos; +}; + +struct QueueInfo { + ur_mutex Mutex; + ur_event_handle_t LastEvent; +}; + +struct ContextInfo { + + std::shared_ptr getDeviceInfo(ur_device_handle_t Device) { + std::shared_lock Guard(Mutex); + assert(DeviceMap.find(Device) != DeviceMap.end()); + return DeviceMap[Device]; + } + + std::shared_ptr getQueueInfo(ur_queue_handle_t Queue) { + std::shared_lock Guard(Mutex); + assert(QueueMap.find(Queue) != QueueMap.end()); + return QueueMap[Queue]; + } + + std::shared_ptr getUSMAllocInfo(uptr Address) { + std::shared_lock Guard(Mutex); + assert(AllocatedUSMMap.find(Address) != AllocatedUSMMap.end()); + return AllocatedUSMMap[Address]; + } + + ur_shared_mutex Mutex; + std::unordered_map> + DeviceMap; + std::unordered_map> QueueMap; + + /// key: USMAllocInfo.AllocBegin + /// value: USMAllocInfo + /// Use AllocBegin as key can help to detect underflow pointer + std::map> AllocatedUSMMap; +}; + +class SanitizerInterceptor { + public: + ur_result_t allocateMemory(ur_context_handle_t Context, + ur_device_handle_t Device, + const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, size_t Size, + void **ResultPtr, USMMemoryType Type); + ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); + + bool preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, + ur_event_handle_t &Event); + void postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, + ur_event_handle_t &Event); + + ur_result_t insertContext(ur_context_handle_t Context); + ur_result_t eraseContext(ur_context_handle_t Context); + + ur_result_t insertDevice(ur_context_handle_t Context, + ur_device_handle_t Device); + + ur_result_t insertQueue(ur_context_handle_t Context, + ur_queue_handle_t Queue); + ur_result_t eraseQueue(ur_context_handle_t Context, + ur_queue_handle_t Queue); + + private: + ur_result_t updateShadowMemory(ur_queue_handle_t Queue); + ur_result_t enqueueAllocInfo(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t Queue, + std::shared_ptr &AlloccInfo, + ur_event_handle_t &LastEvent); + + /// Initialize Global Variables & Kernel Name at first Launch + void prepareLaunch(ur_queue_handle_t Queue, ur_kernel_handle_t Kernel); + + std::string getKernelName(ur_kernel_handle_t Kernel); + ur_result_t allocShadowMemory(ur_context_handle_t Context, + std::shared_ptr &DeviceInfo); + ur_result_t enqueueMemSetShadow(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t Queue, uptr Addr, + uptr Size, u8 Value, + ur_event_handle_t DepEvent, + ur_event_handle_t *OutEvent); + + std::shared_ptr getContextInfo(ur_context_handle_t Context) { + std::shared_lock Guard(m_ContextMapMutex); + assert(m_ContextMap.find(Context) != m_ContextMap.end()); + return m_ContextMap[Context]; + } + + private: + std::unordered_map> + m_ContextMap; + ur_shared_mutex m_ContextMapMutex; +}; + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/common.hpp b/source/loader/layers/sanitizer/common.hpp new file mode 100644 index 0000000000..8b80814b9c --- /dev/null +++ b/source/loader/layers/sanitizer/common.hpp @@ -0,0 +1,90 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file common.hpp + * + */ + +#pragma once + +#include "ur/ur.hpp" +#include "ur_ddi.h" + +#include +#include + +namespace ur_sanitizer_layer { + +// ================================================================ +// Copy from LLVM compiler-rt/lib/asan + +using uptr = uintptr_t; +using u8 = unsigned char; +using u32 = unsigned int; + +constexpr unsigned ASAN_SHADOW_SCALE = 3; +constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE; + +inline constexpr bool IsPowerOfTwo(uptr x) { + return (x & (x - 1)) == 0 && x != 0; +} + +inline constexpr uptr RoundUpTo(uptr Size, uptr boundary) { + assert(IsPowerOfTwo(boundary)); + return (Size + boundary - 1) & ~(boundary - 1); +} + +inline constexpr uptr RoundDownTo(uptr x, uptr boundary) { + assert(IsPowerOfTwo(boundary)); + return x & ~(boundary - 1); +} + +inline constexpr bool IsAligned(uptr a, uptr alignment) { + return (a & (alignment - 1)) == 0; +} + +// Valid redzone sizes are 16, 32, 64, ... 2048, so we encode them in 3 bits. +// We use adaptive redzones: for larger allocation larger redzones are used. +inline constexpr uptr RZLog2Size(uptr rz_log) { + assert(rz_log < 8); + return 16 << rz_log; +} + +inline constexpr uptr ComputeRZLog(uptr user_requested_size) { + uptr rz_log = user_requested_size <= 64 - 16 ? 0 + : user_requested_size <= 128 - 32 ? 1 + : user_requested_size <= 512 - 64 ? 2 + : user_requested_size <= 4096 - 128 ? 3 + : user_requested_size <= (1 << 14) - 256 ? 4 + : user_requested_size <= (1 << 15) - 512 ? 5 + : user_requested_size <= (1 << 16) - 1024 ? 6 + : 7; + return rz_log; +} + +// ================================================================ + +// Trace an internal UR call; returns in case of an error. +#define UR_CALL(Call) \ + { \ + if (PrintTrace) \ + context.logger.debug("UR ---> {}", #Call); \ + ur_result_t Result = (Call); \ + if (PrintTrace) \ + context.logger.debug("UR <--- {}({})", #Call, Result); \ + if (Result != UR_RESULT_SUCCESS) \ + return Result; \ + } + +#ifndef NDEBUG +#define UR_ASSERT_EQ(Call, Result) assert(Call == Result) +#else +#define UR_ASSERT_EQ(Call, Result) (void)Call +#endif + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/device_sanitizer_report.hpp b/source/loader/layers/sanitizer/device_sanitizer_report.hpp new file mode 100644 index 0000000000..11ae721434 --- /dev/null +++ b/source/loader/layers/sanitizer/device_sanitizer_report.hpp @@ -0,0 +1,95 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file device_sanitizer_report.hpp + * + */ + +#pragma once + +#include + +namespace ur_sanitizer_layer { + +enum class DeviceSanitizerErrorType : int32_t { + UNKNOWN, + OUT_OF_BOUNDS, + MISALIGNED, + USE_AFTER_FREE, + OUT_OF_SHADOW_BOUNDS, +}; + +enum class DeviceSanitizerMemoryType : int32_t { + UNKNOWN, + USM_DEVICE, + USM_HOST, + USM_SHARED, + LOCAL, + PRIVATE, + MEM_BUFFER, +}; + +struct DeviceSanitizerReport { + int Flag = 0; + + char File[256 + 1] = ""; + char Func[256 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; + + bool IsWrite = false; + uint32_t AccessSize = 0; + DeviceSanitizerMemoryType MemoryType; + DeviceSanitizerErrorType ErrorType; + + bool IsRecover = false; +}; + +inline const char *DeviceSanitizerFormat(DeviceSanitizerMemoryType MemoryType) { + switch (MemoryType) { + case DeviceSanitizerMemoryType::USM_DEVICE: + return "USM Device Memory"; + case DeviceSanitizerMemoryType::USM_HOST: + return "USM Host Memory"; + case DeviceSanitizerMemoryType::USM_SHARED: + return "USM Shared Memory"; + case DeviceSanitizerMemoryType::LOCAL: + return "Local Memory"; + case DeviceSanitizerMemoryType::PRIVATE: + return "Private Memory"; + case DeviceSanitizerMemoryType::MEM_BUFFER: + return "Memory Buffer"; + default: + return "Unknown Memory"; + } +} + +inline const char *DeviceSanitizerFormat(DeviceSanitizerErrorType ErrorType) { + switch (ErrorType) { + case DeviceSanitizerErrorType::OUT_OF_BOUNDS: + return "out-of-bounds-access"; + case DeviceSanitizerErrorType::MISALIGNED: + return "misaligned-access"; + case DeviceSanitizerErrorType::USE_AFTER_FREE: + return "use-after-free"; + case DeviceSanitizerErrorType::OUT_OF_SHADOW_BOUNDS: + return "out-of-shadow-bounds-access"; + default: + return "unknown-error"; + } +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp new file mode 100644 index 0000000000..64f54752ca --- /dev/null +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -0,0 +1,485 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_sanddi.cpp + * + */ + +#include "asan_interceptor.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMHostAlloc +__urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + const ur_usm_desc_t + *pUSMDesc, ///< [in][optional] USM memory allocation descriptor + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM host memory object +) { + auto pfnHostAlloc = context.urDdiTable.USM.pfnHostAlloc; + + if (nullptr == pfnHostAlloc) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return context.interceptor->allocateMemory( + hContext, nullptr, pUSMDesc, pool, size, ppMem, USMMemoryType::HOST); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMDeviceAlloc +__urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_usm_desc_t + *pUSMDesc, ///< [in][optional] USM memory allocation descriptor + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM device memory object +) { + auto pfnDeviceAlloc = context.urDdiTable.USM.pfnDeviceAlloc; + + if (nullptr == pfnDeviceAlloc) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return context.interceptor->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::DEVICE); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMSharedAlloc +__urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_usm_desc_t * + pUSMDesc, ///< [in][optional] Pointer to USM memory allocation descriptor. + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM shared memory object +) { + auto pfnSharedAlloc = context.urDdiTable.USM.pfnSharedAlloc; + + if (nullptr == pfnSharedAlloc) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return context.interceptor->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::SHARE); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMFree +__urdlllocal ur_result_t UR_APICALL urUSMFree( + ur_context_handle_t hContext, ///< [in] handle of the context object + void *pMem ///< [in] pointer to USM memory object +) { + auto pfnFree = context.urDdiTable.USM.pfnFree; + + if (nullptr == pfnFree) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + return context.interceptor->releaseMemory(hContext, pMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urQueueCreate +__urdlllocal ur_result_t UR_APICALL urQueueCreate( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_queue_properties_t + *pProperties, ///< [in][optional] pointer to queue creation properties. + ur_queue_handle_t + *phQueue ///< [out] pointer to handle of queue object created +) { + auto pfnCreate = context.urDdiTable.Queue.pfnCreate; + + if (nullptr == pfnCreate) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_result_t result = pfnCreate(hContext, hDevice, pProperties, phQueue); + if (result == UR_RESULT_SUCCESS) { + result = context.interceptor->insertQueue(hContext, *phQueue); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urQueueRelease +__urdlllocal ur_result_t UR_APICALL urQueueRelease( + ur_queue_handle_t hQueue ///< [in] handle of the queue object to release +) { + auto pfnRelease = context.urDdiTable.Queue.pfnRelease; + + if (nullptr == pfnRelease) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_context_handle_t hContext; + UR_CALL(context.urDdiTable.Queue.pfnGetInfo(hQueue, UR_QUEUE_INFO_CONTEXT, + sizeof(ur_context_handle_t), + &hContext, nullptr)); + UR_CALL(context.interceptor->eraseQueue(hContext, hQueue)); + + ur_result_t result = pfnRelease(hQueue); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueKernelLaunch +__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnKernelLaunch = context.urDdiTable.Enqueue.pfnKernelLaunch; + + if (nullptr == pfnKernelLaunch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_event_handle_t hPreEvent{}; + std::vector events(numEventsInWaitList + 1); + for (unsigned i = 0; i < numEventsInWaitList; ++i) { + events.push_back(phEventWaitList[i]); + } + + // launchKernel must append to num_events_in_wait_list, not prepend + context.interceptor->preLaunchKernel(hKernel, hQueue, hPreEvent); + if (hPreEvent) { + events.push_back(hPreEvent); + } + + ur_event_handle_t hEvent{}; + ur_result_t result = pfnKernelLaunch( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, &hEvent); + + if (result == UR_RESULT_SUCCESS) { + context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent); + } + + if (phEvent) { + *phEvent = hEvent; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextCreate +__urdlllocal ur_result_t UR_APICALL urContextCreate( + uint32_t numDevices, ///< [in] the number of devices given in phDevices + const ur_device_handle_t + *phDevices, ///< [in][range(0, numDevices)] array of handle of devices. + const ur_context_properties_t * + pProperties, ///< [in][optional] pointer to context creation properties. + ur_context_handle_t + *phContext ///< [out] pointer to handle of context object created +) { + auto pfnCreate = context.urDdiTable.Context.pfnCreate; + + if (nullptr == pfnCreate) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_result_t result = + pfnCreate(numDevices, phDevices, pProperties, phContext); + + if (result == UR_RESULT_SUCCESS) { + auto Context = *phContext; + result = context.interceptor->insertContext(Context); + if (result != UR_RESULT_SUCCESS) { + return result; + } + for (uint32_t i = 0; i < numDevices; ++i) { + result = context.interceptor->insertDevice(Context, phDevices[i]); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextCreateWithNativeHandle +__urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( + ur_native_handle_t + hNativeContext, ///< [in][nocheck] the native handle of the context. + uint32_t numDevices, ///< [in] number of devices associated with the context + const ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] list of devices associated with the context + const ur_context_native_properties_t * + pProperties, ///< [in][optional] pointer to native context properties struct + ur_context_handle_t * + phContext ///< [out] pointer to the handle of the context object created. +) { + auto pfnCreateWithNativeHandle = + context.urDdiTable.Context.pfnCreateWithNativeHandle; + + if (nullptr == pfnCreateWithNativeHandle) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_result_t result = pfnCreateWithNativeHandle( + hNativeContext, numDevices, phDevices, pProperties, phContext); + + if (result == UR_RESULT_SUCCESS) { + auto Context = *phContext; + result = context.interceptor->insertContext(Context); + if (result != UR_RESULT_SUCCESS) { + return result; + } + for (uint32_t i = 0; i < numDevices; ++i) { + result = context.interceptor->insertDevice(Context, phDevices[i]); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextRelease +__urdlllocal ur_result_t UR_APICALL urContextRelease( + ur_context_handle_t hContext ///< [in] handle of the context to release. +) { + auto pfnRelease = context.urDdiTable.Context.pfnRelease; + + if (nullptr == pfnRelease) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + UR_CALL(context.interceptor->eraseContext(hContext)); + ur_result_t result = pfnRelease(hContext); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Context table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_context_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_sanitizer_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur_sanitizer_layer::urContextCreate; + pDdiTable->pfnRelease = ur_sanitizer_layer::urContextRelease; + + pDdiTable->pfnCreateWithNativeHandle = + ur_sanitizer_layer::urContextCreateWithNativeHandle; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Enqueue table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_enqueue_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_sanitizer_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnKernelLaunch = ur_sanitizer_layer::urEnqueueKernelLaunch; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Queue table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_queue_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_sanitizer_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur_sanitizer_layer::urQueueCreate; + pDdiTable->pfnRelease = ur_sanitizer_layer::urQueueRelease; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USM table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetUSMProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_usm_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_sanitizer_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::urUSMDeviceAlloc; + + return result; +} + +ur_result_t context_t::init(ur_dditable_t *dditable, + const std::set &enabledLayerNames, + [[maybe_unused]] codeloc_data codelocData) { + ur_result_t result = UR_RESULT_SUCCESS; + + if (enabledLayerNames.count("UR_LAYER_ASAN")) { + context.enabledType = SanitizerType::AddressSanitizer; + } else if (enabledLayerNames.count("UR_LAYER_MSAN")) { + context.enabledType = SanitizerType::MemorySanitizer; + } else if (enabledLayerNames.count("UR_LAYER_TSAN")) { + context.enabledType = SanitizerType::ThreadSanitizer; + } + + // Only support AddressSanitizer now + if (context.enabledType != SanitizerType::AddressSanitizer) { + return result; + } + + if (context.enabledType == SanitizerType::AddressSanitizer) { + if (!(dditable->VirtualMem.pfnReserve && dditable->VirtualMem.pfnMap && + dditable->VirtualMem.pfnGranularityGetInfo)) { + die("Some VirtualMem APIs are needed to enable UR_LAYER_ASAN"); + } + + if (!dditable->PhysicalMem.pfnCreate) { + die("Some PhysicalMem APIs are needed to enable UR_LAYER_ASAN"); + } + } + + urDdiTable = *dditable; + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::urGetContextProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->Context); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::urGetEnqueueProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->Enqueue); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::urGetQueueProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->Queue); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::urGetUSMProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->USM); + } + + return result; +} +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp new file mode 100644 index 0000000000..0df123b6c2 --- /dev/null +++ b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp @@ -0,0 +1,30 @@ +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_sanitizer_layer.cpp + * + */ + +#include "ur_sanitizer_layer.hpp" +#include "asan_interceptor.hpp" + +namespace ur_sanitizer_layer { +context_t context; + +/////////////////////////////////////////////////////////////////////////////// +context_t::context_t() + : interceptor(std::make_unique()), + logger(logger::create_logger("sanitizer")) {} + +bool context_t::isAvailable() const { return true; } + +ur_result_t context_t::tearDown() { return UR_RESULT_SUCCESS; } + +/////////////////////////////////////////////////////////////////////////////// +context_t::~context_t() {} +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp b/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp new file mode 100644 index 0000000000..018d9f4a80 --- /dev/null +++ b/source/loader/layers/sanitizer/ur_sanitizer_layer.hpp @@ -0,0 +1,55 @@ +/* + * + * Copyright (C) 2023 Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_sanitizer_layer.hpp + * + */ + +#pragma once + +#include "logger/ur_logger.hpp" +#include "ur_proxy_layer.hpp" + +#define SANITIZER_COMP_NAME "sanitizer layer" + +namespace ur_sanitizer_layer { + +class SanitizerInterceptor; + +enum class SanitizerType { + None, + AddressSanitizer, + MemorySanitizer, + ThreadSanitizer, +}; + +/////////////////////////////////////////////////////////////////////////////// +class __urdlllocal context_t : public proxy_layer_context_t { + public: + ur_dditable_t urDdiTable = {}; + std::unique_ptr interceptor; + logger::Logger logger; + SanitizerType enabledType = SanitizerType::None; + + context_t(); + ~context_t(); + + bool isAvailable() const override; + + std::vector getNames() const override { + return {"UR_LAYER_ASAN", "UR_LAYER_MSAN", "UR_LAYER_TSAN"}; + } + ur_result_t init(ur_dditable_t *dditable, + const std::set &enabledLayerNames, + codeloc_data codelocData) override; + + ur_result_t tearDown() override; +}; + +extern context_t context; +} // namespace ur_sanitizer_layer diff --git a/source/loader/ur_lib.hpp b/source/loader/ur_lib.hpp index 9d1e02a67e..41ab7cb52e 100644 --- a/source/loader/ur_lib.hpp +++ b/source/loader/ur_lib.hpp @@ -23,6 +23,9 @@ #if UR_ENABLE_TRACING #include "tracing/ur_tracing_layer.hpp" #endif +#if UR_ENABLE_SANITIZER +#include "sanitizer/ur_sanitizer_layer.hpp" +#endif #include #include @@ -69,7 +72,10 @@ class __urdlllocal context_t { const std::vector layers = { &ur_validation_layer::context, #if UR_ENABLE_TRACING - &ur_tracing_layer::context + &ur_tracing_layer::context, +#endif +#if UR_ENABLE_SANITIZER + &ur_sanitizer_layer::context #endif }; std::string availableLayers;