diff --git a/CMakeLists.txt b/CMakeLists.txt index fd5fd91cf8..c09b5f0a3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,15 @@ if(UR_ENABLE_TRACING) endif() if(UR_ENABLE_SANITIZER) - add_compile_definitions(UR_ENABLE_SANITIZER) + if(APPLE) + message(WARNING "Sanitizer layer isn't supported on macOS") + set(UR_ENABLE_SANITIZER OFF) + elseif(WIN32) + message(WARNING "Sanitizer layer isn't supported on Windows") + set(UR_ENABLE_SANITIZER OFF) + else() + add_compile_definitions(UR_ENABLE_SANITIZER) + endif() endif() if(UR_USE_ASAN) diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index 255e934f94..474fa6c79b 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -114,7 +114,14 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp ) + + target_sources(ur_loader + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/linux/san_utils.cpp + ) + target_include_directories(ur_loader PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer" "${CMAKE_CURRENT_SOURCE_DIR}/../" ) endif() diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 118eb858d0..88a3b464c2 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -12,7 +12,6 @@ */ #include "asan_interceptor.hpp" -#include "device_sanitizer_report.hpp" #include "ur_sanitizer_layer.hpp" namespace ur_sanitizer_layer { @@ -29,11 +28,20 @@ constexpr int kMemBufferRedzoneMagic = (char)0x84; constexpr auto kSPIR_AsanShadowMemoryGlobalStart = "__AsanShadowMemoryGlobalStart"; constexpr auto kSPIR_AsanShadowMemoryGlobalEnd = "__AsanShadowMemoryGlobalEnd"; +constexpr auto kSPIR_AsanShadowMemoryLocalStart = + "__AsanShadowMemoryLocalStart"; +constexpr auto kSPIR_AsanShadowMemoryLocalEnd = "__AsanShadowMemoryLocalEnd"; + +constexpr auto kSPIR_DeviceType = "__DeviceType"; constexpr auto kSPIR_DeviceSanitizerReportMem = "__DeviceSanitizerReportMem"; DeviceSanitizerReport SPIR_DeviceSanitizerReportMem; +uptr MemToShadow_CPU(uptr USM_SHADOW_BASE, uptr UPtr) { + return USM_SHADOW_BASE + (UPtr >> 3); +} + uptr MemToShadow_PVC(uptr USM_SHADOW_BASE, uptr UPtr) { if (UPtr & 0xFF00000000000000ULL) { // Device USM return USM_SHADOW_BASE + 0x200000000000ULL + @@ -70,8 +78,42 @@ ur_program_handle_t getProgram(ur_kernel_handle_t Kernel) { return Program; } +size_t getLocalMemorySize(ur_device_handle_t Device) { + size_t LocalMemorySize; + [[maybe_unused]] auto Result = context.urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_LOCAL_MEM_SIZE, sizeof(LocalMemorySize), + &LocalMemorySize, nullptr); + assert(Result == UR_RESULT_SUCCESS); + return LocalMemorySize; +} + +std::string getKernelName(ur_kernel_handle_t Kernel) { + size_t KernelNameSize = 0; + [[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetInfo( + Kernel, UR_KERNEL_INFO_FUNCTION_NAME, 0, nullptr, &KernelNameSize); + assert(Res == UR_RESULT_SUCCESS); + + std::vector KernelNameBuf(KernelNameSize); + Res = context.urDdiTable.Kernel.pfnGetInfo( + Kernel, UR_KERNEL_INFO_FUNCTION_NAME, KernelNameSize, + KernelNameBuf.data(), nullptr); + assert(Res == UR_RESULT_SUCCESS); + + return std::string(KernelNameBuf.data(), KernelNameSize - 1); +} + } // namespace +SanitizerInterceptor::SanitizerInterceptor() + : m_IsInASanContext(IsInASanContext()), + m_ShadowMemInited(m_IsInASanContext) {} + +SanitizerInterceptor::~SanitizerInterceptor() { + if (!m_IsInASanContext && m_ShadowMemInited && !DestroyShadowMem()) { + context.logger.error("Failed to destroy shadow memory"); + } +} + /// The memory chunk allocated from the underlying allocator looks like this: /// L L L L L L U U U U U U R R /// L -- left redzone words (0 or more bytes) @@ -191,10 +233,12 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context, (void *)AllocInfo->AllocBegin); } -bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, - ur_queue_handle_t Queue, - ur_event_handle_t &Event) { - prepareLaunch(Queue, Kernel); +ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + ur_event_handle_t &Event, + LaunchInfo &LaunchInfo, + uint32_t numWorkgroup) { + UR_CALL(prepareLaunch(Queue, Kernel, LaunchInfo, numWorkgroup)); UR_CALL(updateShadowMemory(Queue)); @@ -207,32 +251,35 @@ bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, Event = QueueInfo->LastEvent; QueueInfo->LastEvent = nullptr; - return true; + return UR_RESULT_SUCCESS; } void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - ur_event_handle_t &Event) { + ur_event_handle_t &Event, + LaunchInfo &LaunchInfo) { auto Program = getProgram(Kernel); ur_event_handle_t ReadEvent{}; // If kernel has defined SPIR_DeviceSanitizerReportMem, then we try to read it // to host, but it's okay that it isn't defined + // FIXME: We must use block operation here, until we support urEventSetCallback auto Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead( Queue, Program, kSPIR_DeviceSanitizerReportMem, true, - sizeof(SPIR_DeviceSanitizerReportMem), 0, - &SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent); + sizeof(LaunchInfo.SPIR_DeviceSanitizerReportMem), 0, + &LaunchInfo.SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent); if (Result == UR_RESULT_SUCCESS) { Event = ReadEvent; - auto AH = &SPIR_DeviceSanitizerReportMem; + auto AH = &LaunchInfo.SPIR_DeviceSanitizerReportMem; if (!AH->Flag) { return; } const char *File = AH->File[0] ? AH->File : ""; const char *Func = AH->Func[0] ? AH->Func : ""; + auto KernelName = getKernelName(Kernel); context.logger.always("\n====ERROR: DeviceSanitizer: {} on {}", DeviceSanitizerFormat(AH->ErrorType), @@ -240,52 +287,57 @@ void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, context.logger.always( "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, " "{}, {})", - AH->IsWrite ? "WRITE" : "READ", AH->AccessSize, Func, AH->LID0, - AH->LID1, AH->LID2, AH->GID0, AH->GID1, AH->GID2); + AH->IsWrite ? "WRITE" : "READ", AH->AccessSize, KernelName.c_str(), + AH->LID0, AH->LID1, AH->LID2, AH->GID0, AH->GID1, AH->GID2); context.logger.always(" #0 {} {}:{}", Func, File, AH->Line); if (!AH->IsRecover) { - abort(); + exit(1); } } } -std::string SanitizerInterceptor::getKernelName(ur_kernel_handle_t Kernel) { - size_t KernelNameSize = 0; - [[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetInfo( - Kernel, UR_KERNEL_INFO_FUNCTION_NAME, 0, nullptr, &KernelNameSize); - assert(Res == UR_RESULT_SUCCESS); - - std::vector KernelNameBuf(KernelNameSize); - Res = context.urDdiTable.Kernel.pfnGetInfo( - Kernel, UR_KERNEL_INFO_FUNCTION_NAME, KernelNameSize, - KernelNameBuf.data(), nullptr); - assert(Res == UR_RESULT_SUCCESS); - - return std::string(KernelNameBuf.data(), KernelNameSize - 1); -} - ur_result_t SanitizerInterceptor::allocShadowMemory( ur_context_handle_t Context, std::shared_ptr &DeviceInfo) { if (DeviceInfo->Type == DeviceType::CPU) { - context.logger.error("Unsupport device type"); - return UR_RESULT_ERROR_INVALID_ARGUMENT; + if (!m_IsInASanContext) { + static std::once_flag OnceFlag; + bool Result = true; + std::call_once(OnceFlag, [&]() { + Result = m_ShadowMemInited = SetupShadowMem(); + }); + + if (!Result) { + context.logger.error("Failed to allocate shadow memory"); + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } + } + + DeviceInfo->ShadowOffset = LOW_SHADOW_BEGIN; + DeviceInfo->ShadowOffsetEnd = HIGH_SHADOW_END; } else if (DeviceInfo->Type == DeviceType::GPU_PVC) { /// SHADOW MEMORY MAPPING (PVC, with CPU 47bit) /// Host/Shared USM : 0x0 ~ 0x0fff_ffff_ffff /// ? : 0x1000_0000_0000 ~ 0x1fff_ffff_ffff /// Device USM : 0x2000_0000_0000 ~ 0x3fff_ffff_ffff constexpr size_t SHADOW_SIZE = 1ULL << 46; - - // TODO: Protect Bad Zone - auto Result = context.urDdiTable.VirtualMem.pfnReserve( - Context, nullptr, SHADOW_SIZE, (void **)&DeviceInfo->ShadowOffset); - if (Result != UR_RESULT_SUCCESS) { - context.logger.error("Failed to allocate shadow memory on PVC: {}", - Result); - return Result; + // FIXME: Currently, Level-Zero doesn't create independent VAs for each contexts, + // which will cause out-of-resource error when users use multiple contexts + static uptr ShadowOffset, ShadowOffsetEnd; + + if (!ShadowOffset) { + // TODO: Protect Bad Zone + auto Result = context.urDdiTable.VirtualMem.pfnReserve( + Context, nullptr, SHADOW_SIZE, (void **)&ShadowOffset); + if (Result != UR_RESULT_SUCCESS) { + context.logger.error( + "Failed to allocate shadow memory on PVC: {}", Result); + return Result; + } + ShadowOffsetEnd = ShadowOffset + SHADOW_SIZE; } - DeviceInfo->ShadowOffsetEnd = DeviceInfo->ShadowOffset + SHADOW_SIZE; + DeviceInfo->ShadowOffset = ShadowOffset; + DeviceInfo->ShadowOffsetEnd = ShadowOffsetEnd; } else { context.logger.error("Unsupport device type"); return UR_RESULT_ERROR_INVALID_ARGUMENT; @@ -301,23 +353,40 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow( ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, ur_event_handle_t DepEvent, ur_event_handle_t *OutEvent) { - uint32_t NumEventsInWaitList = DepEvent ? 1 : 0; - const ur_event_handle_t *EventsWaitList = DepEvent ? &DepEvent : nullptr; - auto ContextInfo = getContextInfo(Context); auto DeviceInfo = ContextInfo->getDeviceInfo(Device); if (DeviceInfo->Type == DeviceType::CPU) { - context.logger.error("Unsupport device type"); - return UR_RESULT_ERROR_INVALID_ARGUMENT; - } else if (DeviceInfo->Type == DeviceType::GPU_PVC) { - ur_event_handle_t InternalEvent{}; - ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + uptr ShadowBegin = MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr); + uptr ShadowEnd = + MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr + Size - 1); + + // Poison shadow memory outside of asan runtime is not allowed, so we + // need to avoid memset's call from being intercepted. + static auto MemSet = + (void *(*)(void *, int, size_t))GetMemFunctionPointer("memset"); + if (!MemSet) { + context.logger.error( + "Failed to get 'memset' function from libc.so.6"); + return UR_RESULT_ERROR_UNKNOWN; + } + MemSet((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + context.logger.debug( + "enqueueMemSetShadow (addr={}, count={}, value={})", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, + (void *)(size_t)Value); + } else if (DeviceInfo->Type == DeviceType::GPU_PVC) { uptr ShadowBegin = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr); uptr ShadowEnd = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr + Size - 1); + uint32_t NumEventsInWaitList = DepEvent ? 1 : 0; + const ur_event_handle_t *EventsWaitList = + DepEvent ? &DepEvent : nullptr; + ur_event_handle_t InternalEvent{}; + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + { static const size_t PageSize = [Context, Device]() { size_t Size; @@ -382,9 +451,12 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow( const char Pattern[] = {(char)Value}; auto URes = context.urDdiTable.Enqueue.pfnUSMFill( - Queue, (void *)ShadowBegin, 1, Pattern, - (ShadowEnd - ShadowBegin + 1), NumEventsInWaitList, EventsWaitList, - Event); + Queue, (void *)ShadowBegin, 1, Pattern, ShadowEnd - ShadowBegin + 1, + NumEventsInWaitList, EventsWaitList, Event); + context.logger.debug( + "enqueueMemSetShadow (addr={}, count={}, value={}): {}", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, + (void *)(size_t)Value, URes); if (URes != UR_RESULT_SUCCESS) { context.logger.error("urEnqueueUSMFill(): {}", URes); return URes; @@ -463,17 +535,13 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) { auto ContextInfo = getContextInfo(Context); - auto HostInfo = ContextInfo->getDeviceInfo(nullptr); auto DeviceInfo = ContextInfo->getDeviceInfo(Device); auto QueueInfo = ContextInfo->getQueueInfo(Queue); - std::shared_lock HostGuard(HostInfo->Mutex, - std::defer_lock); std::unique_lock DeviceGuard(DeviceInfo->Mutex, std::defer_lock); - std::scoped_lock, - std::unique_lock, ur_mutex> - Guard(HostGuard, DeviceGuard, QueueInfo->Mutex); + std::scoped_lock, ur_mutex> Guard( + DeviceGuard, QueueInfo->Mutex); ur_event_handle_t LastEvent = QueueInfo->LastEvent; @@ -501,6 +569,7 @@ ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) { std::scoped_lock Guard(m_ContextMapMutex); assert(m_ContextMap.find(Context) != m_ContextMap.end()); m_ContextMap.erase(Context); + // TODO: Remove devices in each context return UR_RESULT_SUCCESS; } @@ -559,12 +628,16 @@ ur_result_t SanitizerInterceptor::eraseQueue(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } -void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, - ur_kernel_handle_t Kernel) { +ur_result_t SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, + ur_kernel_handle_t Kernel, + LaunchInfo &LaunchInfo, + uint32_t numWorkgroup) { auto Context = getContext(Queue); auto Device = getDevice(Queue); auto Program = getProgram(Kernel); + LaunchInfo.Context = Context; + auto ContextInfo = getContextInfo(Context); auto DeviceInfo = ContextInfo->getDeviceInfo(Device); auto QueueInfo = ContextInfo->getQueueInfo(Queue); @@ -572,7 +645,7 @@ void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, std::scoped_lock Guard(QueueInfo->Mutex); ur_event_handle_t LastEvent = QueueInfo->LastEvent; - { + do { // Set global variable to program auto EnqueueWriteGlobal = [&](const char *Name, const void *Value) { ur_event_handle_t NewEvent{}; @@ -592,14 +665,80 @@ void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue, return true; }; - // Device shadow memory offset + // Write shadow memory offset for global memory EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalStart, &DeviceInfo->ShadowOffset); EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalEnd, &DeviceInfo->ShadowOffsetEnd); - } + + // Write device type + EnqueueWriteGlobal(kSPIR_DeviceType, &DeviceInfo->Type); + + if (DeviceInfo->Type == DeviceType::CPU) { + break; + } + + // Write shadow memory offset for local memory + auto LocalMemorySize = getLocalMemorySize(Device); + auto LocalShadowMemorySize = + (numWorkgroup * LocalMemorySize) >> ASAN_SHADOW_SCALE; + + context.logger.info("LocalInfo(WorkGroup={}, LocalMemorySize={}, " + "LocalShadowMemorySize={})", + numWorkgroup, LocalMemorySize, + LocalShadowMemorySize); + + ur_usm_desc_t Desc{UR_STRUCTURE_TYPE_USM_HOST_DESC, nullptr, 0, 0}; + auto Result = context.urDdiTable.USM.pfnDeviceAlloc( + Context, Device, &Desc, nullptr, LocalShadowMemorySize, + (void **)&LaunchInfo.LocalShadowOffset); + if (Result != UR_RESULT_SUCCESS) { + context.logger.error( + "Failed to allocate shadow memory for local memory: {}", + numWorkgroup, Result); + context.logger.error("Maybe the number of workgroup too large"); + return Result; + } + LaunchInfo.LocalShadowOffsetEnd = + LaunchInfo.LocalShadowOffset + LocalShadowMemorySize - 1; + + EnqueueWriteGlobal(kSPIR_AsanShadowMemoryLocalStart, + &LaunchInfo.LocalShadowOffset); + EnqueueWriteGlobal(kSPIR_AsanShadowMemoryLocalEnd, + &LaunchInfo.LocalShadowOffsetEnd); + + { + ur_event_handle_t NewEvent{}; + uint32_t NumEvents = LastEvent ? 1 : 0; + const ur_event_handle_t *EventsList = + LastEvent ? &LastEvent : nullptr; + const char Pattern[] = {0}; + + auto URes = context.urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)LaunchInfo.LocalShadowOffset, 1, Pattern, + LocalShadowMemorySize, NumEvents, EventsList, &NewEvent); + if (URes != UR_RESULT_SUCCESS) { + context.logger.error("urEnqueueUSMFill(): {}", URes); + return URes; + } + LastEvent = NewEvent; + } + + context.logger.info("ShadowMemory(Local, {} - {})", + (void *)LaunchInfo.LocalShadowOffset, + (void *)LaunchInfo.LocalShadowOffsetEnd); + } while (false); QueueInfo->LastEvent = LastEvent; + return UR_RESULT_SUCCESS; +} + +LaunchInfo::~LaunchInfo() { + if (LocalShadowOffset) { + [[maybe_unused]] auto Result = + context.urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset); + assert(Result == UR_RESULT_SUCCESS); + } } } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index edad3f926e..0753b53d63 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -6,13 +6,14 @@ * See LICENSE.TXT * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * - * @file ur_sanitizer_layer.cpp + * @file asan_interceptor.hpp * */ #pragma once #include "common.hpp" +#include "device_sanitizer_report.hpp" #include #include @@ -80,8 +81,26 @@ struct ContextInfo { std::map> AllocatedUSMMap; }; +struct LaunchInfo { + uptr LocalShadowOffset; + uptr LocalShadowOffsetEnd; + ur_context_handle_t Context; + + DeviceSanitizerReport SPIR_DeviceSanitizerReportMem; + + size_t LocalWorkSize[3]; + + LaunchInfo() + : LocalShadowOffset(0), LocalShadowOffsetEnd(0), Context(nullptr) {} + ~LaunchInfo(); +}; + class SanitizerInterceptor { public: + SanitizerInterceptor(); + + ~SanitizerInterceptor(); + ur_result_t allocateMemory(ur_context_handle_t Context, ur_device_handle_t Device, const ur_usm_desc_t *Properties, @@ -89,10 +108,12 @@ class SanitizerInterceptor { void **ResultPtr, USMMemoryType Type); ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); - bool preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - ur_event_handle_t &Event); + ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + ur_event_handle_t &Event, + LaunchInfo &LaunchInfo, uint32_t numWorkgroup); void postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, - ur_event_handle_t &Event); + ur_event_handle_t &Event, LaunchInfo &LaunchInfo); ur_result_t insertContext(ur_context_handle_t Context); ur_result_t eraseContext(ur_context_handle_t Context); @@ -114,9 +135,10 @@ class SanitizerInterceptor { ur_event_handle_t &LastEvent); /// Initialize Global Variables & Kernel Name at first Launch - void prepareLaunch(ur_queue_handle_t Queue, ur_kernel_handle_t Kernel); + ur_result_t prepareLaunch(ur_queue_handle_t Queue, + ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo, + uint32_t numWorkgroup); - std::string getKernelName(ur_kernel_handle_t Kernel); ur_result_t allocShadowMemory(ur_context_handle_t Context, std::shared_ptr &DeviceInfo); ur_result_t enqueueMemSetShadow(ur_context_handle_t Context, @@ -136,6 +158,9 @@ class SanitizerInterceptor { std::unordered_map> m_ContextMap; ur_shared_mutex m_ContextMapMutex; + + bool m_IsInASanContext; + bool m_ShadowMemInited; }; } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/common.hpp b/source/loader/layers/sanitizer/common.hpp index 8b80814b9c..001d4f2e5d 100644 --- a/source/loader/layers/sanitizer/common.hpp +++ b/source/loader/layers/sanitizer/common.hpp @@ -30,6 +30,18 @@ using u32 = unsigned int; constexpr unsigned ASAN_SHADOW_SCALE = 3; constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE; +// Based on "compiler-rt/lib/asan/asan_mapping.h" +// Typical shadow mapping on Linux/x86_64 with SHADOW_OFFSET == 0x00007fff8000: +constexpr uptr LOW_SHADOW_BEGIN = 0x00007fff8000ULL; +constexpr uptr LOW_SHADOW_END = 0x00008fff6fffULL; +constexpr uptr SHADOW_GAP_BEGIN = 0x00008fff7000ULL; +constexpr uptr SHADOW_GAP_END = 0x02008fff6fffULL; +constexpr uptr HIGH_SHADOW_BEGIN = 0x02008fff7000ULL; +constexpr uptr HIGH_SHADOW_END = 0x10007fff7fffULL; +constexpr uptr LOW_SHADOW_SIZE = LOW_SHADOW_END - LOW_SHADOW_BEGIN; +constexpr uptr SHADOW_GAP_SIZE = SHADOW_GAP_END - SHADOW_GAP_BEGIN; +constexpr uptr HIGH_SHADOW_SIZE = HIGH_SHADOW_END - HIGH_SHADOW_BEGIN; + inline constexpr bool IsPowerOfTwo(uptr x) { return (x & (x - 1)) == 0 && x != 0; } @@ -87,4 +99,12 @@ inline constexpr uptr ComputeRZLog(uptr user_requested_size) { #define UR_ASSERT_EQ(Call, Result) (void)Call #endif +bool IsInASanContext(); + +bool SetupShadowMem(); + +bool DestroyShadowMem(); + +void *GetMemFunctionPointer(const char *); + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/linux/san_utils.cpp b/source/loader/layers/sanitizer/linux/san_utils.cpp new file mode 100644 index 0000000000..fa28d0a9d2 --- /dev/null +++ b/source/loader/layers/sanitizer/linux/san_utils.cpp @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +/* + * + * Copyright (C) 2023 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file san_utils.cpp + * + */ + +#include "common.hpp" + +#include +#include +#include +#include + +extern "C" __attribute__((weak)) void __asan_init(void); + +namespace ur_sanitizer_layer { + +bool IsInASanContext() { return __asan_init != nullptr; } + +static bool ReserveShadowMem(uptr Addr, uptr Size) { + Size = RoundUpTo(Size, EXEC_PAGESIZE); + Addr = RoundDownTo(Addr, EXEC_PAGESIZE); + void *P = + mmap((void *)Addr, Size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0); + return Addr == (uptr)P; +} + +static bool ProtectShadowGap(uptr Addr, uptr Size) { + void *P = + mmap((void *)Addr, Size, PROT_NONE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0); + return Addr == (uptr)P; +} + +bool SetupShadowMem() { + if (!ReserveShadowMem(LOW_SHADOW_BEGIN, LOW_SHADOW_SIZE)) { + return false; + } + + if (!ReserveShadowMem(HIGH_SHADOW_BEGIN, HIGH_SHADOW_SIZE)) { + return false; + } + + if (!ProtectShadowGap(SHADOW_GAP_BEGIN, SHADOW_GAP_SIZE)) { + return false; + } + return true; +} + +bool DestroyShadowMem() { + if (munmap((void *)LOW_SHADOW_BEGIN, LOW_SHADOW_SIZE) == -1) { + return false; + } + + if (munmap((void *)HIGH_SHADOW_BEGIN, HIGH_SHADOW_SIZE) == -1) { + return false; + } + + if (munmap((void *)SHADOW_GAP_BEGIN, SHADOW_GAP_SIZE) == -1) { + return false; + } + return true; +} + +void *GetMemFunctionPointer(const char *FuncName) { + void *handle = dlopen(LIBC_SO, RTLD_LAZY); + if (!handle) { + return (void *)nullptr; + } + void *ptr = dlsym(handle, FuncName); + if (!ptr) { + return (void *)nullptr; + } + return ptr; +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp index 64f54752ca..7bd49e4ff0 100644 --- a/source/loader/layers/sanitizer/ur_sanddi.cpp +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -33,6 +33,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urUSMHostAlloc"); + return context.interceptor->allocateMemory( hContext, nullptr, pUSMDesc, pool, size, ppMem, USMMemoryType::HOST); } @@ -56,6 +58,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urUSMDeviceAlloc"); + return context.interceptor->allocateMemory( hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::DEVICE); } @@ -79,6 +83,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urUSMSharedAlloc"); + return context.interceptor->allocateMemory( hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::SHARE); } @@ -95,6 +101,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMFree( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urUSMFree"); + return context.interceptor->releaseMemory(hContext, pMem); } @@ -114,6 +122,8 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urQueueCreate"); + ur_result_t result = pfnCreate(hContext, hDevice, pProperties, phQueue); if (result == UR_RESULT_SUCCESS) { result = context.interceptor->insertQueue(hContext, *phQueue); @@ -133,6 +143,8 @@ __urdlllocal ur_result_t UR_APICALL urQueueRelease( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urQueueRelease"); + ur_context_handle_t hContext; UR_CALL(context.urDdiTable.Queue.pfnGetInfo(hQueue, UR_QUEUE_INFO_CONTEXT, sizeof(ur_context_handle_t), @@ -181,16 +193,35 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_event_handle_t hPreEvent{}; - std::vector events(numEventsInWaitList + 1); - for (unsigned i = 0; i < numEventsInWaitList; ++i) { - events.push_back(phEventWaitList[i]); + context.logger.debug("==== urEnqueueKernelLaunch"); + + LaunchInfo LaunchInfo; + const size_t *pUserLocalWorkSize = pLocalWorkSize; + if (!pUserLocalWorkSize) { + pUserLocalWorkSize = LaunchInfo.LocalWorkSize; + // FIXME: This is W/A until urKernelSuggestGroupSize is added + LaunchInfo.LocalWorkSize[0] = 1; + LaunchInfo.LocalWorkSize[1] = 1; + LaunchInfo.LocalWorkSize[2] = 1; } - // launchKernel must append to num_events_in_wait_list, not prepend - context.interceptor->preLaunchKernel(hKernel, hQueue, hPreEvent); + uint32_t numWork = 1; + for (uint32_t dim = 0; dim < workDim; ++dim) { + numWork *= (pGlobalWorkSize[dim] + pUserLocalWorkSize[dim] - 1) / + pUserLocalWorkSize[dim]; + } + + std::vector hEvents; + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + hEvents.push_back(phEventWaitList[i]); + } + + // preLaunchKernel must append to num_events_in_wait_list, not prepend + ur_event_handle_t hPreEvent{}; + UR_CALL(context.interceptor->preLaunchKernel(hKernel, hQueue, hPreEvent, + LaunchInfo, numWork)); if (hPreEvent) { - events.push_back(hPreEvent); + hEvents.push_back(hPreEvent); } ur_event_handle_t hEvent{}; @@ -199,7 +230,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( pLocalWorkSize, numEventsInWaitList, phEventWaitList, &hEvent); if (result == UR_RESULT_SUCCESS) { - context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent); + context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent, + LaunchInfo); } if (phEvent) { @@ -226,6 +258,8 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urContextCreate"); + ur_result_t result = pfnCreate(numDevices, phDevices, pProperties, phContext); @@ -266,6 +300,8 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urContextCreateWithNativeHandle"); + ur_result_t result = pfnCreateWithNativeHandle( hNativeContext, numDevices, phDevices, pProperties, phContext); @@ -297,6 +333,8 @@ __urdlllocal ur_result_t UR_APICALL urContextRelease( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + context.logger.debug("==== urContextRelease"); + UR_CALL(context.interceptor->eraseContext(hContext)); ur_result_t result = pfnRelease(hContext); @@ -425,6 +463,9 @@ __urdlllocal ur_result_t UR_APICALL urGetUSMProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::urUSMDeviceAlloc; + pDdiTable->pfnHostAlloc = ur_sanitizer_layer::urUSMHostAlloc; + pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::urUSMSharedAlloc; + pDdiTable->pfnFree = ur_sanitizer_layer::urUSMFree; return result; }