diff --git a/CMakeLists.txt b/CMakeLists.txt
index fd5fd91cf8..c09b5f0a3c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -126,7 +126,15 @@ if(UR_ENABLE_TRACING)
 endif()
 
 if(UR_ENABLE_SANITIZER)
-    add_compile_definitions(UR_ENABLE_SANITIZER)
+    if(APPLE)
+        message(WARNING "Sanitizer layer isn't supported on macOS")
+        set(UR_ENABLE_SANITIZER OFF)
+    elseif(WIN32)
+        message(WARNING "Sanitizer layer isn't supported on Windows")
+        set(UR_ENABLE_SANITIZER OFF)
+    else()
+        add_compile_definitions(UR_ENABLE_SANITIZER)
+    endif()
 endif()
 
 if(UR_USE_ASAN)
diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt
index 255e934f94..474fa6c79b 100644
--- a/source/loader/CMakeLists.txt
+++ b/source/loader/CMakeLists.txt
@@ -114,7 +114,14 @@ if(UR_ENABLE_SANITIZER)
         ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp
         ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp
     )
+
+    target_sources(ur_loader
+        PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/linux/san_utils.cpp
+    )
+
     target_include_directories(ur_loader PRIVATE
+        "${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer"
         "${CMAKE_CURRENT_SOURCE_DIR}/../"
     )
 endif()
diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp
index 118eb858d0..88a3b464c2 100644
--- a/source/loader/layers/sanitizer/asan_interceptor.cpp
+++ b/source/loader/layers/sanitizer/asan_interceptor.cpp
@@ -12,7 +12,6 @@
  */
 
 #include "asan_interceptor.hpp"
-#include "device_sanitizer_report.hpp"
 #include "ur_sanitizer_layer.hpp"
 
 namespace ur_sanitizer_layer {
@@ -29,11 +28,20 @@ constexpr int kMemBufferRedzoneMagic = (char)0x84;
 constexpr auto kSPIR_AsanShadowMemoryGlobalStart =
     "__AsanShadowMemoryGlobalStart";
 constexpr auto kSPIR_AsanShadowMemoryGlobalEnd = "__AsanShadowMemoryGlobalEnd";
+constexpr auto kSPIR_AsanShadowMemoryLocalStart =
+    "__AsanShadowMemoryLocalStart";
+constexpr auto kSPIR_AsanShadowMemoryLocalEnd = "__AsanShadowMemoryLocalEnd";
+
+constexpr auto kSPIR_DeviceType = "__DeviceType";
 
 constexpr auto kSPIR_DeviceSanitizerReportMem = "__DeviceSanitizerReportMem";
 
 DeviceSanitizerReport SPIR_DeviceSanitizerReportMem;
 
+uptr MemToShadow_CPU(uptr USM_SHADOW_BASE, uptr UPtr) {
+    return USM_SHADOW_BASE + (UPtr >> 3);
+}
+
 uptr MemToShadow_PVC(uptr USM_SHADOW_BASE, uptr UPtr) {
     if (UPtr & 0xFF00000000000000ULL) { // Device USM
         return USM_SHADOW_BASE + 0x200000000000ULL +
@@ -70,8 +78,42 @@ ur_program_handle_t getProgram(ur_kernel_handle_t Kernel) {
     return Program;
 }
 
+size_t getLocalMemorySize(ur_device_handle_t Device) {
+    size_t LocalMemorySize;
+    [[maybe_unused]] auto Result = context.urDdiTable.Device.pfnGetInfo(
+        Device, UR_DEVICE_INFO_LOCAL_MEM_SIZE, sizeof(LocalMemorySize),
+        &LocalMemorySize, nullptr);
+    assert(Result == UR_RESULT_SUCCESS);
+    return LocalMemorySize;
+}
+
+std::string getKernelName(ur_kernel_handle_t Kernel) {
+    size_t KernelNameSize = 0;
+    [[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetInfo(
+        Kernel, UR_KERNEL_INFO_FUNCTION_NAME, 0, nullptr, &KernelNameSize);
+    assert(Res == UR_RESULT_SUCCESS);
+
+    std::vector<char> KernelNameBuf(KernelNameSize);
+    Res = context.urDdiTable.Kernel.pfnGetInfo(
+        Kernel, UR_KERNEL_INFO_FUNCTION_NAME, KernelNameSize,
+        KernelNameBuf.data(), nullptr);
+    assert(Res == UR_RESULT_SUCCESS);
+
+    return std::string(KernelNameBuf.data(), KernelNameSize - 1);
+}
+
 } // namespace
 
+SanitizerInterceptor::SanitizerInterceptor()
+    : m_IsInASanContext(IsInASanContext()),
+      m_ShadowMemInited(m_IsInASanContext) {}
+
+SanitizerInterceptor::~SanitizerInterceptor() {
+    if (!m_IsInASanContext && m_ShadowMemInited && !DestroyShadowMem()) {
+        context.logger.error("Failed to destroy shadow memory");
+    }
+}
+
 /// The memory chunk allocated from the underlying allocator looks like this:
 /// L L L L L L U U U U U U R R
 ///   L -- left redzone words (0 or more bytes)
@@ -191,10 +233,12 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,
                                           (void *)AllocInfo->AllocBegin);
 }
 
-bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
-                                           ur_queue_handle_t Queue,
-                                           ur_event_handle_t &Event) {
-    prepareLaunch(Queue, Kernel);
+ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
+                                                  ur_queue_handle_t Queue,
+                                                  ur_event_handle_t &Event,
+                                                  LaunchInfo &LaunchInfo,
+                                                  uint32_t numWorkgroup) {
+    UR_CALL(prepareLaunch(Queue, Kernel, LaunchInfo, numWorkgroup));
 
     UR_CALL(updateShadowMemory(Queue));
 
@@ -207,32 +251,35 @@ bool SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
     Event = QueueInfo->LastEvent;
     QueueInfo->LastEvent = nullptr;
 
-    return true;
+    return UR_RESULT_SUCCESS;
 }
 
 void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
                                             ur_queue_handle_t Queue,
-                                            ur_event_handle_t &Event) {
+                                            ur_event_handle_t &Event,
+                                            LaunchInfo &LaunchInfo) {
     auto Program = getProgram(Kernel);
     ur_event_handle_t ReadEvent{};
 
     // If kernel has defined SPIR_DeviceSanitizerReportMem, then we try to read it
     // to host, but it's okay that it isn't defined
+    // FIXME: We must use block operation here, until we support urEventSetCallback
     auto Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead(
         Queue, Program, kSPIR_DeviceSanitizerReportMem, true,
-        sizeof(SPIR_DeviceSanitizerReportMem), 0,
-        &SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent);
+        sizeof(LaunchInfo.SPIR_DeviceSanitizerReportMem), 0,
+        &LaunchInfo.SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent);
 
     if (Result == UR_RESULT_SUCCESS) {
         Event = ReadEvent;
 
-        auto AH = &SPIR_DeviceSanitizerReportMem;
+        auto AH = &LaunchInfo.SPIR_DeviceSanitizerReportMem;
         if (!AH->Flag) {
             return;
         }
 
         const char *File = AH->File[0] ? AH->File : "<unknown file>";
         const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
+        auto KernelName = getKernelName(Kernel);
 
         context.logger.always("\n====ERROR: DeviceSanitizer: {} on {}",
                               DeviceSanitizerFormat(AH->ErrorType),
@@ -240,52 +287,57 @@ void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
         context.logger.always(
             "{} of size {} at kernel <{}> LID({}, {}, {}) GID({}, "
             "{}, {})",
-            AH->IsWrite ? "WRITE" : "READ", AH->AccessSize, Func, AH->LID0,
-            AH->LID1, AH->LID2, AH->GID0, AH->GID1, AH->GID2);
+            AH->IsWrite ? "WRITE" : "READ", AH->AccessSize, KernelName.c_str(),
+            AH->LID0, AH->LID1, AH->LID2, AH->GID0, AH->GID1, AH->GID2);
         context.logger.always("  #0 {} {}:{}", Func, File, AH->Line);
         if (!AH->IsRecover) {
-            abort();
+            exit(1);
         }
     }
 }
 
-std::string SanitizerInterceptor::getKernelName(ur_kernel_handle_t Kernel) {
-    size_t KernelNameSize = 0;
-    [[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetInfo(
-        Kernel, UR_KERNEL_INFO_FUNCTION_NAME, 0, nullptr, &KernelNameSize);
-    assert(Res == UR_RESULT_SUCCESS);
-
-    std::vector<char> KernelNameBuf(KernelNameSize);
-    Res = context.urDdiTable.Kernel.pfnGetInfo(
-        Kernel, UR_KERNEL_INFO_FUNCTION_NAME, KernelNameSize,
-        KernelNameBuf.data(), nullptr);
-    assert(Res == UR_RESULT_SUCCESS);
-
-    return std::string(KernelNameBuf.data(), KernelNameSize - 1);
-}
-
 ur_result_t SanitizerInterceptor::allocShadowMemory(
     ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo) {
     if (DeviceInfo->Type == DeviceType::CPU) {
-        context.logger.error("Unsupport device type");
-        return UR_RESULT_ERROR_INVALID_ARGUMENT;
+        if (!m_IsInASanContext) {
+            static std::once_flag OnceFlag;
+            bool Result = true;
+            std::call_once(OnceFlag, [&]() {
+                Result = m_ShadowMemInited = SetupShadowMem();
+            });
+
+            if (!Result) {
+                context.logger.error("Failed to allocate shadow memory");
+                return UR_RESULT_ERROR_OUT_OF_RESOURCES;
+            }
+        }
+
+        DeviceInfo->ShadowOffset = LOW_SHADOW_BEGIN;
+        DeviceInfo->ShadowOffsetEnd = HIGH_SHADOW_END;
     } else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
         /// SHADOW MEMORY MAPPING (PVC, with CPU 47bit)
         ///   Host/Shared USM : 0x0              ~ 0x0fff_ffff_ffff
         ///   ?               : 0x1000_0000_0000 ~ 0x1fff_ffff_ffff
         ///   Device USM      : 0x2000_0000_0000 ~ 0x3fff_ffff_ffff
         constexpr size_t SHADOW_SIZE = 1ULL << 46;
-
-        // TODO: Protect Bad Zone
-        auto Result = context.urDdiTable.VirtualMem.pfnReserve(
-            Context, nullptr, SHADOW_SIZE, (void **)&DeviceInfo->ShadowOffset);
-        if (Result != UR_RESULT_SUCCESS) {
-            context.logger.error("Failed to allocate shadow memory on PVC: {}",
-                                 Result);
-            return Result;
+        // FIXME: Currently, Level-Zero doesn't create independent VAs for each contexts,
+        // which will cause out-of-resource error when users use multiple contexts
+        static uptr ShadowOffset, ShadowOffsetEnd;
+
+        if (!ShadowOffset) {
+            // TODO: Protect Bad Zone
+            auto Result = context.urDdiTable.VirtualMem.pfnReserve(
+                Context, nullptr, SHADOW_SIZE, (void **)&ShadowOffset);
+            if (Result != UR_RESULT_SUCCESS) {
+                context.logger.error(
+                    "Failed to allocate shadow memory on PVC: {}", Result);
+                return Result;
+            }
+            ShadowOffsetEnd = ShadowOffset + SHADOW_SIZE;
         }
 
-        DeviceInfo->ShadowOffsetEnd = DeviceInfo->ShadowOffset + SHADOW_SIZE;
+        DeviceInfo->ShadowOffset = ShadowOffset;
+        DeviceInfo->ShadowOffsetEnd = ShadowOffsetEnd;
     } else {
         context.logger.error("Unsupport device type");
         return UR_RESULT_ERROR_INVALID_ARGUMENT;
@@ -301,23 +353,40 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow(
     ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value,
     ur_event_handle_t DepEvent, ur_event_handle_t *OutEvent) {
 
-    uint32_t NumEventsInWaitList = DepEvent ? 1 : 0;
-    const ur_event_handle_t *EventsWaitList = DepEvent ? &DepEvent : nullptr;
-
     auto ContextInfo = getContextInfo(Context);
     auto DeviceInfo = ContextInfo->getDeviceInfo(Device);
 
     if (DeviceInfo->Type == DeviceType::CPU) {
-        context.logger.error("Unsupport device type");
-        return UR_RESULT_ERROR_INVALID_ARGUMENT;
-    } else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
-        ur_event_handle_t InternalEvent{};
-        ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
+        uptr ShadowBegin = MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr);
+        uptr ShadowEnd =
+            MemToShadow_CPU(DeviceInfo->ShadowOffset, Ptr + Size - 1);
+
+        // Poison shadow memory outside of asan runtime is not allowed, so we
+        // need to avoid memset's call from being intercepted.
+        static auto MemSet =
+            (void *(*)(void *, int, size_t))GetMemFunctionPointer("memset");
+        if (!MemSet) {
+            context.logger.error(
+                "Failed to get 'memset' function from libc.so.6");
+            return UR_RESULT_ERROR_UNKNOWN;
+        }
 
+        MemSet((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1);
+        context.logger.debug(
+            "enqueueMemSetShadow (addr={}, count={}, value={})",
+            (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
+            (void *)(size_t)Value);
+    } else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
         uptr ShadowBegin = MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr);
         uptr ShadowEnd =
             MemToShadow_PVC(DeviceInfo->ShadowOffset, Ptr + Size - 1);
 
+        uint32_t NumEventsInWaitList = DepEvent ? 1 : 0;
+        const ur_event_handle_t *EventsWaitList =
+            DepEvent ? &DepEvent : nullptr;
+        ur_event_handle_t InternalEvent{};
+        ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
+
         {
             static const size_t PageSize = [Context, Device]() {
                 size_t Size;
@@ -382,9 +451,12 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow(
 
         const char Pattern[] = {(char)Value};
         auto URes = context.urDdiTable.Enqueue.pfnUSMFill(
-            Queue, (void *)ShadowBegin, 1, Pattern,
-            (ShadowEnd - ShadowBegin + 1), NumEventsInWaitList, EventsWaitList,
-            Event);
+            Queue, (void *)ShadowBegin, 1, Pattern, ShadowEnd - ShadowBegin + 1,
+            NumEventsInWaitList, EventsWaitList, Event);
+        context.logger.debug(
+            "enqueueMemSetShadow (addr={}, count={}, value={}): {}",
+            (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1,
+            (void *)(size_t)Value, URes);
         if (URes != UR_RESULT_SUCCESS) {
             context.logger.error("urEnqueueUSMFill(): {}", URes);
             return URes;
@@ -463,17 +535,13 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) {
 
     auto ContextInfo = getContextInfo(Context);
 
-    auto HostInfo = ContextInfo->getDeviceInfo(nullptr);
     auto DeviceInfo = ContextInfo->getDeviceInfo(Device);
     auto QueueInfo = ContextInfo->getQueueInfo(Queue);
 
-    std::shared_lock<ur_shared_mutex> HostGuard(HostInfo->Mutex,
-                                                std::defer_lock);
     std::unique_lock<ur_shared_mutex> DeviceGuard(DeviceInfo->Mutex,
                                                   std::defer_lock);
-    std::scoped_lock<std::shared_lock<ur_shared_mutex>,
-                     std::unique_lock<ur_shared_mutex>, ur_mutex>
-        Guard(HostGuard, DeviceGuard, QueueInfo->Mutex);
+    std::scoped_lock<std::unique_lock<ur_shared_mutex>, ur_mutex> Guard(
+        DeviceGuard, QueueInfo->Mutex);
 
     ur_event_handle_t LastEvent = QueueInfo->LastEvent;
 
@@ -501,6 +569,7 @@ ur_result_t SanitizerInterceptor::eraseContext(ur_context_handle_t Context) {
     std::scoped_lock<ur_shared_mutex> Guard(m_ContextMapMutex);
     assert(m_ContextMap.find(Context) != m_ContextMap.end());
     m_ContextMap.erase(Context);
+    // TODO: Remove devices in each context
     return UR_RESULT_SUCCESS;
 }
 
@@ -559,12 +628,16 @@ ur_result_t SanitizerInterceptor::eraseQueue(ur_context_handle_t Context,
     return UR_RESULT_SUCCESS;
 }
 
-void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue,
-                                         ur_kernel_handle_t Kernel) {
+ur_result_t SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue,
+                                                ur_kernel_handle_t Kernel,
+                                                LaunchInfo &LaunchInfo,
+                                                uint32_t numWorkgroup) {
     auto Context = getContext(Queue);
     auto Device = getDevice(Queue);
     auto Program = getProgram(Kernel);
 
+    LaunchInfo.Context = Context;
+
     auto ContextInfo = getContextInfo(Context);
     auto DeviceInfo = ContextInfo->getDeviceInfo(Device);
     auto QueueInfo = ContextInfo->getQueueInfo(Queue);
@@ -572,7 +645,7 @@ void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue,
     std::scoped_lock<ur_mutex> Guard(QueueInfo->Mutex);
     ur_event_handle_t LastEvent = QueueInfo->LastEvent;
 
-    {
+    do {
         // Set global variable to program
         auto EnqueueWriteGlobal = [&](const char *Name, const void *Value) {
             ur_event_handle_t NewEvent{};
@@ -592,14 +665,80 @@ void SanitizerInterceptor::prepareLaunch(ur_queue_handle_t Queue,
             return true;
         };
 
-        // Device shadow memory offset
+        // Write shadow memory offset for global memory
         EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalStart,
                            &DeviceInfo->ShadowOffset);
         EnqueueWriteGlobal(kSPIR_AsanShadowMemoryGlobalEnd,
                            &DeviceInfo->ShadowOffsetEnd);
-    }
+
+        // Write device type
+        EnqueueWriteGlobal(kSPIR_DeviceType, &DeviceInfo->Type);
+
+        if (DeviceInfo->Type == DeviceType::CPU) {
+            break;
+        }
+
+        // Write shadow memory offset for local memory
+        auto LocalMemorySize = getLocalMemorySize(Device);
+        auto LocalShadowMemorySize =
+            (numWorkgroup * LocalMemorySize) >> ASAN_SHADOW_SCALE;
+
+        context.logger.info("LocalInfo(WorkGroup={}, LocalMemorySize={}, "
+                            "LocalShadowMemorySize={})",
+                            numWorkgroup, LocalMemorySize,
+                            LocalShadowMemorySize);
+
+        ur_usm_desc_t Desc{UR_STRUCTURE_TYPE_USM_HOST_DESC, nullptr, 0, 0};
+        auto Result = context.urDdiTable.USM.pfnDeviceAlloc(
+            Context, Device, &Desc, nullptr, LocalShadowMemorySize,
+            (void **)&LaunchInfo.LocalShadowOffset);
+        if (Result != UR_RESULT_SUCCESS) {
+            context.logger.error(
+                "Failed to allocate shadow memory for local memory: {}",
+                numWorkgroup, Result);
+            context.logger.error("Maybe the number of workgroup too large");
+            return Result;
+        }
+        LaunchInfo.LocalShadowOffsetEnd =
+            LaunchInfo.LocalShadowOffset + LocalShadowMemorySize - 1;
+
+        EnqueueWriteGlobal(kSPIR_AsanShadowMemoryLocalStart,
+                           &LaunchInfo.LocalShadowOffset);
+        EnqueueWriteGlobal(kSPIR_AsanShadowMemoryLocalEnd,
+                           &LaunchInfo.LocalShadowOffsetEnd);
+
+        {
+            ur_event_handle_t NewEvent{};
+            uint32_t NumEvents = LastEvent ? 1 : 0;
+            const ur_event_handle_t *EventsList =
+                LastEvent ? &LastEvent : nullptr;
+            const char Pattern[] = {0};
+
+            auto URes = context.urDdiTable.Enqueue.pfnUSMFill(
+                Queue, (void *)LaunchInfo.LocalShadowOffset, 1, Pattern,
+                LocalShadowMemorySize, NumEvents, EventsList, &NewEvent);
+            if (URes != UR_RESULT_SUCCESS) {
+                context.logger.error("urEnqueueUSMFill(): {}", URes);
+                return URes;
+            }
+            LastEvent = NewEvent;
+        }
+
+        context.logger.info("ShadowMemory(Local, {} - {})",
+                            (void *)LaunchInfo.LocalShadowOffset,
+                            (void *)LaunchInfo.LocalShadowOffsetEnd);
+    } while (false);
 
     QueueInfo->LastEvent = LastEvent;
+    return UR_RESULT_SUCCESS;
+}
+
+LaunchInfo::~LaunchInfo() {
+    if (LocalShadowOffset) {
+        [[maybe_unused]] auto Result =
+            context.urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset);
+        assert(Result == UR_RESULT_SUCCESS);
+    }
 }
 
 } // namespace ur_sanitizer_layer
diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp
index edad3f926e..0753b53d63 100644
--- a/source/loader/layers/sanitizer/asan_interceptor.hpp
+++ b/source/loader/layers/sanitizer/asan_interceptor.hpp
@@ -6,13 +6,14 @@
  * See LICENSE.TXT
  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
- * @file ur_sanitizer_layer.cpp
+ * @file asan_interceptor.hpp
  *
  */
 
 #pragma once
 
 #include "common.hpp"
+#include "device_sanitizer_report.hpp"
 
 #include <map>
 #include <memory>
@@ -80,8 +81,26 @@ struct ContextInfo {
     std::map<uptr, std::shared_ptr<USMAllocInfo>> AllocatedUSMMap;
 };
 
+struct LaunchInfo {
+    uptr LocalShadowOffset;
+    uptr LocalShadowOffsetEnd;
+    ur_context_handle_t Context;
+
+    DeviceSanitizerReport SPIR_DeviceSanitizerReportMem;
+
+    size_t LocalWorkSize[3];
+
+    LaunchInfo()
+        : LocalShadowOffset(0), LocalShadowOffsetEnd(0), Context(nullptr) {}
+    ~LaunchInfo();
+};
+
 class SanitizerInterceptor {
   public:
+    SanitizerInterceptor();
+
+    ~SanitizerInterceptor();
+
     ur_result_t allocateMemory(ur_context_handle_t Context,
                                ur_device_handle_t Device,
                                const ur_usm_desc_t *Properties,
@@ -89,10 +108,12 @@ class SanitizerInterceptor {
                                void **ResultPtr, USMMemoryType Type);
     ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr);
 
-    bool preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue,
-                         ur_event_handle_t &Event);
+    ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel,
+                                ur_queue_handle_t Queue,
+                                ur_event_handle_t &Event,
+                                LaunchInfo &LaunchInfo, uint32_t numWorkgroup);
     void postLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue,
-                          ur_event_handle_t &Event);
+                          ur_event_handle_t &Event, LaunchInfo &LaunchInfo);
 
     ur_result_t insertContext(ur_context_handle_t Context);
     ur_result_t eraseContext(ur_context_handle_t Context);
@@ -114,9 +135,10 @@ class SanitizerInterceptor {
                                  ur_event_handle_t &LastEvent);
 
     /// Initialize Global Variables & Kernel Name at first Launch
-    void prepareLaunch(ur_queue_handle_t Queue, ur_kernel_handle_t Kernel);
+    ur_result_t prepareLaunch(ur_queue_handle_t Queue,
+                              ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo,
+                              uint32_t numWorkgroup);
 
-    std::string getKernelName(ur_kernel_handle_t Kernel);
     ur_result_t allocShadowMemory(ur_context_handle_t Context,
                                   std::shared_ptr<DeviceInfo> &DeviceInfo);
     ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
@@ -136,6 +158,9 @@ class SanitizerInterceptor {
     std::unordered_map<ur_context_handle_t, std::shared_ptr<ContextInfo>>
         m_ContextMap;
     ur_shared_mutex m_ContextMapMutex;
+
+    bool m_IsInASanContext;
+    bool m_ShadowMemInited;
 };
 
 } // namespace ur_sanitizer_layer
diff --git a/source/loader/layers/sanitizer/common.hpp b/source/loader/layers/sanitizer/common.hpp
index 8b80814b9c..001d4f2e5d 100644
--- a/source/loader/layers/sanitizer/common.hpp
+++ b/source/loader/layers/sanitizer/common.hpp
@@ -30,6 +30,18 @@ using u32 = unsigned int;
 constexpr unsigned ASAN_SHADOW_SCALE = 3;
 constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE;
 
+// Based on "compiler-rt/lib/asan/asan_mapping.h"
+// Typical shadow mapping on Linux/x86_64 with SHADOW_OFFSET == 0x00007fff8000:
+constexpr uptr LOW_SHADOW_BEGIN = 0x00007fff8000ULL;
+constexpr uptr LOW_SHADOW_END = 0x00008fff6fffULL;
+constexpr uptr SHADOW_GAP_BEGIN = 0x00008fff7000ULL;
+constexpr uptr SHADOW_GAP_END = 0x02008fff6fffULL;
+constexpr uptr HIGH_SHADOW_BEGIN = 0x02008fff7000ULL;
+constexpr uptr HIGH_SHADOW_END = 0x10007fff7fffULL;
+constexpr uptr LOW_SHADOW_SIZE = LOW_SHADOW_END - LOW_SHADOW_BEGIN;
+constexpr uptr SHADOW_GAP_SIZE = SHADOW_GAP_END - SHADOW_GAP_BEGIN;
+constexpr uptr HIGH_SHADOW_SIZE = HIGH_SHADOW_END - HIGH_SHADOW_BEGIN;
+
 inline constexpr bool IsPowerOfTwo(uptr x) {
     return (x & (x - 1)) == 0 && x != 0;
 }
@@ -87,4 +99,12 @@ inline constexpr uptr ComputeRZLog(uptr user_requested_size) {
 #define UR_ASSERT_EQ(Call, Result) (void)Call
 #endif
 
+bool IsInASanContext();
+
+bool SetupShadowMem();
+
+bool DestroyShadowMem();
+
+void *GetMemFunctionPointer(const char *);
+
 } // namespace ur_sanitizer_layer
diff --git a/source/loader/layers/sanitizer/linux/san_utils.cpp b/source/loader/layers/sanitizer/linux/san_utils.cpp
new file mode 100644
index 0000000000..fa28d0a9d2
--- /dev/null
+++ b/source/loader/layers/sanitizer/linux/san_utils.cpp
@@ -0,0 +1,85 @@
+//===----------------------------------------------------------------------===//
+/*
+ *
+ * Copyright (C) 2023 Intel Corporation
+ *
+ * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See LICENSE.TXT
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * @file san_utils.cpp
+ *
+ */
+
+#include "common.hpp"
+
+#include <asm/param.h>
+#include <dlfcn.h>
+#include <gnu/lib-names.h>
+#include <sys/mman.h>
+
+extern "C" __attribute__((weak)) void __asan_init(void);
+
+namespace ur_sanitizer_layer {
+
+bool IsInASanContext() { return __asan_init != nullptr; }
+
+static bool ReserveShadowMem(uptr Addr, uptr Size) {
+    Size = RoundUpTo(Size, EXEC_PAGESIZE);
+    Addr = RoundDownTo(Addr, EXEC_PAGESIZE);
+    void *P =
+        mmap((void *)Addr, Size, PROT_READ | PROT_WRITE,
+             MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0);
+    return Addr == (uptr)P;
+}
+
+static bool ProtectShadowGap(uptr Addr, uptr Size) {
+    void *P =
+        mmap((void *)Addr, Size, PROT_NONE,
+             MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0);
+    return Addr == (uptr)P;
+}
+
+bool SetupShadowMem() {
+    if (!ReserveShadowMem(LOW_SHADOW_BEGIN, LOW_SHADOW_SIZE)) {
+        return false;
+    }
+
+    if (!ReserveShadowMem(HIGH_SHADOW_BEGIN, HIGH_SHADOW_SIZE)) {
+        return false;
+    }
+
+    if (!ProtectShadowGap(SHADOW_GAP_BEGIN, SHADOW_GAP_SIZE)) {
+        return false;
+    }
+    return true;
+}
+
+bool DestroyShadowMem() {
+    if (munmap((void *)LOW_SHADOW_BEGIN, LOW_SHADOW_SIZE) == -1) {
+        return false;
+    }
+
+    if (munmap((void *)HIGH_SHADOW_BEGIN, HIGH_SHADOW_SIZE) == -1) {
+        return false;
+    }
+
+    if (munmap((void *)SHADOW_GAP_BEGIN, SHADOW_GAP_SIZE) == -1) {
+        return false;
+    }
+    return true;
+}
+
+void *GetMemFunctionPointer(const char *FuncName) {
+    void *handle = dlopen(LIBC_SO, RTLD_LAZY);
+    if (!handle) {
+        return (void *)nullptr;
+    }
+    void *ptr = dlsym(handle, FuncName);
+    if (!ptr) {
+        return (void *)nullptr;
+    }
+    return ptr;
+}
+
+} // namespace ur_sanitizer_layer
diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp
index 64f54752ca..7bd49e4ff0 100644
--- a/source/loader/layers/sanitizer/ur_sanddi.cpp
+++ b/source/loader/layers/sanitizer/ur_sanddi.cpp
@@ -33,6 +33,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urUSMHostAlloc");
+
     return context.interceptor->allocateMemory(
         hContext, nullptr, pUSMDesc, pool, size, ppMem, USMMemoryType::HOST);
 }
@@ -56,6 +58,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urUSMDeviceAlloc");
+
     return context.interceptor->allocateMemory(
         hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::DEVICE);
 }
@@ -79,6 +83,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urUSMSharedAlloc");
+
     return context.interceptor->allocateMemory(
         hContext, hDevice, pUSMDesc, pool, size, ppMem, USMMemoryType::SHARE);
 }
@@ -95,6 +101,8 @@ __urdlllocal ur_result_t UR_APICALL urUSMFree(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urUSMFree");
+
     return context.interceptor->releaseMemory(hContext, pMem);
 }
 
@@ -114,6 +122,8 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urQueueCreate");
+
     ur_result_t result = pfnCreate(hContext, hDevice, pProperties, phQueue);
     if (result == UR_RESULT_SUCCESS) {
         result = context.interceptor->insertQueue(hContext, *phQueue);
@@ -133,6 +143,8 @@ __urdlllocal ur_result_t UR_APICALL urQueueRelease(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urQueueRelease");
+
     ur_context_handle_t hContext;
     UR_CALL(context.urDdiTable.Queue.pfnGetInfo(hQueue, UR_QUEUE_INFO_CONTEXT,
                                                 sizeof(ur_context_handle_t),
@@ -181,16 +193,35 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
-    ur_event_handle_t hPreEvent{};
-    std::vector<ur_event_handle_t> events(numEventsInWaitList + 1);
-    for (unsigned i = 0; i < numEventsInWaitList; ++i) {
-        events.push_back(phEventWaitList[i]);
+    context.logger.debug("==== urEnqueueKernelLaunch");
+
+    LaunchInfo LaunchInfo;
+    const size_t *pUserLocalWorkSize = pLocalWorkSize;
+    if (!pUserLocalWorkSize) {
+        pUserLocalWorkSize = LaunchInfo.LocalWorkSize;
+        // FIXME: This is W/A until urKernelSuggestGroupSize is added
+        LaunchInfo.LocalWorkSize[0] = 1;
+        LaunchInfo.LocalWorkSize[1] = 1;
+        LaunchInfo.LocalWorkSize[2] = 1;
     }
 
-    // launchKernel must append to num_events_in_wait_list, not prepend
-    context.interceptor->preLaunchKernel(hKernel, hQueue, hPreEvent);
+    uint32_t numWork = 1;
+    for (uint32_t dim = 0; dim < workDim; ++dim) {
+        numWork *= (pGlobalWorkSize[dim] + pUserLocalWorkSize[dim] - 1) /
+                   pUserLocalWorkSize[dim];
+    }
+
+    std::vector<ur_event_handle_t> hEvents;
+    for (uint32_t i = 0; i < numEventsInWaitList; ++i) {
+        hEvents.push_back(phEventWaitList[i]);
+    }
+
+    // preLaunchKernel must append to num_events_in_wait_list, not prepend
+    ur_event_handle_t hPreEvent{};
+    UR_CALL(context.interceptor->preLaunchKernel(hKernel, hQueue, hPreEvent,
+                                                 LaunchInfo, numWork));
     if (hPreEvent) {
-        events.push_back(hPreEvent);
+        hEvents.push_back(hPreEvent);
     }
 
     ur_event_handle_t hEvent{};
@@ -199,7 +230,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
         pLocalWorkSize, numEventsInWaitList, phEventWaitList, &hEvent);
 
     if (result == UR_RESULT_SUCCESS) {
-        context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent);
+        context.interceptor->postLaunchKernel(hKernel, hQueue, hEvent,
+                                              LaunchInfo);
     }
 
     if (phEvent) {
@@ -226,6 +258,8 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urContextCreate");
+
     ur_result_t result =
         pfnCreate(numDevices, phDevices, pProperties, phContext);
 
@@ -266,6 +300,8 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urContextCreateWithNativeHandle");
+
     ur_result_t result = pfnCreateWithNativeHandle(
         hNativeContext, numDevices, phDevices, pProperties, phContext);
 
@@ -297,6 +333,8 @@ __urdlllocal ur_result_t UR_APICALL urContextRelease(
         return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
     }
 
+    context.logger.debug("==== urContextRelease");
+
     UR_CALL(context.interceptor->eraseContext(hContext));
     ur_result_t result = pfnRelease(hContext);
 
@@ -425,6 +463,9 @@ __urdlllocal ur_result_t UR_APICALL urGetUSMProcAddrTable(
     ur_result_t result = UR_RESULT_SUCCESS;
 
     pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::urUSMDeviceAlloc;
+    pDdiTable->pfnHostAlloc = ur_sanitizer_layer::urUSMHostAlloc;
+    pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::urUSMSharedAlloc;
+    pDdiTable->pfnFree = ur_sanitizer_layer::urUSMFree;
 
     return result;
 }