diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index e3c6c2d2ee34f..6c728f316fc89 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -60,6 +60,61 @@ Compilation::~Compilation() {
       delete Arg.second;
 }
 
+static void HandleXarchArgs(DerivedArgList *OffloadArgList, const Driver &D,
+                            bool IsDevice) {
+  if (!OffloadArgList)
+    return;
+
+  if (IsDevice && !OffloadArgList->hasArg(options::OPT_Xarch_device))
+    return;
+
+  if (!IsDevice && !OffloadArgList->hasArg(options::OPT_Xarch_host))
+    return;
+
+  bool NeedHandle = false;
+  std::vector<std::string> XarchValues;
+  XarchValues = IsDevice
+                    ? OffloadArgList->getAllArgValues(options::OPT_Xarch_device)
+                    : OffloadArgList->getAllArgValues(options::OPT_Xarch_host);
+  SmallVector<StringRef, 8> XarchValueRefs;
+  for (auto XarchV : XarchValues) {
+    if (XarchV.find(' ') != std::string::npos) {
+      NeedHandle = true;
+      StringRef XarchVRef(XarchV);
+      SmallVector<StringRef, 8> XarchVecs;
+      XarchVRef.trim().split(XarchVecs, ' ', -1, false);
+      size_t Index;
+      const size_t XSize = XarchVecs.size();
+      for (Index = 0; Index < XSize; ++Index) {
+        if (XarchVecs[Index].compare("-mllvm") == 0) {
+          if (Index < (XSize - 1)) {
+            XarchValueRefs.push_back(OffloadArgList->MakeArgStringRef(
+                (StringRef("-mllvm=") + XarchVecs[Index + 1]).str()));
+            Index++;
+            continue;
+          } else
+            D.Diag(clang::diag::err_drv_missing_argument) << "-mllvm" << 1;
+        } else
+          XarchValueRefs.push_back(
+              OffloadArgList->MakeArgStringRef(XarchVecs[Index]));
+      }
+    } else
+      XarchValueRefs.push_back(StringRef(XarchV));
+  }
+
+  if (NeedHandle) {
+    auto Xarch_OPT =
+        IsDevice ? options::OPT_Xarch_device : options::OPT_Xarch_host;
+    OffloadArgList->eraseArg(Xarch_OPT);
+    for (auto XarchV : XarchValueRefs) {
+      Arg *A = OffloadArgList->MakeSeparateArg(
+          nullptr, D.getOpts().getOption(Xarch_OPT), XarchV);
+      A->claim();
+      OffloadArgList->append(A);
+    }
+  }
+}
+
 const DerivedArgList &
 Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
                                  Action::OffloadKind DeviceOffloadKind) {
@@ -82,9 +137,11 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
 
     DerivedArgList *NewDAL = nullptr;
     if (!OffloadArgs) {
+      HandleXarchArgs(TranslatedArgs, getDriver(), false);
       NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch,
                                       DeviceOffloadKind, &AllocatedArgs);
     } else {
+      HandleXarchArgs(OffloadArgs, getDriver(), true);
       NewDAL = TC->TranslateXarchArgs(*OffloadArgs, BoundArch, DeviceOffloadKind,
                                       &AllocatedArgs);
       if (!NewDAL)
diff --git a/clang/test/Driver/sycl-xarch.cpp b/clang/test/Driver/sycl-xarch.cpp
new file mode 100644
index 0000000000000..200a8de643170
--- /dev/null
+++ b/clang/test/Driver/sycl-xarch.cpp
@@ -0,0 +1,162 @@
+///
+/// Perform several driver tests for SYCL -Xarch_device/host on Linux
+///
+
+// UNSUPPORTED: system-windows
+
+/// ###########################################################################
+
+/// test behavior of -Xarch_device with 1 option for SYCL compiler, the flag
+/// should be passed to device compilation only.
+// RUN: %clangxx -fsycl %s -Xarch_device -fsanitize=address -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTION
+// RUN: %clangxx -fsycl %s -Xarch_device -fsanitize=address -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_ONLY
+// SYCL_XARCH_DEVICE_OPTION: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_DEVICE_OPTION-SAME: -fsanitize=address
+// SYCL_XARCH_DEVICE_OPTION-SAME: -fsanitize-address-use-after-return=never
+// SYCL_XARCH_DEVICE_OPTION-SAME: -fno-sanitize-address-use-after-scope
+// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0"
+// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-stack=0"
+// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-globals=0"
+// SYCL_XARCH_DEVICE_ONLY: llc{{.*}} "-filetype=obj"
+// SYCL_XARCH_DEVICE_ONLY-NOT: fsanitize=address
+
+/// test behavior of -Xarch_device with multiple options for SYCL compiler, the
+/// flags should be passed to device compilation only.
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS1
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS1
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS2
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS3
+// SYCL_XARCH_DEVICE_OPTIONS1: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fsanitize=address
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fsanitize-address-use-after-return=never
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fno-sanitize-address-use-after-scope
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0"
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-stack=0"
+// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-globals=0"
+// SYCL_XARCH_DEVICE_OPTIONS2: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_DEVICE_OPTIONS2-SAME: XARCH_DEVICE_TEST
+// SYCL_XARCH_DEVICE_OPTIONS3: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_DEVICE_OPTIONS3-SAME: "-mllvm" "-enable-merge-functions"
+
+
+/// test behavior of -Xarch_host with 1 option for SYCL compiler, the flag
+/// should be passed to host compilation only.
+// RUN: %clangxx -fsycl %s -Xarch_host -fsanitize=address -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTION
+// RUN: %clangxx -fsycl %s -Xarch_host -fsanitize=address -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_ONLY
+// SYCL_XARCH_HOST_OPTION: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_OPTION-SAME: -fsanitize=address
+// SYCL_XARCH_HOST_OPTION-SAME: -fsanitize-address-use-after-scope
+// SYCL_XARCH_HOST_OPTION-NEXT: libclang_rt.asan
+// SYCL_XARCH_HOST_ONLY: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_HOST_ONLY-NOT: -fsanitize=address
+// SYCL_XARCH_HOST_ONLY: clang{{.*}} "-fsycl-is-host"
+
+/// test behavior of -Xarch_host with multiple options for SYCL compiler, the
+/// flags should be passed to host compilation only.
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS1
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS2
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS3
+// SYCL_XARCH_HOST_OPTIONS1: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_OPTIONS1-SAME: -fsanitize=address
+// SYCL_XARCH_HOST_OPTIONS1-SAME: -fsanitize-address-use-after-scope
+// SYCL_XARCH_HOST_OPTIONS2: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_OPTIONS2-SAME: XARCH_HOST_TEST
+// SYCL_XARCH_HOST_OPTIONS3: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_OPTIONS3-SAME: "-mllvm" "-enable-merge-functions"
+
+// test behavior of combination of -Xarch_device and -Xarch_device.
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_DEVICE_OPTIONS1
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_DEVICE_OPTIONS2
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_NO_DEVICE
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS1
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS2
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS3
+// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_COM_NO_HOST
+// SYCL_XARCH_COM_DEVICE_OPTIONS1: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fsanitize=address
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fsanitize-address-use-after-return=never
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fno-sanitize-address-use-after-scope
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0"
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-stack=0"
+// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-globals=0"
+// SYCL_XARCH_COM_DEVICE_OPTIONS2: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_COM_DEVICE_OPTIONS2-SAME: "-mllvm" "-enable-merge-functions"
+// SYCL_XARCH_COM_NO_DEVICE: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_COM_NO_DEVICE-NOT: USE_XARCH_HOST
+// SYCL_XARCH_COM_NO_DEVICE: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_COM_HOST_OPTIONS1: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_COM_HOST_OPTIONS1-SAME: -fsanitize=memory
+// SYCL_XARCH_COM_HOST_OPTIONS1-NEXT: libclang_rt.msan
+// SYCL_XARCH_COM_HOST_OPTIONS2: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_COM_HOST_OPTIONS2-SAME: USE_XARCH_HOST
+// SYCL_XARCH_COM_HOST_OPTIONS3: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_COM_HOST_OPTIONS3-SAME: -fno-builtin
+// SYCL_XARCH_COM_NO_HOST: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_COM_NO_HOST-NOT: "-mllvm" "-enable-merge-functions"
+
+
+// test behavior of multiple usage of -Xarch_host in single command line
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE1
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE2
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE3
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE4
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE1
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE2
+// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \
+// RUN:   -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE3
+// SYCL_XARCH_HOST_MULTIPLE1: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_MULTIPLE1-SAME: -fsanitize=address
+// SYCL_XARCH_HOST_MULTIPLE1-NEXT: libclang_rt.asan
+// SYCL_XARCH_HOST_MULTIPLE2: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_MULTIPLE2-SAME: "-mllvm" "-enable-merge-functions"
+// SYCL_XARCH_HOST_MULTIPLE3: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_MULTIPLE3-SAME: "FOO"
+// SYCL_XARCH_HOST_MULTIPLE4: clang{{.*}} "-fsycl-is-host"
+// SYCL_XARCH_HOST_MULTIPLE4-SAME: "FOO1"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE1: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE1-NOT: -fsanitize=address
+// SYCL_XARCH_NO_DEVICE_MULTIPLE1: llc{{.*}} "-filetype=obj"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE2: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE2-NOT: "-mllvm" "-enable-merge-functions"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE2: llc{{.*}} "-filetype=obj"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE3: clang{{.*}} "-fsycl-is-device"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE3-NOT: "FOO"
+// SYCL_XARCH_NO_DEVICE_MULTIPLE3: llc{{.*}} "-filetype=obj"
diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp
index 429792f94eb1d..3b6d1cf71f441 100644
--- a/libdevice/atomic.hpp
+++ b/libdevice/atomic.hpp
@@ -62,6 +62,11 @@ __spirv_AtomicCompareExchange(int SPIR_GLOBAL *, __spv::Scope::Flag,
                               __spv::MemorySemanticsMask::Flag,
                               __spv::MemorySemanticsMask::Flag, int, int);
 
+extern DEVICE_EXTERNAL int
+__spirv_AtomicCompareExchange(int *, __spv::Scope::Flag,
+                              __spv::MemorySemanticsMask::Flag,
+                              __spv::MemorySemanticsMask::Flag, int, int);
+
 extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *,
                                               __spv::Scope::Flag,
                                               __spv::MemorySemanticsMask::Flag);
@@ -70,6 +75,10 @@ extern DEVICE_EXTERNAL void
 __spirv_AtomicStore(int SPIR_GLOBAL *, __spv::Scope::Flag,
                     __spv::MemorySemanticsMask::Flag, int);
 
+extern DEVICE_EXTERNAL void
+__spirv_AtomicStore(int *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag,
+                    int);
+
 /// Atomically set the value in *Ptr with Desired if and only if it is Expected
 /// Return the value which already was in *Ptr
 static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired,
@@ -80,6 +89,13 @@ static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired,
       __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected);
 }
 
+static inline int atomicCompareAndSet(int *Ptr, int Desired, int Expected) {
+  return __spirv_AtomicCompareExchange(
+      Ptr, __spv::Scope::Device,
+      __spv::MemorySemanticsMask::SequentiallyConsistent,
+      __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected);
+}
+
 static inline int atomicLoad(SPIR_GLOBAL int *Ptr) {
   return __spirv_AtomicLoad(Ptr, __spv::Scope::Device,
                             __spv::MemorySemanticsMask::SequentiallyConsistent);
@@ -90,4 +106,9 @@ static inline void atomicStore(SPIR_GLOBAL int *Ptr, int V) {
                       __spv::MemorySemanticsMask::SequentiallyConsistent, V);
 }
 
+static inline void atomicStore(int *Ptr, int V) {
+  __spirv_AtomicStore(Ptr, __spv::Scope::Device,
+                      __spv::MemorySemanticsMask::SequentiallyConsistent, V);
+}
+
 #endif // __SPIR__
diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake
index f8477c7b5d2d0..1d2e1b4de64f5 100644
--- a/libdevice/cmake/modules/SYCLLibdevice.cmake
+++ b/libdevice/cmake/modules/SYCLLibdevice.cmake
@@ -109,7 +109,7 @@ set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp im
 set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler)
 set(bfloat16_obj_deps sycl-headers sycl-compiler)
 if (NOT MSVC)
-  set(sanitizer_obj_deps device.h sycl-compiler)
+  set(sanitizer_obj_deps device.h atomic.hpp spirv_vars.h include/sanitizer_device_utils.hpp include/spir_global_var.hpp sycl-compiler)
 endif()
 
 add_devicelib_obj(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps})
@@ -126,9 +126,9 @@ add_devicelib_obj(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps})
 add_devicelib_obj(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps})
 add_devicelib_obj(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} )
 if(MSVC)
-add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps})
+  add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps})
 else()
-add_devicelib_obj(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps})
+  add_devicelib_obj(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code)
 endif()
 
 add_fallback_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code)
diff --git a/libdevice/include/device-sanitizer-report.hpp b/libdevice/include/device-sanitizer-report.hpp
new file mode 100644
index 0000000000000..477fac6d4a5d4
--- /dev/null
+++ b/libdevice/include/device-sanitizer-report.hpp
@@ -0,0 +1,58 @@
+//==-- device-sanitizer-report.hpp - Structure and declaration for assert
+// support --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+// Treat this header as system one to workaround frontend's restriction
+#pragma clang system_header
+
+#include <cinttypes>
+
+enum class DeviceSanitizerErrorType : int32_t {
+  UNKNOWN,
+  OUT_OF_BOUND,
+  MISALIGNED,
+  USE_AFTER_FREE,
+  OUT_OF_SHADOW_BOUND,
+};
+
+enum class DeviceSanitizerMemoryType : int32_t {
+  UNKNOWN,
+  USM_DEVICE,
+  USM_HOST,
+  USM_SHARED,
+  LOCAL,
+  PRIVATE,
+  MEM_BUFFER,
+};
+
+// NOTE Layout of this structure should be aligned with the one in
+// sycl/include/sycl/detail/device_sanitizer_report.hpp
+struct DeviceSanitizerReport {
+  int Flag = 0;
+
+  char File[256 + 1] = "";
+  char Func[256 + 1] = "";
+
+  int32_t Line = 0;
+
+  uint64_t GID0 = 0;
+  uint64_t GID1 = 0;
+  uint64_t GID2 = 0;
+
+  uint64_t LID0 = 0;
+  uint64_t LID1 = 0;
+  uint64_t LID2 = 0;
+
+  bool IsWrite = false;
+  uint32_t AccessSize = 0;
+  DeviceSanitizerMemoryType MemoryType = DeviceSanitizerMemoryType::UNKNOWN;
+  DeviceSanitizerErrorType ErrorType = DeviceSanitizerErrorType::UNKNOWN;
+
+  bool IsRecover = false;
+};
diff --git a/libdevice/include/sanitizer_device_utils.hpp b/libdevice/include/sanitizer_device_utils.hpp
new file mode 100644
index 0000000000000..ab472d2139abc
--- /dev/null
+++ b/libdevice/include/sanitizer_device_utils.hpp
@@ -0,0 +1,49 @@
+//==-- sanitizer_device_utils.hpp - Declaration for sanitizer global var ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include "spir_global_var.hpp"
+#include <cstdint>
+
+// Treat this header as system one to workaround frontend's restriction
+#pragma clang system_header
+
+template <typename T>
+class
+#ifdef __SYCL_DEVICE_ONLY__
+    [[__sycl_detail__::global_variable_allowed, __sycl_detail__::device_global,
+      __sycl_detail__::add_ir_attributes_global_variable(
+          "sycl-device-global-size", "sycl-device-image-scope", sizeof(T),
+          nullptr)]]
+#endif
+    DeviceGlobal {
+public:
+  DeviceGlobal() = default;
+  DeviceGlobal(const DeviceGlobal &) = delete;
+  DeviceGlobal(const DeviceGlobal &&) = delete;
+  DeviceGlobal &operator=(const DeviceGlobal &) = delete;
+  DeviceGlobal &operator=(const DeviceGlobal &&) = delete;
+
+  DeviceGlobal &operator=(const T newValue) noexcept {
+    val = newValue;
+    return *this;
+  }
+
+  operator T &() noexcept { return val; }
+
+  operator const T &() const noexcept { return val; }
+
+  T &get() noexcept { return val; }
+
+  const T &get() const noexcept { return val; }
+
+private:
+  T val;
+};
+
+enum DeviceType : uintptr_t { UNKNOWN, CPU, GPU_PVC, GPU_DG2 };
diff --git a/libdevice/include/spir_global_var.hpp b/libdevice/include/spir_global_var.hpp
new file mode 100644
index 0000000000000..4bf1cb8daefae
--- /dev/null
+++ b/libdevice/include/spir_global_var.hpp
@@ -0,0 +1,26 @@
+//==- spir_global_var.hpp - Declaration for device global variable support -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+// Treat this header as system one to workaround frontend's restriction
+#pragma clang system_header
+
+#ifndef SPIR_GLOBAL_VAR
+#ifdef __SYCL_DEVICE_ONLY__
+#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var))
+#else
+#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro."
+#define SPIR_GLOBAL_VAR
+#endif
+#endif
+
+#define __SYCL_GLOBAL__ __attribute__((opencl_global))
+#define __SYCL_LOCAL__ __attribute__((opencl_local))
+#define __SYCL_PRIVATE__ __attribute__((opencl_private))
+#define __SYCL_CONSTANT__ __attribute__((opencl_constant))
diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp
index c0d4cbba112c3..3e4fd11ec811c 100644
--- a/libdevice/sanitizer_utils.cpp
+++ b/libdevice/sanitizer_utils.cpp
@@ -6,22 +6,525 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "atomic.hpp"
 #include "device.h"
+#include "spirv_vars.h"
+
+#include "include/device-sanitizer-report.hpp"
+#include "include/sanitizer_device_utils.hpp"
+
+#include <cstddef>
 #include <cstdint>
 
-using uptr = uint64_t;
+using uptr = uintptr_t;
+using s8 = char;
+using u8 = unsigned char;
+using s16 = short;
+using u16 = unsigned short;
+
+#define ASAN_SHADOW_SCALE 3
+#define ASAN_SHADOW_GRANULARITY (1ULL << ASAN_SHADOW_SCALE)
+
+DeviceGlobal<uptr> __AsanShadowMemoryGlobalStart;
+DeviceGlobal<uptr> __AsanShadowMemoryGlobalEnd;
+DeviceGlobal<uptr> __AsanShadowMemoryLocalStart;
+DeviceGlobal<uptr> __AsanShadowMemoryLocalEnd;
+
+DeviceGlobal<DeviceSanitizerReport> __DeviceSanitizerReportMem;
+
+DeviceGlobal<DeviceType> __DeviceType;
+
 #if defined(__SPIR__)
-// TODO: add real implementation in __asan_load_n.
-DEVICE_EXTERN_C_NOINLINE
-void __asan_load_n(uptr addr, unsigned n) {
-  (void)addr;
-  (void)n;
-  return;
+
+#ifdef __SYCL_DEVICE_ONLY__
+extern SYCL_EXTERNAL int
+__spirv_ocl_printf(const __SYCL_CONSTANT__ char *Format, ...);
+
+extern SYCL_EXTERNAL __SYCL_GLOBAL__ void *
+__spirv_GenericCastToPtrExplicit_ToGlobal(void *, int);
+extern SYCL_EXTERNAL __SYCL_LOCAL__ void *
+__spirv_GenericCastToPtrExplicit_ToLocal(void *, int);
+extern SYCL_EXTERNAL __SYCL_PRIVATE__ void *
+__spirv_GenericCastToPtrExplicit_ToPrivate(void *, int);
+#endif
+
+// These magic values are written to shadow for better error
+// reporting.
+const int kUsmDeviceRedzoneMagic = (char)0x81;
+const int kUsmHostRedzoneMagic = (char)0x82;
+const int kUsmSharedRedzoneMagic = (char)0x83;
+const int kMemBufferRedzoneMagic = (char)0x84;
+
+const int kUsmDeviceDeallocatedMagic = (char)0x91;
+const int kUsmHostDeallocatedMagic = (char)0x92;
+const int kUsmSharedDeallocatedMagic = (char)0x93;
+
+const int kSharedLocalRedzoneMagic = (char)0xa1;
+
+// Same with Asan Stack
+const int kPrivateLeftRedzoneMagic = (char)0xf1;
+const int kPrivateMidRedzoneMagic = (char)0xf2;
+const int kPrivateRightRedzoneMagic = (char)0xf3;
+
+static const __SYCL_CONSTANT__ char __asan_shadow_value_start[] =
+    "%p(%d) -> %p:";
+static const __SYCL_CONSTANT__ char __asan_shadow_value[] = " %02X";
+static const __SYCL_CONSTANT__ char __asan_current_shadow_value[] = ">%02X";
+static const __SYCL_CONSTANT__ char __newline[] = "\n";
+
+static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] =
+    "ERROR: Global shadow memory out-of-bound (ptr: %p -> %p, base: %p)\n";
+static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] =
+    "ERROR: Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: "
+    "%p)\n";
+
+static const __SYCL_CONSTANT__ char __unsupport_device_type[] =
+    "ERROR: Unsupport device type: %d\n";
+
+#define ASAN_REPORT_NONE 0
+#define ASAN_REPORT_START 1
+#define ASAN_REPORT_FINISH 2
+
+#define AS_PRIVATE 0
+#define AS_GLOBAL 1
+#define AS_CONSTANT 2
+#define AS_LOCAL 3
+#define AS_GENERIC 4
+
+namespace {
+
+__SYCL_GLOBAL__ void *ToGlobal(void *ptr) {
+  return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5);
+}
+__SYCL_LOCAL__ void *ToLocal(void *ptr) {
+  return __spirv_GenericCastToPtrExplicit_ToLocal(ptr, 4);
+}
+__SYCL_PRIVATE__ void *ToPrivate(void *ptr) {
+  return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7);
+}
+
+inline uptr MemToShadow_CPU(uptr addr, int32_t as) {
+  return __AsanShadowMemoryGlobalStart + (addr >> 3);
+}
+
+inline uptr MemToShadow_DG2(uptr addr, int32_t as) {
+  uptr shadow_ptr = 0;
+  if (addr & (~0xffffffffffff)) {
+    shadow_ptr =
+        (((addr & 0xffffffffffff) >> 3) + __AsanShadowMemoryGlobalStart) |
+        (~0xffffffffffff);
+  } else {
+    shadow_ptr = (addr >> 3) + __AsanShadowMemoryGlobalStart;
+  }
+
+  if (shadow_ptr > __AsanShadowMemoryGlobalEnd) {
+    __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr);
+  }
+
+  return shadow_ptr;
+}
+
+inline uptr MemToShadow_PVC(uptr addr, int32_t as) {
+  uptr shadow_ptr = 0;
+
+  if (as == AS_GENERIC) {
+    if ((shadow_ptr = (uptr)ToGlobal((void *)addr))) {
+      as = AS_GLOBAL;
+    } else if ((shadow_ptr = (uptr)ToPrivate((void *)addr))) {
+      as = AS_PRIVATE;
+    } else if ((shadow_ptr = (uptr)ToLocal((void *)addr))) {
+      as = AS_LOCAL;
+    } else {
+      return 0;
+    }
+  }
+
+  if (as == AS_PRIVATE) {            // private
+  } else if (as == AS_GLOBAL) {      // global
+    if (addr & 0xFF00000000000000) { // Device USM
+      shadow_ptr = __AsanShadowMemoryGlobalStart + 0x200000000000 +
+                   ((addr & 0xFFFFFFFFFFFF) >> 3);
+    } else { // Only consider 47bit VA
+      shadow_ptr =
+          __AsanShadowMemoryGlobalStart + ((addr & 0x7FFFFFFFFFFF) >> 3);
+    }
+
+    if (shadow_ptr > __AsanShadowMemoryGlobalEnd) {
+      __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr,
+                         (uptr)__AsanShadowMemoryGlobalStart);
+      shadow_ptr = 0;
+    }
+  } else if (as == AS_CONSTANT) { // constant
+  } else if (as == AS_LOCAL) {    // local
+    // The size of SLM is 128KB on PVC
+    constexpr unsigned slm_size = 128 * 1024;
+    const auto wg_lid =
+        __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
+            __spirv_BuiltInNumWorkgroups.z +
+        __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
+        __spirv_BuiltInWorkgroupId.z;
+
+    shadow_ptr = __AsanShadowMemoryLocalStart + ((wg_lid * slm_size) >> 3) +
+                 ((addr & (slm_size - 1)) >> 3);
+
+    if (shadow_ptr > __AsanShadowMemoryLocalEnd) {
+      __spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr, wg_lid,
+                         (uptr)__AsanShadowMemoryLocalStart);
+      shadow_ptr = 0;
+    }
+  }
+
+  return shadow_ptr;
+}
+
+inline uptr MemToShadow(uptr addr, int32_t as) {
+  uptr shadow_ptr = 0;
+
+  if (__DeviceType == DeviceType::CPU) {
+    shadow_ptr = MemToShadow_CPU(addr, as);
+  } else if (__DeviceType == DeviceType::GPU_PVC) {
+    shadow_ptr = MemToShadow_PVC(addr, as);
+  } else {
+    __spirv_ocl_printf(__unsupport_device_type, (int)__DeviceType);
+    return shadow_ptr;
+  }
+
+  return shadow_ptr;
+}
+
+inline constexpr uptr RoundUpTo(uptr size, uptr boundary) {
+  return (size + boundary - 1) & ~(boundary - 1);
+}
+
+inline constexpr uptr RoundDownTo(uptr x, uptr boundary) {
+  return x & ~(boundary - 1);
+}
+
+bool MemIsZero(const char *beg, uptr size) {
+  const char *end = beg + size;
+  uptr *aligned_beg = (uptr *)RoundUpTo((uptr)beg, sizeof(uptr));
+  uptr *aligned_end = (uptr *)RoundDownTo((uptr)end, sizeof(uptr));
+  uptr all = 0;
+  // Prologue.
+  for (const char *mem = beg; mem < (char *)aligned_beg && mem < end; mem++)
+    all |= *mem;
+  // Aligned loop.
+  for (; aligned_beg < aligned_end; aligned_beg++)
+    all |= *aligned_beg;
+  // Epilogue.
+  if ((char *)aligned_end >= beg) {
+    for (const char *mem = (char *)aligned_end; mem < end; mem++)
+      all |= *mem;
+  }
+  return all == 0;
 }
 
-DEVICE_EXTERN_C_NOINLINE
-void __asan_load4(uptr addr) { __asan_load_n(addr, 4); }
+void print_shadow_memory(uptr addr, int32_t as) {
+  uptr shadow_address = MemToShadow(addr, as);
+  uptr p = shadow_address & (~0xf);
+  __spirv_ocl_printf(__asan_shadow_value_start, addr, as, p);
+  for (int i = 0; i < 0xf; ++i) {
+    u8 shadow_value = *(u8 *)(p + i);
+    if (p + i == shadow_address) {
+      __spirv_ocl_printf(__asan_current_shadow_value, shadow_value);
+    } else {
+      __spirv_ocl_printf(__asan_shadow_value, shadow_value);
+    }
+  }
+  __spirv_ocl_printf(__newline);
+}
+
+} // namespace
+
+bool __asan_region_is_value(uptr addr, int32_t as, std::size_t size,
+                            char value) {
+  if (size == 0)
+    return true;
+  while (size--) {
+    char *shadow = (char *)MemToShadow(addr, as);
+    if (*shadow != value) {
+      return false;
+    }
+    ++addr;
+  }
+  return true;
+}
+
+static void __asan_internal_report_save(
+    uptr ptr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line,
+    const char __SYCL_CONSTANT__ *func, bool is_write, uint32_t access_size,
+    DeviceSanitizerMemoryType memory_type, DeviceSanitizerErrorType error_type,
+    bool is_recover = false) {
+
+  const int Expected = ASAN_REPORT_NONE;
+  int Desired = ASAN_REPORT_START;
+  if (atomicCompareAndSet(&__DeviceSanitizerReportMem.get().Flag, Desired,
+                          Expected) == Expected) {
+
+    int FileLength = 0;
+    int FuncLength = 0;
+
+    if (file)
+      for (auto *C = file; *C != '\0'; ++C, ++FileLength)
+        ;
+    if (func)
+      for (auto *C = func; *C != '\0'; ++C, ++FuncLength)
+        ;
+
+    int MaxFileIdx = sizeof(__DeviceSanitizerReportMem.get().File) - 1;
+    int MaxFuncIdx = sizeof(__DeviceSanitizerReportMem.get().Func) - 1;
+
+    if (FileLength < MaxFileIdx)
+      MaxFileIdx = FileLength;
+    if (FuncLength < MaxFuncIdx)
+      MaxFuncIdx = FuncLength;
+
+    for (int Idx = 0; Idx < MaxFileIdx; ++Idx)
+      __DeviceSanitizerReportMem.get().File[Idx] = file[Idx];
+    __DeviceSanitizerReportMem.get().File[MaxFileIdx] = '\0';
+
+    for (int Idx = 0; Idx < MaxFuncIdx; ++Idx)
+      __DeviceSanitizerReportMem.get().Func[Idx] = func[Idx];
+    __DeviceSanitizerReportMem.get().Func[MaxFuncIdx] = '\0';
+
+    __DeviceSanitizerReportMem.get().Line = line;
+    __DeviceSanitizerReportMem.get().GID0 = __spirv_GlobalInvocationId_x();
+    __DeviceSanitizerReportMem.get().GID1 = __spirv_GlobalInvocationId_y();
+    __DeviceSanitizerReportMem.get().GID2 = __spirv_GlobalInvocationId_z();
+    __DeviceSanitizerReportMem.get().LID0 = __spirv_LocalInvocationId_x();
+    __DeviceSanitizerReportMem.get().LID1 = __spirv_LocalInvocationId_y();
+    __DeviceSanitizerReportMem.get().LID2 = __spirv_LocalInvocationId_z();
+
+    __DeviceSanitizerReportMem.get().IsWrite = is_write;
+    __DeviceSanitizerReportMem.get().AccessSize = access_size;
+    __DeviceSanitizerReportMem.get().ErrorType = error_type;
+    __DeviceSanitizerReportMem.get().MemoryType = memory_type;
+    __DeviceSanitizerReportMem.get().IsRecover = is_recover;
+
+    // Show we've done copying
+    atomicStore(&__DeviceSanitizerReportMem.get().Flag, ASAN_REPORT_FINISH);
+  }
+}
+
+///
+/// ASAN Error Reporters
+///
+
+void __asan_report_access_error(uptr addr, int32_t as, size_t size,
+                                bool is_write, uptr poisoned_addr,
+                                const char __SYCL_CONSTANT__ *file,
+                                int32_t line,
+                                const char __SYCL_CONSTANT__ *func,
+                                bool is_recover = false) {
+  // Check Error Type
+  s8 *shadow_address = (s8 *)MemToShadow(poisoned_addr, as);
+  int shadow_value = *shadow_address;
+  if (shadow_value > 0) {
+    shadow_value = *(shadow_address + 1);
+  }
+  // FIXME: check if shadow_address out-of-bound
+
+  DeviceSanitizerMemoryType memory_type;
+  DeviceSanitizerErrorType error_type;
+
+  switch (shadow_value) {
+  case kUsmDeviceRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_DEVICE;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  case kUsmHostRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_HOST;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  case kUsmSharedRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_SHARED;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  case kUsmDeviceDeallocatedMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_DEVICE;
+    error_type = DeviceSanitizerErrorType::USE_AFTER_FREE;
+    break;
+  case kUsmHostDeallocatedMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_HOST;
+    error_type = DeviceSanitizerErrorType::USE_AFTER_FREE;
+    break;
+  case kUsmSharedDeallocatedMagic:
+    memory_type = DeviceSanitizerMemoryType::USM_SHARED;
+    error_type = DeviceSanitizerErrorType::USE_AFTER_FREE;
+    break;
+  case kPrivateLeftRedzoneMagic:
+  case kPrivateMidRedzoneMagic:
+  case kPrivateRightRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::PRIVATE;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  case kMemBufferRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::MEM_BUFFER;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  case kSharedLocalRedzoneMagic:
+    memory_type = DeviceSanitizerMemoryType::LOCAL;
+    error_type = DeviceSanitizerErrorType::OUT_OF_BOUND;
+    break;
+  default:
+    memory_type = DeviceSanitizerMemoryType::UNKNOWN;
+    error_type = DeviceSanitizerErrorType::UNKNOWN;
+  }
+
+  __asan_internal_report_save(addr, as, file, line, func, is_write, size,
+                              memory_type, error_type, is_recover);
+}
+
+///
+/// Check if memory is poisoned
+///
+
+// NOTE: size < 8
+inline int __asan_address_is_poisoned(uptr a, int32_t as, size_t size) {
+  auto *shadow_address = (s8 *)MemToShadow(a, as);
+  if (shadow_address) {
+    auto shadow_value = *shadow_address;
+    if (shadow_value) {
+      s8 last_accessed_byte = (a & (ASAN_SHADOW_GRANULARITY - 1)) + size - 1;
+      return (last_accessed_byte >= shadow_value);
+    }
+  }
+  return false;
+}
+
+// NOTE: size = 1
+inline int __asan_address_is_poisoned(uptr a, int32_t as) {
+  return __asan_address_is_poisoned(a, as, 1);
+}
+
+inline uptr __asan_region_is_poisoned(uptr beg, int32_t as, size_t size) {
+  if (!size)
+    return 0;
+
+  uptr end = beg + size;
+  uptr aligned_b = RoundUpTo(beg, ASAN_SHADOW_GRANULARITY);
+  uptr aligned_e = RoundDownTo(end, ASAN_SHADOW_GRANULARITY);
+
+  uptr shadow_beg = MemToShadow(aligned_b, as);
+  if (!shadow_beg) {
+    return 0;
+  }
+  uptr shadow_end = MemToShadow(aligned_e, as);
+  if (!shadow_end) {
+    return 0;
+  }
+
+  // First check the first and the last application bytes,
+  // then check the ASAN_SHADOW_GRANULARITY-aligned region by calling
+  // MemIsZero on the corresponding shadow.
+  if (!__asan_address_is_poisoned(beg, as) &&
+      !__asan_address_is_poisoned(end - 1, as) &&
+      (shadow_end <= shadow_beg ||
+       MemIsZero((const char *)shadow_beg, shadow_end - shadow_beg)))
+    return 0;
+
+  // The fast check failed, so we have a poisoned byte somewhere.
+  // Find it slowly.
+  for (; beg < end; beg++)
+    if (__asan_address_is_poisoned(beg, as))
+      return beg;
+
+  return 0;
+}
+
+///
+/// ASAN Load/Store Report Built-ins
+///
+
+#define ASAN_REPORT_ERROR(type, is_write, size)                                \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##size(                           \
+      uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \
+      const char __SYCL_CONSTANT__ *func) {                                    \
+    if (__asan_address_is_poisoned(addr, as, size)) {                          \
+      __asan_report_access_error(addr, as, size, is_write, addr, file, line,   \
+                                 func);                                        \
+    }                                                                          \
+  }                                                                            \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##size##_noabort(                 \
+      uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \
+      const char __SYCL_CONSTANT__ *func) {                                    \
+    if (__asan_address_is_poisoned(addr, as, size)) {                          \
+      __asan_report_access_error(addr, as, size, is_write, addr, file, line,   \
+                                 func, true);                                  \
+    }                                                                          \
+  }
+
+ASAN_REPORT_ERROR(load, false, 1)
+ASAN_REPORT_ERROR(load, false, 2)
+ASAN_REPORT_ERROR(load, false, 4)
+ASAN_REPORT_ERROR(store, true, 1)
+ASAN_REPORT_ERROR(store, true, 2)
+ASAN_REPORT_ERROR(store, true, 4)
+
+#define ASAN_REPORT_ERROR_BYTE(type, is_write, size)                           \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##size(                           \
+      uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \
+      const char __SYCL_CONSTANT__ *func) {                                    \
+    u##size *shadow_address = (u##size *)MemToShadow(addr, as);                \
+    if (shadow_address && *shadow_address) {                                   \
+      __asan_report_access_error(addr, as, size, is_write, addr, file, line,   \
+                                 func);                                        \
+    }                                                                          \
+  }                                                                            \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##size##_noabort(                 \
+      uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \
+      const char __SYCL_CONSTANT__ *func) {                                    \
+    u##size *shadow_address = (u##size *)MemToShadow(addr, as);                \
+    if (shadow_address && *shadow_address) {                                   \
+      __asan_report_access_error(addr, as, size, is_write, addr, file, line,   \
+                                 func, true);                                  \
+    }                                                                          \
+  }
+
+ASAN_REPORT_ERROR_BYTE(load, false, 8)
+ASAN_REPORT_ERROR_BYTE(load, false, 16)
+ASAN_REPORT_ERROR_BYTE(store, true, 8)
+ASAN_REPORT_ERROR_BYTE(store, true, 16)
+
+#define ASAN_REPORT_ERROR_N(type, is_write)                                    \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##N(                              \
+      uptr addr, size_t size, int32_t as, const char __SYCL_CONSTANT__ *file,  \
+      int32_t line, const char __SYCL_CONSTANT__ *func) {                      \
+    if (auto poisoned_addr = __asan_region_is_poisoned(addr, as, size)) {      \
+      __asan_report_access_error(addr, as, size, is_write, poisoned_addr,      \
+                                 file, line, func);                            \
+    }                                                                          \
+  }                                                                            \
+  DEVICE_EXTERN_C_NOINLINE void __asan_##type##N_noabort(                      \
+      uptr addr, size_t size, int32_t as, const char __SYCL_CONSTANT__ *file,  \
+      int32_t line, const char __SYCL_CONSTANT__ *func) {                      \
+    if (auto poisoned_addr = __asan_region_is_poisoned(addr, as, size)) {      \
+      __asan_report_access_error(addr, as, size, is_write, poisoned_addr,      \
+                                 file, line, func, true);                      \
+    }                                                                          \
+  }
+
+ASAN_REPORT_ERROR_N(load, false)
+ASAN_REPORT_ERROR_N(store, true)
+
+DEVICE_EXTERN_C_NOINLINE void
+__asan_set_shadow_local_memory(uptr ptr, size_t size,
+                               size_t size_with_redzone) {
+  uptr aligned_size = RoundUpTo(size, ASAN_SHADOW_GRANULARITY);
+
+  {
+    auto shadow_address = MemToShadow(ptr + aligned_size, AS_LOCAL);
+    auto count = (size_with_redzone - aligned_size) / ASAN_SHADOW_GRANULARITY;
+    for (size_t i = 0; i < count; ++i) {
+      ((u8 *)shadow_address)[i] = kSharedLocalRedzoneMagic;
+    }
+  }
+
+  if (size != aligned_size) {
+    auto user_end = ptr + size - 1;
+    auto *shadow_end = (s8 *)MemToShadow(user_end, AS_LOCAL);
+    *shadow_end = user_end - RoundDownTo(user_end, ASAN_SHADOW_GRANULARITY);
+  }
+}
 
-DEVICE_EXTERN_C_NOINLINE
-void __asan_load8(uptr addr) { __asan_load_n(addr, 8); }
-#endif // __SPIR__
+#endif
diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h
index 640f0dbf3cd9e..0387c322965ce 100644
--- a/libdevice/spirv_vars.h
+++ b/libdevice/spirv_vars.h
@@ -21,6 +21,7 @@ typedef size_t size_t_vec __attribute__((ext_vector_type(3)));
 __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId;
 __SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId;
 __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId;
+__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups;
 __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId;
 __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize;
 
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8a2864a078731..22080d30e6c57 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -727,6 +727,13 @@ struct AddressSanitizer {
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
   void markEscapedLocalAllocas(Function &F);
+  void instrumentSyclAllocateLocalMemory(CallInst *CI);
+
+  GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name,
+                                          StringRef Value,
+                                          unsigned AddressSpace);
+  void AppendDebugInfoToArgs(Instruction *InsertBefore, Value *Addr,
+                             SmallVectorImpl<Value *> &Args);
 
 private:
   friend struct FunctionStackPoisoner;
@@ -768,7 +775,10 @@ struct AddressSanitizer {
   ShadowMapping Mapping;
   FunctionCallee AsanHandleNoReturnFunc;
   FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
+  FunctionCallee AsanSetShadowDeviceLocalFunc;
   Constant *AsanShadowGlobal;
+  Constant *AsanShadowDevicePrivate;
+  StringMap<GlobalVariable *> GlobalStringMap;
 
   // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
   FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
@@ -1234,7 +1244,78 @@ static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
   return false;
 }
 
+static bool isUnsupportedSPIRAccess(Value *Addr, Function *Func) {
+  Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+  // Private address space: skip kernel arguments
+  if (PtrTy->getPointerAddressSpace() == 0) {
+    return Func->getCallingConv() == CallingConv::SPIR_KERNEL &&
+           isa<Argument>(Addr);
+  }
+
+  // All the rest address spaces: skip SPIR-V built-in varibles
+  auto *OrigValue = Addr->stripPointerCasts();
+  return OrigValue->getName().starts_with("__spirv_BuiltIn");
+}
+
+GlobalVariable *AddressSanitizer::GetOrCreateGlobalString(
+    Module &M, StringRef Name, StringRef Value, unsigned AddressSpace) {
+  GlobalVariable *StringGV = nullptr;
+  if (GlobalStringMap.find(Value.str()) != GlobalStringMap.end())
+    return GlobalStringMap.at(Value.str());
+
+  auto *Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), Value.size() + 1);
+  StringGV = new GlobalVariable(
+      M, Ty, true, GlobalValue::InternalLinkage,
+      ConstantDataArray::getString(M.getContext(), Value), Name, nullptr,
+      GlobalValue::NotThreadLocal, AddressSpace);
+  GlobalStringMap[Value.str()] = StringGV;
+
+  return StringGV;
+}
+
+void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore,
+                                             Value *Addr,
+                                             SmallVectorImpl<Value *> &Args) {
+  auto *M = InsertBefore->getModule();
+  auto &C = InsertBefore->getContext();
+  auto &Loc = InsertBefore->getDebugLoc();
+
+  // SPIR constant address space
+  constexpr unsigned ConstantAS = 2;
+  PointerType *ConstASPtrTy = Type::getInt8Ty(C)->getPointerTo(ConstantAS);
+
+  // Address Space
+  Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+  Args.push_back(
+      ConstantInt::get(Type::getInt32Ty(C), PtrTy->getPointerAddressSpace()));
+
+  // File & Line
+  if (Loc) {
+    StringRef FileName = Loc->getFilename();
+    auto *FileNameGV =
+        GetOrCreateGlobalString(*M, "__asan_file", FileName, ConstantAS);
+    Args.push_back(ConstantExpr::getPointerCast(FileNameGV, ConstASPtrTy));
+    Args.push_back(ConstantInt::get(Type::getInt32Ty(C), Loc.getLine()));
+  } else {
+    Args.push_back(ConstantPointerNull::get(ConstASPtrTy));
+    Args.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+  }
+
+  // Function
+  auto FuncName = InsertBefore->getFunction()->getName();
+  auto *FuncNameGV = GetOrCreateGlobalString(*M, "__asan_func",
+                                             demangle(FuncName), ConstantAS);
+  Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy));
+}
+
 Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+  if (TargetTriple.isSPIR()) {
+    // ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart;
+    Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff));
+    Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+    Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate);
+    return IRB.CreateAdd(Shadow, ShadowBase);
+  }
   // Shadow >> scale
   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
   if (Mapping.Offset == 0) return Shadow;
@@ -1250,6 +1331,53 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
     return IRB.CreateAdd(Shadow, ShadowBase);
 }
 
+static uint64_t getSizeAndRedzoneSizeForLocal(uint64_t Size,
+                                              uint64_t Granularity,
+                                              uint64_t Alignment) {
+  uint64_t Res = 0;
+  if (Size <= 4)
+    Res = 16;
+  else if (Size <= 16)
+    Res = 32;
+  else if (Size <= 128)
+    Res = Size + 32;
+  else if (Size <= 512)
+    Res = Size + 64;
+  else if (Size <= 4096)
+    Res = Size + 128;
+  else
+    Res = Size + 256;
+  return alignTo(std::max(Res, 2 * Granularity), Alignment);
+}
+
+// Instument __sycl_allocateLocalMemory
+void AddressSanitizer::instrumentSyclAllocateLocalMemory(CallInst *CI) {
+  InstrumentationIRBuilder IRB(CI->getNextNode());
+  auto *Size = cast<ConstantInt>(CI->getArgOperand(0));
+  auto *Alignment = cast<ConstantInt>(CI->getArgOperand(1));
+
+  const auto Granularity = 1 << Mapping.Scale;
+  // The base address of local memory needs to align to granularity
+  const auto Align = alignTo(Alignment->getZExtValue(), Granularity);
+
+  auto *SizeWithRedZone = ConstantInt::get(
+      IntptrTy, getSizeAndRedzoneSizeForLocal(Size->getZExtValue(), Granularity,
+                                              Alignment->getZExtValue()));
+
+  auto *NewCI =
+      IRB.CreateCall(CI->getCalledFunction(),
+                     {SizeWithRedZone, ConstantInt::get(IntptrTy, Align)});
+
+  /// __asan_set_shadow_local_memory(uptr beg, size_t size, size_t
+  /// size_with_redzone)
+  IRB.CreateCall(
+      AsanSetShadowDeviceLocalFunc,
+      {IRB.CreatePointerCast(NewCI, IntptrTy), Size, SizeWithRedZone});
+
+  CI->replaceAllUsesWith(NewCI);
+  CI->eraseFromParent();
+}
+
 // Instrument memset/memmove/memcpy
 void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
   InstrumentationIRBuilder IRB(MI);
@@ -1295,11 +1423,18 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
 }
 
 bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
-  // Instrument accesses from different address spaces only for AMDGPU.
-  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
-  if (PtrTy->getPointerAddressSpace() != 0 &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr)))
-    return true;
+  // SPIR has its own rules to filter the instrument accesses
+  if (TargetTriple.isSPIR()) {
+    if (isUnsupportedSPIRAccess(Ptr, Inst->getFunction()))
+      return true;
+  } else {
+    // Instrument accesses from different address spaces only for AMDGPU.
+    Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
+    if (PtrTy->getPointerAddressSpace() != 0 &&
+        !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) {
+      return true;
+    }
+  }
 
   // Ignore swifterror addresses.
   // swifterror memory addresses are mem2reg promoted by instruction
@@ -1778,12 +1913,21 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
   if (UseCalls) {
-    if (Exp == 0)
-      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
-                     AddrLong);
-    else
+    if (Exp == 0) {
+      if (TargetTriple.isSPIR()) {
+        SmallVector<Value *, 5> Args;
+        Args.push_back(AddrLong);
+        AppendDebugInfoToArgs(InsertBefore, Addr, Args);
+        IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+                       Args);
+      } else {
+        IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+                       AddrLong);
+      }
+    } else {
       IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
                      {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+    }
     return;
   }
 
@@ -1849,10 +1993,18 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
 
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
   if (UseCalls) {
-    if (Exp == 0)
-      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
-                     {AddrLong, Size});
-    else
+    if (Exp == 0) {
+      if (TargetTriple.isSPIR()) {
+        SmallVector<Value *, 6> Args;
+        Args.push_back(AddrLong);
+        Args.push_back(Size);
+        AppendDebugInfoToArgs(InsertBefore, Addr, Args);
+        IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], Args);
+      } else {
+        IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
+                       {AddrLong, Size});
+      }
+    } else
       IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
                      {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
   } else {
@@ -2667,6 +2819,12 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) {
     }
   }
 
+  // SPIR kernel needn't AsanCtorFunction & AsanDtorFunction
+  if (TargetTriple.isSPIR()) {
+    AsanCtorFunction = nullptr;
+    AsanDtorFunction = nullptr;
+  }
+
   const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple);
 
   // Put the constructor and destructor in comdat if both
@@ -2714,6 +2872,23 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
           AL1 = AL1.addParamAttribute(*C, 1, AK);
         }
       }
+
+      // Extend __asan_load/store arguments: unsigned int address_space, char*
+      // file, unsigned int line, char* func
+      if (TargetTriple.isSPIR()) {
+        constexpr unsigned ConstantAS = 2;
+        auto *Int8PtrTy = Type::getInt8Ty(*C)->getPointerTo(ConstantAS);
+
+        Args1.push_back(Type::getInt32Ty(*C)); // address_space
+        Args1.push_back(Int8PtrTy);            // file
+        Args1.push_back(Type::getInt32Ty(*C)); // line
+        Args1.push_back(Int8PtrTy);            // func
+
+        Args2.push_back(Type::getInt32Ty(*C)); // address_space
+        Args2.push_back(Int8PtrTy);            // file
+        Args2.push_back(Type::getInt32Ty(*C)); // line
+        Args2.push_back(Int8PtrTy);            // func
+      }
       AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
           kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
           FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
@@ -2761,6 +2936,20 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
     AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow",
                                            ArrayType::get(IRB.getInt8Ty(), 0));
 
+  if (TargetTriple.isSPIR()) {
+    AsanShadowDevicePrivate =
+        M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] {
+          return new GlobalVariable(M, IntptrTy, true,
+                                    GlobalVariable::ExternalLinkage, nullptr,
+                                    "__AsanShadowMemoryPrivateStart", nullptr,
+                                    GlobalVariable::NotThreadLocal, 1);
+        });
+
+    AsanSetShadowDeviceLocalFunc =
+        M.getOrInsertFunction("__asan_set_shadow_local_memory", IRB.getVoidTy(),
+                              IntptrTy, IntptrTy, IntptrTy);
+  }
+
   AMDGPUAddressShared =
       M.getOrInsertFunction(kAMDGPUAddressSharedName, IRB.getInt1Ty(), PtrTy);
   AMDGPUAddressPrivate =
@@ -2856,6 +3045,8 @@ bool AddressSanitizer::instrumentFunction(Function &F,
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
   if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
   if (F.getName().starts_with("__asan_")) return false;
+  if (F.getName().contains("__sycl_service_kernel__"))
+    return false;
 
   bool FunctionModified = false;
 
@@ -2891,6 +3082,7 @@ bool AddressSanitizer::instrumentFunction(Function &F,
   SmallVector<Instruction *, 8> NoReturnCalls;
   SmallVector<BasicBlock *, 16> AllBlocks;
   SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
+  SmallVector<CallInst *, 8> SyclAllocateLocalMemoryCalls;
 
   // Fill the set of memory operations to instrument.
   for (auto &BB : F) {
@@ -2939,8 +3131,16 @@ bool AddressSanitizer::instrumentFunction(Function &F,
           if (CB->doesNotReturn())
             NoReturnCalls.push_back(CB);
         }
-        if (CallInst *CI = dyn_cast<CallInst>(&Inst))
-          maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+        if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+          if (TargetTriple.isSPIR() && CI->getCalledFunction() &&
+              CI->getCalledFunction()->getCallingConv() ==
+                  llvm::CallingConv::SPIR_FUNC &&
+              CI->getCalledFunction()->getName() ==
+                  "__sycl_allocateLocalMemory")
+            SyclAllocateLocalMemoryCalls.push_back(CI);
+          else
+            maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+        }
       }
       if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
     }
@@ -2962,10 +3162,17 @@ bool AddressSanitizer::instrumentFunction(Function &F,
                     F.getParent()->getDataLayout());
     FunctionModified = true;
   }
-  for (auto *Inst : IntrinToInstrument) {
-    if (!suppressInstrumentationSiteForDebug(NumInstrumented))
-      instrumentMemIntrinsic(Inst);
-    FunctionModified = true;
+  if (TargetTriple.isSPIR()) {
+    for (auto *CI : SyclAllocateLocalMemoryCalls) {
+      instrumentSyclAllocateLocalMemory(CI);
+      FunctionModified = true;
+    }
+  } else {
+    for (auto *Inst : IntrinToInstrument) {
+      if (!suppressInstrumentationSiteForDebug(NumInstrumented))
+        instrumentMemIntrinsic(Inst);
+      FunctionModified = true;
+    }
   }
 
   FunctionStackPoisoner FSP(F, *this);
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3721564890ddb..956a2e940e96b 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -294,6 +294,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   if (Name == "memset" || Name == "memcpy")
     return false;
 
+  // Prevent from asan interception in kernel
+  if (Name == "__asan_set_shadow_local_memory")
+    return false;
+
   // Determine if code size heuristics need to be applied.
   ApplyCodeSizeHeuristics =
       L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
diff --git a/llvm/test/Instrumentation/AddressSanitizer/spir.ll b/llvm/test/Instrumentation/AddressSanitizer/spir.ll
new file mode 100644
index 0000000000000..cee6a67605c5d
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/spir.ll
@@ -0,0 +1,156 @@
+; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -S | FileCheck %s
+
+; ModuleID = 'spir.cpp'
+source_filename = "spir.cpp"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" }
+%"class.sycl::_V1::detail::array" = type { [1 x i64] }
+%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" }
+
+$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10FillBuffer = comdat any
+
+@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+; CHECK: __AsanShadowMemoryPrivateStart
+
+; Function Attrs: convergent mustprogress norecurse nounwind sanitize_address uwtable
+define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10FillBuffer(ptr addrspace(1) noundef align 8 %_arg_Sum, ptr addrspace(1) noundef align 8 %_arg_Accessor, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_Accessor1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_Accessor2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_Accessor3) local_unnamed_addr #0 comdat !srcloc !65 !kernel_arg_buffer_location !66 !kernel_arg_runtime_aligned !67 !kernel_arg_exclusive_ptr !67 !sycl_fixed_targets !68 {
+entry:
+  %0 = load i64, ptr %_arg_Accessor3, align 8
+  %1 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !68
+  %call.i10 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef 16, i64 noundef 4) #5, !noalias !69
+  ; CHECK: __asan_set_shadow_local_memory
+  %cmp.i = icmp eq i64 %1, 0
+  br i1 %cmp.i, label %if.then.i, label %_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit
+
+if.then.i:                                        ; preds = %entry
+  call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %call.i10, i8 0, i64 16, i1 false), !noalias !69
+  br label %_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit
+
+_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit: ; preds = %entry, %if.then.i
+  %add.ptr.i = getelementptr inbounds i64, ptr addrspace(1) %_arg_Accessor, i64 %0
+  tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) #5, !noalias !69
+  %cmp.i14 = icmp ult i64 %1, 2147483648
+  tail call void @llvm.assume(i1 %cmp.i14)
+  %arrayidx.i16 = getelementptr inbounds i64, ptr addrspace(1) %add.ptr.i, i64 %1
+  %2 = load i64, ptr addrspace(1) %arrayidx.i16, align 8, !tbaa !72
+  ; CHECK: __asan_load8
+  %arrayidx.i = getelementptr inbounds [4 x i32], ptr addrspace(3) %call.i10, i64 0, i64 %1
+  %3 = load i32, ptr addrspace(3) %arrayidx.i, align 4, !tbaa !76
+  ; CHECK: __asan_load4
+  %conv.i = sext i32 %3 to i64
+  %add.i = add i64 %2, %conv.i
+  %4 = load i64, ptr addrspace(1) %_arg_Sum, align 8, !tbaa !72
+  ; CHECK: __asan_load8
+  %add5.i = add i64 %4, %add.i
+  store i64 %add5.i, ptr addrspace(1) %_arg_Sum, align 8, !tbaa !72
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #1
+
+; Function Attrs: convergent nounwind
+declare dso_local spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef %0, i64 noundef %1) local_unnamed_addr #2
+
+; Function Attrs: convergent nounwind
+declare dso_local spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef %0) #3
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #4
+
+attributes #0 = { convergent mustprogress norecurse nounwind sanitize_address uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" }
+attributes #1 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #5 = { convergent nounwind }
+
+!llvm.module.flags = !{!0, !1, !2}
+!opencl.spir.version = !{!3}
+!spirv.Source = !{!4}
+!sycl_aspects = !{!5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !58, !59, !60, !61, !62, !63}
+!llvm.ident = !{!64}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 2}
+!2 = !{i32 7, !"frame-pointer", i32 2}
+!3 = !{i32 1, i32 2}
+!4 = !{i32 4, i32 100000}
+!5 = !{!"cpu", i32 1}
+!6 = !{!"gpu", i32 2}
+!7 = !{!"accelerator", i32 3}
+!8 = !{!"custom", i32 4}
+!9 = !{!"fp16", i32 5}
+!10 = !{!"fp64", i32 6}
+!11 = !{!"image", i32 9}
+!12 = !{!"online_compiler", i32 10}
+!13 = !{!"online_linker", i32 11}
+!14 = !{!"queue_profiling", i32 12}
+!15 = !{!"usm_device_allocations", i32 13}
+!16 = !{!"usm_host_allocations", i32 14}
+!17 = !{!"usm_shared_allocations", i32 15}
+!18 = !{!"usm_system_allocations", i32 17}
+!19 = !{!"ext_intel_pci_address", i32 18}
+!20 = !{!"ext_intel_gpu_eu_count", i32 19}
+!21 = !{!"ext_intel_gpu_eu_simd_width", i32 20}
+!22 = !{!"ext_intel_gpu_slices", i32 21}
+!23 = !{!"ext_intel_gpu_subslices_per_slice", i32 22}
+!24 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23}
+!25 = !{!"ext_intel_max_mem_bandwidth", i32 24}
+!26 = !{!"ext_intel_mem_channel", i32 25}
+!27 = !{!"usm_atomic_host_allocations", i32 26}
+!28 = !{!"usm_atomic_shared_allocations", i32 27}
+!29 = !{!"atomic64", i32 28}
+!30 = !{!"ext_intel_device_info_uuid", i32 29}
+!31 = !{!"ext_oneapi_srgb", i32 30}
+!32 = !{!"ext_oneapi_native_assert", i32 31}
+!33 = !{!"host_debuggable", i32 32}
+!34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33}
+!35 = !{!"ext_oneapi_cuda_async_barrier", i32 34}
+!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35}
+!37 = !{!"ext_intel_free_memory", i32 36}
+!38 = !{!"ext_intel_device_id", i32 37}
+!39 = !{!"ext_intel_memory_clock_rate", i32 38}
+!40 = !{!"ext_intel_memory_bus_width", i32 39}
+!41 = !{!"emulated", i32 40}
+!42 = !{!"ext_intel_legacy_image", i32 41}
+!43 = !{!"ext_oneapi_bindless_images", i32 42}
+!44 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43}
+!45 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44}
+!46 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45}
+!47 = !{!"ext_oneapi_interop_memory_import", i32 46}
+!48 = !{!"ext_oneapi_interop_memory_export", i32 47}
+!49 = !{!"ext_oneapi_interop_semaphore_import", i32 48}
+!50 = !{!"ext_oneapi_interop_semaphore_export", i32 49}
+!51 = !{!"ext_oneapi_mipmap", i32 50}
+!52 = !{!"ext_oneapi_mipmap_anisotropy", i32 51}
+!53 = !{!"ext_oneapi_mipmap_level_reference", i32 52}
+!54 = !{!"ext_intel_esimd", i32 53}
+!55 = !{!"ext_oneapi_ballot_group", i32 54}
+!56 = !{!"ext_oneapi_fixed_size_group", i32 55}
+!57 = !{!"ext_oneapi_opportunistic_group", i32 56}
+!58 = !{!"ext_oneapi_tangle_group", i32 57}
+!59 = !{!"int64_base_atomics", i32 7}
+!60 = !{!"int64_extended_atomics", i32 8}
+!61 = !{!"usm_system_allocator", i32 17}
+!62 = !{!"usm_restricted_shared_allocations", i32 16}
+!63 = !{!"host", i32 0}
+!64 = !{!"clang version 18.0.0git (https://github.com/intel/llvm.git caecf6b928648a83c8ceb84988231cb246c4365e)"}
+!65 = !{i32 419}
+!66 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
+!67 = !{i1 false, i1 true, i1 false, i1 false, i1 false}
+!68 = !{}
+!69 = !{!70}
+!70 = distinct !{!70, !71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_: %agg.result"}
+!71 = distinct !{!71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_"}
+!72 = !{!73, !73, i64 0}
+!73 = !{!"long", !74, i64 0}
+!74 = !{!"omnipotent char", !75, i64 0}
+!75 = !{!"Simple C++ TBAA"}
+!76 = !{!77, !77, i64 0}
+!77 = !{!"int", !74, i64 0}
diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp
index 50cb225f6cb18..85728be8aabf0 100644
--- a/llvm/tools/sycl-post-link/sycl-post-link.cpp
+++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp
@@ -313,8 +313,9 @@ std::vector<StringRef> getKernelNamesUsingAssert(const Module &M) {
 }
 
 bool isModuleUsingAsan(const Module &M) {
-  auto *AsanInitFunction = M.getFunction("__asan_init");
-  return AsanInitFunction;
+  return llvm::any_of(M.functions(), [](const Function &F) {
+    return F.getName().starts_with("__asan_");
+  });
 }
 
 // Gets reqd_work_group_size information for function Func.
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
index dadccc099e016..16a950af3151a 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -973,7 +973,7 @@ int main() {
     syclex::if_architecture_is<syclex::architecture::intel_gpu_pvc>([&]{
       // Code for PVC
     }).otherwise([&]{
-      // Fallback code
+      // Generic code
     });
   });
 
@@ -987,7 +987,7 @@ int main() {
                                        syclex::architecture::amd_gpu_gfx1013>([&]{
       // Code for AMD devices between gfx1010 and gfx1013 (inclusive)
     }).otherwise([&]{
-      // Fallback code
+      // Generic code
     });
   });
 }
diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc
index 8d2b31b3ecd32..73c9c5a9ac9bc 100644
--- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc
+++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc
@@ -152,7 +152,7 @@ executes the `if_device_has` function has **all** of the aspects listed in this
 pack.  If the condition is `true`, the implementation calls `fn`.  Otherwise,
 the function `fn` is potentially discarded as described below.
 
-=== Fallback code
+=== Generic code
 
 The value returned by `if_device_has` is an object _F_ of an unspecified type,
 which provides the following member functions:
@@ -200,7 +200,7 @@ void frob() {
   }).else_if_device_has<aspect::bar>([] {
     // code that uses features tied to "bar" aspect
   }).otherwise([] {
-    // fallback code that works on all devices
+    // generic code that works on all devices
   });
 }
 ```
diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index 31c6a86f3300d..0c02a7705b51d 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -151,9 +151,10 @@
 // 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
 // 15.43 Changed the signature of piextMemGetNativeHandle to also take a
 // pi_device
+// 15.44 Add coarse-grain memory advice flag for HIP.
 
 #define _PI_H_VERSION_MAJOR 15
-#define _PI_H_VERSION_MINOR 43
+#define _PI_H_VERSION_MINOR 44
 
 #define _PI_STRING_HELPER(a) #a
 #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -582,6 +583,8 @@ typedef enum {
   PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7,
   PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8,
   PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9,
+  PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10,
+  PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11,
   PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF,
 } _pi_mem_advice;
 
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index 7de174f03701c..ad28f40d21edd 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,14 +57,14 @@ endif()
 if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
-  # commit 9363574db721d2388c7d76a10edb128764872352
-  # Merge: 553a6b82 5e513738
+  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
+  # commit cd97e172cbbfc411fccb0b80e0fff6f9126574f4
+  # Merge: bd745d10 2a9ded6f
   # Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
-  # Date:   Thu Feb 1 11:50:16 2024 +0000
-  #     Merge pull request #1302 from kbenzie/benie/cl-binary-type-intermediate
-  #     [CL] Handle INTERMEDIATE binary type
-  set(UNIFIED_RUNTIME_TAG 9363574db721d2388c7d76a10edb128764872352)
+  # Date:   Fri Feb 2 14:24:16 2024 +0000
+  #     Merge pull request #1249 from GeorgeWeb/georgi/hip_memadvise_coarse_grained
+  #     [HIP] Implement coarse-grained memory advice for the HIP adapter
+  set(UNIFIED_RUNTIME_TAG cd97e172cbbfc411fccb0b80e0fff6f9126574f4)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
@@ -203,6 +203,13 @@ endif()
 
 if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS)
     add_dependencies(sycl-runtime-libraries ur_adapter_opencl)
+
+    # Install the UR adapters too
+    install(TARGETS ur_adapter_opencl
+      LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
+      ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
+      RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev
+    )
 endif()
 
 if ("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS)
diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp
index c19c93a6af53a..4f0c7a8a1f885 100644
--- a/sycl/plugins/unified_runtime/pi2ur.hpp
+++ b/sycl/plugins/unified_runtime/pi2ur.hpp
@@ -3375,6 +3375,12 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
   if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) {
     UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST;
   }
+  if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) {
+    UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
+  }
+  if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) {
+    UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
+  }
   if (Advice & PI_MEM_ADVICE_RESET) {
     UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT;
   }
diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp
index 92bccc2cea2b2..4dbeaccc9baf2 100644
--- a/sycl/source/detail/pi.cpp
+++ b/sycl/source/detail/pi.cpp
@@ -685,45 +685,39 @@ static uint16_t getELFHeaderType(const unsigned char *ImgData, size_t ImgSize) {
 sycl::detail::pi::PiDeviceBinaryType
 getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) {
   // Top-level magic numbers for the recognized binary image formats.
-  struct {
-    sycl::detail::pi::PiDeviceBinaryType Fmt;
-    const uint32_t Magic;
-  } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203},
-              {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342},
-              // 'I', 'N', 'T', 'C' ; Intel native
-              {PI_DEVICE_BINARY_TYPE_NATIVE, 0x43544E49}};
-
-  if (ImgSize >= sizeof(Fmts[0].Magic)) {
-    std::remove_const_t<decltype(Fmts[0].Magic)> Hdr = 0;
-    std::copy(ImgData, ImgData + sizeof(Hdr), reinterpret_cast<char *>(&Hdr));
-
-    // Check headers for direct formats.
-    for (const auto &Fmt : Fmts) {
-      if (Hdr == Fmt.Magic)
-        return Fmt.Fmt;
-    }
-
-    // ELF e_type for recognized binary image formats.
-    struct {
-      sycl::detail::pi::PiDeviceBinaryType Fmt;
-      const uint16_t Magic;
-    } ELFFmts[] = {{PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF04},  // OpenCL executable
-                   {PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF12}}; // ZEBIN executable
-
-    // ELF files need to be parsed separately. The header type ends after 18
-    // bytes.
-    if (Hdr == 0x464c457F && ImgSize >= 18) {
-      uint16_t HdrType = getELFHeaderType(ImgData, ImgSize);
-      for (const auto &ELFFmt : ELFFmts) {
-        if (HdrType == ELFFmt.Magic)
-          return ELFFmt.Fmt;
-      }
-      // Newer ZEBIN format does not have a special header type, but can instead
-      // be identified by having a required .ze_info section.
-      if (checkELFSectionPresent(".ze_info", ImgData, ImgSize))
-        return PI_DEVICE_BINARY_TYPE_NATIVE;
-    }
+  auto MatchMagicNumber = [&](auto Number) {
+    return ImgSize >= sizeof(Number) &&
+           std::memcmp(ImgData, &Number, sizeof(Number)) == 0;
+  };
+
+  if (MatchMagicNumber(uint32_t{0x07230203}))
+    return PI_DEVICE_BINARY_TYPE_SPIRV;
+
+  if (MatchMagicNumber(uint32_t{0xDEC04342}))
+    return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE;
+
+  if (MatchMagicNumber(uint32_t{0x43544E49}))
+    // 'I', 'N', 'T', 'C' ; Intel native
+    return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE;
+
+  // Check for ELF format, size requirements include data we'll read in case of
+  // succesful match.
+  if (ImgSize >= 18 && MatchMagicNumber(uint32_t{0x464c457F})) {
+    uint16_t ELFHdrType = getELFHeaderType(ImgData, ImgSize);
+    if (ELFHdrType == 0xFF04)
+      // OpenCL executable.
+      return PI_DEVICE_BINARY_TYPE_NATIVE;
+
+    if (ELFHdrType == 0xFF12)
+      // ZEBIN executable.
+      return PI_DEVICE_BINARY_TYPE_NATIVE;
+
+    // Newer ZEBIN format does not have a special header type, but can instead
+    // be identified by having a required .ze_info section.
+    if (checkELFSectionPresent(".ze_info", ImgData, ImgSize))
+      return PI_DEVICE_BINARY_TYPE_NATIVE;
   }
+
   return PI_DEVICE_BINARY_TYPE_NONE;
 }
 
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp
new file mode 100644
index 0000000000000..84e28fd6e0883
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp
@@ -0,0 +1,40 @@
+// REQUIRES: linux, cpu
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s
+#include <sycl/sycl.hpp>
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 12345;
+#if defined(MALLOC_HOST)
+  auto *array = sycl::malloc_host<char>(N, Q);
+#elif defined(MALLOC_SHARED)
+  auto *array = sycl::malloc_shared<char>(N, Q);
+#elif defined(MALLOC_DEVICE)
+  auto *array = sycl::malloc_device<char>(N, Q);
+#elif defined(MALLOC_SYSTEM)
+  auto *array = new char[N];
+#else
+#error "Must provide malloc type to run the test"
+#endif
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernelR_4>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
+  });
+  Q.wait();
+  // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory
+  // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory
+  // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory
+  // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345, 0, 0\)}}
+  // CHECK: {{  #0 .* .*parallel_for_char.cpp:}}[[@LINE-7]]
+
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp
new file mode 100644
index 0000000000000..6644d6ba81f06
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp
@@ -0,0 +1,40 @@
+// REQUIRES: linux, cpu, aspect-fp64
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s
+#include <sycl/sycl.hpp>
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 123456;
+#if defined(MALLOC_HOST)
+  auto *array = sycl::malloc_host<double>(N, Q);
+#elif defined(MALLOC_SHARED)
+  auto *array = sycl::malloc_shared<double>(N, Q);
+#elif defined(MALLOC_DEVICE)
+  auto *array = sycl::malloc_device<double>(N, Q);
+#elif defined(MALLOC_SYSTEM)
+  auto *array = new double[N];
+#else
+#error "Must provide malloc type to run the test"
+#endif
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernelR_4>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
+  });
+  Q.wait();
+  // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory
+  // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory
+  // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory
+  // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456, 0, 0\)}}
+  // CHECK: {{  #0 .* .*parallel_for_double.cpp:}}[[@LINE-7]]
+
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp
new file mode 100644
index 0000000000000..631effebe4515
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp
@@ -0,0 +1,42 @@
+// REQUIRES: linux, cpu
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s
+#include <sycl/sycl.hpp>
+
+__attribute__((noinline)) void foo(int *array, size_t i) { array[i] = 1; }
+// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory
+// CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory
+// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory
+// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(123, 0, 0\)}}
+// CHECK: {{  #0 foo\(int\*, unsigned long\) .*parallel_for_func.cpp:}}[[@LINE-5]]
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 123;
+#if defined(MALLOC_HOST)
+  auto *array = sycl::malloc_host<int>(N, Q);
+#elif defined(MALLOC_SHARED)
+  auto *array = sycl::malloc_shared<int>(N, Q);
+#elif defined(MALLOC_DEVICE)
+  auto *array = sycl::malloc_device<int>(N, Q);
+#elif defined(MALLOC_SYSTEM)
+  auto *array = new int[N];
+#else
+#error "Must provide malloc type to run the test"
+#endif
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernel>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { foo(array, item.get_global_id(0)); });
+  });
+  Q.wait();
+
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp
new file mode 100644
index 0000000000000..5fef6ec395bfc
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp
@@ -0,0 +1,40 @@
+// REQUIRES: linux, cpu
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s
+#include <sycl/sycl.hpp>
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 1234567;
+#if defined(MALLOC_HOST)
+  auto *array = sycl::malloc_host<int>(N, Q);
+#elif defined(MALLOC_SHARED)
+  auto *array = sycl::malloc_shared<int>(N, Q);
+#elif defined(MALLOC_DEVICE)
+  auto *array = sycl::malloc_device<int>(N, Q);
+#elif defined(MALLOC_SYSTEM)
+  auto *array = new int[N];
+#else
+#error "Must provide malloc type to run the test"
+#endif
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernelR_4>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
+  });
+  Q.wait();
+  // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory
+  // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory
+  // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory
+  // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(1234567, 0, 0\)}}
+  // CHECK: {{  #0 .* .*parallel_for_int.cpp:}}[[@LINE-7]]
+
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp
new file mode 100644
index 0000000000000..85f81619f858f
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp
@@ -0,0 +1,40 @@
+// REQUIRES: linux, cpu
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s
+// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s
+#include <sycl/sycl.hpp>
+
+int main() {
+  sycl::queue Q;
+  constexpr std::size_t N = 123456789;
+#if defined(MALLOC_HOST)
+  auto *array = sycl::malloc_host<short>(N, Q);
+#elif defined(MALLOC_SHARED)
+  auto *array = sycl::malloc_shared<short>(N, Q);
+#elif defined(MALLOC_DEVICE)
+  auto *array = sycl::malloc_device<short>(N, Q);
+#elif defined(MALLOC_SYSTEM)
+  auto *array = new short[N];
+#else
+#error "Must provide malloc type to run the test"
+#endif
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernelR_4>(
+        sycl::nd_range<1>(N + 1, 1),
+        [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; });
+  });
+  Q.wait();
+  // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory
+  // CHECK-HOST:   ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory
+  // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory
+  // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456789, 0, 0\)}}
+  // CHECK: {{  #0 .* .*parallel_for_short.cpp:}}[[@LINE-7]]
+
+  return 0;
+}
diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp
new file mode 100644
index 0000000000000..b8278b209aa6a
--- /dev/null
+++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp
@@ -0,0 +1,29 @@
+// REQUIRES: linux, cpu
+// RUN: %{build} %device_sanitizer_flags -g -o %t.out
+// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t.out 2>&1 | FileCheck %s
+#include <sycl/sycl.hpp>
+
+constexpr std::size_t N = 16;
+constexpr std::size_t group_size = 8;
+
+int main() {
+  sycl::queue Q;
+  auto *data = sycl::malloc_host<int>(1, Q);
+
+  Q.submit([&](sycl::handler &h) {
+    h.parallel_for<class MyKernel>(
+        sycl::nd_range<1>(N, group_size), [=](sycl::nd_item<1> item) {
+          sycl::multi_ptr<int[N], sycl::access::address_space::local_space>
+              ptr = sycl::ext::oneapi::group_local_memory<int[N]>(
+                  item.get_group());
+          auto &ref = *ptr;
+          ref[item.get_local_linear_id() * 2 + 4] = 42;
+          // CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Local Memory
+          // CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(6, 0, 0\) GID\(.*, 0, 0\)}}
+          // CHECK: {{  #0 .* .*local-overflow-1.cpp:}}[[@LINE-3]]
+        });
+  });
+
+  Q.wait();
+  return 0;
+}
diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp
index a1d40ad0489a1..062f5952236ad 100644
--- a/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp
+++ b/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp
@@ -3,6 +3,9 @@
 // https://github.com/intel/llvm/issues/7634
 // UNSUPPORTED: hip
 //
+// https://github.com/intel/llvm/issues/8832
+// UNSUPPORTED: cuda
+//
 // FIXME: Remove XFAIL one intel/llvm#11364 is resolved
 // XFAIL: (opencl && gpu)
 
diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp
index e5db927f33e11..1bf2ecdc98418 100644
--- a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp
+++ b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp
@@ -3,6 +3,9 @@
 // https://github.com/intel/llvm/issues/7634
 // UNSUPPORTED: hip
 //
+// https://github.com/intel/llvm/issues/8832
+// UNSUPPORTED: cuda
+//
 // FIXME: Remove XFAIL one intel/llvm#11364 is resolved
 // XFAIL: (opencl && gpu)
 
diff --git a/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp b/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp
index ab3154380fe07..5e9022054cb33 100644
--- a/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp
+++ b/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp
@@ -10,14 +10,14 @@
 
 // RUN: %clangxx -fsycl %S/Inputs/filter_list_queries.cpp -o %t.out
 
-// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-ONLY
+// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-ONLY
 // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-GPU-ONLY
 // RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-CPU-ONLY
 //
-// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU
-// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc,cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-CPU
+// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU
+// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga,cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-CPU
 //
-// RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu,acc,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU-CPU
+// RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu,fpga,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU-CPU
 //
 // CHECK-ACC-ONLY: Device: acc
 // CHECK-ACC-ONLY-NOT: Device: cpu
diff --git a/sycl/test-e2e/FilterSelector/select_device.cpp b/sycl/test-e2e/FilterSelector/select_device.cpp
index 7516545ab59fd..ddb725b0523a4 100644
--- a/sycl/test-e2e/FilterSelector/select_device.cpp
+++ b/sycl/test-e2e/FilterSelector/select_device.cpp
@@ -4,7 +4,7 @@
 // RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run-unfiltered-devices} %t.out
 // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run-unfiltered-devices} %t.out
 // RUN: env ONEAPI_DEVICE_SELECTOR='*:cpu;level_zero:gpu' %{run-unfiltered-devices} %t.out
-// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:acc %{run-unfiltered-devices} %t.out
+// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:fpga %{run-unfiltered-devices} %t.out
 //
 // Checks if only specified device types can be acquired from select_device
 // when ONEAPI_DEVICE_SELECTOR is set
@@ -45,7 +45,7 @@ int main() {
     device d = cs.select_device();
   }
   if (!envVal || forcedPIs == "*" ||
-      forcedPIs.find("acc") != std::string::npos) {
+      forcedPIs.find("fpga") != std::string::npos) {
     accelerator_selector as;
     device d = as.select_device();
   }
diff --git a/sycl/test-e2e/FilterSelector/select_device_acc.cpp b/sycl/test-e2e/FilterSelector/select_device_acc.cpp
index 38d97652f8441..f5b94b535bbd4 100644
--- a/sycl/test-e2e/FilterSelector/select_device_acc.cpp
+++ b/sycl/test-e2e/FilterSelector/select_device_acc.cpp
@@ -1,5 +1,5 @@
 // RUN: %{build} -o %t.out
-// RUN: env ONEAPI_DEVICE_SELECTOR='*:acc' %{run-unfiltered-devices} %t.out
+// RUN: env ONEAPI_DEVICE_SELECTOR='*:fpga' %{run-unfiltered-devices} %t.out
 //
 // Checks if only specified device types can be acquired from select_device
 // when ONEAPI_DEVICE_SELECTOR is set
diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp
index 6dcc4690880d6..042874b90290c 100644
--- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp
+++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp
@@ -3,6 +3,6 @@
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga,spir64_gen -Xsycl-target-backend "-device *" %S/Inputs/is_compatible_with_env.cpp -o %t.out
 
 // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out
-// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:acc %{run} %t.out
+// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:fpga %{run} %t.out
 // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} %t.out
 // RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run} %t.out
diff --git a/sycl/test-e2e/Sampler/basic-rw.cpp b/sycl/test-e2e/Sampler/basic-rw.cpp
index 4e5aae87de067..f73ce7e391ea0 100644
--- a/sycl/test-e2e/Sampler/basic-rw.cpp
+++ b/sycl/test-e2e/Sampler/basic-rw.cpp
@@ -15,8 +15,8 @@
     ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./binx.bin
     ONEAPI_DEVICE_SELECTOR=opencl:cpu ./binx.bin
 
-    ONEAPI_DEVICE_SELECTOR=opecl:acc ../binx.bin    <--  does not support image
-   operations at this time.
+    ONEAPI_DEVICE_SELECTOR=opencl:fpga ../binx.bin    <--  does not support
+   image operations at this time.
 
 */
 
diff --git a/sycl/test-e2e/USM/memadvise_flags.cpp b/sycl/test-e2e/USM/memadvise_flags.cpp
index 542ee7a1ea7a2..d7c28a71b5ca4 100755
--- a/sycl/test-e2e/USM/memadvise_flags.cpp
+++ b/sycl/test-e2e/USM/memadvise_flags.cpp
@@ -59,6 +59,8 @@ int main() {
     valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST);
     valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST);
     valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST);
+    valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED);
+    valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED);
   } else {
     // Skip
     return 0;
diff --git a/sycl/test-e2e/USM/memory_coherency_hip.cpp b/sycl/test-e2e/USM/memory_coherency_hip.cpp
new file mode 100644
index 0000000000000..dec0182812c70
--- /dev/null
+++ b/sycl/test-e2e/USM/memory_coherency_hip.cpp
@@ -0,0 +1,147 @@
+// RUN: %{build} -o %t1.out
+// REQUIRES: hip_amd
+// RUN: %{run} %t1.out
+
+//==---- memory_coherency_hip.cpp  -----------------------------------------==//
+// USM coarse/fine grain memory coherency test for the HIP-AMD backend.
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <sycl/sycl.hpp>
+
+#include <chrono>
+#include <iostream>
+#include <thread>
+
+namespace kernels {
+class SquareKrnl final {
+  int *mPtr;
+
+public:
+  SquareKrnl(int *ptr) : mPtr{ptr} {}
+
+  void operator()(sycl::id<1>) const { *mPtr = (*mPtr) * (*mPtr); }
+};
+
+class CoherencyTestKrnl final {
+  int *mPtr;
+
+public:
+  CoherencyTestKrnl(int *ptr) : mPtr{ptr} {}
+
+  void operator()(sycl::id<1>) const {
+    auto atm = sycl::atomic_ref<int, sycl::memory_order::relaxed,
+                                sycl::memory_scope::device>(mPtr[0]);
+
+    // mPtr was initialized to 1 by the host, now set it to 2.
+    atm.fetch_add(1);
+
+    // spin until mPtr is 3, then change it to 4.
+    int expected{3};
+    int old = atm.load();
+    while (true) {
+      old = atm.load();
+      if (old == expected) {
+        if (atm.compare_exchange_strong(old, 4)) {
+          break;
+        }
+      }
+    }
+  }
+};
+} // namespace kernels
+
+int main() {
+  sycl::queue q{};
+  sycl::device dev = q.get_device();
+  sycl::context ctx = q.get_context();
+  if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
+    std::cout << "Shared USM is not supported. Skipping test.\n";
+    return 0;
+  }
+
+  bool coherent{false};
+
+  int *ptr = sycl::malloc_shared<int>(1, q);
+
+  // Coherency test 1
+  //
+  // The following test validates if memory access is fine with memory allocated
+  // using malloc_managed() and COARSE_GRAINED advice set via mem_advise().
+  //
+  // Coarse grained memory is only guaranteed to be coherent outside of GPU
+  // kernels that modify it. Changes applied to coarse-grained memory by a GPU
+  // kernel are only visible to the rest of the system (CPU or other GPUs) when
+  // the kernel has completed. A GPU kernel is only guaranteed to see changes
+  // applied to coarse grained memory by the rest of the system (CPU or other
+  // GPUs) if those changes were made before the kernel launched.
+
+  // Hint to use coarse-grain memory.
+  q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED});
+
+  int init_val{9};
+  int expected{init_val * init_val};
+
+  *ptr = init_val;
+  q.parallel_for(sycl::range{1}, kernels::SquareKrnl{ptr});
+  // Synchronise the underlying stream.
+  q.wait();
+
+  // Check if caches are flushed correctly and same memory is between devices.
+  if (*ptr == expected) {
+    coherent = true;
+  } else {
+    std::cerr << "Coherency test failed. Value: " << *ptr
+              << " (expected: " << expected << ")\n";
+    coherent = false;
+  }
+
+  // Coherency test 2
+  //
+  // The following test validates if fine-grain behavior is observed or not with
+  // memory allocated using malloc_managed().
+  //
+  // Fine grained memory allows CPUs and GPUs to synchronize (via atomics) and
+  // coherently communicate with each other while the GPU kernel is running.
+
+  // Hint to use fine-grain memory.
+  q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED});
+
+  init_val = 1;
+  expected = 4;
+
+  *ptr = init_val;
+  q.parallel_for(sycl::range{1}, kernels::CoherencyTestKrnl{ptr});
+
+  // wait until ptr is 2 from the kernel (or 3 seconds), then increment to 3.
+  while (*ptr == 2) {
+    using std::chrono_literals::operator""s;
+    std::this_thread::sleep_for(3s);
+    break;
+  }
+  *ptr += 1;
+
+  // Synchronise the underlying stream.
+  q.wait();
+
+  // Check if caches are flushed correctly and same memory is between devices.
+  if (*ptr == expected) {
+    coherent &= true;
+  } else {
+    std::cerr << "Coherency test failed. Value: " << *ptr
+              << " (expected: " << expected << ")\n";
+    coherent = false;
+  }
+
+  // Cleanup
+  sycl::free(ptr, q);
+
+  // Check if all coherency tests passed.
+  assert(coherent);
+  // The above assert won't trigger with NDEBUG, so ensure the right exit code.
+  return coherent ? 0 : 1;
+}
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index 8c35fdfa2b65b..e1b2b4a1ef28d 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -686,3 +686,7 @@
     lit_config.maxIndividualTestTime = 600
 except ImportError:
     pass
+
+config.substitutions.append(
+    ("%device_sanitizer_flags", "-Xsycl-target-frontend -fsanitize=address")
+)
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
index 3c6b95c1eb4af..d004a37bcc610 100644
--- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
+++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip || (windows && level_zero)
 
-// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
+// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/sycl.hpp>
@@ -41,8 +41,8 @@
 #include "../common.hpp"
 #include "atomic_fixt.hpp"
 
-constexpr size_t numBlocks = 64;
-constexpr size_t numThreads = 256;
+constexpr size_t numBlocks = 1;
+constexpr size_t numThreads = 1;
 constexpr size_t numData = 6;
 
 template <typename T, typename AtomicType>
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
index 6b31bcc626ee0..4ccc67fbff53e 100644
--- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
+++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip
 
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <iostream>
diff --git a/sycl/unittests/pi/CMakeLists.txt b/sycl/unittests/pi/CMakeLists.txt
index 0c78c9a634010..861fc41069c7e 100644
--- a/sycl/unittests/pi/CMakeLists.txt
+++ b/sycl/unittests/pi/CMakeLists.txt
@@ -1,9 +1,7 @@
 set(CMAKE_CXX_EXTENSIONS OFF)
 
 add_sycl_unittest(PiTests OBJECT
-  EnqueueMemTest.cpp
   PiMock.cpp
-  PlatformTest.cpp
   PiUtility.cpp
   pi_arguments_handler.cpp
   piInteropRetain.cpp
@@ -13,11 +11,3 @@ add_dependencies(PiTests sycl)
 target_include_directories(PiTests PRIVATE SYSTEM ${sycl_inc_dir})
 target_include_directories(PiTests PRIVATE ${sycl_src_dir}/../tools/xpti_helpers)
 
-if("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
-    add_subdirectory(cuda)
-endif()
-
-if("hip" IN_LIST SYCL_ENABLE_PLUGINS)
-    add_subdirectory(hip)
-endif()
-
diff --git a/sycl/unittests/pi/EnqueueMemTest.cpp b/sycl/unittests/pi/EnqueueMemTest.cpp
deleted file mode 100644
index d6439654f7bbb..0000000000000
--- a/sycl/unittests/pi/EnqueueMemTest.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//==---- EnqueueMemTest.cpp --- PI unit tests ------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <gtest/gtest.h>
-#include <sycl/detail/pi.hpp>
-
-using namespace sycl;
-
-namespace {
-class EnqueueMemTest : public testing::TestWithParam<detail::PluginPtr> {
-protected:
-  constexpr static size_t _numElementsX = 8;
-  constexpr static size_t _numElementsY = 4;
-
-  pi_device _device = nullptr;
-  pi_context _context = nullptr;
-  pi_queue _queue = nullptr;
-  pi_mem _mem = nullptr;
-
-  EnqueueMemTest() = default;
-
-  ~EnqueueMemTest() = default;
-
-  void SetUp() override {
-
-    const detail::PluginPtr &plugin = GetParam();
-
-    pi_platform platform = nullptr;
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  1, &platform, nullptr)),
-              PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform, PI_DEVICE_TYPE_DEFAULT, 1, &_device, nullptr)),
-              PI_SUCCESS);
-
-    pi_result result = PI_ERROR_INVALID_VALUE;
-    result = plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-        nullptr, 1u, &_device, nullptr, nullptr, &_context);
-    ASSERT_EQ(result, PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  _context, _device, 0, &_queue)),
-              PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                  _context, PI_MEM_FLAGS_ACCESS_RW,
-                  _numElementsX * _numElementsY * sizeof(pi_int32), nullptr,
-                  &_mem, nullptr)),
-              PI_SUCCESS);
-  }
-
-  void TearDown() override {
-
-    const detail::PluginPtr &plugin = GetParam();
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(_mem)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueRelease>(_queue)),
-              PI_SUCCESS);
-    ASSERT_EQ(
-        (plugin->call_nocheck<detail::PiApiKind::piContextRelease>(_context)),
-        PI_SUCCESS);
-  }
-
-  template <typename T> void TestBufferFill(const T &pattern) {
-
-    const detail::PluginPtr &plugin = GetParam();
-
-    T inValues[_numElementsX] = {};
-
-    for (size_t i = 0; i < _numElementsX; ++i) {
-      ASSERT_NE(pattern, inValues[i]);
-    }
-
-    pi_event event;
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-                  _queue, _mem, PI_TRUE, 0, _numElementsX * sizeof(T), inValues,
-                  0, nullptr, &event)),
-              PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferFill>(
-                  _queue, _mem, &pattern, sizeof(T), 0, sizeof(inValues), 0,
-                  nullptr, &event)),
-              PI_SUCCESS);
-    ASSERT_EQ(
-        (plugin->call_nocheck<detail::PiApiKind::piEventsWait>(1, &event)),
-        PI_SUCCESS);
-
-    T outValues[_numElementsX] = {};
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                  _queue, _mem, PI_TRUE, 0, _numElementsX * sizeof(T),
-                  outValues, 0, nullptr, &event)),
-              PI_SUCCESS);
-
-    for (size_t i = 0; i < _numElementsX; ++i) {
-      ASSERT_EQ(pattern, outValues[i]);
-    }
-  }
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    EnqueueMemTestImpl, EnqueueMemTest,
-    testing::ValuesIn(pi::initializeAndRemoveInvalid()),
-    [](const testing::TestParamInfo<EnqueueMemTest::ParamType> &info) {
-      return pi::GetBackendString(info.param);
-    });
-
-template <typename T> struct vec4 {
-  T x, y, z, w;
-
-  bool operator==(const vec4 &rhs) const {
-    return x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w;
-  }
-
-  bool operator!=(const vec4 &rhs) const { return !(*this == rhs); }
-};
-
-template <typename T> struct vec2 {
-  T x, y;
-
-  bool operator==(const vec2 &rhs) const { return x == rhs.x && y == rhs.y; }
-
-  bool operator!=(const vec2 &rhs) const { return !(*this == rhs); }
-};
-
-TEST_P(EnqueueMemTest, piEnqueueMemBufferFill) {
-
-  TestBufferFill(float{1});
-  TestBufferFill(vec2<float>{1, 2});
-  TestBufferFill(vec4<float>{1, 2, 3, 4});
-
-  TestBufferFill(uint8_t{1});
-  TestBufferFill(vec2<uint8_t>{1, 2});
-  TestBufferFill(vec4<uint8_t>{1, 2, 3, 4});
-
-  TestBufferFill(uint16_t{1});
-  TestBufferFill(vec2<uint16_t>{1, 2});
-  TestBufferFill(vec4<uint16_t>{1, 2, 3, 4});
-
-  TestBufferFill(uint32_t{1});
-  TestBufferFill(vec2<uint32_t>{1, 2});
-  TestBufferFill(vec4<uint32_t>{1, 2, 3, 4});
-}
-} // namespace
diff --git a/sycl/unittests/pi/PlatformTest.cpp b/sycl/unittests/pi/PlatformTest.cpp
deleted file mode 100644
index 61834dbb14fff..0000000000000
--- a/sycl/unittests/pi/PlatformTest.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-//==---- PlatformTest.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <gtest/gtest.h>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-#include <vector>
-
-namespace {
-
-using namespace sycl;
-
-class PlatformTest : public testing::TestWithParam<detail::PluginPtr> {
-protected:
-  std::vector<pi_platform> _platforms;
-  PlatformTest() : _platforms{} {};
-
-  ~PlatformTest() override = default;
-
-  void SetUp() override {
-
-    const detail::PluginPtr &plugin = GetParam();
-
-    ASSERT_NO_FATAL_FAILURE(Test::SetUp());
-
-    const static char *platform_count_key = "PiPlatformCount";
-
-    pi_uint32 platform_count = 0u;
-
-    // Initialize the logged number of platforms before the following assertion.
-    RecordProperty(platform_count_key, platform_count);
-
-    // TODO: Change the test to check this for all plugins present.
-    // Currently, it is only checking for the first plugin attached.
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &platform_count)),
-              PI_SUCCESS);
-
-    // Overwrite previous log value with queried number of platforms.
-    RecordProperty(platform_count_key, platform_count);
-
-    if (platform_count == 0u) {
-      std::cout << "WARNING: piPlatformsGet does not find any PI platforms.\n";
-
-      // Do not call into OpenCL below as a platform count of 0 might fail with
-      // OpenCL implementations if the platforms pointer is not `nullptr`.
-      return;
-    }
-
-    _platforms.resize(platform_count, nullptr);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  _platforms.size(), _platforms.data(), nullptr)),
-              PI_SUCCESS);
-  }
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    PlatformTestImpl, PlatformTest,
-    testing::ValuesIn(pi::initializeAndRemoveInvalid()),
-    [](const testing::TestParamInfo<PlatformTest::ParamType> &info) {
-      return pi::GetBackendString(info.param);
-    });
-
-TEST_P(PlatformTest, piPlatformsGet) {
-  // The PlatformTest::SetUp method is called to prepare for this test case
-  // implicitly tests the calls to `piPlatformsGet`.
-}
-
-TEST_P(PlatformTest, piPlatformGetInfo) {
-
-  const detail::PluginPtr &plugin = GetParam();
-
-  auto get_info_test = [&](pi_platform platform, _pi_platform_info info) {
-    size_t reported_string_length = 0;
-    EXPECT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformGetInfo>(
-                  platform, info, 0u, nullptr, &reported_string_length)),
-              PI_SUCCESS);
-
-    // Create a larger result string to catch overwrites.
-    std::vector<char> param_value(reported_string_length * 2u, '\0');
-    EXPECT_EQ(
-        (plugin->call_nocheck<detail::PiApiKind::piPlatformGetInfo>(
-            platform, info, param_value.size(), param_value.data(), nullptr)),
-        PI_SUCCESS)
-        << "piPlatformGetInfo for " << detail::pi::platformInfoToString(info)
-        << " failed.\n";
-
-    const auto returned_string_length = strlen(param_value.data()) + 1;
-
-    EXPECT_EQ(returned_string_length, reported_string_length)
-        << "Returned string length " << returned_string_length
-        << " does not equal reported string length " << reported_string_length
-        << ".\n";
-  };
-
-  for (const auto &platform : _platforms) {
-    get_info_test(platform, PI_PLATFORM_INFO_NAME);
-    get_info_test(platform, PI_PLATFORM_INFO_VENDOR);
-    get_info_test(platform, PI_PLATFORM_INFO_PROFILE);
-    get_info_test(platform, PI_PLATFORM_INFO_VERSION);
-    get_info_test(platform, PI_PLATFORM_INFO_EXTENSIONS);
-    get_info_test(platform, PI_EXT_PLATFORM_INFO_BACKEND);
-  }
-}
-} // namespace
diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt
deleted file mode 100644
index 7808340cc4302..0000000000000
--- a/sycl/unittests/pi/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-add_sycl_unittest(PiCudaTests OBJECT
-  test_base_objects.cpp
-  test_commands.cpp
-  test_contexts.cpp
-  test_device.cpp
-  test_interop_get_native.cpp
-  test_kernels.cpp
-  test_mem_obj.cpp
-  test_primary_context.cpp
-  test_sampler_properties.cpp
-)
-
-add_dependencies(PiCudaTests sycl)
-
-target_compile_definitions(PiCudaTests
-  PRIVATE
-    GTEST_HAS_COMBINE=1)
-
-target_include_directories(PiCudaTests
-  PRIVATE
-    "../"
-    "${sycl_inc_dir}/sycl/detail/"
-    "${sycl_inc_dir}"
-    "${sycl_plugin_dir}/cuda/"
-    "${sycl_plugin_dir}/unified_runtime/"
-)
-
-target_link_libraries(PiCudaTests
-  PRIVATE
-    cudadrv
-    UnifiedRuntime-Headers
-)
diff --git a/sycl/unittests/pi/cuda/CudaUtils.hpp b/sycl/unittests/pi/cuda/CudaUtils.hpp
deleted file mode 100644
index f7cb8b40492d3..0000000000000
--- a/sycl/unittests/pi/cuda/CudaUtils.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#pragma once
-
-#include <cuda.h>
-
-namespace pi {
-
-// utility function to clear the CUDA context stack
-inline void clearCudaContext() {
-  CUcontext ctxt = nullptr;
-  do {
-    cuCtxSetCurrent(nullptr);
-    cuCtxGetCurrent(&ctxt);
-  } while (ctxt != nullptr);
-}
-
-} // namespace pi
diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp
deleted file mode 100644
index d0799a08cfff3..0000000000000
--- a/sycl/unittests/pi/cuda/test_base_objects.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-//==---- test_base_objects.cpp --- PI unit tests ---------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/cuda_definitions.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-#include <thread>
-
-const unsigned int LATEST_KNOWN_CUDA_DRIVER_API_VERSION = 3020u;
-
-using namespace sycl;
-
-class CudaBaseObjectsTest : public ::testing::Test {
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-  }
-
-  CudaBaseObjectsTest() = default;
-
-  ~CudaBaseObjectsTest() = default;
-};
-
-TEST_F(CudaBaseObjectsTest, piContextCreate) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform = nullptr;
-  pi_device device;
-  ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_GE(numPlatforms, 1u);
-  ASSERT_NE(platform, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS)
-      << "piDevicesGet failed.\n";
-
-  pi_context ctxt = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS)
-      << "piContextCreate failed.\n";
-
-  EXPECT_NE(ctxt, nullptr);
-  EXPECT_EQ(ctxt->get_device(), device);
-
-  // Retrieve the cuCtxt to check information is correct
-  CUcontext cudaContext = ctxt->get();
-  unsigned int version = 0;
-  cuCtxGetApiVersion(cudaContext, &version);
-  EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaBaseObjectsTest, piContextCreateChildThread) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform;
-  pi_device device;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS);
-
-  pi_context ctxt;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS);
-  EXPECT_NE(ctxt, nullptr);
-
-  // Retrieve the cuCtxt to check information is correct
-  auto checkValue = [=]() {
-    CUcontext cudaContext = ctxt->get();
-    unsigned int version = 0;
-    auto cuErr = cuCtxGetApiVersion(cudaContext, &version);
-    EXPECT_EQ(cuErr, CUDA_SUCCESS);
-    EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
-
-    // The current context is different from the current thread
-    CUcontext current;
-    cuErr = cuCtxGetCurrent(&current);
-    EXPECT_EQ(cuErr, CUDA_SUCCESS);
-    EXPECT_NE(cudaContext, current);
-
-    // Set the context from PI API as the current one
-    cuErr = cuCtxPushCurrent(cudaContext);
-    EXPECT_EQ(cuErr, CUDA_SUCCESS);
-
-    cuErr = cuCtxGetCurrent(&current);
-    EXPECT_EQ(cuErr, CUDA_SUCCESS);
-    EXPECT_EQ(cudaContext, current);
-  };
-  auto callContextFromOtherThread = std::thread(checkValue);
-
-  callContextFromOtherThread.join();
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
-            PI_SUCCESS);
-}
diff --git a/sycl/unittests/pi/cuda/test_commands.cpp b/sycl/unittests/pi/cuda/test_commands.cpp
deleted file mode 100644
index 6c794fe51c899..0000000000000
--- a/sycl/unittests/pi/cuda/test_commands.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//==---- test_commands.cpp --- PI unit tests -------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "CudaUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct CudaCommandsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_queue queue_;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi::clearCudaContext();
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    ASSERT_NE(context_, nullptr);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context_, device_, 0, &queue_)),
-              PI_SUCCESS);
-    ASSERT_NE(queue_, nullptr);
-    auto tmpCtxt = queue_->get_context();
-    ASSERT_EQ(tmpCtxt, context_);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  CudaCommandsTest() = default;
-
-  ~CudaCommandsTest() = default;
-};
-
-TEST_F(CudaCommandsTest, PIEnqueueReadBufferBlocking) {
-  constexpr const size_t memSize = 10u;
-  constexpr const size_t bytes = memSize * sizeof(int);
-  const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  int output[memSize] = {};
-
-  pi_mem memObj;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-          context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
-      PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-                queue_, memObj, true, 0, bytes, data, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue_, memObj, true, 0, bytes, output, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  bool isSame =
-      std::equal(std::begin(output), std::end(output), std::begin(data));
-  EXPECT_TRUE(isSame);
-  if (!isSame) {
-    std::for_each(std::begin(output), std::end(output),
-                  [](int &elem) { std::cout << elem << ","; });
-    std::cout << std::endl;
-  }
-}
-
-TEST_F(CudaCommandsTest, PIEnqueueReadBufferNonBlocking) {
-  constexpr const size_t memSize = 10u;
-  constexpr const size_t bytes = memSize * sizeof(int);
-  const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  int output[memSize] = {};
-
-  pi_mem memObj;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-          context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
-      PI_SUCCESS);
-
-  pi_event cpIn, cpOut;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-                queue_, memObj, false, 0, bytes, data, 0, nullptr, &cpIn)),
-            PI_SUCCESS);
-  ASSERT_NE(cpIn, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue_, memObj, false, 0, bytes, output, 0, nullptr, &cpOut)),
-            PI_SUCCESS);
-  ASSERT_NE(cpOut, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEventsWait>(1, &cpOut)),
-            PI_SUCCESS);
-
-  bool isSame =
-      std::equal(std::begin(output), std::end(output), std::begin(data));
-  EXPECT_TRUE(isSame);
-  if (!isSame) {
-    std::for_each(std::begin(output), std::end(output),
-                  [](int &elem) { std::cout << elem << ","; });
-    std::cout << std::endl;
-  }
-}
diff --git a/sycl/unittests/pi/cuda/test_contexts.cpp b/sycl/unittests/pi/cuda/test_contexts.cpp
deleted file mode 100644
index 7113537ebf147..0000000000000
--- a/sycl/unittests/pi/cuda/test_contexts.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//==---- test_contexts.cpp --- PI unit tests -------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include <cuda.h>
-
-#include "CudaUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct CudaContextsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  pi_platform platform_;
-  pi_device device_;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-  }
-
-  void TearDown() override {}
-
-  CudaContextsTest() = default;
-
-  ~CudaContextsTest() = default;
-};
-
-TEST_F(CudaContextsTest, ContextLifetime) {
-  // start with no active context
-  pi::clearCudaContext();
-
-  // create a context
-  pi_context context;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context)),
-            PI_SUCCESS);
-  ASSERT_NE(context, nullptr);
-
-  // create a queue from the context, this should use the ScopedContext
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-
-  // ensure the queue has the correct context
-  ASSERT_EQ(context, queue->get_context());
-
-  // check that the context is now the active CUDA context
-  CUcontext cudaCtxt = nullptr;
-  cuCtxGetCurrent(&cudaCtxt);
-  ASSERT_EQ(cudaCtxt, context->get());
-
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  plugin->call<detail::PiApiKind::piContextRelease>(context);
-
-  // check that the context was cleaned up properly by the destructor
-  cuCtxGetCurrent(&cudaCtxt);
-  ASSERT_EQ(cudaCtxt, nullptr);
-}
-
-TEST_F(CudaContextsTest, ContextLifetimeExisting) {
-  // start by setting up a CUDA context on the thread
-  CUcontext original;
-  cuCtxCreate(&original, CU_CTX_MAP_HOST, device_->get());
-
-  // ensure the CUDA context is active
-  CUcontext current = nullptr;
-  cuCtxGetCurrent(&current);
-  ASSERT_EQ(original, current);
-
-  // create a PI context
-  pi_context context;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context)),
-            PI_SUCCESS);
-  ASSERT_NE(context, nullptr);
-
-  // create a queue from the context, this should use the ScopedContext
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-
-  // ensure the queue has the correct context
-  ASSERT_EQ(context, queue->get_context());
-
-  // check that the context is now the active CUDA context
-  cuCtxGetCurrent(&current);
-  ASSERT_EQ(current, context->get());
-
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  plugin->call<detail::PiApiKind::piContextRelease>(context);
-
-  // check that the context was cleaned up, the old context will be restored
-  // automatically by cuCtxDestroy in piContextRelease, as it was pushed on the
-  // stack bu cuCtxCreate
-  cuCtxGetCurrent(&current);
-  ASSERT_EQ(current, original);
-
-  // release original context
-  cuCtxDestroy(original);
-}
-
-// In some cases (for host_task), the SYCL runtime may call PI API functions
-// from threads of the thread pool, this can cause issues because with the CUDA
-// plugin these functions will set an active CUDA context on these threads, but
-// never clean it up, as it will only get cleaned up in the main thread.
-//
-// So the following test aims to reproduce the scenario where there is a
-// dangling deleted context in a separate thread and seeing if the PI calls are
-// still able to work correctly in that thread.
-TEST_F(CudaContextsTest, ContextThread) {
-  // start with no active context
-  pi::clearCudaContext();
-
-  // create two PI contexts
-  pi_context context1;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context1)),
-            PI_SUCCESS);
-  ASSERT_NE(context1, nullptr);
-
-  pi_context context2;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context2)),
-            PI_SUCCESS);
-  ASSERT_NE(context2, nullptr);
-
-  // setup synchronization variables between the main thread and the testing
-  // thread
-  std::mutex m;
-  std::condition_variable cv;
-  bool released = false;
-  bool thread_done = false;
-
-  // create a testing thread that will create a queue with the first context,
-  // release the queue, then wait for the main thread to release the first
-  // context, and then create and release another queue with the second context
-  // this time
-  auto test_thread = std::thread([&] {
-    CUcontext current = nullptr;
-
-    // create a queue with the first context
-    pi_queue queue;
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context1, device_, 0, &queue)),
-              PI_SUCCESS);
-    ASSERT_NE(queue, nullptr);
-
-    // ensure the queue has the correct context
-    ASSERT_EQ(context1, queue->get_context());
-
-    // check that the first context is now the active CUDA context
-    cuCtxGetCurrent(&current);
-    ASSERT_EQ(current, context1->get());
-
-    plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-
-    // mark the first set of processing as done and notify the main thread
-    std::unique_lock<std::mutex> lock(m);
-    thread_done = true;
-    lock.unlock();
-    cv.notify_one();
-
-    // wait for the main thread to release the first context
-    lock.lock();
-    cv.wait(lock, [&] { return released; });
-
-    // check that the first context is still active, this is because deleting a
-    // context only cleans up the current thread
-    cuCtxGetCurrent(&current);
-    ASSERT_EQ(current, context1->get());
-
-    // create a queue with the second context
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context2, device_, 0, &queue)),
-              PI_SUCCESS);
-    ASSERT_NE(queue, nullptr);
-
-    // ensure the queue has the correct context
-    ASSERT_EQ(context2, queue->get_context());
-
-    // check that the second context is now the active CUDA context
-    cuCtxGetCurrent(&current);
-    ASSERT_EQ(current, context2->get());
-
-    plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  });
-
-  // wait for the thread to be done with the first queue to release the first
-  // context
-  std::unique_lock<std::mutex> lock(m);
-  cv.wait(lock, [&] { return thread_done; });
-  plugin->call<detail::PiApiKind::piContextRelease>(context1);
-
-  // notify the other thread that the context was released
-  released = true;
-  lock.unlock();
-  cv.notify_one();
-
-  // wait for the thread to finish
-  test_thread.join();
-
-  plugin->call<detail::PiApiKind::piContextRelease>(context2);
-
-  // check that there is no context set on the main thread
-  CUcontext current = nullptr;
-  cuCtxGetCurrent(&current);
-  ASSERT_EQ(current, nullptr);
-}
diff --git a/sycl/unittests/pi/cuda/test_device.cpp b/sycl/unittests/pi/cuda/test_device.cpp
deleted file mode 100644
index 9ddb62ca1a512..0000000000000
--- a/sycl/unittests/pi/cuda/test_device.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//==---- test_device.cpp --- PI unit tests ---------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct CudaDeviceTests : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  CudaDeviceTests() = default;
-  ~CudaDeviceTests() = default;
-};
-
-TEST_F(CudaDeviceTests, PIDeviceGetInfoSimple) {
-
-  size_t return_size = 0;
-  pi_device_type device_type;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_TYPE, sizeof(pi_device_type),
-                &device_type, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_device_type));
-  EXPECT_EQ(
-      device_type,
-      PI_DEVICE_TYPE_GPU); // backend pre-defined value, device must be a GPU
-
-  pi_device parent_device = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PARENT_DEVICE, sizeof(pi_device),
-                &parent_device, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_device));
-  EXPECT_EQ(parent_device,
-            nullptr); // backend pre-set value, device cannot have a parent
-
-  pi_platform platform = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PLATFORM, sizeof(pi_platform),
-                &platform, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_platform));
-  EXPECT_EQ(platform, platform_); // test fixture device was created from the
-                                  // test fixture platform
-
-  cl_device_partition_property device_partition_property = -1;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PARTITION_TYPE,
-                sizeof(cl_device_partition_property),
-                &device_partition_property, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(device_partition_property,
-            0); // PI CUDA backend will not support device partitioning, this
-                // function should just return 0.
-  EXPECT_EQ(return_size, sizeof(cl_device_partition_property));
-}
diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp
deleted file mode 100644
index 903d44043cda6..0000000000000
--- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-//==------- test_interop_get_native.cpp - SYCL CUDA get_native tests -------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "TestGetPlatforms.hpp"
-#include <sycl/sycl.hpp>
-
-#include <iostream>
-
-using namespace sycl;
-
-struct CudaInteropGetNativeTests : public ::testing::TestWithParam<platform> {
-
-protected:
-  std::unique_ptr<queue> syclQueue_;
-  device syclDevice_;
-
-  void SetUp() override {
-    syclDevice_ = GetParam().get_devices()[0];
-    syclQueue_ = std::unique_ptr<queue>{new queue{syclDevice_}};
-  }
-
-  void TearDown() override { syclQueue_.reset(); }
-};
-
-TEST_P(CudaInteropGetNativeTests, getNativeDevice) {
-  CUdevice cudaDevice = get_native<backend::ext_oneapi_cuda>(syclDevice_);
-  char cudaDeviceName[2] = {0, 0};
-  CUresult result = cuDeviceGetName(cudaDeviceName, 2, cudaDevice);
-  ASSERT_EQ(result, CUDA_SUCCESS);
-  ASSERT_NE(cudaDeviceName[0], 0);
-}
-
-TEST_P(CudaInteropGetNativeTests, getNativeContext) {
-  CUcontext cudaContext =
-      get_native<backend::ext_oneapi_cuda>(syclQueue_->get_context());
-  ASSERT_NE(cudaContext, nullptr);
-}
-
-TEST_P(CudaInteropGetNativeTests, getNativeQueue) {
-  CUstream cudaStream = get_native<backend::ext_oneapi_cuda>(*syclQueue_);
-  ASSERT_NE(cudaStream, nullptr);
-
-  CUcontext streamContext = nullptr;
-  CUresult result = cuStreamGetCtx(cudaStream, &streamContext);
-  ASSERT_EQ(result, CUDA_SUCCESS);
-
-  CUcontext cudaContext =
-      get_native<backend::ext_oneapi_cuda>(syclQueue_->get_context());
-  ASSERT_EQ(streamContext, cudaContext);
-}
-
-TEST_P(CudaInteropGetNativeTests, interopTaskGetMem) {
-  buffer<int, 1> syclBuffer(range<1>{1});
-  syclQueue_->submit([&](handler &cgh) {
-    auto syclAccessor = syclBuffer.get_access<access::mode::read>(cgh);
-    cgh.host_task([=](interop_handle ih) {
-      CUdeviceptr cudaPtr =
-          ih.get_native_mem<backend::ext_oneapi_cuda>(syclAccessor);
-      CUdeviceptr cudaPtrBase;
-      size_t cudaPtrSize = 0;
-      CUcontext cudaContext =
-          get_native<backend::ext_oneapi_cuda>(syclQueue_->get_context());
-      ASSERT_EQ(CUDA_SUCCESS, cuCtxPushCurrent(cudaContext));
-      ASSERT_EQ(CUDA_SUCCESS,
-                cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr));
-      ASSERT_EQ(CUDA_SUCCESS, cuCtxPopCurrent(nullptr));
-      ASSERT_EQ(sizeof(int), cudaPtrSize);
-    });
-  });
-}
-
-TEST_P(CudaInteropGetNativeTests, interopTaskGetQueue) {
-  CUstream cudaStream = get_native<backend::ext_oneapi_cuda>(*syclQueue_);
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      CUstream cudaInteropStream =
-          ih.get_native_queue<backend::ext_oneapi_cuda>();
-      ASSERT_EQ(cudaInteropStream, cudaStream);
-    });
-  });
-}
-
-TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeMem) {
-  buffer<int, 1> syclBuffer(range<1>{1});
-  syclQueue_->submit([&](handler &cgh) {
-    auto syclAccessor = syclBuffer.get_access<access::mode::read>(cgh);
-    cgh.host_task([=](interop_handle ih) {
-      CUdeviceptr cudaPtr =
-          ih.get_native_mem<backend::ext_oneapi_cuda>(syclAccessor);
-      CUdeviceptr cudaPtrBase;
-      size_t cudaPtrSize = 0;
-      CUcontext cudaContext =
-          get_native<backend::ext_oneapi_cuda>(syclQueue_->get_context());
-      ASSERT_EQ(CUDA_SUCCESS, cuCtxPushCurrent(cudaContext));
-      ASSERT_EQ(CUDA_SUCCESS,
-                cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr));
-      ASSERT_EQ(CUDA_SUCCESS, cuCtxPopCurrent(nullptr));
-      ASSERT_EQ(sizeof(int), cudaPtrSize);
-    });
-  });
-}
-
-TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeQueue) {
-  CUstream cudaStream = get_native<backend::ext_oneapi_cuda>(*syclQueue_);
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      CUstream cudaInteropStream =
-          ih.get_native_queue<backend::ext_oneapi_cuda>();
-      ASSERT_EQ(cudaInteropStream, cudaStream);
-    });
-  });
-}
-
-TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeContext) {
-  CUcontext cudaContext =
-      get_native<backend::ext_oneapi_cuda>(syclQueue_->get_context());
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      CUcontext cudaInteropContext =
-          ih.get_native_context<backend::ext_oneapi_cuda>();
-      ASSERT_EQ(cudaInteropContext, cudaContext);
-    });
-  });
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    OnCudaPlatform, CudaInteropGetNativeTests,
-    ::testing::ValuesIn(pi::getPlatformsWithName("CUDA BACKEND")));
diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp
deleted file mode 100644
index 736e266b6566e..0000000000000
--- a/sycl/unittests/pi/cuda/test_kernels.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-//==---- test_kernels.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-// PI CUDA kernels carry an additional argument for the implicit global offset.
-#define NUM_IMPLICIT_ARGS 1
-
-using namespace sycl;
-
-struct CudaKernelsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_queue queue_;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    ASSERT_NE(context_, nullptr);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context_, device_, 0, &queue_)),
-              PI_SUCCESS);
-    ASSERT_NE(queue_, nullptr);
-    ASSERT_EQ(queue_->get_context(), context_);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  CudaKernelsTest() = default;
-
-  ~CudaKernelsTest() = default;
-};
-
-const char *ptxSource = "\n\
-.version 3.2\n\
-.target sm_20\n\
-.address_size 64\n\
-.visible .entry _Z8myKernelPi(\n\
-	.param .u64 _Z8myKernelPi_param_0\n\
-)\n\
-{\n\
-	.reg .s32 	%r<5>;\n\
-	.reg .s64 	%rd<5>;\n\
-	ld.param.u64 	%rd1, [_Z8myKernelPi_param_0];\n\
-	cvta.to.global.u64 	%rd2, %rd1;\n\
-	.loc 1 3 1\n\
-	mov.u32 	%r1, %ntid.x;\n\
-	mov.u32 	%r2, %ctaid.x;\n\
-	mov.u32 	%r3, %tid.x;\n\
-	mad.lo.s32 	%r4, %r1, %r2, %r3;\n\
-	mul.wide.s32 	%rd3, %r4, 4;\n\
-	add.s64 	%rd4, %rd2, %rd3;\n\
-	.loc 1 4 1\n\
-	st.global.u32 	[%rd4], %r4;\n\
-	.loc 1 5 2\n\
-	ret;\n\
-    ret;\
-\n\
-}\
-\n\
-";
-
-const char *twoParams = "\n\
-.version 3.2\n\
-.target sm_20\n\
-.address_size 64\n\
-.visible .entry twoParamKernel(\n\
-	.param .u64 twoParamKernel_param_0,\n\
-  .param .u64 twoParamKernel_param_1\n\
-)\n\
-{\n\
-  ret;\
-  \n\
-}\n\
-";
-
-const char *threeParamsTwoLocal = "\n\
-.version 3.2\n\
-.target sm_20\n\
-.address_size 64\n\
-.visible .entry twoParamKernelLocal(\n\
-	.param .u64 twoParamKernel_param_0,\n\
-  .param .u32 twoParamKernel_param_1,\n\
-  .param .u32 twoParamKernel_param_2\n\
-)\n\
-{\n\
-  ret;\
-  \n\
-}\n\
-";
-
-TEST_F(CudaKernelsTest, PICreateProgramAndKernel) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-  ASSERT_NE(kern, nullptr);
-}
-
-TEST_F(CudaKernelsTest, PICreateProgramAndKernelWithMetadata) {
-
-  std::vector<uint32_t> reqdWorkGroupSizeMD;
-  reqdWorkGroupSizeMD.reserve(5);
-  // 64-bit representing bit size
-  reqdWorkGroupSizeMD.push_back(96);
-  reqdWorkGroupSizeMD.push_back(0);
-  // reqd_work_group_size x
-  reqdWorkGroupSizeMD.push_back(8);
-  // reqd_work_group_size y
-  reqdWorkGroupSizeMD.push_back(16);
-  // reqd_work_group_size z
-  reqdWorkGroupSizeMD.push_back(32);
-
-  const char *reqdWorkGroupSizeMDConstName =
-      "_Z8myKernelPi@reqd_work_group_size";
-  std::vector<char> reqdWorkGroupSizeMDName(
-      reqdWorkGroupSizeMDConstName,
-      reqdWorkGroupSizeMDConstName + strlen(reqdWorkGroupSizeMDConstName) + 1);
-  _pi_device_binary_property_struct reqdWorkGroupSizeMDProp = {
-      reqdWorkGroupSizeMDName.data(), reqdWorkGroupSizeMD.data(),
-      pi_property_type::PI_PROPERTY_TYPE_BYTE_ARRAY,
-      sizeof(std::uint64_t) + sizeof(std::uint32_t) * 3};
-  pi_device_binary_property reqdWorkGroupSizeMDPropPointer =
-      &reqdWorkGroupSizeMDProp;
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 1,
-          &reqdWorkGroupSizeMDPropPointer, &binary_status, &prog)),
-      PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-  ASSERT_NE(kern, nullptr);
-
-  size_t compileWGSize[3] = {0};
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelGetGroupInfo>(
-                kern, device_, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE,
-                sizeof(size_t) * 3, compileWGSize, nullptr)),
-            PI_SUCCESS);
-  for (int i = 0; i < 3; ++i) {
-    ASSERT_EQ(compileWGSize[i], reqdWorkGroupSizeMD[i + 2]);
-  }
-}
-
-TEST_F(CudaKernelsTest, PIKernelArgumentSimple) {
-
-  pi_program prog;
-  /// NOTE: `binary_status` currently unsused in the CUDA backend but in case we
-  /// use it at some point in the future, pass it anyway and check the result.
-  /// Same goes for all the other tests in this file.
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-
-  int number = 10;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 0, sizeof(int), &number)),
-            PI_SUCCESS);
-  const auto &kernArgs = kern->get_arg_indices();
-  ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
-  int storedValue = *(static_cast<const int *>(kernArgs[0]));
-  ASSERT_EQ(storedValue, number);
-}
-
-TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-
-  int number = 10;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 0, sizeof(int), &number)),
-            PI_SUCCESS);
-  const auto &kernArgs = kern->get_arg_indices();
-  ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS);
-  int storedValue = *(static_cast<const int *>(kernArgs[0]));
-  ASSERT_EQ(storedValue, number);
-
-  int otherNumber = 934;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 0, sizeof(int), &otherNumber)),
-            PI_SUCCESS);
-  const auto &kernArgs2 = kern->get_arg_indices();
-  ASSERT_EQ(kernArgs2.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
-  storedValue = *(static_cast<const int *>(kernArgs2[0]));
-  ASSERT_EQ(storedValue, otherNumber);
-}
-
-TEST_F(CudaKernelsTest, PIKernelSetMemObj) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-
-  size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 0, sizeof(pi_mem), &memObj)),
-            PI_SUCCESS);
-  const auto &kernArgs = kern->get_arg_indices();
-  ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
-  pi_mem storedValue = *(static_cast<pi_mem *>(kernArgs[0]));
-  ASSERT_EQ(storedValue, memObj);
-}
-
-TEST_F(CudaKernelsTest, PIkerneldispatch) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "_Z8myKernelPi", &kern)),
-            PI_SUCCESS);
-
-  size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piextKernelSetArgMemObj>(
-                kern, 0, nullptr, &memObj)),
-            PI_SUCCESS);
-
-  size_t workDim = 1;
-  size_t globalWorkOffset[] = {0};
-  size_t globalWorkSize[] = {1};
-  size_t localWorkSize[] = {1};
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueKernelLaunch>(
-                queue_, kern, workDim, globalWorkOffset, globalWorkSize,
-                localWorkSize, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaKernelsTest, PIkerneldispatchTwo) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-          context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, 0,
-          nullptr, &binary_status, &prog)),
-      PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "twoParamKernel", &kern)),
-            PI_SUCCESS);
-
-  size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-
-  pi_mem memObj2;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj2,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piextKernelSetArgMemObj>(
-                kern, 0, nullptr, &memObj)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piextKernelSetArgMemObj>(
-                kern, 1, nullptr, &memObj2)),
-            PI_SUCCESS);
-
-  size_t workDim = 1;
-  size_t globalWorkOffset[] = {0};
-  size_t globalWorkSize[] = {1};
-  size_t localWorkSize[] = {1};
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueKernelLaunch>(
-                queue_, kern, workDim, globalWorkOffset, globalWorkSize,
-                localWorkSize, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj2)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) {
-
-  pi_program prog;
-  pi_int32 binary_status = PI_SUCCESS;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramCreateWithBinary>(
-                context_, 1, &device_, nullptr,
-                (const unsigned char **)&threeParamsTwoLocal, 0, nullptr,
-                &binary_status, &prog)),
-            PI_SUCCESS);
-  ASSERT_EQ(binary_status, PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piProgramBuild>(
-                prog, 1, &device_, "", nullptr, nullptr)),
-            PI_SUCCESS);
-
-  pi_kernel kern;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelCreate>(
-                prog, "twoParamKernelLocal", &kern)),
-            PI_SUCCESS);
-
-  int number = 10;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 0, sizeof(int), &number)),
-            PI_SUCCESS);
-  const auto &kernArgs = kern->get_arg_indices();
-  ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS);
-  int storedValue = *(static_cast<const int *>(kernArgs[0]));
-  ASSERT_EQ(storedValue, number);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 1, sizeof(int), nullptr)),
-            PI_SUCCESS);
-  const auto &kernArgs2 = kern->get_arg_indices();
-  ASSERT_EQ(kernArgs2.size(), (size_t)2 + NUM_IMPLICIT_ARGS);
-  storedValue = *(static_cast<const int *>(kernArgs2[1]));
-  ASSERT_EQ(storedValue, 0);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
-                kern, 2, sizeof(int), nullptr)),
-            PI_SUCCESS);
-  const auto &kernArgs3 = kern->get_arg_indices();
-  ASSERT_EQ(kernArgs3.size(), (size_t)3 + NUM_IMPLICIT_ARGS);
-  storedValue = *(static_cast<const int *>(kernArgs3[2]));
-  ASSERT_EQ(storedValue, static_cast<int>(sizeof(int)));
-}
diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp
deleted file mode 100644
index b0693ff30830c..0000000000000
--- a/sycl/unittests/pi/cuda/test_mem_obj.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-//==---- test_mem_obj.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "CudaUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_cuda.hpp>
-#include <sycl/detail/cuda_definitions.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct CudaTestMemObj : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi::clearCudaContext();
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  CudaTestMemObj() = default;
-
-  ~CudaTestMemObj() = default;
-};
-
-TEST_F(CudaTestMemObj, piMemBufferCreateSimple) {
-  const size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaTestMemObj, piMemBufferAllocHost) {
-  const size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaTestMemObj, piMemBufferCreateNoActiveContext) {
-  const size_t memSize = 1024u;
-  // Context has been destroyed
-
-  CUcontext current = nullptr;
-
-  // pop CUDA contexts until there is not a cuda context bound to the thread
-  do {
-    CUcontext oldContext = nullptr;
-    auto cuErr = cuCtxPopCurrent(&oldContext);
-    EXPECT_EQ(cuErr, CUDA_SUCCESS);
-
-    // There should not be any active CUDA context
-    cuErr = cuCtxGetCurrent(&current);
-    ASSERT_EQ(cuErr, CUDA_SUCCESS);
-  } while (current != nullptr);
-
-  // The context object is passed, even if its not active it should be used
-  // to allocate the memory object
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-  ASSERT_NE(memObj, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaTestMemObj, piMemBufferPinnedMappedRead) {
-  const size_t memSize = sizeof(int);
-  const int value = 20;
-
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context_, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-  ASSERT_EQ(queue->get_context(), context_);
-
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-          queue, memObj, true, 0, sizeof(int), &value, 0, nullptr, nullptr)),
-      PI_SUCCESS);
-
-  int *host_ptr = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
-                queue, memObj, true, PI_MAP_READ, 0, sizeof(int), 0, nullptr,
-                nullptr, (void **)&host_ptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(*host_ptr, value);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
-                queue, memObj, host_ptr, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-}
-
-TEST_F(CudaTestMemObj, piMemBufferPinnedMappedWrite) {
-  const size_t memSize = sizeof(int);
-  const int value = 30;
-
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context_, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-  ASSERT_EQ(queue->get_context(), context_);
-
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  int *host_ptr = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
-                queue, memObj, true, PI_MAP_WRITE, 0, sizeof(int), 0, nullptr,
-                nullptr, (void **)&host_ptr)),
-            PI_SUCCESS);
-
-  *host_ptr = value;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
-                queue, memObj, host_ptr, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  int read_value = 0;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue, memObj, true, 0, sizeof(int), &read_value, 0, nullptr,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(read_value, value);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-}
diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp
deleted file mode 100644
index f9ce627d126ad..0000000000000
--- a/sycl/unittests/pi/cuda/test_primary_context.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//==---------- pi_primary_context.cpp - PI unit tests ----------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <cuda.h>
-
-#include "TestGetPlatforms.hpp"
-#include <pi_cuda.hpp>
-#include <sycl/sycl.hpp>
-
-#include <iostream>
-
-using namespace sycl;
-
-struct CudaPrimaryContextTests : public ::testing::TestWithParam<platform> {
-
-protected:
-  device deviceA_;
-  device deviceB_;
-
-  void SetUp() override {
-    std::vector<device> CudaDevices = GetParam().get_devices();
-
-    deviceA_ = CudaDevices[0];
-    deviceB_ = CudaDevices.size() > 1 ? CudaDevices[1] : deviceA_;
-  }
-
-  void TearDown() override {}
-};
-
-TEST_P(CudaPrimaryContextTests, piSingleContext) {
-  std::cout << "create single context" << std::endl;
-  context Context(
-      deviceA_, async_handler{},
-      {sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
-
-  CUdevice CudaDevice = get_native<backend::ext_oneapi_cuda>(deviceA_);
-  CUcontext CudaContext = get_native<backend::ext_oneapi_cuda>(Context);
-
-  CUcontext PrimaryCudaContext;
-  cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice);
-
-  ASSERT_EQ(CudaContext, PrimaryCudaContext);
-
-  cuDevicePrimaryCtxRelease(CudaDevice);
-}
-
-TEST_P(CudaPrimaryContextTests, piMultiContextSingleDevice) {
-  std::cout << "create multiple contexts for one device" << std::endl;
-  context ContextA(
-      deviceA_, async_handler{},
-      {sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
-  context ContextB(
-      deviceA_, async_handler{},
-      {sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
-
-  CUcontext CudaContextA = get_native<backend::ext_oneapi_cuda>(ContextA);
-  CUcontext CudaContextB = get_native<backend::ext_oneapi_cuda>(ContextB);
-
-  ASSERT_EQ(CudaContextA, CudaContextB);
-}
-
-TEST_P(CudaPrimaryContextTests, piMultiContextMultiDevice) {
-  if (deviceA_ == deviceB_)
-    return;
-
-  CUdevice CudaDeviceA = get_native<backend::ext_oneapi_cuda>(deviceA_);
-  CUdevice CudaDeviceB = get_native<backend::ext_oneapi_cuda>(deviceB_);
-
-  ASSERT_NE(CudaDeviceA, CudaDeviceB);
-
-  std::cout << "create multiple contexts for multiple devices" << std::endl;
-  context ContextA(
-      deviceA_, async_handler{},
-      {sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
-  context ContextB(
-      deviceB_, async_handler{},
-      {sycl::ext::oneapi::cuda::property::context::use_primary_context{}});
-
-  CUcontext CudaContextA = get_native<backend::ext_oneapi_cuda>(ContextA);
-  CUcontext CudaContextB = get_native<backend::ext_oneapi_cuda>(ContextB);
-
-  ASSERT_NE(CudaContextA, CudaContextB);
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    OnCudaPlatform, CudaPrimaryContextTests,
-    ::testing::ValuesIn(pi::getPlatformsWithName("CUDA BACKEND")));
diff --git a/sycl/unittests/pi/cuda/test_sampler_properties.cpp b/sycl/unittests/pi/cuda/test_sampler_properties.cpp
deleted file mode 100644
index 793703d2bd1ca..0000000000000
--- a/sycl/unittests/pi/cuda/test_sampler_properties.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//==---- PlatformTest.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-#include <vector>
-
-namespace {
-
-using namespace sycl;
-
-class SamplerPropertiesTest
-    : public ::testing::TestWithParam<std::tuple<
-          pi_bool, pi_sampler_filter_mode, pi_sampler_addressing_mode>> {
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_cuda);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_sampler sampler_;
-
-  pi_bool normalizedCoords_;
-  pi_sampler_filter_mode filterMode_;
-  pi_sampler_addressing_mode addressMode_;
-
-  SamplerPropertiesTest() = default;
-
-  ~SamplerPropertiesTest() override = default;
-
-  void SetUp() override {
-    // skip the tests if the CUDA backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    std::tie(normalizedCoords_, filterMode_, addressMode_) = GetParam();
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-
-    pi_sampler_properties sampler_properties[] = {
-        PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS,
-        static_cast<pi_sampler_properties>(normalizedCoords_),
-        PI_SAMPLER_PROPERTIES_ADDRESSING_MODE,
-        static_cast<pi_sampler_properties>(addressMode_),
-        PI_SAMPLER_PROPERTIES_FILTER_MODE,
-        static_cast<pi_sampler_properties>(filterMode_),
-        0};
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piSamplerCreate>(
-                  context_, sampler_properties, &sampler_)),
-              PI_SUCCESS);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piSamplerRelease>(sampler_);
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-};
-
-TEST_P(SamplerPropertiesTest, piCheckNormalizedCoords) {
-  pi_bool actualNormalizedCoords = !normalizedCoords_;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool),
-      &actualNormalizedCoords, nullptr);
-
-  ASSERT_EQ(actualNormalizedCoords, normalizedCoords_);
-}
-
-TEST_P(SamplerPropertiesTest, piCheckFilterMode) {
-  pi_sampler_filter_mode actualFilterMode;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_FILTER_MODE, sizeof(pi_sampler_filter_mode),
-      &actualFilterMode, nullptr);
-
-  ASSERT_EQ(actualFilterMode, filterMode_);
-}
-
-TEST_P(SamplerPropertiesTest, piCheckAddressingMode) {
-  pi_sampler_addressing_mode actualAddressMode;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_ADDRESSING_MODE,
-      sizeof(pi_sampler_addressing_mode), &actualAddressMode, nullptr);
-
-  ASSERT_EQ(actualAddressMode, addressMode_);
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    SamplerPropertiesTestImpl, SamplerPropertiesTest,
-    ::testing::Combine(
-        ::testing::Values(PI_TRUE, PI_FALSE),
-        ::testing::Values(PI_SAMPLER_FILTER_MODE_LINEAR,
-                          PI_SAMPLER_FILTER_MODE_NEAREST),
-        ::testing::Values(PI_SAMPLER_ADDRESSING_MODE_CLAMP,
-                          PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE,
-                          PI_SAMPLER_ADDRESSING_MODE_NONE,
-                          PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT,
-                          PI_SAMPLER_ADDRESSING_MODE_REPEAT)));
-} // namespace
diff --git a/sycl/unittests/pi/hip/CMakeLists.txt b/sycl/unittests/pi/hip/CMakeLists.txt
deleted file mode 100644
index eee75b0447551..0000000000000
--- a/sycl/unittests/pi/hip/CMakeLists.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-add_sycl_unittest(PiHipTests OBJECT
-  test_base_objects.cpp
-  test_commands.cpp
-  test_contexts.cpp
-  test_device.cpp
-  test_interop_get_native.cpp
-  test_kernels.cpp
-  test_mem_obj.cpp
-  test_primary_context.cpp
-  test_sampler_properties.cpp
-)
-
-add_dependencies(PiHipTests sycl)
-
-target_compile_definitions(PiHipTests
-  PRIVATE
-    GTEST_HAS_COMBINE=1)
-
-target_include_directories(PiHipTests
-  PRIVATE
-    "../"
-    "${sycl_inc_dir}/sycl/detail/"
-    "${sycl_inc_dir}"
-    "${sycl_plugin_dir}/hip/"
-    "${sycl_plugin_dir}/unified_runtime/"
-)
-
-if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD")
-  # Set HIP define to select AMD platform
-  target_compile_definitions(PiHipTests PRIVATE __HIP_PLATFORM_AMD__)
-elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA")
-  # Set HIP define to select NVIDIA platform
-  target_compile_definitions(PiHipTests PRIVATE __HIP_PLATFORM_NVIDIA__)
-else()
-  message(FATAL_ERROR "Unspecified PI HIP platform, please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'")
-endif()
-
-target_link_libraries(PiHipTests
-  PRIVATE
-    rocmdrv
-    UnifiedRuntime-Headers
-)
diff --git a/sycl/unittests/pi/hip/HipUtils.hpp b/sycl/unittests/pi/hip/HipUtils.hpp
deleted file mode 100644
index c62bf7dfefb08..0000000000000
--- a/sycl/unittests/pi/hip/HipUtils.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#pragma once
-
-#include <hip/hip_runtime.h>
-
-namespace pi {
-
-// utility function to clear the HIP context stack
-inline void clearHipContext() {
-  hipCtx_t ctxt = nullptr;
-  do {
-    hipCtxSetCurrent(nullptr);
-    hipCtxGetCurrent(&ctxt);
-  } while (ctxt != nullptr);
-}
-
-} // namespace pi
diff --git a/sycl/unittests/pi/hip/test_base_objects.cpp b/sycl/unittests/pi/hip/test_base_objects.cpp
deleted file mode 100644
index 86458aa62c0b1..0000000000000
--- a/sycl/unittests/pi/hip/test_base_objects.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//==---- test_base_objects.cpp --- PI unit tests ---------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-#include <thread>
-
-// https://sep5.readthedocs.io/en/latest/ROCm_API_References/
-// HIP_API/Context-Management.html#_CPPv419hipCtxGetApiVersion8hipCtx_tPi
-const int HIP_DRIVER_API_VERSION = 4;
-
-using namespace sycl;
-
-class HipBaseObjectsTest : public ::testing::Test {
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-  }
-
-  HipBaseObjectsTest() = default;
-
-  ~HipBaseObjectsTest() = default;
-};
-
-TEST_F(HipBaseObjectsTest, piContextCreate) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform = nullptr;
-  pi_device device;
-  ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_GE(numPlatforms, 1u);
-  ASSERT_NE(platform, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS)
-      << "piDevicesGet failed.\n";
-
-  pi_context ctxt = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS)
-      << "piContextCreate failed.\n";
-
-  EXPECT_NE(ctxt, nullptr);
-  EXPECT_EQ(ctxt->get_device(), device);
-
-  // Retrieve the hipCtxt to check information is correct
-  hipCtx_t hipContext = ctxt->get();
-  int version = 0;
-  auto hipErr = hipCtxGetApiVersion(hipContext, &version);
-  EXPECT_EQ(hipErr, PI_SUCCESS);
-  EXPECT_EQ(version, HIP_DRIVER_API_VERSION);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
-            PI_SUCCESS);
-}
-
-TEST_F(HipBaseObjectsTest, piContextCreateChildThread) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform;
-  pi_device device;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS);
-
-  pi_context ctxt;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS);
-  EXPECT_NE(ctxt, nullptr);
-
-  // Retrieve the cuCtxt to check information is correct
-  auto checkValue = [=]() {
-    hipCtx_t hipContext = ctxt->get();
-    int version = 0;
-    auto hipErr = hipCtxGetApiVersion(hipContext, &version);
-    EXPECT_EQ(hipErr, PI_SUCCESS);
-    EXPECT_EQ(version, HIP_DRIVER_API_VERSION);
-
-    // The current context is different from the current thread
-    hipCtx_t current;
-    hipErr = hipCtxGetCurrent(&current);
-    EXPECT_EQ(hipErr, PI_SUCCESS);
-    EXPECT_NE(hipContext, current);
-
-    // Set the context from PI API as the current one
-    hipErr = hipCtxPushCurrent(hipContext);
-    EXPECT_EQ(hipErr, PI_SUCCESS);
-
-    hipErr = hipCtxGetCurrent(&current);
-    EXPECT_EQ(hipErr, PI_SUCCESS);
-    EXPECT_EQ(hipContext, current);
-  };
-  auto callContextFromOtherThread = std::thread(checkValue);
-
-  callContextFromOtherThread.join();
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
-            PI_SUCCESS);
-}
diff --git a/sycl/unittests/pi/hip/test_commands.cpp b/sycl/unittests/pi/hip/test_commands.cpp
deleted file mode 100644
index d453e9b9da1ea..0000000000000
--- a/sycl/unittests/pi/hip/test_commands.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//==---- test_commands.cpp --- PI unit tests -------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "HipUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct HipCommandsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_queue queue_;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi::clearHipContext();
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    ASSERT_NE(context_, nullptr);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context_, device_, 0, &queue_)),
-              PI_SUCCESS);
-    ASSERT_NE(queue_, nullptr);
-    auto tmpCtxt = queue_->get_context();
-    ASSERT_EQ(tmpCtxt, context_);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  HipCommandsTest() = default;
-
-  ~HipCommandsTest() = default;
-};
-
-TEST_F(HipCommandsTest, PIEnqueueReadBufferBlocking) {
-  constexpr const size_t memSize = 10u;
-  constexpr const size_t bytes = memSize * sizeof(int);
-  const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  int output[memSize] = {};
-
-  pi_mem memObj;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-          context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
-      PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-                queue_, memObj, true, 0, bytes, data, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue_, memObj, true, 0, bytes, output, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  bool isSame =
-      std::equal(std::begin(output), std::end(output), std::begin(data));
-  EXPECT_TRUE(isSame);
-  if (!isSame) {
-    std::for_each(std::begin(output), std::end(output),
-                  [](int &elem) { std::cout << elem << ","; });
-    std::cout << std::endl;
-  }
-}
-
-TEST_F(HipCommandsTest, PIEnqueueReadBufferNonBlocking) {
-  constexpr const size_t memSize = 10u;
-  constexpr const size_t bytes = memSize * sizeof(int);
-  const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  int output[memSize] = {};
-
-  pi_mem memObj;
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-          context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
-      PI_SUCCESS);
-
-  pi_event cpIn, cpOut;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-                queue_, memObj, false, 0, bytes, data, 0, nullptr, &cpIn)),
-            PI_SUCCESS);
-  ASSERT_NE(cpIn, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue_, memObj, false, 0, bytes, output, 0, nullptr, &cpOut)),
-            PI_SUCCESS);
-  ASSERT_NE(cpOut, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEventsWait>(1, &cpOut)),
-            PI_SUCCESS);
-
-  bool isSame =
-      std::equal(std::begin(output), std::end(output), std::begin(data));
-  EXPECT_TRUE(isSame);
-  if (!isSame) {
-    std::for_each(std::begin(output), std::end(output),
-                  [](int &elem) { std::cout << elem << ","; });
-    std::cout << std::endl;
-  }
-}
diff --git a/sycl/unittests/pi/hip/test_contexts.cpp b/sycl/unittests/pi/hip/test_contexts.cpp
deleted file mode 100644
index fa20e101d00e2..0000000000000
--- a/sycl/unittests/pi/hip/test_contexts.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//==---- test_contexts.cpp --- PI unit tests -------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include <hip/hip_runtime.h>
-
-#include "HipUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct HipContextsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  pi_platform platform_;
-  pi_device device_;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-  }
-
-  void TearDown() override {}
-
-  HipContextsTest() = default;
-
-  ~HipContextsTest() = default;
-};
-
-TEST_F(HipContextsTest, ContextLifetime) {
-  // start with no active context
-  pi::clearHipContext();
-
-  // create a context
-  pi_context context;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context)),
-            PI_SUCCESS);
-  ASSERT_NE(context, nullptr);
-
-  // create a queue from the context, this should use the ScopedContext
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-
-  // ensure the queue has the correct context
-  ASSERT_EQ(context, queue->get_context());
-
-  // check that the context is now the active HIP context
-  hipCtx_t hipCtxt = nullptr;
-  hipCtxGetCurrent(&hipCtxt);
-  ASSERT_EQ(hipCtxt, context->get());
-
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  plugin->call<detail::PiApiKind::piContextRelease>(context);
-
-  // check that the context was cleaned up properly by the destructor
-  hipCtxGetCurrent(&hipCtxt);
-  ASSERT_EQ(hipCtxt, nullptr);
-}
-
-TEST_F(HipContextsTest, ContextLifetimeExisting) {
-  // start by setting up a HIP context on the thread
-  hipCtx_t original;
-  hipCtxCreate(&original, hipDeviceMapHost, device_->get());
-
-  // ensure the HIP context is active
-  hipCtx_t current = nullptr;
-  hipCtxGetCurrent(&current);
-  ASSERT_EQ(original, current);
-
-  // create a PI context
-  pi_context context;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context)),
-            PI_SUCCESS);
-  ASSERT_NE(context, nullptr);
-
-  // create a queue from the context, this should use the ScopedContext
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-
-  // ensure the queue has the correct context
-  ASSERT_EQ(context, queue->get_context());
-
-  // check that the context is now the active HIP context
-  hipCtxGetCurrent(&current);
-  ASSERT_EQ(current, context->get());
-
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  plugin->call<detail::PiApiKind::piContextRelease>(context);
-
-  // check that the context was cleaned up, the old context will be restored
-  // automatically by hipCtxDestroy in piContextRelease, as it was pushed on the
-  // stack bu hipCtxCreate
-  hipCtxGetCurrent(&current);
-  ASSERT_EQ(current, original);
-
-  // release original context
-  hipCtxDestroy(original);
-}
-
-// In some cases (for host_task), the SYCL runtime may call PI API functions
-// from threads of the thread pool, this can cause issues because with the HIP
-// plugin these functions will set an active HIP context on these threads, but
-// never clean it up, as it will only get cleaned up in the main thread.
-//
-// So the following test aims to reproduce the scenario where there is a
-// dangling deleted context in a separate thread and seeing if the PI calls are
-// still able to work correctly in that thread.
-TEST_F(HipContextsTest, ContextThread) {
-  // start with no active context
-  pi::clearHipContext();
-
-  // create two PI contexts
-  pi_context context1;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context1)),
-            PI_SUCCESS);
-  ASSERT_NE(context1, nullptr);
-
-  pi_context context2;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                nullptr, 1, &device_, nullptr, nullptr, &context2)),
-            PI_SUCCESS);
-  ASSERT_NE(context2, nullptr);
-
-  // setup synchronization variables between the main thread and the testing
-  // thread
-  std::mutex m;
-  std::condition_variable cv;
-  bool released = false;
-  bool thread_done = false;
-
-  // create a testing thread that will create a queue with the first context,
-  // release the queue, then wait for the main thread to release the first
-  // context, and then create and release another queue with the second context
-  // this time
-  auto test_thread = std::thread([&] {
-    hipCtx_t current = nullptr;
-
-    // create a queue with the first context
-    pi_queue queue;
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context1, device_, 0, &queue)),
-              PI_SUCCESS);
-    ASSERT_NE(queue, nullptr);
-
-    // ensure the queue has the correct context
-    ASSERT_EQ(context1, queue->get_context());
-
-    // check that the first context is now the active HIP context
-    hipCtxGetCurrent(&current);
-    ASSERT_EQ(current, context1->get());
-
-    plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-
-    // mark the first set of processing as done and notify the main thread
-    std::unique_lock<std::mutex> lock(m);
-    thread_done = true;
-    lock.unlock();
-    cv.notify_one();
-
-    // wait for the main thread to release the first context
-    lock.lock();
-    cv.wait(lock, [&] { return released; });
-
-    // check that the first context is still active, this is because deleting a
-    // context only cleans up the current thread
-    hipCtxGetCurrent(&current);
-    ASSERT_EQ(current, context1->get());
-
-    // create a queue with the second context
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context2, device_, 0, &queue)),
-              PI_SUCCESS);
-    ASSERT_NE(queue, nullptr);
-
-    // ensure the queue has the correct context
-    ASSERT_EQ(context2, queue->get_context());
-
-    // check that the second context is now the active HIP context
-    hipCtxGetCurrent(&current);
-    ASSERT_EQ(current, context2->get());
-
-    plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-  });
-
-  // wait for the thread to be done with the first queue to release the first
-  // context
-  std::unique_lock<std::mutex> lock(m);
-  cv.wait(lock, [&] { return thread_done; });
-  plugin->call<detail::PiApiKind::piContextRelease>(context1);
-
-  // notify the other thread that the context was released
-  released = true;
-  lock.unlock();
-  cv.notify_one();
-
-  // wait for the thread to finish
-  test_thread.join();
-
-  plugin->call<detail::PiApiKind::piContextRelease>(context2);
-
-  // check that there is no context set on the main thread
-  hipCtx_t current = nullptr;
-  hipCtxGetCurrent(&current);
-  ASSERT_EQ(current, nullptr);
-}
diff --git a/sycl/unittests/pi/hip/test_device.cpp b/sycl/unittests/pi/hip/test_device.cpp
deleted file mode 100644
index a2d61c6ee87fc..0000000000000
--- a/sycl/unittests/pi/hip/test_device.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//==---- test_device.cpp --- PI unit tests ---------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct HipDeviceTests : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  HipDeviceTests() = default;
-  ~HipDeviceTests() = default;
-};
-
-TEST_F(HipDeviceTests, PIDeviceGetInfoSimple) {
-
-  size_t return_size = 0;
-  pi_device_type device_type;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_TYPE, sizeof(pi_device_type),
-                &device_type, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_device_type));
-  EXPECT_EQ(
-      device_type,
-      PI_DEVICE_TYPE_GPU); // backend pre-defined value, device must be a GPU
-
-  pi_device parent_device = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PARENT_DEVICE, sizeof(pi_device),
-                &parent_device, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_device));
-  EXPECT_EQ(parent_device,
-            nullptr); // backend pre-set value, device cannot have a parent
-
-  pi_platform platform = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PLATFORM, sizeof(pi_platform),
-                &platform, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(return_size, sizeof(pi_platform));
-  EXPECT_EQ(platform, platform_); // test fixture device was created from the
-                                  // test fixture platform
-
-  cl_device_partition_property device_partition_property = -1;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
-                device_, PI_DEVICE_INFO_PARTITION_TYPE,
-                sizeof(cl_device_partition_property),
-                &device_partition_property, &return_size)),
-            PI_SUCCESS);
-  EXPECT_EQ(device_partition_property,
-            0); // PI HIP backend will not support device partitioning, this
-                // function should just return 0.
-  EXPECT_EQ(return_size, sizeof(cl_device_partition_property));
-}
diff --git a/sycl/unittests/pi/hip/test_interop_get_native.cpp b/sycl/unittests/pi/hip/test_interop_get_native.cpp
deleted file mode 100644
index 39e357430036b..0000000000000
--- a/sycl/unittests/pi/hip/test_interop_get_native.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-//==------- test_interop_get_native.cpp - SYCL HIP get_native tests --------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include <sycl/sycl.hpp>
-
-#include <sycl/ext/oneapi/backend/hip.hpp>
-
-#include "TestGetPlatforms.hpp"
-
-#include <iostream>
-
-using namespace sycl;
-
-struct HipInteropGetNativeTests : public ::testing::TestWithParam<platform> {
-
-protected:
-  std::unique_ptr<queue> syclQueue_;
-  device syclDevice_;
-
-  void SetUp() override {
-    syclDevice_ = GetParam().get_devices()[0];
-    syclQueue_ = std::unique_ptr<queue>{new queue{syclDevice_}};
-  }
-
-  void TearDown() override { syclQueue_.reset(); }
-};
-
-TEST_P(HipInteropGetNativeTests, getNativeDevice) {
-  hipDevice_t hipDevice = get_native<backend::ext_oneapi_hip>(syclDevice_);
-  char hipDeviceName[2] = {0, 0};
-  hipError_t result = hipDeviceGetName(hipDeviceName, 2, hipDevice);
-  ASSERT_EQ(result, PI_SUCCESS);
-  ASSERT_NE(hipDeviceName[0], 0);
-}
-
-TEST_P(HipInteropGetNativeTests, getNativeContext) {
-  hipCtx_t hipContext =
-      get_native<backend::ext_oneapi_hip>(syclQueue_->get_context());
-  ASSERT_NE(hipContext, nullptr);
-}
-
-TEST_P(HipInteropGetNativeTests, interopTaskGetMem) {
-  buffer<int, 1> syclBuffer(range<1>{1});
-  syclQueue_->submit([&](handler &cgh) {
-    auto syclAccessor = syclBuffer.get_access<access::mode::read>(cgh);
-    cgh.host_task([=](interop_handle ih) {
-      hipDeviceptr_t hipPtr =
-          ih.get_native_mem<backend::ext_oneapi_hip>(syclAccessor);
-      hipDeviceptr_t hipPtrBase;
-      size_t hipPtrSize = 0;
-      hipCtx_t hipContext =
-          get_native<backend::ext_oneapi_hip>(syclQueue_->get_context());
-      ASSERT_EQ(PI_SUCCESS, hipCtxPushCurrent(hipContext));
-      ASSERT_EQ(PI_SUCCESS,
-                hipMemGetAddressRange(&hipPtrBase, &hipPtrSize, hipPtr));
-      ASSERT_EQ(PI_SUCCESS, hipCtxPopCurrent(nullptr));
-      ASSERT_EQ(sizeof(int), hipPtrSize);
-    });
-  });
-}
-
-TEST_P(HipInteropGetNativeTests, interopTaskGetQueue) {
-  hipStream_t hipStream = get_native<backend::ext_oneapi_hip>(*syclQueue_);
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      hipStream_t hipInteropStream =
-          ih.get_native_queue<backend::ext_oneapi_hip>();
-      ASSERT_EQ(hipInteropStream, hipStream);
-    });
-  });
-}
-
-TEST_P(HipInteropGetNativeTests, hostTaskGetNativeMem) {
-  buffer<int, 1> syclBuffer(range<1>{1});
-  syclQueue_->submit([&](handler &cgh) {
-    auto syclAccessor = syclBuffer.get_access<access::mode::read>(cgh);
-    cgh.host_task([=](interop_handle ih) {
-      hipDeviceptr_t hipPtr =
-          ih.get_native_mem<backend::ext_oneapi_hip>(syclAccessor);
-      hipDeviceptr_t hipPtrBase;
-      size_t hipPtrSize = 0;
-      hipCtx_t hipContext =
-          get_native<backend::ext_oneapi_hip>(syclQueue_->get_context());
-      ASSERT_EQ(PI_SUCCESS, hipCtxPushCurrent(hipContext));
-      ASSERT_EQ(PI_SUCCESS,
-                hipMemGetAddressRange(&hipPtrBase, &hipPtrSize, hipPtr));
-      ASSERT_EQ(PI_SUCCESS, hipCtxPopCurrent(nullptr));
-      ASSERT_EQ(sizeof(int), hipPtrSize);
-    });
-  });
-}
-
-TEST_P(HipInteropGetNativeTests, hostTaskGetNativeQueue) {
-  hipStream_t hipStream = get_native<backend::ext_oneapi_hip>(*syclQueue_);
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      hipStream_t hipInteropStream =
-          ih.get_native_queue<backend::ext_oneapi_hip>();
-      ASSERT_EQ(hipInteropStream, hipStream);
-    });
-  });
-}
-
-TEST_P(HipInteropGetNativeTests, hostTaskGetNativeContext) {
-  hipCtx_t hipContext =
-      get_native<backend::ext_oneapi_hip>(syclQueue_->get_context());
-  syclQueue_->submit([&](handler &cgh) {
-    cgh.host_task([=](interop_handle ih) {
-      hipCtx_t hipInteropContext =
-          ih.get_native_context<backend::ext_oneapi_hip>();
-      ASSERT_EQ(hipInteropContext, hipContext);
-    });
-  });
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    OnHipPlatform, HipInteropGetNativeTests,
-    ::testing::ValuesIn(pi::getPlatformsWithName("HIP BACKEND")));
diff --git a/sycl/unittests/pi/hip/test_kernels.cpp b/sycl/unittests/pi/hip/test_kernels.cpp
deleted file mode 100644
index 51463a5e23c29..0000000000000
--- a/sycl/unittests/pi/hip/test_kernels.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//==---- test_kernels.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-// PI HIP kernels carry an additional argument for the implicit global offset.
-#define NUM_IMPLICIT_ARGS 1
-
-using namespace sycl;
-
-struct HipKernelsTest : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_queue queue_;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    ASSERT_NE(context_, nullptr);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                  context_, device_, 0, &queue_)),
-              PI_SUCCESS);
-    ASSERT_NE(queue_, nullptr);
-    ASSERT_EQ(queue_->get_context(), context_);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piQueueRelease>(queue_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  HipKernelsTest() = default;
-
-  ~HipKernelsTest() = default;
-};
diff --git a/sycl/unittests/pi/hip/test_mem_obj.cpp b/sycl/unittests/pi/hip/test_mem_obj.cpp
deleted file mode 100644
index 382a510bf97d9..0000000000000
--- a/sycl/unittests/pi/hip/test_mem_obj.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//==---- test_mem_obj.cpp --- PI unit tests --------------------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "HipUtils.hpp"
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <pi_hip.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-
-struct HipTestMemObj : public ::testing::Test {
-
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    pi::clearHipContext();
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-
-  HipTestMemObj() = default;
-
-  ~HipTestMemObj() = default;
-};
-
-TEST_F(HipTestMemObj, piMemBufferCreateSimple) {
-  const size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(HipTestMemObj, piMemBufferAllocHost) {
-  const size_t memSize = 1024u;
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(HipTestMemObj, piMemBufferCreateNoActiveContext) {
-  const size_t memSize = 1024u;
-  // Context has been destroyed
-
-  hipCtx_t current = nullptr;
-
-  // pop HIP contexts until there is not a HIP context bound to the thread
-  do {
-    hipCtx_t oldContext = nullptr;
-    auto hipErr = hipCtxPopCurrent(&oldContext);
-    EXPECT_EQ(hipErr, PI_SUCCESS);
-
-    // There should not be any active HIP context
-    hipErr = hipCtxGetCurrent(&current);
-    ASSERT_EQ(hipErr, PI_SUCCESS);
-  } while (current != nullptr);
-
-  // The context object is passed, even if its not active it should be used
-  // to allocate the memory object
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
-                nullptr)),
-            PI_SUCCESS);
-  ASSERT_NE(memObj, nullptr);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-}
-
-TEST_F(HipTestMemObj, piMemBufferPinnedMappedRead) {
-  const size_t memSize = sizeof(int);
-  const int value = 20;
-
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context_, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-  ASSERT_EQ(queue->get_context(), context_);
-
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(
-      (plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
-          queue, memObj, true, 0, sizeof(int), &value, 0, nullptr, nullptr)),
-      PI_SUCCESS);
-
-  int *host_ptr = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
-                queue, memObj, true, PI_MAP_READ, 0, sizeof(int), 0, nullptr,
-                nullptr, (void **)&host_ptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(*host_ptr, value);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
-                queue, memObj, host_ptr, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-}
-
-TEST_F(HipTestMemObj, piMemBufferPinnedMappedWrite) {
-  const size_t memSize = sizeof(int);
-  const int value = 30;
-
-  pi_queue queue;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
-                context_, device_, 0, &queue)),
-            PI_SUCCESS);
-  ASSERT_NE(queue, nullptr);
-  ASSERT_EQ(queue->get_context(), context_);
-
-  pi_mem memObj;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemBufferCreate>(
-                context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC,
-                memSize, nullptr, &memObj, nullptr)),
-            PI_SUCCESS);
-
-  int *host_ptr = nullptr;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferMap>(
-                queue, memObj, true, PI_MAP_WRITE, 0, sizeof(int), 0, nullptr,
-                nullptr, (void **)&host_ptr)),
-            PI_SUCCESS);
-
-  *host_ptr = value;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemUnmap>(
-                queue, memObj, host_ptr, 0, nullptr, nullptr)),
-            PI_SUCCESS);
-
-  int read_value = 0;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piEnqueueMemBufferRead>(
-                queue, memObj, true, 0, sizeof(int), &read_value, 0, nullptr,
-                nullptr)),
-            PI_SUCCESS);
-
-  ASSERT_EQ(read_value, value);
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piMemRelease>(memObj)),
-            PI_SUCCESS);
-  plugin->call<detail::PiApiKind::piQueueRelease>(queue);
-}
diff --git a/sycl/unittests/pi/hip/test_primary_context.cpp b/sycl/unittests/pi/hip/test_primary_context.cpp
deleted file mode 100644
index 1ed80159d32d9..0000000000000
--- a/sycl/unittests/pi/hip/test_primary_context.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//==---------- test_primary_context.cpp - PI unit tests --------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include <hip/hip_runtime.h>
-
-#include "TestGetPlatforms.hpp"
-#include <pi_hip.hpp>
-#include <sycl/ext/oneapi/backend/hip.hpp>
-#include <sycl/sycl.hpp>
-
-#include <iostream>
-
-using namespace sycl;
-
-struct HipPrimaryContextTests : public ::testing::TestWithParam<platform> {
-
-protected:
-  device deviceA_;
-  device deviceB_;
-
-  void SetUp() override {
-    std::vector<device> HipDevices = GetParam().get_devices();
-
-    deviceA_ = HipDevices[0];
-    deviceB_ = HipDevices.size() > 1 ? HipDevices[1] : deviceA_;
-  }
-
-  void TearDown() override {}
-};
-
-TEST_P(HipPrimaryContextTests, piSingleContext) {
-  std::cout << "create single context" << std::endl;
-  context Context(deviceA_, async_handler{});
-
-  hipDevice_t HipDevice = get_native<backend::ext_oneapi_hip>(deviceA_);
-  hipCtx_t HipContext = get_native<backend::ext_oneapi_hip>(Context);
-
-  hipCtx_t PrimaryHipContext;
-  hipDevicePrimaryCtxRetain(&PrimaryHipContext, HipDevice);
-
-  ASSERT_EQ(HipContext, PrimaryHipContext);
-
-  hipDevicePrimaryCtxRelease(HipDevice);
-}
-
-TEST_P(HipPrimaryContextTests, piMultiContextSingleDevice) {
-  std::cout << "create multiple contexts for one device" << std::endl;
-  context ContextA(deviceA_, async_handler{});
-  context ContextB(deviceA_, async_handler{});
-
-  hipCtx_t HipContextA = get_native<backend::ext_oneapi_hip>(ContextA);
-  hipCtx_t HipContextB = get_native<backend::ext_oneapi_hip>(ContextB);
-
-  ASSERT_EQ(HipContextA, HipContextB);
-}
-
-TEST_P(HipPrimaryContextTests, piMultiContextMultiDevice) {
-  if (deviceA_ == deviceB_)
-    return;
-
-  hipDevice_t HipDeviceA = get_native<backend::ext_oneapi_hip>(deviceA_);
-  hipDevice_t HipDeviceB = get_native<backend::ext_oneapi_hip>(deviceB_);
-
-  ASSERT_NE(HipDeviceA, HipDeviceB);
-
-  std::cout << "create multiple contexts for multiple devices" << std::endl;
-  context ContextA(deviceA_, async_handler{});
-  context ContextB(deviceB_, async_handler{});
-
-  hipCtx_t HipContextA = get_native<backend::ext_oneapi_hip>(ContextA);
-  hipCtx_t HipContextB = get_native<backend::ext_oneapi_hip>(ContextB);
-
-  ASSERT_NE(HipContextA, HipContextB);
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    OnHipPlatform, HipPrimaryContextTests,
-    ::testing::ValuesIn(pi::getPlatformsWithName("HIP BACKEND")));
diff --git a/sycl/unittests/pi/hip/test_sampler_properties.cpp b/sycl/unittests/pi/hip/test_sampler_properties.cpp
deleted file mode 100644
index bed0bb9b053a4..0000000000000
--- a/sycl/unittests/pi/hip/test_sampler_properties.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//==---- test_sampler_properties.cpp --- PI unit tests ---------------------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-
-#include "TestGetPlugin.hpp"
-#include <detail/plugin.hpp>
-#include <sycl/detail/pi.hpp>
-#include <sycl/sycl.hpp>
-
-#include <vector>
-
-namespace {
-
-using namespace sycl;
-
-class SamplerPropertiesTest
-    : public ::testing::TestWithParam<std::tuple<
-          pi_bool, pi_sampler_filter_mode, pi_sampler_addressing_mode>> {
-protected:
-  std::optional<detail::PluginPtr> &plugin =
-      pi::initializeAndGet(backend::ext_oneapi_hip);
-
-  pi_platform platform_;
-  pi_device device_;
-  pi_context context_;
-  pi_sampler sampler_;
-
-  pi_bool normalizedCoords_;
-  pi_sampler_filter_mode filterMode_;
-  pi_sampler_addressing_mode addressMode_;
-
-  SamplerPropertiesTest() = default;
-
-  ~SamplerPropertiesTest() override = default;
-
-  void SetUp() override {
-    // skip the tests if the HIP backend is not available
-    if (!plugin.has_value()) {
-      GTEST_SKIP();
-    }
-
-    std::tie(normalizedCoords_, filterMode_, addressMode_) = GetParam();
-
-    pi_uint32 numPlatforms = 0;
-    ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS);
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  0, nullptr, &numPlatforms)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                  numPlatforms, &platform_, nullptr)),
-              PI_SUCCESS)
-        << "piPlatformsGet failed.\n";
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                  platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
-              PI_SUCCESS);
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                  nullptr, 1, &device_, nullptr, nullptr, &context_)),
-              PI_SUCCESS);
-    EXPECT_NE(context_, nullptr);
-
-    pi_sampler_properties sampler_properties[] = {
-        PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS,
-        static_cast<pi_sampler_properties>(normalizedCoords_),
-        PI_SAMPLER_PROPERTIES_ADDRESSING_MODE,
-        static_cast<pi_sampler_properties>(addressMode_),
-        PI_SAMPLER_PROPERTIES_FILTER_MODE,
-        static_cast<pi_sampler_properties>(filterMode_),
-        0};
-
-    ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piSamplerCreate>(
-                  context_, sampler_properties, &sampler_)),
-              PI_SUCCESS);
-  }
-
-  void TearDown() override {
-    if (plugin.has_value()) {
-      plugin->call<detail::PiApiKind::piSamplerRelease>(sampler_);
-      plugin->call<detail::PiApiKind::piDeviceRelease>(device_);
-      plugin->call<detail::PiApiKind::piContextRelease>(context_);
-    }
-  }
-};
-
-TEST_P(SamplerPropertiesTest, piCheckNormalizedCoords) {
-  pi_bool actualNormalizedCoords = !normalizedCoords_;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool),
-      &actualNormalizedCoords, nullptr);
-
-  ASSERT_EQ(actualNormalizedCoords, normalizedCoords_);
-}
-
-TEST_P(SamplerPropertiesTest, piCheckFilterMode) {
-  pi_sampler_filter_mode actualFilterMode;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_FILTER_MODE, sizeof(pi_sampler_filter_mode),
-      &actualFilterMode, nullptr);
-
-  ASSERT_EQ(actualFilterMode, filterMode_);
-}
-
-TEST_P(SamplerPropertiesTest, piCheckAddressingMode) {
-  pi_sampler_addressing_mode actualAddressMode;
-
-  plugin->call<detail::PiApiKind::piSamplerGetInfo>(
-      sampler_, PI_SAMPLER_INFO_ADDRESSING_MODE,
-      sizeof(pi_sampler_addressing_mode), &actualAddressMode, nullptr);
-
-  ASSERT_EQ(actualAddressMode, addressMode_);
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    SamplerPropertiesTestImpl, SamplerPropertiesTest,
-    ::testing::Combine(
-        ::testing::Values(PI_TRUE, PI_FALSE),
-        ::testing::Values(PI_SAMPLER_FILTER_MODE_LINEAR,
-                          PI_SAMPLER_FILTER_MODE_NEAREST),
-        ::testing::Values(PI_SAMPLER_ADDRESSING_MODE_CLAMP,
-                          PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE,
-                          PI_SAMPLER_ADDRESSING_MODE_NONE,
-                          PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT,
-                          PI_SAMPLER_ADDRESSING_MODE_REPEAT)));
-} // namespace