diff --git a/bitcode/ROCm-Device-Libs b/bitcode/ROCm-Device-Libs
index 7eca6d212..b5c3eddd3 160000
--- a/bitcode/ROCm-Device-Libs
+++ b/bitcode/ROCm-Device-Libs
@@ -1 +1 @@
-Subproject commit 7eca6d2125b7e8a1738313326a2f874ce945bb61
+Subproject commit b5c3eddd352a074ce6c8222be859ed152faed9f3
diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake
index 54188c132..e911402fb 100644
--- a/cmake/FindLLVM.cmake
+++ b/cmake/FindLLVM.cmake
@@ -22,16 +22,22 @@
 #   THE SOFTWARE.
 #
 #=============================================================================
+if(DEFINED LLVM_CONFIG_BIN)
+  # if it was cached to NOT_FOUND, unset it
+  if(LLVM_CONFIG_BIN STREQUAL "LLVM_CONFIG_BIN-NOTFOUND")
+    message(STATUS "LLVM_CONFIG_BIN was set to LLVM_CONFIG_BIN-NOTFOUND. Unsetting...")
+    unset(LLVM_CONFIG_BIN CACHE)
+  endif()
 
-if(NOT DEFINED LLVM_CONFIG_BIN)
+  # if it was set to a path, check that it exists
+  if(NOT EXISTS ${LLVM_CONFIG_BIN})
+    message(FATAL_ERROR "Provided LLVM_CONFIG_BIN (${LLVM_CONFIG_BIN}) does not exist")
+  endif()
+else() # if it was not defined, look for it
   find_program(LLVM_CONFIG_BIN NAMES llvm-config)
   if(NOT LLVM_CONFIG_BIN)
       message(FATAL_ERROR "Can't find llvm-config. Please provide CMake argument -DLLVM_CONFIG_BIN=/path/to/llvm-config<-version>")
   endif()
-else() # check that LLVM_CONFIG_BIN points to existing binary
-    if(NOT EXISTS ${LLVM_CONFIG_BIN})
-        message(FATAL_ERROR "Provided LLVM_CONFIG_BIN (${LLVM_CONFIG_BIN}) does not exist")
-    endif()
 endif()
 message(STATUS "Using llvm-config: ${LLVM_CONFIG_BIN}")
 
diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake
index 0106f719a..27bd8c849 100644
--- a/cmake/UnitTests.cmake
+++ b/cmake/UnitTests.cmake
@@ -13,6 +13,7 @@ list(APPEND CPU_POCL_FAILED_TESTS " ")
 list(APPEND GPU_POCL_FAILED_TESTS " ")  # TODO
 list(APPEND NON_PARALLEL_TESTS " ")
 
+list(APPEND NON_PARALLEL_TESTS "hipMultiThreadAddCallback") # added after adding MKL back into testing
 list(APPEND NON_PARALLEL_TESTS "TestLargeGlobalVar")
 list(APPEND NON_PARALLEL_TESTS "cuda-asyncAPI")
 list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpy_Negative")
@@ -1436,6 +1437,8 @@ list(APPEND DGPU_LEVEL0_BASE_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Func
 list(APPEND IGPU_LEVEL0_RCL_FAILED_TESTS "Unit_hipMultiThreadDevice_NearZero") # only happens when ctest -j $(nproc) RCL
 list(APPEND IGPU_LEVEL0_RCL_FAILED_TESTS "Unit_hipMemsetFunctional_PartialSet_3D") # only happens when ctest -j $(nproc) RCL
 
+list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hip_sycl_interop") # Timeout Using MKL 2023.2.3 
+list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hip_sycl_interop_no_buffers") # Timeout Using MKL 2023.2.3 
 list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "Unit_hipMemset2DAsync_MultiThread") # Race condition 
 list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hipStreamSemantics") # SEGFAULT
 list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "deviceMallocCompile") # Unimplemented
diff --git a/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp b/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp
index 277c71467..863764e2d 100644
--- a/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp
+++ b/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp
@@ -113,8 +113,17 @@ int oneMKLGemmTest(uintptr_t* nativeHandlers, const char* hip_backend, float* A,
     std::vector<sycl::device> sycl_devices(1);
     sycl_devices[0] = sycl_device;
     sycl::context sycl_context = sycl::ext::oneapi::level_zero::make_context(sycl_devices, (pi_native_handle)hContext, 1);
+
+    bool isImmCmdList = true;
+    // query the environemtn for CHIP_L0_IMM_CMD_LIST flag, if it's OFF, off or 0, then set isImmCmdList to false
+    char* env = getenv("CHIP_L0_IMM_CMD_LIST");
+    if (env != NULL) {
+      if (!strcmp(env, "OFF") || !strcmp(env, "off") || !strcmp(env, "0")) {
+        isImmCmdList = false;
+      }
+    }
 #if __INTEL_LLVM_COMPILER >= 20240000
-    sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, true, 1, sycl::property::queue::in_order());
+    sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, isImmCmdList, 1, sycl::property::queue::in_order());
 #else
     sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, 1);
 #endif
diff --git a/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp b/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp
index 8ad660f79..b71b42781 100644
--- a/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp
+++ b/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp
@@ -111,9 +111,19 @@ int oneMKLGemmTest(uintptr_t* nativeHandlers, const char* hip_backend, float* A,
     std::vector<sycl::device> sycl_devices(1);
     sycl_devices[0] = sycl_device;
     sycl::context sycl_context = sycl::ext::oneapi::level_zero::make_context(sycl_devices, (pi_native_handle)hContext, 1);
+
+    bool isImmCmdList = true;
+    // query the environemtn for CHIP_L0_IMM_CMD_LIST flag, if it's OFF, off or 0, then set isImmCmdList to false
+    char* env = getenv("CHIP_L0_IMM_CMD_LIST");
+    if (env != NULL) {
+      if (!strcmp(env, "OFF") || !strcmp(env, "off") || !strcmp(env, "0")) {
+        isImmCmdList = false;
+      }
+    }
+
 #if __INTEL_LLVM_COMPILER >= 20240000
     sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue,
-                                                           true, 1, sycl::property::queue::in_order());
+                                                           isImmCmdList, 1, sycl::property::queue::in_order());
 #else
     sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, 1);
 #endif
diff --git a/scripts/check.py b/scripts/check.py
index 3882d18e6..a275058f8 100755
--- a/scripts/check.py
+++ b/scripts/check.py
@@ -67,7 +67,7 @@ def run_cmd(cmd):
 # setup module load line
 modules = ""
 if args.modules == "on":
-  modules =  ". /etc/profile.d/modules.sh && module load "
+  modules =  ". /etc/profile.d/modules.sh && export MODULEPATH=/space/modulefiles && module load "
   if args.backend == "opencl" and args.device_type == "cpu":
       modules += "opencl/cpu"
   elif args.backend == "opencl" and args.device_type == "igpu":
diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh
index 8ecba8d58..505ffb801 100755
--- a/scripts/unit_tests.sh
+++ b/scripts/unit_tests.sh
@@ -7,7 +7,7 @@ set -e
 if [ -f "/opt/actions-runner/num-threads.txt" ]; then
   num_threads=$(cat /opt/actions-runner/num-threads.txt)
 else
-  num_threads=24
+  num_threads=$(nproc)
 fi
 
 num_tries=1
@@ -104,7 +104,10 @@ export CHIP_LOGLEVEL=err
 export POCL_KERNEL_CACHE=0
 
 # Use OpenCL for building/test discovery to prevent Level Zero from being used in multi-thread/multi-process environment
-module load $CLANG opencl/dgpu # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit
+module use ~/modulefiles
+module use /space/modulefiles
+module load oneapi/mkl/2023.2.3 oneapi/compiler/2023.2.3 $CLANG opencl/dgpu
+which icpx
 
 output=$(clinfo -l 2>&1 | grep "Platform #0")
 echo $output
@@ -138,8 +141,8 @@ else
   cd build
 
   echo "building with $CLANG"
-  cmake ../ -DCMAKE_BUILD_TYPE="$build_type" &> /dev/null
-  make all build_tests install -j 24 #&> /dev/null
+  cmake ../ -DCMAKE_BUILD_TYPE="$build_type"
+  make all build_tests install -j $(nproc) #&> /dev/null
   echo "chipStar build complete." 
 
   # # Build libCEED
@@ -148,7 +151,7 @@ else
   # ../scripts/compile_libceed.sh ${CHIPSTAR_INSTALL_DIR}
 fi
 
-module unload opencl/dgpu
+module unload opencl/dgpu oneapi/compiler/2023.2.3
 
 # module load HIP/hipBLAS/main/release # for libCEED NOTE: Must be after build step otherwise it will cause link issues.
 
diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc
index 73de32ca4..92790fc0b 100644
--- a/src/backend/Level0/CHIPBackendLevel0.cc
+++ b/src/backend/Level0/CHIPBackendLevel0.cc
@@ -1359,7 +1359,6 @@ std::shared_ptr<chipstar::Event> CHIPQueueLevel0::enqueueMarkerImpl() {
 }
 
 std::shared_ptr<chipstar::Event> CHIPQueueLevel0::enqueueMarkerImplReg() {
-  logError("CHIPQueueLevel0::enqueueMarkerImplReg");
   std::shared_ptr<chipstar::Event> MarkerEvent =
       static_cast<CHIPBackendLevel0 *>(Backend)->createEventShared(
           ChipContext_);
@@ -1427,7 +1426,6 @@ std::shared_ptr<chipstar::Event> CHIPQueueLevel0::enqueueBarrierImpl(
 
 std::shared_ptr<chipstar::Event> CHIPQueueLevel0::enqueueBarrierImplReg(
     const std::vector<std::shared_ptr<chipstar::Event>> &EventsToWaitFor) {
-  logError("CHIPQueueLevel0::enqueueBarrierImplReg");
   std::shared_ptr<chipstar::Event> BarrierEvent =
       static_cast<CHIPBackendLevel0 *>(Backend)->createEventShared(
           ChipContext_);