diff --git a/bitcode/ROCm-Device-Libs b/bitcode/ROCm-Device-Libs index 7eca6d212..b5c3eddd3 160000 --- a/bitcode/ROCm-Device-Libs +++ b/bitcode/ROCm-Device-Libs @@ -1 +1 @@ -Subproject commit 7eca6d2125b7e8a1738313326a2f874ce945bb61 +Subproject commit b5c3eddd352a074ce6c8222be859ed152faed9f3 diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake index 54188c132..e911402fb 100644 --- a/cmake/FindLLVM.cmake +++ b/cmake/FindLLVM.cmake @@ -22,16 +22,22 @@ # THE SOFTWARE. # #============================================================================= +if(DEFINED LLVM_CONFIG_BIN) + # if it was cached to NOT_FOUND, unset it + if(LLVM_CONFIG_BIN STREQUAL "LLVM_CONFIG_BIN-NOTFOUND") + message(STATUS "LLVM_CONFIG_BIN was set to LLVM_CONFIG_BIN-NOTFOUND. Unsetting...") + unset(LLVM_CONFIG_BIN CACHE) + endif() -if(NOT DEFINED LLVM_CONFIG_BIN) + # if it was set to a path, check that it exists + if(NOT EXISTS ${LLVM_CONFIG_BIN}) + message(FATAL_ERROR "Provided LLVM_CONFIG_BIN (${LLVM_CONFIG_BIN}) does not exist") + endif() +else() # if it was not defined, look for it find_program(LLVM_CONFIG_BIN NAMES llvm-config) if(NOT LLVM_CONFIG_BIN) message(FATAL_ERROR "Can't find llvm-config. Please provide CMake argument -DLLVM_CONFIG_BIN=/path/to/llvm-config<-version>") endif() -else() # check that LLVM_CONFIG_BIN points to existing binary - if(NOT EXISTS ${LLVM_CONFIG_BIN}) - message(FATAL_ERROR "Provided LLVM_CONFIG_BIN (${LLVM_CONFIG_BIN}) does not exist") - endif() endif() message(STATUS "Using llvm-config: ${LLVM_CONFIG_BIN}") diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake index 0106f719a..27bd8c849 100644 --- a/cmake/UnitTests.cmake +++ b/cmake/UnitTests.cmake @@ -13,6 +13,7 @@ list(APPEND CPU_POCL_FAILED_TESTS " ") list(APPEND GPU_POCL_FAILED_TESTS " ") # TODO list(APPEND NON_PARALLEL_TESTS " ") +list(APPEND NON_PARALLEL_TESTS "hipMultiThreadAddCallback") # added after adding MKL back into testing list(APPEND NON_PARALLEL_TESTS "TestLargeGlobalVar") list(APPEND NON_PARALLEL_TESTS "cuda-asyncAPI") list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpy_Negative") @@ -1436,6 +1437,8 @@ list(APPEND DGPU_LEVEL0_BASE_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Func list(APPEND IGPU_LEVEL0_RCL_FAILED_TESTS "Unit_hipMultiThreadDevice_NearZero") # only happens when ctest -j $(nproc) RCL list(APPEND IGPU_LEVEL0_RCL_FAILED_TESTS "Unit_hipMemsetFunctional_PartialSet_3D") # only happens when ctest -j $(nproc) RCL +list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hip_sycl_interop") # Timeout Using MKL 2023.2.3 +list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hip_sycl_interop_no_buffers") # Timeout Using MKL 2023.2.3 list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "Unit_hipMemset2DAsync_MultiThread") # Race condition list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND IGPU_LEVEL0_BASE_FAILED_TESTS "deviceMallocCompile") # Unimplemented diff --git a/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp b/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp index 277c71467..863764e2d 100644 --- a/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp +++ b/samples/hip_sycl_interop/onemkl_gemm_wrapper/onemkl_gemm_wrapper.cpp @@ -113,8 +113,17 @@ int oneMKLGemmTest(uintptr_t* nativeHandlers, const char* hip_backend, float* A, std::vector sycl_devices(1); sycl_devices[0] = sycl_device; sycl::context sycl_context = sycl::ext::oneapi::level_zero::make_context(sycl_devices, (pi_native_handle)hContext, 1); + + bool isImmCmdList = true; + // query the environemtn for CHIP_L0_IMM_CMD_LIST flag, if it's OFF, off or 0, then set isImmCmdList to false + char* env = getenv("CHIP_L0_IMM_CMD_LIST"); + if (env != NULL) { + if (!strcmp(env, "OFF") || !strcmp(env, "off") || !strcmp(env, "0")) { + isImmCmdList = false; + } + } #if __INTEL_LLVM_COMPILER >= 20240000 - sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, true, 1, sycl::property::queue::in_order()); + sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, isImmCmdList, 1, sycl::property::queue::in_order()); #else sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, 1); #endif diff --git a/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp b/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp index 8ad660f79..b71b42781 100644 --- a/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp +++ b/samples/hip_sycl_interop_no_buffers/onemkl_gemm_wrapper_no_buffers/onemkl_gemm_wrapper.cpp @@ -111,9 +111,19 @@ int oneMKLGemmTest(uintptr_t* nativeHandlers, const char* hip_backend, float* A, std::vector sycl_devices(1); sycl_devices[0] = sycl_device; sycl::context sycl_context = sycl::ext::oneapi::level_zero::make_context(sycl_devices, (pi_native_handle)hContext, 1); + + bool isImmCmdList = true; + // query the environemtn for CHIP_L0_IMM_CMD_LIST flag, if it's OFF, off or 0, then set isImmCmdList to false + char* env = getenv("CHIP_L0_IMM_CMD_LIST"); + if (env != NULL) { + if (!strcmp(env, "OFF") || !strcmp(env, "off") || !strcmp(env, "0")) { + isImmCmdList = false; + } + } + #if __INTEL_LLVM_COMPILER >= 20240000 sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, - true, 1, sycl::property::queue::in_order()); + isImmCmdList, 1, sycl::property::queue::in_order()); #else sycl_queue = sycl::ext::oneapi::level_zero::make_queue(sycl_context, sycl_device, (pi_native_handle)hQueue, 1); #endif diff --git a/scripts/check.py b/scripts/check.py index 3882d18e6..a275058f8 100755 --- a/scripts/check.py +++ b/scripts/check.py @@ -67,7 +67,7 @@ def run_cmd(cmd): # setup module load line modules = "" if args.modules == "on": - modules = ". /etc/profile.d/modules.sh && module load " + modules = ". /etc/profile.d/modules.sh && export MODULEPATH=/space/modulefiles && module load " if args.backend == "opencl" and args.device_type == "cpu": modules += "opencl/cpu" elif args.backend == "opencl" and args.device_type == "igpu": diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index 8ecba8d58..505ffb801 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -7,7 +7,7 @@ set -e if [ -f "/opt/actions-runner/num-threads.txt" ]; then num_threads=$(cat /opt/actions-runner/num-threads.txt) else - num_threads=24 + num_threads=$(nproc) fi num_tries=1 @@ -104,7 +104,10 @@ export CHIP_LOGLEVEL=err export POCL_KERNEL_CACHE=0 # Use OpenCL for building/test discovery to prevent Level Zero from being used in multi-thread/multi-process environment -module load $CLANG opencl/dgpu # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit +module use ~/modulefiles +module use /space/modulefiles +module load oneapi/mkl/2023.2.3 oneapi/compiler/2023.2.3 $CLANG opencl/dgpu +which icpx output=$(clinfo -l 2>&1 | grep "Platform #0") echo $output @@ -138,8 +141,8 @@ else cd build echo "building with $CLANG" - cmake ../ -DCMAKE_BUILD_TYPE="$build_type" &> /dev/null - make all build_tests install -j 24 #&> /dev/null + cmake ../ -DCMAKE_BUILD_TYPE="$build_type" + make all build_tests install -j $(nproc) #&> /dev/null echo "chipStar build complete." # # Build libCEED @@ -148,7 +151,7 @@ else # ../scripts/compile_libceed.sh ${CHIPSTAR_INSTALL_DIR} fi -module unload opencl/dgpu +module unload opencl/dgpu oneapi/compiler/2023.2.3 # module load HIP/hipBLAS/main/release # for libCEED NOTE: Must be after build step otherwise it will cause link issues. diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 73de32ca4..92790fc0b 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1359,7 +1359,6 @@ std::shared_ptr CHIPQueueLevel0::enqueueMarkerImpl() { } std::shared_ptr CHIPQueueLevel0::enqueueMarkerImplReg() { - logError("CHIPQueueLevel0::enqueueMarkerImplReg"); std::shared_ptr MarkerEvent = static_cast(Backend)->createEventShared( ChipContext_); @@ -1427,7 +1426,6 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImpl( std::shared_ptr CHIPQueueLevel0::enqueueBarrierImplReg( const std::vector> &EventsToWaitFor) { - logError("CHIPQueueLevel0::enqueueBarrierImplReg"); std::shared_ptr BarrierEvent = static_cast(Backend)->createEventShared( ChipContext_);