From 43fb84a8536de7ae265a013b6bd7ab823a7df0de Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Fri, 1 Dec 2023 17:09:37 +0200 Subject: [PATCH 1/7] scripts/check.py: disable tests using double if the device doesn't support doubles --- scripts/check.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/scripts/check.py b/scripts/check.py index 45e7486ba..3882d18e6 100755 --- a/scripts/check.py +++ b/scripts/check.py @@ -86,17 +86,24 @@ def run_cmd(cmd): cmd = f"{modules} {env_vars} ./hipInfo" out, _ = run_cmd(cmd) -texture_support = "maxTexture1DLinear:" in out and 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip()) +texture_support = 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip()) +double_support = 0 < int(out.split("arch.hasDoubles:")[1].split("\n")[0].strip()) +if double_support: + double_cmd = "" +else: + double_cmd = "|[Dd]ouble" if not texture_support: texture_cmd = "|[Tt]ex" else: texture_cmd = "" + + if args.categories: - cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt" - cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt" - cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt" - cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt" + cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt" + cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt" + cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt" + cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt" if(args.dry_run): print(cmd_deviceFunc) print(cmd_graph) From 35ca3998e28c48a2f207b3d793af89261be05b10 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Fri, 1 Dec 2023 17:14:31 +0200 Subject: [PATCH 2/7] fix issue #715 - getHIPCCPath picking up wrong hipcc binary --- src/Utils.cc | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/Utils.cc b/src/Utils.cc index 0747270f6..6d771f24b 100644 --- a/src/Utils.cc +++ b/src/Utils.cc @@ -27,6 +27,11 @@ #include #include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include + bool isConvertibleToInt(const std::string &str) { try { std::stoi(str); @@ -132,16 +137,33 @@ std::optional readFromFile(const fs::path Path) { return std::nullopt; } +static int dlIterateCallback(struct dl_phdr_info *Info, + size_t Size, void *Data) { + std::string *Res = static_cast(Data); + std::string DlName(Info->dlpi_name); + size_t Pos = DlName.find("/libCHIP.so"); + if (Pos == std::string::npos) + return 0; + + DlName.erase(Pos); + Res->assign(DlName); + return 1; +} + std::optional getHIPCCPath() { static std::once_flag Flag; static std::optional HIPCCPath; - std::call_once(Flag, []() { + std::string LibCHIPPath("/dev/null"); + dl_iterate_phdr(dlIterateCallback, static_cast(&LibCHIPPath)); + + std::call_once(Flag, [&]() { for (const auto &ExeCand : { + fs::path(LibCHIPPath) / "bin/hipcc", #if !CHIP_DEBUG_BUILD fs::path(CHIP_INSTALL_DIR) / "bin/hipcc", #endif - fs::path(CHIP_BUILD_DIR) / "bin/hipcc" + fs::path(CHIP_BUILD_DIR) / "bin/hipcc" }) if (canExecuteHipcc(ExeCand)) { HIPCCPath = ExeCand; @@ -149,6 +171,7 @@ std::optional getHIPCCPath() { } }); + logDebug("HIPCC path: {}", HIPCCPath->c_str()); return HIPCCPath; } From 50bd0c86500e6a59674214436ddb0d69aaa37dd3 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 5 Dec 2023 16:57:00 +0200 Subject: [PATCH 3/7] update documentation --- docs/release_notes/chipStar_1.1.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/release_notes/chipStar_1.1.rst b/docs/release_notes/chipStar_1.1.rst index 0ca45cd0b..749443901 100644 --- a/docs/release_notes/chipStar_1.1.rst +++ b/docs/release_notes/chipStar_1.1.rst @@ -62,6 +62,24 @@ speeding up various workloads significantly. Workloads that do not exploit parallelism but enqueue a lot of very small kernels (in the 10's of microseconds range) may also benefit as the barrier itself could dominate the execution time. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Improved portability to other platforms & devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The portability of chipStar has been improved & tested with ARM Mali G52 GPU device. + +To build chipStar that works with ARM Mali G52: + +1) build LLVM and SPIRV-LLVM-Translator as described in the README.md + +2) build chipStar with -DCHIP_MALI_GPU_WORKAROUNDS=ON cmake option + +There are some limitations. Kernels that require subgroups will not work, +because chipStar currently implements subgroups with the ``cl_intel_subgroups`` +extension. + + + ============== Minor Features ============== @@ -76,6 +94,11 @@ Minor Features * Level Zero command lists are now recycled (#665). +* Improved support for fast-math compilation. Note that this requires using + Clang's -ffast-math flag, not CUDA's -use-fast-math flag. + + + ============== Major Bugfixes ============== From 1b8e9e8cdcdd0dd88342b36c16aa599fe4061ccb Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Mon, 11 Dec 2023 08:28:34 -0600 Subject: [PATCH 4/7] switch to using LLVM module --- scripts/unit_tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index eef4246f2..aff95d13f 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -23,13 +23,13 @@ build_type=$(echo "$1" | tr '[:lower:]' '[:upper:]') if [ "$2" == "llvm-15" ]; then LLVM=llvm-15 - CLANG=clang/clang15-spirv-omp + CLANG=llvm/15.0/dynamic elif [ "$2" == "llvm-16" ]; then LLVM=llvm-16 - CLANG=clang/clang16-spirv-omp + CLANG=llvm/16.0/dynamic elif [ "$2" == "llvm-17" ]; then LLVM=llvm-17 - CLANG=clang/clang17-spirv-omp + CLANG=llvm/17.0/dynamic else echo "$2" echo "Invalid 2nd argument. Use either 'llvm-15', 'llvm-16' or 'llvm-17'." From c9cef4d6c3979a4da24ea1104d3081c0b20e8be5 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 12 Dec 2023 05:43:59 -0600 Subject: [PATCH 5/7] Remove -gdwarf-4 flag from debug build options --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index edf3c7d2c..00c0e277f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -284,7 +284,7 @@ endif() string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) if(uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - add_compile_options(-Wall -gdwarf-4 -O1) + add_compile_options(-Wall -O1) list(APPEND CHIP_SPV_DEFINITIONS SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE) else() list(APPEND CHIP_SPV_DEFINITIONS SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_WARN) From 4d421e1b821d71cd242d686e509b5aea1fd607dd Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 12 Dec 2023 06:07:06 -0600 Subject: [PATCH 6/7] unit_tests.sh updates * build using intel dgpu * use self-built runtime * no event wait override for l0 --- scripts/unit_tests.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index aff95d13f..b247d5cbd 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -95,10 +95,9 @@ export IGC_EnableDPEmulation=1 export OverrideDefaultFP64Settings=1 export CHIP_LOGLEVEL=err export POCL_KERNEL_CACHE=0 -export CHIP_L0_COLLECT_EVENTS_TIMEOUT=30 # Use OpenCL for building/test discovery to prevent Level Zero from being used in multi-thread/multi-process environment -module load $CLANG intel/opencl # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit +module load $CLANG opencl/dgpu # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit output=$(clinfo -l 2>&1 | grep "Platform #0") echo $output @@ -142,6 +141,8 @@ else # ../scripts/compile_libceed.sh ${CHIPSTAR_INSTALL_DIR} fi +module unload opencl/dgpu + # module load HIP/hipBLAS/main/release # for libCEED NOTE: Must be after build step otherwise it will cause link issues. # Test Level Zero Regular Cmd Lists iGPU From 3a380e33c590ad611a0ff192e14f1375429f8760 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 12 Dec 2023 08:18:22 -0600 Subject: [PATCH 7/7] set number of parallel CI tests from num-threads.txt --- scripts/unit_tests.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index b247d5cbd..d1e8e0dc1 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -2,8 +2,15 @@ set -e +# read the file /opt/actions-runner/num-threads.txt and set the number of threads to the value in the file +# if the file does not exist, set the number of threads to 24 +if [ -f "/opt/actions-runner/num-threads.txt" ]; then + num_threads=$(cat /opt/actions-runner/num-threads.txt) +else + num_threads=24 +fi + num_tries=1 -num_threads=24 timeout=200 # Check if at least one argument is provided