Skip to content

Commit

Permalink
Merge pull request #724 from CHIP-SPV/fix-hipcc-path
Browse files Browse the repository at this point in the history
some additional 1.1 fixes (on main)
  • Loading branch information
pvelesko authored Dec 12, 2023
2 parents 566c690 + 3a380e3 commit 6f602e7
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 14 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)

if(uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
add_compile_options(-Wall -gdwarf-4 -O1)
add_compile_options(-Wall -O1)
list(APPEND CHIP_SPV_DEFINITIONS SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
else()
list(APPEND CHIP_SPV_DEFINITIONS SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_WARN)
Expand Down
23 changes: 23 additions & 0 deletions docs/release_notes/chipStar_1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,24 @@ speeding up various workloads significantly. Workloads that do not exploit
parallelism but enqueue a lot of very small kernels (in the 10's of microseconds
range) may also benefit as the barrier itself could dominate the execution time.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Improved portability to other platforms & devices
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The portability of chipStar has been improved & tested with ARM Mali G52 GPU device.

To build chipStar that works with ARM Mali G52:

1) build LLVM and SPIRV-LLVM-Translator as described in the README.md

2) build chipStar with -DCHIP_MALI_GPU_WORKAROUNDS=ON cmake option

There are some limitations. Kernels that require subgroups will not work,
because chipStar currently implements subgroups with the ``cl_intel_subgroups``
extension.



==============
Minor Features
==============
Expand All @@ -76,6 +94,11 @@ Minor Features

* Level Zero command lists are now recycled (#665).

* Improved support for fast-math compilation. Note that this requires using
Clang's -ffast-math flag, not CUDA's -use-fast-math flag.



==============
Major Bugfixes
==============
Expand Down
17 changes: 12 additions & 5 deletions scripts/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,24 @@ def run_cmd(cmd):

cmd = f"{modules} {env_vars} ./hipInfo"
out, _ = run_cmd(cmd)
texture_support = "maxTexture1DLinear:" in out and 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip())
texture_support = 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip())
double_support = 0 < int(out.split("arch.hasDoubles:")[1].split("\n")[0].strip())
if double_support:
double_cmd = ""
else:
double_cmd = "|[Dd]ouble"
if not texture_support:
texture_cmd = "|[Tt]ex"
else:
texture_cmd = ""



if args.categories:
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"
cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt"
cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt"
cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt"
cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt"
if(args.dry_run):
print(cmd_deviceFunc)
print(cmd_graph)
Expand Down
20 changes: 14 additions & 6 deletions scripts/unit_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@

set -e

# read the file /opt/actions-runner/num-threads.txt and set the number of threads to the value in the file
# if the file does not exist, set the number of threads to 24
if [ -f "/opt/actions-runner/num-threads.txt" ]; then
num_threads=$(cat /opt/actions-runner/num-threads.txt)
else
num_threads=24
fi

num_tries=1
num_threads=24
timeout=200

# Check if at least one argument is provided
Expand All @@ -23,13 +30,13 @@ build_type=$(echo "$1" | tr '[:lower:]' '[:upper:]')

if [ "$2" == "llvm-15" ]; then
LLVM=llvm-15
CLANG=clang/clang15-spirv-omp
CLANG=llvm/15.0/dynamic
elif [ "$2" == "llvm-16" ]; then
LLVM=llvm-16
CLANG=clang/clang16-spirv-omp
CLANG=llvm/16.0/dynamic
elif [ "$2" == "llvm-17" ]; then
LLVM=llvm-17
CLANG=clang/clang17-spirv-omp
CLANG=llvm/17.0/dynamic
else
echo "$2"
echo "Invalid 2nd argument. Use either 'llvm-15', 'llvm-16' or 'llvm-17'."
Expand Down Expand Up @@ -95,10 +102,9 @@ export IGC_EnableDPEmulation=1
export OverrideDefaultFP64Settings=1
export CHIP_LOGLEVEL=err
export POCL_KERNEL_CACHE=0
export CHIP_L0_COLLECT_EVENTS_TIMEOUT=30

# Use OpenCL for building/test discovery to prevent Level Zero from being used in multi-thread/multi-process environment
module load $CLANG intel/opencl # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit
module load $CLANG opencl/dgpu # leave intel/opencl loaded otherwise hip_sycl_interop samples segfault upon exit

output=$(clinfo -l 2>&1 | grep "Platform #0")
echo $output
Expand Down Expand Up @@ -142,6 +148,8 @@ else
# ../scripts/compile_libceed.sh ${CHIPSTAR_INSTALL_DIR}
fi

module unload opencl/dgpu

# module load HIP/hipBLAS/main/release # for libCEED NOTE: Must be after build step otherwise it will cause link issues.

# Test Level Zero Regular Cmd Lists iGPU
Expand Down
27 changes: 25 additions & 2 deletions src/Utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
#include <fstream>
#include <random>

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <link.h>

bool isConvertibleToInt(const std::string &str) {
try {
std::stoi(str);
Expand Down Expand Up @@ -132,23 +137,41 @@ std::optional<std::string> readFromFile(const fs::path Path) {
return std::nullopt;
}

static int dlIterateCallback(struct dl_phdr_info *Info,
size_t Size, void *Data) {
std::string *Res = static_cast<std::string *>(Data);
std::string DlName(Info->dlpi_name);
size_t Pos = DlName.find("/libCHIP.so");
if (Pos == std::string::npos)
return 0;

DlName.erase(Pos);
Res->assign(DlName);
return 1;
}

std::optional<fs::path> getHIPCCPath() {
static std::once_flag Flag;
static std::optional<fs::path> HIPCCPath;

std::call_once(Flag, []() {
std::string LibCHIPPath("/dev/null");
dl_iterate_phdr(dlIterateCallback, static_cast<void*>(&LibCHIPPath));

std::call_once(Flag, [&]() {
for (const auto &ExeCand : {
fs::path(LibCHIPPath) / "bin/hipcc",
#if !CHIP_DEBUG_BUILD
fs::path(CHIP_INSTALL_DIR) / "bin/hipcc",
#endif
fs::path(CHIP_BUILD_DIR) / "bin/hipcc"
fs::path(CHIP_BUILD_DIR) / "bin/hipcc"
})
if (canExecuteHipcc(ExeCand)) {
HIPCCPath = ExeCand;
return;
}
});

logDebug("HIPCC path: {}", HIPCCPath->c_str());
return HIPCCPath;
}

Expand Down

0 comments on commit 6f602e7

Please sign in to comment.