Skip to content

Commit

Permalink
Merge pull request #720 from CHIP-SPV/merge-release
Browse files Browse the repository at this point in the history
Merge Release-1.1 into main
  • Loading branch information
pvelesko committed Dec 7, 2023
2 parents c4a6c39 + 7bb9b3f commit 566c690
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 51 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ include(UnitTests)
target_compile_definitions(
CHIP PRIVATE ${HIP_ENABLE_SPIRV} ${CHIP_SPV_DEFINITIONS})

target_link_libraries(CHIP INTERFACE ${CHIP_INTERFACE_LIBS})
target_link_libraries(CHIP PUBLIC ${CHIP_INTERFACE_LIBS})

if(HAS_EXPERIMENTAL_FILESYSTEM)
target_link_libraries(CHIP PUBLIC stdc++fs)
Expand Down
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ It's recommended to use the chipStar forks of LLVM and SPIRV-LLVM-Translator.
For this you can use a script included in the chipStar repository:

```bash
# chipStar/scripts/configure_llvm.sh <version 15/16/17> <install_dir>
chipStar/scripts/configure_llvm.sh 17 /opt/install/llvm/17.0
cd ./llvm-project/llvm/build_17
# chipStar/scripts/configure_llvm.sh <version 15/16/17> <install_dir> <static/dynamic>
chipStar/scripts/configure_llvm.sh 17 /opt/install/llvm/17.0 dynamic
cd llvm-project/llvm/build_17
make -j 16
<sudo> make install
```
Expand Down Expand Up @@ -113,12 +113,16 @@ NOTE: If you don't have libOpenCL.so (for example from the `ocl-icd-opencl-dev`
There's a script `check.py` which can be used to run unit tests and which filters out known failing tests for different platforms. Its usage is as follows.

```bash
# BACKEND={opencl/level0/pocl} # Which backend/driver you wish to test, "opencl" = Intel OpenCL runtime, "level0" = Intel LevelZero runtime, "pocl" = PoCL OpenCL runtime
# BACKEND={opencl/level0-{reg,imm}/pocl}
# ^ Which backend/driver/platform you wish to test:
# "opencl" = Intel OpenCL runtime, "level0" = Intel LevelZero runtime with regular command lists (reg) or immediate command lists (imm), "pocl" = PoCL OpenCL runtime
# DEVICE={cpu,igpu,dgpu} # What kind of device to test.
# ^ This selects the expected test pass lists.
# 'igpu' is a Intel Iris Xe iGPU, 'dgpu' a typical recent Intel dGPU such as Data Center GPU Max series or an Arc.
# PARALLEL={N} # How many tests to run in parallel.
# export CHIP_PLATFORM=N # If there are multiple OpenCL platforms present on the system, selects which one to use

python3 $SOURCE_DIR/scripts/check.py --num-threads $PARALLEL $BUILD_DIR $DEVICE $BACKEND
python3 $SOURCE_DIR/scripts/check.py -m off --num-threads $PARALLEL $BUILD_DIR $DEVICE $BACKEND
```

Please refer to the [user documentation](docs/Using.md) for instructions on how to use the installed chipStar to build CUDA/HIP programs.
Expand All @@ -131,8 +135,8 @@ CHIP_PLATFORM=<N> # If there are multiple platform
CHIP_DEVICE=<N> # If there are multiple devices present on the system, selects which one to use. Defaults to 0
CHIP_LOGLEVEL=<trace/debug/info/warn/err/crit> # Sets the log level. If compiled in RELEASE, only err/crit are available
CHIP_DUMP_SPIRV=<ON/OFF(default)> # Dumps the generated SPIR-V code to a file
CHIP_JIT_FLAGS=<flags> # String to override the default JIT flags. Defaults to -x spir -cl-kernel-arg-info -cl-std=CL3.0
CHIP_L0_COLLECT_EVENTS_TIMEOUT=<N(30s default)> # Timeout in milliseconds for collecting Level Zero events
CHIP_JIT_FLAGS=<flags> # String to override the default JIT flags. Defaults to -cl-kernel-arg-info -cl-std=CL3.0
CHIP_L0_COLLECT_EVENTS_TIMEOUT=<N(30s default)> # Timeout in seconds for collecting Level Zero events
CHIP_L0_IMM_CMD_LISTS=<ON(default)/OFF> # Use immediate command lists in Level Zero
```

Expand Down
3 changes: 2 additions & 1 deletion chipStarConfig.hh.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@

#cmakedefine CHIPSTAR_VERSION "@CHIPSTAR_VERSION@"

#cmakedefine OCML_BASIC_ROUNDED_OPERATIONS "@OCML_BASIC_ROUNDED_OPERATIONS@"
// not implemented yet
#undef OCML_BASIC_ROUNDED_OPERATIONS

#cmakedefine CHIP_SOURCE_DIR "@CHIP_SOURCE_DIR@"

Expand Down
14 changes: 14 additions & 0 deletions cmake/UnitTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ list(APPEND CPU_POCL_FAILED_TESTS " ")
list(APPEND GPU_POCL_FAILED_TESTS " ") # TODO
list(APPEND NON_PARALLEL_TESTS " ")

list(APPEND NON_PARALLEL_TESTS "TestLargeGlobalVar")
list(APPEND NON_PARALLEL_TESTS "cuda-asyncAPI")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpy_Negative")
list(APPEND NON_PARALLEL_TESTS "firstTouch")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpy_HalfMemCopy")
list(APPEND NON_PARALLEL_TESTS "Unit_hipStreamBeginCapture_ColligatedStrmCapture_defaultflag")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpyWithStream_TestkindDtoH")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpyWithStream_TestkindDefault")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemsetFunctional_ZeroValue_2D")
list(APPEND NON_PARALLEL_TESTS "Unit_hipHostMalloc_NonCoherent")
list(APPEND NON_PARALLEL_TESTS "Unit_hipStreamAddCallback_WithCreatedStream")
list(APPEND NON_PARALLEL_TESTS "cuda-sortnet")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemset3DAsync_SeekSetArrayPortion")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpyToFromSymbol_SyncAndAsync")
list(APPEND NON_PARALLEL_TESTS "MatrixMultiply")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemcpy2DFromArray_PinnedMemSameGPU")
list(APPEND NON_PARALLEL_TESTS "Unit_hipMemset3D_SeekSetArrayPortion")
Expand Down
5 changes: 5 additions & 0 deletions docs/Development.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ There are several transformations (LLVM passes) done on the LLVM IR of the devic
* HipAbort.cpp - special handling for abort() calls from the device side (to cause a host abort currently).
* HipDefrost.cpp - removes freeze from instructions (workaround for the llvm-spirv translator).
* HipDynMem.cpp - replaces dynamically sized shared-memory variables (`extern __shared__ type variable[];`) with a kernel argument. This is because in OpenCL, dynamically-sized local memory can only be passed as kernel argument.
* HipEmitLoweredNames.cpp - required processing for hiprtcGetLoweredName()
* HipGlobalVariable.cpp - creates special kernels that handle access and modification of global scope variables.
* HipKernelArgSpiller.cpp - Reduces the size of large kernel parameter lists by spilling them into a device buffer
* HipLowerSwitch.cpp - Lowers switch instructions with a "non-standard" integer bitwidth (e.g. i4) to bitwidth supported by SPIRV-LLVM-Translator
* HipLowerZeroLengthArrays.cpp - Lowers occurrences of zero length array types (unsupported by SPIRV-LLVM-Translator)
* HipSanityChecks.cpp - sanity checks on the LLVM IR just before HIP-to-SPIR-V lowering
* HipPasses.cpp - defines a pass plugin that runs a collection of LLVM passes (= rest of the files in this directory).
* HipPrintf.cpp - pass to convert calls to the CUDA/HIP printf() to OpenCL/SPIR-V compatible printf() calls.
* HipStripUsedIntrinsics.cpp - pass to remove llvm.used and llvm.compiler.used intrinsic variables.
Expand Down
6 changes: 3 additions & 3 deletions docs/Device_API_support_matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
| Feature | HIP API # of funcs | # of impl in chipStar | chipStar notes |
|-------------------------------|---------------------|-----------------------|---------------------------|
| Coordinate Built-Ins | 12 | 12 | |
| Warp Size variable | supported | supported | chipStar support probably low effort, but requires guarantee from driver side to respect warpSize (cl_intel_required_subgroup_size) |
| Timer functions | 2 | 0 | missing: clock, clock64; seems already available in intel GPU hardware & driver (TODO: unclear about HW clock bit width), possibly needs software (SPIR-V) support |
| Atomic functions | ~30 | ~30 | all supported, but a few (on float/double types) are emulated, proper impl requires OpenCL/driver/HW support |
| Warp Size variable | supported | supported | implemented, but requires support from driver side to respect warpSize (= cl_intel_required_subgroup_size extension) |
| Timer functions | 2 | 2 | currently only fallback implementations of clock, clock64 are available |
| Atomic functions | ~30 | ~30 | all supported; the implementation is efficient only if cl_ext_float_atomics is present & supported by backend & HW|
| Vector Types | 48 | 48 | |
| Memory-Fence Instructions | 3 | 2 | \_\_threadfence_system is unsupported |
| Synchronization Functions | 4 | 4 | |
Expand Down
4 changes: 3 additions & 1 deletion docs/release_notes/chipStar_1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ Previous versions of chipStar used command queue barriers excessively for
synchronization, which led to limited opportunities for asynchronous execution.
In chipStar 1.1, command queue synchronization is done using event dependencies,
which leads to more task parallelism opportunities presented to the drivers,
speeding up various workloads significantly.
speeding up various workloads significantly. Workloads that do not exploit
parallelism but enqueue a lot of very small kernels (in the 10's of microseconds
range) may also benefit as the barrier itself could dominate the execution time.

==============
Minor Features
Expand Down
2 changes: 1 addition & 1 deletion scripts/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def run_cmd(cmd):

cmd = f"{modules} {env_vars} ./hipInfo"
out, _ = run_cmd(cmd)
texture_support = 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip())
texture_support = "maxTexture1DLinear:" in out and 0 < int(out.split("maxTexture1DLinear:")[1].split("\n")[0].strip())
if not texture_support:
texture_cmd = "|[Tt]ex"
else:
Expand Down
135 changes: 135 additions & 0 deletions scripts/clang-format-diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env python3
#
#===- clang-format-diff.py - ClangFormat Diff Reformatter ----*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

"""
This script reads input from a unified diff and reformats all the changed
lines. This is useful to reformat all the lines touched by a specific patch.
Example usage for git/svn users:
git diff -U0 --no-color --relative HEAD^ | clang-format-diff.py -p1 -i
svn diff --diff-cmd=diff -x-U0 | clang-format-diff.py -i
It should be noted that the filename contained in the diff is used unmodified
to determine the source file to update. Users calling this script directly
should be careful to ensure that the path in the diff is correct relative to the
current working directory.
"""
from __future__ import absolute_import, division, print_function

import argparse
import difflib
import re
import subprocess
import sys

if sys.version_info.major >= 3:
from io import StringIO
else:
from io import BytesIO as StringIO


def main():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=
argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', action='store_true', default=False,
help='apply edits to files instead of displaying a diff')
parser.add_argument('-p', metavar='NUM', default=0,
help='strip the smallest prefix containing P slashes')
parser.add_argument('-regex', metavar='PATTERN', default=None,
help='custom pattern selecting file paths to reformat '
'(case sensitive, overrides -iregex)')
parser.add_argument('-iregex', metavar='PATTERN', default=
r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hh|hpp|hxx|m|mm|inc|js|ts'
r'|proto|protodevel|java|cs|json)',
help='custom pattern selecting file paths to reformat '
'(case insensitive, overridden by -regex)')
parser.add_argument('-sort-includes', action='store_true', default=False,
help='let clang-format sort include blocks')
parser.add_argument('-v', '--verbose', action='store_true',
help='be more verbose, ineffective without -i')
parser.add_argument('-style',
help='formatting style to apply (LLVM, GNU, Google, Chromium, '
'Microsoft, Mozilla, WebKit)')
parser.add_argument('-binary', default='clang-format',
help='location of binary to use for clang-format')
args = parser.parse_args()

# Extract changed lines for each file.
filename = None
lines_by_file = {}
for line in sys.stdin:
match = re.search(r'^\+\+\+\ (.*?/){%s}(\S*)' % args.p, line)
if match:
filename = match.group(2)
if filename is None:
continue

if args.regex is not None:
if not re.match('^%s$' % args.regex, filename):
continue
else:
if not re.match('^%s$' % args.iregex, filename, re.IGNORECASE):
continue

match = re.search(r'^@@.*\+(\d+)(,(\d+))?', line)
if match:
start_line = int(match.group(1))
line_count = 1
if match.group(3):
line_count = int(match.group(3))
if line_count == 0:
continue
end_line = start_line + line_count - 1
lines_by_file.setdefault(filename, []).extend(
['-lines', str(start_line) + ':' + str(end_line)])

# Reformat files containing changes in place.
for filename, lines in lines_by_file.items():
if args.i and args.verbose:
print('Formatting {}'.format(filename))
command = [args.binary, filename]
if args.i:
command.append('-i')
if args.sort_includes:
command.append('-sort-includes')
command.extend(lines)
if args.style:
command.extend(['-style', args.style])

try:
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=None,
stdin=subprocess.PIPE,
universal_newlines=True)
except OSError as e:
# Give the user more context when clang-format isn't
# found/isn't executable, etc.
raise RuntimeError(
'Failed to run "%s" - %s"' % (" ".join(command), e.strerror))

stdout, stderr = p.communicate()
if p.returncode != 0:
sys.exit(p.returncode)

if not args.i:
with open(filename) as f:
code = f.readlines()
formatted_code = StringIO(stdout).readlines()
diff = difflib.unified_diff(code, formatted_code,
filename, filename,
'(before formatting)', '(after formatting)')
diff_string = ''.join(diff)
if len(diff_string) > 0:
sys.stdout.write(diff_string)

if __name__ == '__main__':
main()
71 changes: 44 additions & 27 deletions scripts/configure_llvm.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/bin/bash

# if an error is enountered, exit
set -e

# check arguments
if [ $# -ne 3 ]; then
echo "Usage: $0 <version> <install_dir> <build_type>"
if [ $# -ne 4 ]; then
echo "Usage: $0 <version> <install_dir> <link_type> <only_necessary_spirv_exts>"
echo "version: LLVM version 15, 16, 17"
echo "build_type: static or dynamic"
echo "link_type: static or dynamic"
echo "only_necessary_spirv_exts: on or off"
exit 1
fi

Expand All @@ -14,48 +18,61 @@ if [ "$1" != "15" ] && [ "$1" != "16" ] && [ "$1" != "17" ]; then
exit 1
fi

# check build_type argument
# check link_type argument
if [ "$3" != "static" ] && [ "$3" != "dynamic" ]; then
echo "Invalid build_type. Must be 'static' or 'dynamic'."
echo "Invalid link_type. Must be 'static' or 'dynamic'."
exit 1
fi

# if an error is enountered, exit
set -e
# check only-necessary-spirv-exts argument
if [ "$4" != "on" ] && [ "$4" != "off" ]; then
echo "Invalid only_necessary_spirv_exts. Must be 'on' or 'off'."
exit 1
fi

VERSION=$1
INSTALL_DIR=$2
BUILD_TYPE=$3
LINK_TYPE=$3

# set the brach name for checkuot based on only-necessary-spirv-exts
if [ "$4" == "on" ]; then
LLVM_BRANCH="spirv-ext-fixes-${VERSION}"
TRANSLATOR_BRANCH="chipStar-llvm-${VERSION}"
else
LLVM_BRANCH="chipStar-llvm-${VERSION}"
TRANSLATOR_BRANCH="chipStar-llvm-${VERSION}"
fi

export LLVM_DIR=`pwd`/llvm-project/llvm

# check if llvm-project exists, if not clone it
if [ ! -d llvm-project ]; then
git clone https://github.com/CHIP-SPV/llvm-project.git -b chipStar-llvm-${VERSION} --depth 1
git clone https://github.com/CHIP-SPV/llvm-project.git -b ${LLVM_BRANCH} --depth 1
cd ${LLVM_DIR}/projects
git clone https://github.com/CHIP-SPV/SPIRV-LLVM-Translator.git -b chipStar-llvm-${VERSION} --depth 1
git clone https://github.com/CHIP-SPV/SPIRV-LLVM-Translator.git -b ${TRANSLATOR_BRANCH} --depth 1
cd ${LLVM_DIR}
else
# Warn the user, error out
echo "llvm-project directory already exists. Assuming it's cloned from chipStar."
cd ${LLVM_DIR}
# check if already on the desired branch
if [ `git branch --show-current` == "chipStar-llvm-${VERSION}" ]; then
echo "Already on branch chipStar-llvm-${VERSION}"
if [ `git branch --show-current` == "${LLVM_BRANCH}" ]; then
echo "Already on branch ${LLVM_BRANCH}"
else
echo "Switching to branch chipStar-llvm-${VERSION}"
git br -D chipStar-llvm-${VERSION} &> /dev/null
git fetch origin chipStar-llvm-${VERSION}:chipStar-llvm-${VERSION}
git checkout chipStar-llvm-${VERSION}
echo "Switching to branch ${LLVM_BRANCH}"
git br -D ${LLVM_BRANCH} &> /dev/null
git fetch origin ${LLVM_BRANCH}:${LLVM_BRANCH}
git checkout ${LLVM_BRANCH}
fi
cd ${LLVM_DIR}/projects/SPIRV-LLVM-Translator
# check if already on the desired branch
if [ `git branch --show-current` == "chipStar-llvm-${VERSION}" ]; then
echo "Already on branch chipStar-llvm-${VERSION}"
if [ `git branch --show-current` == "${TRANSLATOR_BRANCH}" ]; then
echo "Already on branch ${TRANSLATOR_BRANCH}"
else
echo "Switching to branch chipStar-llvm-${VERSION}"
git br -D chipStar-llvm-${VERSION} &> /dev/null
git fetch origin chipStar-llvm-${VERSION}:chipStar-llvm-${VERSION}
git checkout chipStar-llvm-${VERSION}
echo "Switching to branch ${TRANSLATOR_BRANCH}"
git br -D ${TRANSLATOR_BRANCH} &> /dev/null
git fetch origin ${TRANSLATOR_BRANCH}:${TRANSLATOR_BRANCH}
git checkout ${TRANSLATOR_BRANCH}
fi
cd ${LLVM_DIR}
fi
Expand All @@ -71,24 +88,24 @@ else
fi

# Add build type condition
if [ "$BUILD_TYPE" == "static" ]; then
if [ "$LINK_TYPE" == "static" ]; then
cmake ../ \
-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_PROJECTS="clang;openmp" \
-DLLVM_TARGETS_TO_BUILD=host
elif [ "$BUILD_TYPE" == "dynamic" ]; then
elif [ "$LINK_TYPE" == "dynamic" ]; then
cmake ../ \
-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \
-DCMAKE_INSTALL_RPATH=${INSTALL_DIR}/lib \
-DLLVM_ENABLE_PROJECTS="clang;openmp" \
-DLLVM_TARGETS_TO_BUILD=host \
-DLLVM_LINK_LLVM_DYLIB=OFF \
-DLLVM_LINK_LLVM_DYLIB=ON \
-DLLVM_BUILD_LLVM_DYLIB=ON \
-DLLVM_PARALLEL_LINK_JOBS=2 \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_ASSERTIONS=On
else
echo "Invalid build_type. Must be 'static' or 'dynamic'."
echo "Invalid link_type. Must be 'static' or 'dynamic'."
exit 1
fi
Loading

0 comments on commit 566c690

Please sign in to comment.