Skip to content

Commit

Permalink
Merge branch 'main' into georgi/reintroduce-catch-bad-alloc-cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
aarongreig authored Sep 25, 2024
2 parents 5b73907 + 17aa04d commit d07ea63
Show file tree
Hide file tree
Showing 290 changed files with 10,018 additions and 6,231 deletions.
19 changes: 14 additions & 5 deletions .github/workflows/benchmarks_compute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
default: 'level_zero'
options:
- level_zero
- level_zero_v2
unit:
description: Test unit (cpu/gpu)
type: choice
Expand Down Expand Up @@ -142,17 +143,25 @@ jobs:
--ci-defaults ${{matrix.adapter.sycl_config}}
--cmake-opt="-DLLVM_INSTALL_UTILS=ON"
--cmake-opt="-DSYCL_PI_TESTS=OFF"
--cmake-opt="-DSYCL_PI_UR_USE_FETCH_CONTENT=OFF"
--cmake-opt="-DSYCL_PI_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
--cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF"
--cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
--cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
--cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build SYCL
run: cmake --build ${{github.workspace}}/sycl_build -j

- name: Set oneAPI Device Selector
run: |
echo "ONEAPI_DEVICE_SELECTOR=${{ matrix.adapter.str_name }}:${{ matrix.adapter.unit }}" >> $GITHUB_ENV
- name: Configure UR
working-directory: ${{github.workspace}}/ur-repo
run: >
cmake -DCMAKE_BUILD_TYPE=Release
-B${{github.workspace}}/ur-repo/build
-DUR_BUILD_TESTS=OFF
-DUR_BUILD_ADAPTER_L0=ON
-DUR_BUILD_ADAPTER_L0_V2=ON
- name: Build UR
run: cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc)

- name: Run benchmarks
id: benchmarks
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/build-hw-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ on:
required: false
type: string
default: OFF
static_adapter:
required: false
type: string
default: OFF

permissions:
contents: read
Expand All @@ -36,7 +40,7 @@ jobs:
strategy:
matrix:
adapter: [
{name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"},
{name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"},
]
build_type: [Debug, Release]
compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}]
Expand All @@ -49,6 +53,10 @@ jobs:
build_type: Release
- adapter: {static_Loader: ON}
compiler: {c: clang, cxx: clang++}
- adapter: {static_adapter: ON}
build_type: Release
- adapter: {static_adapter: ON}
compiler: {c: clang, cxx: clang++}

runs-on: ${{inputs.runner_name}}

Expand Down Expand Up @@ -76,6 +84,7 @@ jobs:
-DUR_BUILD_TESTS=ON
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
-DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }}
Expand Down
17 changes: 14 additions & 3 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
compiler: [{c: gcc, cxx: g++}]
libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=OFF']
pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON', '-DUMF_ENABLE_POOL_TRACKING=OFF']
latency_tracking: ['-DUMF_ENABLE_LATENCY_TRACKING=OFF']
latency_tracking: ['-DUR_ENABLE_LATENCY_HISTOGRAM=OFF']
include:
- os: 'ubuntu-22.04'
build_type: Release
Expand All @@ -40,7 +40,7 @@ jobs:
- os: 'ubuntu-22.04'
build_type: Release
compiler: {c: clang, cxx: clang++}
latency_tracking: '-DUMF_ENABLE_LATENCY_TRACKING=ON'
latency_tracking: '-DUR_ENABLE_LATENCY_HISTOGRAM=ON'
runs-on: ${{ (matrix.os == 'ubuntu-22.04' && github.repository_owner == 'oneapi-src') && 'intel-ubuntu-22.04' || matrix.os }}

steps:
Expand Down Expand Up @@ -155,6 +155,7 @@ jobs:
adapter_name: L0
runner_name: L0
static_loader: ON
static_adapter: ON

opencl:
name: OpenCL
Expand Down Expand Up @@ -216,7 +217,8 @@ jobs:
os: ['windows-2019', 'windows-2022']
adapter: [
{name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'},
{name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
{name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'},
{name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
]

# TODO: building level zero loader on windows-2019 and clang-cl is currently broken
Expand All @@ -225,16 +227,25 @@ jobs:
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
- os: 'windows-2019'
adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
- os: 'windows-2019'
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
- adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
- adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
- adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
compiler: {c: clang-cl, cxx: clang-cl}

build_type: [Debug, Release]
compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}]
include:
- compiler: {c: clang-cl, cxx: clang-cl}
toolset: "-T ClangCL"
- os: 'windows-2022'
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
build_type: 'Release'
compiler: {c: cl, cxx: cl}

runs-on: ${{matrix.os}}

steps:
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/coverity.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation
#
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
Expand Down Expand Up @@ -38,11 +38,6 @@ jobs:
- name: Install pip packages
run: pip install -r third_party/requirements.txt

- name: Install apt packages
run: |
sudo apt-get update
sudo apt-get install -y libhwloc-dev
- name: Configure CMake
run: >
cmake
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/e2e_core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ jobs:
--ci-defaults ${{matrix.adapter.config}}
--cmake-opt="-DLLVM_INSTALL_UTILS=ON"
--cmake-opt="-DSYCL_PI_TESTS=OFF"
--cmake-opt="-DSYCL_PI_UR_USE_FETCH_CONTENT=OFF"
--cmake-opt="-DSYCL_PI_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
--cmake-opt="-DSYCL_UR_USE_FETCH_CONTENT=OFF"
--cmake-opt="-DSYCL_UR_SOURCE_DIR=${{github.workspace}}/ur-repo/"
--cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
--cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
Expand All @@ -157,7 +157,7 @@ jobs:
- name: Setup SYCL variables
run: |
which clang++ sycl-ls
SYCL_PI_TRACE=-1 sycl-ls
SYCL_UR_TRACE=-1 sycl-ls
- name: Build e2e tests
run: >
Expand All @@ -169,10 +169,6 @@ jobs:
-DCMAKE_CXX_COMPILER="$(which clang++)"
-DLLVM_LIT="${{github.workspace}}/sycl-repo/llvm/utils/lit/lit.py"
- name: Set LIT_XFAIL_NOT
if: inputs.xfail_not != ''
run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV

- name: Set LIT_XFAIL
if: inputs.xfail != ''
run: echo "LIT_XFAIL=${{inputs.xfail}}" >> $GITHUB_ENV
Expand All @@ -181,6 +177,10 @@ jobs:
if: inputs.filter_out != ''
run: echo "LIT_FILTER_OUT=${{inputs.filter_out}}" >> $GITHUB_ENV

- name: Set LIT_XFAIL_NOT
if: inputs.xfail_not != ''
run: echo "LIT_XFAIL_NOT=${{inputs.xfail_not}}" >> $GITHUB_ENV

# TODO: remove once intel/llvm lit tests can properly recognize the GPU
- name: Configure hardware platform feature for L0
if: matrix.adapter.name == 'L0'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/e2e_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ jobs:
config: "--cuda"
unit: "gpu"
extra_lit_flags: "-sv --max-time=3600"
xfail: "Regression/device_num.cpp"
6 changes: 4 additions & 2 deletions .github/workflows/e2e_level_zero.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ jobs:
config: ""
unit: "gpu"
# Failing tests
xfail: "Config/select_device.cpp;DeviceCodeSplit/grf.cpp;ESIMD/grf.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;Plugin/level_zero_barrier_optimization.cpp"
xfail: "DeviceCodeSplit/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp;Matrix/SPVCooperativeMatrix/SG32/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d_cont.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_scalar.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;Matrix/SPVCooperativeMatrix/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp;Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp"
# Unexpectedly Passed Tests
xfail_not: ""
# Flaky tests
filter_out: "ESIMD/named_barriers/loop_extended.cpp"
filter_out: "Basic/accessor/accessor.cpp|DeviceArchitecture/device_architecture_comparison_on_device_aot.cpp|Graph/Explicit/interop-level-zero-launch-kernel.cpp|Graph/RecordReplay/interop-level-zero-launch-kernel.cpp|syclcompat/launch/launch_policy_lmem.cpp"
# These runners by default spawn upwards of 260 workers.
# We also add a time out just in case some test hangs
extra_lit_flags: "--param gpu-intel-pvc=True --param gpu-intel-pvc-1T=True -sv -j 100 --max-time=3600"
2 changes: 1 addition & 1 deletion .github/workflows/e2e_opencl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ jobs:
prefix: ""
config: ""
unit: "cpu"
xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp"
xfail: "AOT/double.cpp;AOT/half.cpp;AOT/reqd-sg-size.cpp;Basic/built-ins/marray_geometric.cpp;KernelCompiler/kernel_compiler_spirv.cpp;KernelCompiler/opencl_queries.cpp;NonUniformGroups/ballot_group.cpp;NonUniformGroups/ballot_group_algorithms.cpp;NonUniformGroups/fixed_size_group_algorithms.cpp;NonUniformGroups/opportunistic_group.cpp;NonUniformGroups/opportunistic_group_algorithms.cpp;NonUniformGroups/tangle_group.cpp;NonUniformGroups/tangle_group_algorithms.cpp"
extra_lit_flags: "-sv --max-time=3600"
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR)
project(unified-runtime VERSION 0.10.0)
project(unified-runtime VERSION 0.11.0)

# Check if unified runtime is built as a standalone project.
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR UR_STANDALONE_BUILD)
Expand Down Expand Up @@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ List of options provided by CMake:
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `$ENV{ROCM_PATH}` or `/opt/rocm` |
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` |
Expand Down
1 change: 1 addition & 0 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ function(add_ur_target_compile_options name)
)
if (CMAKE_BUILD_TYPE STREQUAL "Release")
target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2)
target_compile_options(${name} PRIVATE -fvisibility=hidden)
endif()
if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE
Expand Down
Loading

0 comments on commit d07ea63

Please sign in to comment.