Skip to content

Commit

Permalink
Merge branch 'main' into yc/0806-exclude-shadow-from-coredump
Browse files Browse the repository at this point in the history
  • Loading branch information
yingcong-wu authored Sep 9, 2024
2 parents abcc5ca + cded5d9 commit d619bcd
Show file tree
Hide file tree
Showing 76 changed files with 4,025 additions and 2,745 deletions.
14 changes: 11 additions & 3 deletions .github/workflows/benchmarks_compute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,17 @@ jobs:
- name: Build SYCL
run: cmake --build ${{github.workspace}}/sycl_build -j

- name: Set oneAPI Device Selector
run: |
echo "ONEAPI_DEVICE_SELECTOR=${{ matrix.adapter.str_name }}:${{ matrix.adapter.unit }}" >> $GITHUB_ENV
- name: Configure UR
working-directory: ${{github.workspace}}/ur-repo
run: >
cmake -DCMAKE_BUILD_TYPE=Release
-B${{github.workspace}}/ur-repo/build
-DUR_BUILD_TESTS=OFF
-DUR_BUILD_ADAPTER_L0=ON
-DUR_BUILD_ADAPTER_L0_V2=ON
- name: Build UR
run: cmake --build ${{github.workspace}}/ur-repo/build -j $(nproc)

- name: Run benchmarks
id: benchmarks
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/build-hw-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ on:
required: false
type: string
default: OFF
static_adapter:
required: false
type: string
default: OFF

permissions:
contents: read
Expand All @@ -36,7 +40,7 @@ jobs:
strategy:
matrix:
adapter: [
{name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"},
{name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"},
]
build_type: [Debug, Release]
compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}]
Expand All @@ -49,6 +53,10 @@ jobs:
build_type: Release
- adapter: {static_Loader: ON}
compiler: {c: clang, cxx: clang++}
- adapter: {static_adapter: ON}
build_type: Release
- adapter: {static_adapter: ON}
compiler: {c: clang, cxx: clang++}

runs-on: ${{inputs.runner_name}}

Expand Down Expand Up @@ -76,6 +84,7 @@ jobs:
-DUR_BUILD_TESTS=ON
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
-DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }}
Expand Down
13 changes: 12 additions & 1 deletion .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ jobs:
adapter_name: L0
runner_name: L0
static_loader: ON
static_adapter: ON

opencl:
name: OpenCL
Expand Down Expand Up @@ -216,7 +217,8 @@ jobs:
os: ['windows-2019', 'windows-2022']
adapter: [
{name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'},
{name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
{name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'},
{name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
]

# TODO: building level zero loader on windows-2019 and clang-cl is currently broken
Expand All @@ -225,16 +227,25 @@ jobs:
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
- os: 'windows-2019'
adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
- os: 'windows-2019'
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
- adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
- adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}
compiler: {c: clang-cl, cxx: clang-cl}
- adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
compiler: {c: clang-cl, cxx: clang-cl}

build_type: [Debug, Release]
compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}]
include:
- compiler: {c: clang-cl, cxx: clang-cl}
toolset: "-T ClangCL"
- os: 'windows-2022'
adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'}
build_type: 'Release'
compiler: {c: cl, cxx: cl}

runs-on: ${{matrix.os}}

steps:
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ List of options provided by CMake:
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `$ENV{ROCM_PATH}` or `/opt/rocm` |
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_HSA_INCLUDE_DIRS | Path of the ROCm HSA include directory | Directory path | `${UR_HIP_ROCM_DIR}/hsa/include;${UR_HIP_ROCM_DIR}/include` |
| UR_HIP_LIB_DIR | Path of the ROCm HIP library directory | Directory path | `${UR_HIP_ROCM_DIR}/lib` |
Expand Down
3 changes: 1 addition & 2 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1569,8 +1569,7 @@ typedef enum ur_device_info_t {
///< ::urDevicePartition
UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups
UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress
UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel
///< device
UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of supported sub group sizes
UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory
///< access
UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory
Expand Down
212 changes: 212 additions & 0 deletions include/ur_api_funcs.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@

/*
*
* Copyright (C) 2024 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* @file ur_api_funcs.def
* @version v0.11-r0
*
*/

// Auto-generated file, do not edit.

_UR_API(urPlatformGet)
_UR_API(urPlatformGetInfo)
_UR_API(urPlatformGetNativeHandle)
_UR_API(urPlatformCreateWithNativeHandle)
_UR_API(urPlatformGetApiVersion)
_UR_API(urPlatformGetBackendOption)
_UR_API(urContextCreate)
_UR_API(urContextRetain)
_UR_API(urContextRelease)
_UR_API(urContextGetInfo)
_UR_API(urContextGetNativeHandle)
_UR_API(urContextCreateWithNativeHandle)
_UR_API(urContextSetExtendedDeleter)
_UR_API(urEventGetInfo)
_UR_API(urEventGetProfilingInfo)
_UR_API(urEventWait)
_UR_API(urEventRetain)
_UR_API(urEventRelease)
_UR_API(urEventGetNativeHandle)
_UR_API(urEventCreateWithNativeHandle)
_UR_API(urEventSetCallback)
_UR_API(urProgramCreateWithIL)
_UR_API(urProgramCreateWithBinary)
_UR_API(urProgramBuild)
_UR_API(urProgramCompile)
_UR_API(urProgramLink)
_UR_API(urProgramRetain)
_UR_API(urProgramRelease)
_UR_API(urProgramGetFunctionPointer)
_UR_API(urProgramGetGlobalVariablePointer)
_UR_API(urProgramGetInfo)
_UR_API(urProgramGetBuildInfo)
_UR_API(urProgramSetSpecializationConstants)
_UR_API(urProgramGetNativeHandle)
_UR_API(urProgramCreateWithNativeHandle)
_UR_API(urProgramBuildExp)
_UR_API(urProgramCompileExp)
_UR_API(urProgramLinkExp)
_UR_API(urKernelCreate)
_UR_API(urKernelGetInfo)
_UR_API(urKernelGetGroupInfo)
_UR_API(urKernelGetSubGroupInfo)
_UR_API(urKernelRetain)
_UR_API(urKernelRelease)
_UR_API(urKernelGetNativeHandle)
_UR_API(urKernelCreateWithNativeHandle)
_UR_API(urKernelGetSuggestedLocalWorkSize)
_UR_API(urKernelSetArgValue)
_UR_API(urKernelSetArgLocal)
_UR_API(urKernelSetArgPointer)
_UR_API(urKernelSetExecInfo)
_UR_API(urKernelSetArgSampler)
_UR_API(urKernelSetArgMemObj)
_UR_API(urKernelSetSpecializationConstants)
_UR_API(urKernelSuggestMaxCooperativeGroupCountExp)
_UR_API(urQueueGetInfo)
_UR_API(urQueueCreate)
_UR_API(urQueueRetain)
_UR_API(urQueueRelease)
_UR_API(urQueueGetNativeHandle)
_UR_API(urQueueCreateWithNativeHandle)
_UR_API(urQueueFinish)
_UR_API(urQueueFlush)
_UR_API(urSamplerCreate)
_UR_API(urSamplerRetain)
_UR_API(urSamplerRelease)
_UR_API(urSamplerGetInfo)
_UR_API(urSamplerGetNativeHandle)
_UR_API(urSamplerCreateWithNativeHandle)
_UR_API(urMemImageCreate)
_UR_API(urMemBufferCreate)
_UR_API(urMemRetain)
_UR_API(urMemRelease)
_UR_API(urMemBufferPartition)
_UR_API(urMemGetNativeHandle)
_UR_API(urMemBufferCreateWithNativeHandle)
_UR_API(urMemImageCreateWithNativeHandle)
_UR_API(urMemGetInfo)
_UR_API(urMemImageGetInfo)
_UR_API(urPhysicalMemCreate)
_UR_API(urPhysicalMemRetain)
_UR_API(urPhysicalMemRelease)
_UR_API(urAdapterGet)
_UR_API(urAdapterRelease)
_UR_API(urAdapterRetain)
_UR_API(urAdapterGetLastError)
_UR_API(urAdapterGetInfo)
_UR_API(urEnqueueKernelLaunch)
_UR_API(urEnqueueEventsWait)
_UR_API(urEnqueueEventsWaitWithBarrier)
_UR_API(urEnqueueMemBufferRead)
_UR_API(urEnqueueMemBufferWrite)
_UR_API(urEnqueueMemBufferReadRect)
_UR_API(urEnqueueMemBufferWriteRect)
_UR_API(urEnqueueMemBufferCopy)
_UR_API(urEnqueueMemBufferCopyRect)
_UR_API(urEnqueueMemBufferFill)
_UR_API(urEnqueueMemImageRead)
_UR_API(urEnqueueMemImageWrite)
_UR_API(urEnqueueMemImageCopy)
_UR_API(urEnqueueMemBufferMap)
_UR_API(urEnqueueMemUnmap)
_UR_API(urEnqueueUSMFill)
_UR_API(urEnqueueUSMMemcpy)
_UR_API(urEnqueueUSMPrefetch)
_UR_API(urEnqueueUSMAdvise)
_UR_API(urEnqueueUSMFill2D)
_UR_API(urEnqueueUSMMemcpy2D)
_UR_API(urEnqueueDeviceGlobalVariableWrite)
_UR_API(urEnqueueDeviceGlobalVariableRead)
_UR_API(urEnqueueReadHostPipe)
_UR_API(urEnqueueWriteHostPipe)
_UR_API(urEnqueueKernelLaunchCustomExp)
_UR_API(urEnqueueCooperativeKernelLaunchExp)
_UR_API(urEnqueueTimestampRecordingExp)
_UR_API(urEnqueueNativeCommandExp)
_UR_API(urBindlessImagesUnsampledImageHandleDestroyExp)
_UR_API(urBindlessImagesSampledImageHandleDestroyExp)
_UR_API(urBindlessImagesImageAllocateExp)
_UR_API(urBindlessImagesImageFreeExp)
_UR_API(urBindlessImagesUnsampledImageCreateExp)
_UR_API(urBindlessImagesSampledImageCreateExp)
_UR_API(urBindlessImagesImageCopyExp)
_UR_API(urBindlessImagesImageGetInfoExp)
_UR_API(urBindlessImagesMipmapGetLevelExp)
_UR_API(urBindlessImagesMipmapFreeExp)
_UR_API(urBindlessImagesImportExternalMemoryExp)
_UR_API(urBindlessImagesMapExternalArrayExp)
_UR_API(urBindlessImagesMapExternalLinearMemoryExp)
_UR_API(urBindlessImagesReleaseExternalMemoryExp)
_UR_API(urBindlessImagesImportExternalSemaphoreExp)
_UR_API(urBindlessImagesReleaseExternalSemaphoreExp)
_UR_API(urBindlessImagesWaitExternalSemaphoreExp)
_UR_API(urBindlessImagesSignalExternalSemaphoreExp)
_UR_API(urUSMHostAlloc)
_UR_API(urUSMDeviceAlloc)
_UR_API(urUSMSharedAlloc)
_UR_API(urUSMFree)
_UR_API(urUSMGetMemAllocInfo)
_UR_API(urUSMPoolCreate)
_UR_API(urUSMPoolRetain)
_UR_API(urUSMPoolRelease)
_UR_API(urUSMPoolGetInfo)
_UR_API(urUSMPitchedAllocExp)
_UR_API(urUSMImportExp)
_UR_API(urUSMReleaseExp)
_UR_API(urCommandBufferCreateExp)
_UR_API(urCommandBufferRetainExp)
_UR_API(urCommandBufferReleaseExp)
_UR_API(urCommandBufferFinalizeExp)
_UR_API(urCommandBufferAppendKernelLaunchExp)
_UR_API(urCommandBufferAppendUSMMemcpyExp)
_UR_API(urCommandBufferAppendUSMFillExp)
_UR_API(urCommandBufferAppendMemBufferCopyExp)
_UR_API(urCommandBufferAppendMemBufferWriteExp)
_UR_API(urCommandBufferAppendMemBufferReadExp)
_UR_API(urCommandBufferAppendMemBufferCopyRectExp)
_UR_API(urCommandBufferAppendMemBufferWriteRectExp)
_UR_API(urCommandBufferAppendMemBufferReadRectExp)
_UR_API(urCommandBufferAppendMemBufferFillExp)
_UR_API(urCommandBufferAppendUSMPrefetchExp)
_UR_API(urCommandBufferAppendUSMAdviseExp)
_UR_API(urCommandBufferEnqueueExp)
_UR_API(urCommandBufferRetainCommandExp)
_UR_API(urCommandBufferReleaseCommandExp)
_UR_API(urCommandBufferUpdateKernelLaunchExp)
_UR_API(urCommandBufferGetInfoExp)
_UR_API(urCommandBufferCommandGetInfoExp)
_UR_API(urUsmP2PEnablePeerAccessExp)
_UR_API(urUsmP2PDisablePeerAccessExp)
_UR_API(urUsmP2PPeerAccessGetInfoExp)
_UR_API(urVirtualMemGranularityGetInfo)
_UR_API(urVirtualMemReserve)
_UR_API(urVirtualMemFree)
_UR_API(urVirtualMemMap)
_UR_API(urVirtualMemUnmap)
_UR_API(urVirtualMemSetAccess)
_UR_API(urVirtualMemGetInfo)
_UR_API(urDeviceGet)
_UR_API(urDeviceGetInfo)
_UR_API(urDeviceRetain)
_UR_API(urDeviceRelease)
_UR_API(urDevicePartition)
_UR_API(urDeviceSelectBinary)
_UR_API(urDeviceGetNativeHandle)
_UR_API(urDeviceCreateWithNativeHandle)
_UR_API(urDeviceGetGlobalTimestamps)
_UR_API(urLoaderConfigCreate)
_UR_API(urLoaderConfigEnableLayer)
_UR_API(urLoaderConfigGetInfo)
_UR_API(urLoaderConfigRelease)
_UR_API(urLoaderConfigRetain)
_UR_API(urLoaderConfigSetCodeLocationCallback)
_UR_API(urLoaderConfigSetMockingEnabled)
_UR_API(urLoaderInit)
_UR_API(urLoaderTearDown)
5 changes: 4 additions & 1 deletion scripts/benchmarks/benches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
class Benchmark:
def __init__(self, directory):
self.directory = directory
self.adapter_path = os.path.join(options.ur_dir, 'build', 'lib', f"libur_adapter_{options.ur_adapter_name}.so")

def run_bench(self, command, env_vars):
return run(command=command, env_vars=env_vars, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode()
env_vars_with_forced_adapter = env_vars.copy()
env_vars_with_forced_adapter.update({'UR_ADAPTERS_FORCE_LOAD': self.adapter_path})
return run(command=command, env_vars=env_vars_with_forced_adapter, add_sycl=True, cwd=options.benchmark_cwd).stdout.decode()

def create_data_path(self, name):
data_path = os.path.join(self.directory, "data", name)
Expand Down
7 changes: 0 additions & 7 deletions scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class ComputeBench:
def __init__(self, directory):
self.directory = directory
self.built = False
self.adapter_short_name = {'level_zero' : 'L0', "level_zero_v2" : 'L0_V2'}
return

def setup(self):
Expand All @@ -35,19 +34,16 @@ def setup(self):
f"-DALLOW_WARNINGS=ON",
f"-DBUILD_UR=ON",
f"-DUR_BUILD_TESTS=OFF",
f"-DUR_BUILD_ADAPTER_L0=ON",
f"-DUR_BUILD_TESTS=OFF",
f"-DUMF_DISABLE_HWLOC=ON",
f"-DBENCHMARK_UR_SOURCE_DIR={options.ur_dir}",
f"-DUR_BUILD_ADAPTER_{self.adapter_short_name[options.ur_adapter_name]}=ON"
]
run(configure_command, add_sycl=True)

run(f"cmake --build {build_path} -j", add_sycl=True)

self.built = True
self.bins = os.path.join(build_path, 'bin')
self.libs = os.path.join(build_path, 'lib')

class ComputeBenchmark(Benchmark):
def __init__(self, bench, name, test):
Expand Down Expand Up @@ -130,9 +126,6 @@ def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_ur SubmitKernel {order}"

def extra_env_vars(self) -> dict:
return {"UR_ADAPTERS_FORCE_LOAD" : os.path.join(self.bench.libs, f"libur_adapter_{options.ur_adapter_name}.so")}

def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
Expand Down
Loading

0 comments on commit d619bcd

Please sign in to comment.