Skip to content

Commit

Permalink
Merge branch 'main' into cooperative_kernel_functions
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Aziz <michael.aziz@intel.com>
  • Loading branch information
0x12CC committed Jan 18, 2024
2 parents 9eeda47 + 5d58871 commit dd7c94c
Show file tree
Hide file tree
Showing 44 changed files with 1,896 additions and 158 deletions.
17 changes: 10 additions & 7 deletions .github/docker/ubuntu-22.04.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Expand Down Expand Up @@ -51,20 +51,23 @@ RUN apt-get update \
${BASE_DEPS} \
${UR_DEPS} \
${MISC_DEPS} \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean all

RUN pip3 install ${UR_PYTHON_DEPS}
# pip package is pinned to a version, but it's probably improperly parsed here
# hadolint ignore=DL3013
RUN pip3 install --no-cache-dir ${UR_PYTHON_DEPS}

# Install DPC++
COPY install_dpcpp.sh install_dpcpp.sh
COPY install_dpcpp.sh /opt/install_dpcpp.sh
ENV DPCPP_PATH=/opt/dpcpp
RUN ./install_dpcpp.sh
RUN /opt/install_dpcpp.sh

# Install libbacktrace
COPY install_libbacktrace.sh install_libbacktrace.sh
RUN ./install_libbacktrace.sh
COPY install_libbacktrace.sh /opt/install_libbacktrace.sh
RUN /opt/install_libbacktrace.sh

# Add a new (non-root) 'user'
ENV USER user
ENV USERPASS pass
RUN useradd -m $USER -g sudo -p `mkpasswd $USERPASS`
RUN useradd -m "${USER}" -g sudo -p "$(mkpasswd ${USERPASS})"
34 changes: 34 additions & 0 deletions .github/workflows/hadolint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Runs linter for Docker files
name: Hadolint

on:
workflow_dispatch:
push:
pull_request:
paths:
- '.github/docker/*Dockerfile'
- '.github/workflows/hadolint.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read

jobs:
linux:
name: Hadolint
runs-on: ubuntu-latest

steps:
- name: Clone the git repo
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- name: Run Hadolint
uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
with:
recursive: true
dockerfile: ".github/docker/*Dockerfile"
# ignore pinning apt packages to versions
ignore: DL3008
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ out/

# External content
*/**/external

# VS clangd
/.cache
/compile_commands.json
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ option(UR_USE_UBSAN "enable UndefinedBehaviorSanitizer" OFF)
option(UR_USE_MSAN "enable MemorySanitizer" OFF)
option(UR_USE_TSAN "enable ThreadSanitizer" OFF)
option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF)
option(UR_ENABLE_SANITIZER "enable device sanitizer" ON)
option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON)
option(UR_BUILD_ADAPTER_L0 "Build the Level-Zero adapter" OFF)
Expand Down Expand Up @@ -116,14 +117,15 @@ if(UR_ENABLE_TRACING)
)
if (MSVC)
set(TARGET_XPTI $<IF:$<CONFIG:Release>,xpti,xptid>)

# disable warning C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(xptifw PRIVATE /wd4267)
else()
set(TARGET_XPTI xpti)
endif()
endif()

if(UR_ENABLE_SANITIZER)
add_compile_definitions(UR_ENABLE_SANITIZER)
endif()

if(UR_USE_ASAN)
add_sanitizer_flag(address)
endif()
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ List of options provided by CMake:
| UR_USE_UBSAN | Enable UndefinedBehavior Sanitizer | ON/OFF | OFF |
| UR_USE_MSAN | Enable MemorySanitizer (clang only) | ON/OFF | OFF |
| UR_ENABLE_TRACING | Enable XPTI-based tracing layer | ON/OFF | OFF |
| UR_ENABLE_SANITIZER | Enable device sanitizer layer | ON/OFF | ON |
| UR_CONFORMANCE_TARGET_TRIPLES | SYCL triples to build CTS device binaries for | Comma-separated list | spir64 |
| UR_BUILD_ADAPTER_L0 | Build the Level-Zero adapter | ON/OFF | OFF |
| UR_BUILD_ADAPTER_OPENCL | Build the OpenCL adapter | ON/OFF | OFF |
Expand Down
8 changes: 7 additions & 1 deletion cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,16 @@ function(add_ur_target_compile_options name)
/W3
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE /WX /GS)
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(${name} PRIVATE
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
)
endif()
endif()
endfunction()
Expand Down
15 changes: 15 additions & 0 deletions scripts/core/INTRO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,15 @@ Unified Runtime loader implements tracing support through the `XPTI framework <h
| **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value.
- None

Sanitizers
---------------------

Unified Runtime loader implements the runtime part of device-side sanitizers: AddressSanitizer (`UR_LAYER_ASAN`), MemorySanitizer (`UR_LAYER_MSAN`, planned), and ThreadSanitizer (`UR_LAYER_TSAN`, planned).

This layer shouldn't be enabled explicitly, for example, by the environment variable `UR_ENABLE_LAYERS`, but is enabled by program's runtime (e.g. SYCL/OpenMP Runtime) when the device code is compiled with flag `-fsanitize=address|memory|thread`.

Currently, AddressSanitizer only supports some of the devices on OpenCL and Level-Zero adapters, and this could be extended to support other devices and adapters if UR virtual memory APIs and shadow memory mapping in libdevice are supported.

Logging
---------------------

Expand Down Expand Up @@ -260,6 +269,8 @@ Layers currently included with the runtime are as follows:
- Enables UR_LAYER_PARAMETER_VALIDATION and UR_LAYER_LEAK_CHECKING.
* - UR_LAYER_TRACING
- Enables the XPTI tracing layer, see Tracing_ for more detail.
* - UR_LAYER_ASAN \| UR_LAYER_MSAN \| UR_LAYER_TSAN
- Enables the device-side sanitizer layer, see Sanitizers_ for more detail.

Environment Variables
---------------------
Expand All @@ -274,6 +285,10 @@ Specific environment variables can be set to control the behavior of unified run

Holds parameters for setting Unified Runtime null adapter logging. The syntax is described in the Logging_ section.

.. envvar:: UR_LOG_SANITIZER

Holds parameters for setting Unified Runtime sanitizer logging. The syntax is described in the Logging_ section.

.. envvar:: UR_LOG_VALIDATION

Holds parameters for setting Unified Runtime validation logging. The syntax is described in the Logging_ section.
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
ur_device_handle_t hDevice, ur_native_handle_t *phNativeHandle) {
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(hDevice->get());
*phNativeHandle = reinterpret_cast<ur_native_handle_t>(
static_cast<std::uintptr_t>(hDevice->get()));
return UR_RESULT_SUCCESS;
}

Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
std::make_pair(image_channel_type, num_channels));
cuda_format = cuda_format_and_size.first;
pixel_size_bytes = cuda_format_and_size.second;
} catch (std::out_of_range &e) {
} catch (const std::out_of_range &) {
return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
}
}
Expand Down Expand Up @@ -276,7 +276,7 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
ImageTexDesc.mipmapFilterMode = MipFilterMode;
ImageTexDesc.maxMipmapLevelClamp = hSampler->MaxMipmapLevelClamp;
ImageTexDesc.minMipmapLevelClamp = hSampler->MinMipmapLevelClamp;
ImageTexDesc.maxAnisotropy = hSampler->MaxAnisotropy;
ImageTexDesc.maxAnisotropy = static_cast<unsigned>(hSampler->MaxAnisotropy);

// The address modes can interfere with other dimensionsenqueueEventsWait
// e.g. 1D texture sampling can be interfered with when setting other
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs);
if (Valid) {
Options.push_back(CU_JIT_MAX_REGISTERS);
OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));
OptionVals.push_back(
reinterpret_cast<void *>(static_cast<std::uintptr_t>(MaxRegs)));
}
}

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc,
new ur_sampler_handle_t_(hContext)};

if (pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
Sampler->Props |= pDesc->normalizedCoords;
Sampler->Props |= static_cast<uint32_t>(pDesc->normalizedCoords);
Sampler->Props |= pDesc->filterMode << 1;
Sampler->Props |= pDesc->addressingMode << 2;
} else {
Expand Down
7 changes: 2 additions & 5 deletions source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "event.hpp"
#include "platform.hpp"
#include "queue.hpp"
#include "ur_util.hpp"
#include "usm.hpp"

#include <cuda.h>
Expand Down Expand Up @@ -227,11 +228,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
return ReturnValue(UR_USM_TYPE_HOST);
}
// should never get here
#ifdef _MSC_VER
__assume(0);
#else
__builtin_unreachable();
#endif
ur::unreachable();
}
case UR_USM_ALLOC_INFO_BASE_PTR: {
#if CUDA_VERSION >= 10020
Expand Down
100 changes: 67 additions & 33 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,9 @@
#include "memory.hpp"
#include "queue.hpp"

namespace {
extern size_t imageElementByteSize(hipArray_Format ArrayFormat);

static size_t imageElementByteSize(hipArray_Format ArrayFormat) {
switch (ArrayFormat) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
case HIP_AD_FORMAT_SIGNED_INT8:
return 1;
case HIP_AD_FORMAT_UNSIGNED_INT16:
case HIP_AD_FORMAT_SIGNED_INT16:
case HIP_AD_FORMAT_HALF:
return 2;
case HIP_AD_FORMAT_UNSIGNED_INT32:
case HIP_AD_FORMAT_SIGNED_INT32:
case HIP_AD_FORMAT_FLOAT:
return 4;
default:
detail::ur::die("Invalid image format.");
}
return 0;
}
namespace {

ur_result_t enqueueEventsWait(ur_queue_handle_t, hipStream_t Stream,
uint32_t NumEventsInWaitList,
Expand Down Expand Up @@ -1081,8 +1064,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(

hipArray *Array = std::get<SurfaceMem>(hImage->Mem).getArray(Device);

hipArray_Format Format;
size_t NumChannels;
hipArray_Format Format{};
size_t NumChannels{};
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);
Expand Down Expand Up @@ -1142,8 +1125,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
hipArray *Array =
std::get<SurfaceMem>(hImage->Mem).getArray(hQueue->getDevice());

hipArray_Format Format;
size_t NumChannels;
hipArray_Format Format{};
size_t NumChannels{};
UR_CHECK_ERROR(getArrayDesc(Array, Format, NumChannels));

int ElementByteSize = imageElementByteSize(Format);
Expand Down Expand Up @@ -1205,14 +1188,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(

hipArray *SrcArray =
std::get<SurfaceMem>(hImageSrc->Mem).getArray(hQueue->getDevice());
hipArray_Format SrcFormat;
size_t SrcNumChannels;
hipArray_Format SrcFormat{};
size_t SrcNumChannels{};
UR_CHECK_ERROR(getArrayDesc(SrcArray, SrcFormat, SrcNumChannels));

hipArray *DstArray =
std::get<SurfaceMem>(hImageDst->Mem).getArray(hQueue->getDevice());
hipArray_Format DstFormat;
size_t DstNumChannels;
hipArray_Format DstFormat{};
size_t DstNumChannels{};
UR_CHECK_ERROR(getArrayDesc(DstArray, DstFormat, DstNumChannels));

UR_ASSERT(SrcFormat == DstFormat,
Expand Down Expand Up @@ -1707,16 +1690,67 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
return Result;
}

namespace {

enum class GlobalVariableCopy { Read, Write };

ur_result_t deviceGlobalCopyHelper(
ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
bool blocking, size_t count, size_t offset, void *ptr,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent, GlobalVariableCopy CopyType) {
// Since HIP requires a the global variable to be referenced by name, we use
// metadata to find the correct name to access it by.
auto DeviceGlobalNameIt = hProgram->GlobalIDMD.find(name);
if (DeviceGlobalNameIt == hProgram->GlobalIDMD.end())
return UR_RESULT_ERROR_INVALID_VALUE;
std::string DeviceGlobalName = DeviceGlobalNameIt->second;

try {
hipDeviceptr_t DeviceGlobal = 0;
size_t DeviceGlobalSize = 0;
UR_CHECK_ERROR(hipModuleGetGlobal(&DeviceGlobal, &DeviceGlobalSize,
hProgram->get(),
DeviceGlobalName.c_str()));

if (offset + count > DeviceGlobalSize)
return UR_RESULT_ERROR_INVALID_VALUE;

void *pSrc, *pDst;
if (CopyType == GlobalVariableCopy::Write) {
pSrc = ptr;
pDst = reinterpret_cast<uint8_t *>(DeviceGlobal) + offset;
} else {
pSrc = reinterpret_cast<uint8_t *>(DeviceGlobal) + offset;
pDst = ptr;
}
return urEnqueueUSMMemcpy(hQueue, blocking, pDst, pSrc, count,
numEventsInWaitList, phEventWaitList, phEvent);
} catch (ur_result_t Err) {
return Err;
}
}
} // namespace

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t,
const void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
bool blockingWrite, size_t count, size_t offset, const void *pSrc,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
return deviceGlobalCopyHelper(hQueue, hProgram, name, blockingWrite, count,
offset, const_cast<void *>(pSrc),
numEventsInWaitList, phEventWaitList, phEvent,
GlobalVariableCopy::Write);
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
ur_queue_handle_t, ur_program_handle_t, const char *, bool, size_t, size_t,
void *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name,
bool blockingRead, size_t count, size_t offset, void *pDst,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
return deviceGlobalCopyHelper(
hQueue, hProgram, name, blockingRead, count, offset, pDst,
numEventsInWaitList, phEventWaitList, phEvent, GlobalVariableCopy::Read);
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex,
hKernel->Args.addMemObjArg(argIndex, hArgValue, Properties->memoryAccess);
if (hArgValue->isImage()) {
auto array = std::get<SurfaceMem>(hArgValue->Mem).getArray(Device);
hipArray_Format Format;
hipArray_Format Format{};
size_t NumChannels;
UR_CHECK_ERROR(getArrayDesc(array, Format, NumChannels));
if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
Expand Down
Loading

0 comments on commit dd7c94c

Please sign in to comment.