Skip to content

Commit

Permalink
Merge pull request #693 from CHIP-SPV/rtdevlib
Browse files Browse the repository at this point in the history
Add capability based HIP device library link
  • Loading branch information
pvelesko authored Dec 12, 2023
2 parents 6f602e7 + 556b3c1 commit 9445545
Show file tree
Hide file tree
Showing 31 changed files with 7,119 additions and 3,624 deletions.
15 changes: 13 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# =============================================================================

# temporary
set(CMAKE_WARN_DEPRECATED OFF)
add_compile_options(-Wno-format-extra-args -mf16c)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-duplicate-decl-specifier \
Expand Down Expand Up @@ -210,7 +209,6 @@ option(CHIP_USE_EXTERNAL_HIP_TESTS "Use Catch2 tests from the hip-tests submodul
option(CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE "Enable non-compliant devicelib code such as calling LLVM builtins from inside kernel code. Enables certain unsigned long devicelib func variants" OFF)
option(CHIP_FAST_MATH "Use native_ OpenCL functions which are fast but their precision is implementation defined" OFF)
option(CHIP_USE_INTEL_USM "enable support for cl_intel_unified_shared_memory in the OpenCL backend" OFF)
option(CHIP_EXT_FLOAT_ATOMICS "Use cl_ext_float_atomics." ON)
# This mitigation might be necessary on some systems with an older runtime.
# This mitigation makes memory resident (disable swapping) on the GPU
# This has a significant impact on the cost of a GPU malloc
Expand All @@ -227,6 +225,10 @@ option(OCML_BASIC_ROUNDED_OPERATIONS "Use OCML implementations for devicelib fun
option(CHIP_MALI_GPU_WORKAROUNDS "Apply work-arounds for avoiding SPIR-V \
consumption issues in ARM Mali GPU driver." OFF)

if(CHIP_EXT_FLOAT_ATOMICS)
message(DEPRECATION "-DCHIP_EXT_FLOAT_ATOMICS is no longer effective.")
endif()

# Warpsize would optimally be a device-specific, queried and made
# effective at runtime. However, we need to fix the warpsize since SPIR-Vs need
# to be portable across multiple devices. It should be more portable to
Expand Down Expand Up @@ -323,6 +325,10 @@ message(STATUS "chipStar will be installed to: ${CMAKE_INSTALL_PREFIX}")
add_subdirectory(llvm_passes)
add_subdirectory(bitcode)

# Embed HIP device built-in implementations which are linked into users' device
# programs at runtime based on device capabilities.
target_sources(CHIP PRIVATE $<TARGET_OBJECTS:rtdevlib>)

set(HIPCC_BUILD_PATH "${CMAKE_BINARY_DIR}/bin")
add_subdirectory(HIPCC)

Expand Down Expand Up @@ -373,6 +379,7 @@ target_include_directories(CHIP
"${CMAKE_BINARY_DIR}/include" # for chipStarConfig.hh, should be first
"${CMAKE_SOURCE_DIR}/src"
"${CMAKE_SOURCE_DIR}/include"
"${CMAKE_BINARY_DIR}/bitcode"
)


Expand Down Expand Up @@ -659,11 +666,15 @@ install(EXPORT CHIPTargets
#
# enable the build_tests target
SET(HIP_PATH ${CMAKE_BINARY_DIR}) # pick up build sources, not install sources

set(SAVED_WARN_DEPRECATED ${CMAKE_WARN_DEPRECATED})
set(CMAKE_WARN_DEPRECATED OFF)
if(CHIP_USE_EXTERNAL_HIP_TESTS)
add_subdirectory(hip-tests/catch catch)
else()
add_subdirectory(HIP/tests/catch catch)
endif()
set(CMAKE_WARN_DEPRECATED ${SAVED_WARN_DEPRECATED})

if(CHIP_BUILD_TESTS)
add_subdirectory(tests/cuda)
Expand Down
112 changes: 100 additions & 12 deletions bitcode/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#=============================================================================
# CMake build system files
#
# Copyright (c) 2021-22 chipStar developers
# Copyright (c) 2021-23 chipStar developers
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -38,10 +38,6 @@ if(CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE)
list(APPEND EXTRA_FLAGS "-DCHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE=1")
endif()

if(CHIP_EXT_FLOAT_ATOMICS)
list(APPEND EXTRA_FLAGS "-DCHIP_EXT_FLOAT_ATOMICS")
endif()

if("${LLVM_VERSION}" VERSION_LESS 14.0)
# Definitions for pre-upstreamed HIP-Clang.
set(BC_TRIPLE "spirv64")
Expand All @@ -66,21 +62,31 @@ set(BITCODE_C_COMPILE_FLAGS
# non-OCML sources
set(NON_OCML_SOURCES "devicelib" "_cl_print_str" "texture") # "printf_support"

foreach(SOURCE IN LISTS NON_OCML_SOURCES)
# Compiles SOURCE treated as OpenCL to LLVM bitcode.
function(add_opencl_bitcode SOURCE OUTPUT)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.cl"
OUTPUT "${OUTPUT}"
DEPENDS "${SOURCE}"
COMMAND "${CMAKE_CXX_COMPILER}" ${BITCODE_CL_COMPILE_FLAGS}
-o "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc"
-c "${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.cl"
COMMENT "Building ${SOURCE}.bc"
-o "${OUTPUT}" -c "${SOURCE}"
COMMENT "Building ${SOURCE}"
VERBATIM)
endfunction()

foreach(SOURCE IN LISTS NON_OCML_SOURCES)
add_opencl_bitcode(
"${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.cl"
"${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc")
list(APPEND DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc")
endforeach()

# required by ROCm-Device-Libs
set(ROCM_DEVICELIB_STANDALONE_BUILD ON)
set(ROCM_DEVICELIB_STANDALONE_BUILD ON)

set(SAVED_WARN_DEPRECATED ${CMAKE_WARN_DEPRECATED})
set(CMAKE_WARN_DEPRECATED OFF)
add_subdirectory(ROCm-Device-Libs EXCLUDE_FROM_ALL)
set(CMAKE_WARN_DEPRECATED ${SAVED_WARN_DEPRECATED})

# ROCm-Device-Libs provides OCML and its dependencies (OCLC, OCKL, etc.)
# Since these targets don't seem to get exported as normal targets, we have to link this way.
Expand Down Expand Up @@ -133,3 +139,85 @@ add_custom_target("devicelib_bc"

install(FILES "${CMAKE_BINARY_DIR}/${BC_DESTINATION}/${BC_FILE}"
DESTINATION ${BC_DESTINATION})

# =============================================================================
# Runtime device library (rtdevlib)
#
# A collection of SPIR-V modules which are linked into user's device
# programs based on target capabilities at runtime during JIT compilation.
#
# For example, On OpenCL HIP/CUDA's atomicAdd(float*, float) may be
# implemented with corresponding OpenCL atomic operation if the target
# supports cl_ext_float_atomics extension. In that case the runtime
# would link in a module which implements the atomicAdd(float*, float)
# with the extension's atomic operations. Otherwise, the runtime links
# in a slower, emulated version.
#
# RTDEVLIB_SOURCES defines OpenCL C sources for the rtdevlib. They are
# compiled to SPIR-V binary and embedded into the CHIP
# library. <build-dir>/bitcode/rtdevlib-modules.h declares the
# embedded modules as 'std::array <basename-of-the-source>'.

# Use only characters allowed in C/C++/OpenCL-C language for the
# source names.
set(RTDEVLIB_SOURCES
atomicAddFloat_native atomicAddFloat_emulation
atomicAddDouble_native atomicAddDouble_emulation)

# Alias for add_opencl_bitcode.
foreach(SOURCE IN LISTS RTDEVLIB_SOURCES)
add_opencl_bitcode(
"${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}.cl"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc")
list(APPEND RTDEVLIB_BITCODES "${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc")
endforeach()

add_custom_target("rtdevlib-bitcodes" DEPENDS ${RTDEVLIB_BITCODES})

# Compile LLVM bitcode to SPIR-V binary which is then embedded into
# std::array<unsigned char, N> <ARRAY_NAME>.
function(embed_spirv_in_cpp ARRAY_NAME BC_SOURCE OUTPUT_SOURCE OUTPUT_HEADER)
set(SPIRV_EXTENSIONS "+SPV_EXT_shader_atomic_float_add")
get_filename_component(SOURCE_BASENAME "${BC_SOURCE}" NAME_WLE)
# Name of the intermediate SPIR-V binary. The name of the C array will be
# based on this filepath (with punctuation replaced with "_").
set(SPIR_BINARY ${SOURCE_BASENAME}.spv)
add_custom_command(
OUTPUT "${OUTPUT_SOURCE}" "${OUTPUT_HEADER}"
DEPENDS "${BC_SOURCE}"
BYPRODUCTS "${SPIR_BINARY}"
COMMAND "${LLVM_SPIRV}" --spirv-ext=${SPIRV_EXTENSIONS}
"${BC_SOURCE}" -o "${SPIR_BINARY}"
COMMAND ${CMAKE_SOURCE_DIR}/scripts/embed-binary-in-cpp.bash
${ARRAY_NAME} ${SPIR_BINARY} ${OUTPUT_SOURCE} ${OUTPUT_HEADER}
COMMENT "Generating embedded SPIR-V binary: ${OUTPUT_SOURCE}"
VERBATIM
)
endfunction()

foreach(SOURCE IN LISTS RTDEVLIB_SOURCES)
embed_spirv_in_cpp(
${SOURCE}
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.bc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h")
list(APPEND RTDEVLIB_SOURCES
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.cc")
list(APPEND RTDEVLIB_HEADERS
"${CMAKE_CURRENT_BINARY_DIR}/${SOURCE}.h")
endforeach()

# Generate header that declares all the arrays of the embedded SPIR-V binaries.
set(RTDEVLIB_HEADER "rtdevlib-modules.h")
string(REPLACE ";" " " RTDEVLIB_HEADERS_WITH_SPACES "${RTDEVLIB_HEADERS}")
add_custom_command(
OUTPUT ${RTDEVLIB_HEADER}
DEPENDS ${RTDEVLIB_HEADERS}
COMMAND bash -c "cat ${RTDEVLIB_HEADERS_WITH_SPACES} > ${RTDEVLIB_HEADER}"
COMMENT "Generating rtdevlib header."
VERBATIM
)

add_custom_target(rtdevlib-header DEPENDS ${RTDEVLIB_HEADER})

add_library(rtdevlib OBJECT ${RTDEVLIB_SOURCES} ${RTDEVLIB_HEADER})
72 changes: 72 additions & 0 deletions bitcode/atomicAddDouble_emulation.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2023 chipStar developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

// Implementations for emulated 64-bit floating point atomic add operations.

#ifndef __opencl_c_generic_address_space
#error __opencl_c_generic_address_space needed!
#endif

#define OVERLOADED __attribute__((overloadable))

/* This code is adapted from AMD's HIP sources */
static OVERLOADED double __chip_atomic_add_f64(volatile local double *address,
double val) {
volatile local ulong *uaddr = (volatile local ulong *)address;
ulong old = *uaddr;
ulong r;

do {
r = old;
old = atom_cmpxchg(uaddr, r, as_ulong(val + as_double(r)));
} while (r != old);

return as_double(r);
}

static OVERLOADED double __chip_atomic_add_f64(volatile global double *address,
double val) {
volatile global ulong *uaddr = (volatile global ulong *)address;
ulong old = *uaddr;
ulong r;

do {
r = old;
old = atom_cmpxchg(uaddr, r, as_ulong(val + as_double(r)));
} while (r != old);

return as_double(r);
}

double __chip_atomic_add_f64(generic double *address, double val) {
volatile global double *gi = to_global(address);
if (gi)
return __chip_atomic_add_f64(gi, val);
volatile local double *li = to_local(address);
if (li)
return __chip_atomic_add_f64(li, val);
return 0;
}

double __chip_atomic_add_system_f64(generic double *address, double val) {
return __chip_atomic_add_f64(address, val);
}
50 changes: 50 additions & 0 deletions bitcode/atomicAddDouble_native.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2023 chipStar developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

// Implementations for 64-bit floating point atomic operations using
// OpenCL built-in extension.

#ifndef __opencl_c_generic_address_space
#error __opencl_c_generic_address_space needed!
#endif

#if !defined(__opencl_c_ext_fp64_global_atomic_add) || \
!defined(__opencl_c_ext_fp64_local_atomic_add)
#error cl_ext_float_atomics needed!
#endif

#define OVERLOADED __attribute__((overloadable))

/* https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html
*/
#define DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME, OP, ORDER, SCOPE) \
double __chip_atomic_##NAME##_f64(double *address, double i) { \
return atomic_##OP##_explicit((volatile __generic double *)address, i, \
memory_order_##ORDER, memory_scope_##SCOPE); \
}

#define DEF_CHIP_ATOMIC2F(NAME, OP) \
DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME, OP, relaxed, device) \
DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME##_system, OP, relaxed, all_svm_devices) \
DEF_CHIP_ATOMIC2F_ORDER_SCOPE(NAME##_block, OP, relaxed, work_group)

DEF_CHIP_ATOMIC2F(add, fetch_add);
Loading

0 comments on commit 9445545

Please sign in to comment.