Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create one bitcode library for AMD #15055

Merged
merged 5 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
}
if (GenerateIntrinsics &&
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
!(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
getTarget().getTriple().isAMDGCN()))) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIacos:
case Builtin::BIacosf:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final {
// AOT compilation.
bool SYCLDeviceLibLinked = false;
Action *NativeCPULib = nullptr;
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
bool UseJitLink =
IsSPIR &&
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
Expand Down
13 changes: 8 additions & 5 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,

// spir64 target is actually JIT compilation, so we defer selection of
// bfloat16 libraries to runtime. For AOT we need libraries, but skip
// for Nvidia.
NeedLibs =
Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
// for Nvidia and AMD.
NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
!Triple.isNVPTX() && !Triple.isAMDGCN();
UseNative = false;
if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
C.hasOffloadToolChain<Action::OFK_SYCL>()) {
Expand Down Expand Up @@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX we only use one single bitcode library and ignore
// For NVPTX and AMDGCN we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX();
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
Expand Down Expand Up @@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
frasercrmck marked this conversation as resolved.
Show resolved Hide resolved

if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));

if (IgnoreSingleLibs)
return LibraryList;

Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"

Expand Down
Empty file.
44 changes: 44 additions & 0 deletions clang/test/Driver/sycl-device-lib-amdgcn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa`
// Verify that the correct devicelib linking actions are spawned by the driver.
// Check also if the correct warnings are generated.

// UNSUPPORTED: system-windows

// Check if internal libraries are still linked against when linkage of all
// device libs is manually excluded.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s

// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)

// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that the -fsycl-device-lib flag has no effect when subsets of libs
// are specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
// A warning should be printed that the flag got ignored.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \
// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s

// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)

26 changes: 14 additions & 12 deletions clang/test/Driver/sycl-offload-amdgcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

/// Check phases w/out specifying a compute capability.
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
// CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
// CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
Expand All @@ -37,17 +37,19 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)

/// Check that we only unbundle an archive once.
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \
Expand Down
13 changes: 10 additions & 3 deletions libdevice/cmake/modules/SYCLLibdevice.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes)
add_dependencies(libsycldevice libsycldevice-${filetype})
endforeach()

# For NVPTX each device libary is compiled into a single bitcode
# For NVPTX and AMDGCN each device libary is compiled into a single bitcode
# file and all files created this way are linked into one large bitcode
# library.
# Additional compilation options are needed for compiling each device library.
Expand All @@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
endif()
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
list(APPEND devicelib_arch amd)
set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
"-Xsycl-target-backend" "--offload-arch=gfx940")
set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
endif()


set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv)
set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir)
Expand Down Expand Up @@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64)
endforeach()
endforeach()

# Add device fallback imf libraries for the CUDA target.
# Add device fallback imf libraries for the NVPTX and AMD targets.
# The output files are bitcode.
foreach(arch IN LISTS devicelib_arch)
foreach(dtype IN ITEMS bf16 fp32 fp64)
Expand All @@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
endforeach()
endforeach()

# Create one large bitcode file for the CUDA targets.
# Create one large bitcode file for the CUDA and AMD targets.
# Use all the files collected in the respective global properties.
foreach(arch IN LISTS devicelib_arch)
get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})
Expand Down
5 changes: 3 additions & 2 deletions libdevice/cmath_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

DEVICE_EXTERN_C_INLINE
int abs(int x) { return __devicelib_abs(x); }
Expand Down Expand Up @@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE
float rintf(float x) { return __nv_rintf(x); }
#endif // __NVPTX__

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/cmath_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// All exported functions in math and complex device libraries are weak
// reference. If users provide their own math or complex functions(with
Expand Down Expand Up @@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
}
}
#endif // defined(_WIN32)
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/crt_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
DeviceGlobal<uint64_t[RAND_NEXT_LEN]> RandNext;
#endif

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
DEVICE_EXTERN_C_INLINE
void *memcpy(void *dest, const void *src, size_t n) {
return __devicelib_memcpy(dest, src, n);
Expand Down Expand Up @@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line,
__spirv_LocalInvocationId_z());
}
#endif
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#define EXTERN_C
#endif // __cplusplus

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
#ifdef __SYCL_DEVICE_ONLY__
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak))
#else // __SYCL_DEVICE_ONLY__
Expand All @@ -27,7 +28,7 @@
DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline))
#define DEVICE_EXTERN_C_NOINLINE \
DEVICE_EXTERNAL EXTERN_C __attribute__((noinline))
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__)
#define __LIBDEVICE_IMF_ENABLED__
Expand Down
5 changes: 3 additions & 2 deletions libdevice/device_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#define __LIBDEVICE_DEVICE_MATH_H__

#include "device.h"
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
#include <cstdint>

typedef struct {
Expand Down Expand Up @@ -367,5 +368,5 @@ float __devicelib_scalbnf(float x, int n);
DEVICE_EXTERN_C
double __devicelib_scalbn(double x, int exp);

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
#endif // __LIBDEVICE_DEVICE_MATH_H__
4 changes: 2 additions & 2 deletions libdevice/fallback-cassert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file,
}
#endif // __SPIR__ || __SPIRV__

#ifdef __NVPTX__
#if defined(__NVPTX__) || defined(__AMDGCN__)

DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file,
unsigned __line, const char *__function,
Expand All @@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File,
__assertfail(_Message, _File, _Line, 0, 1);
}

#endif
#endif // __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cmath-fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// To support fallback device libraries on-demand loading, please update the
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
Expand Down Expand Up @@ -193,4 +194,4 @@ DEVICE_EXTERN_C_INLINE
double __devicelib_scalbn(double x, int exp) {
return __spirv_ocl_ldexp(x, exp);
}
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// To support fallback device libraries on-demand loading, please update the
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
Expand Down Expand Up @@ -209,4 +210,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); }
DEVICE_EXTERN_C_INLINE
float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); }

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#include "wrapper.h"
#include <cstdint>

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src,
size_t n) {
Expand Down Expand Up @@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) {

return head_cmp;
}
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_
5 changes: 4 additions & 1 deletion libdevice/imf/imf_fp32_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
/// overhead in these deep learning frameworks.
//===----------------------------------------------------------------------===//

#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) {
return (x >= 0) ? x : -x;
}
Expand Down
5 changes: 4 additions & 1 deletion libdevice/imf/imf_fp64_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
/// overhead in these deep learning frameworks.
//===----------------------------------------------------------------------===//

#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) {
return __fabs(x);
}
Expand Down
5 changes: 4 additions & 1 deletion libdevice/imf/imf_inline_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE
_iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a,
_iml_bf16_internal b,
Expand Down
6 changes: 5 additions & 1 deletion libdevice/imf/imf_inline_fp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../device_imf.hpp"

#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16(
_iml_half_internal a, _iml_half_internal b, _iml_half_internal c) {
_iml_half ha(a), hb(b), hc(c);
Expand Down
Loading
Loading