Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][NVPTX] Create one bitcode library for NVPTX #15048

Merged
merged 5 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final {
++NumOfDeviceLibLinked;
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibName));
if (TC->getTriple().isNVPTX() ||
(TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga)) {
if (TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga) {
auto *SYCLDeviceLibsInputAction =
C.MakeAction<InputAction>(*InputArg, types::TY_Object);
auto *SYCLDeviceLibsUnbundleAction =
Expand Down
55 changes: 43 additions & 12 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,31 +212,51 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
StringRef DeviceLibOption;
};

bool NoDeviceLibs = false;
// Currently, all SYCL device libraries will be linked by default. Linkage
// of "internal" libraries cannot be affected via -fno-sycl-device-lib.
// Currently, all SYCL device libraries will be linked by default.
llvm::StringMap<bool> DeviceLibLinkInfo = {
{"libc", true}, {"libm-fp32", true}, {"libm-fp64", true},
{"libimf-fp32", true}, {"libimf-fp64", true}, {"libimf-bf16", true},
{"libm-bfloat16", true}, {"internal", true}};

// If -fno-sycl-device-lib is specified, its values will be used to exclude
// linkage of libraries specified by DeviceLibLinkInfo. Linkage of "internal"
// libraries cannot be affected via -fno-sycl-device-lib.
bool ExcludeDeviceLibs = false;

if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ,
options::OPT_fno_sycl_device_lib_EQ)) {
if (A->getValues().size() == 0)
C.getDriver().Diag(diag::warn_drv_empty_joined_argument)
<< A->getAsString(Args);
else {
if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ))
NoDeviceLibs = true;
ExcludeDeviceLibs = true;

// When single libraries are ignored and a subset of library names
// not containing the value "all" is specified by -fno-sycl-device-lib,
// print an unused argument warning.
bool PrintUnusedExcludeWarning = false;

for (StringRef Val : A->getValues()) {
if (Val == "all") {
PrintUnusedExcludeWarning = false;

// Make sure that internal libraries are still linked against
// when -fno-sycl-device-lib contains "all" and single libraries
// should be ignored.
IgnoreSingleLibs = IgnoreSingleLibs && !ExcludeDeviceLibs;

for (const auto &K : DeviceLibLinkInfo.keys())
DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal");
DeviceLibLinkInfo[K] = (K == "internal") || !ExcludeDeviceLibs;
break;
}
auto LinkInfoIter = DeviceLibLinkInfo.find(Val);
Expand All @@ -247,10 +267,22 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
DeviceLibLinkInfo[Val] = true && !NoDeviceLibs;
DeviceLibLinkInfo[Val] = !ExcludeDeviceLibs;
PrintUnusedExcludeWarning = IgnoreSingleLibs && ExcludeDeviceLibs;
}
if (PrintUnusedExcludeWarning)
C.getDriver().Diag(diag::warn_drv_unused_argument) << A->getSpelling();
}
}

if (TargetTriple.isNVPTX() && IgnoreSingleLibs) {
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
}

if (IgnoreSingleLibs) {
return LibraryList;
}
MartinWehking marked this conversation as resolved.
Show resolved Hide resolved

using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

const SYCLDeviceLibsList SYCLDeviceWrapperLibs = {
Expand Down Expand Up @@ -304,10 +336,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
StringRef LibSuffix = ".bc";
if (TargetTriple.isNVPTX() ||
(TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga))
// For NVidia or FPGA, we are unbundling objects.
if (TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)
// For FPGA, we are unbundling objects.
LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o";
if (IsNewOffload)
// For new offload model, we use packaged .bc files.
Expand All @@ -323,7 +354,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
};

addLibraries(SYCLDeviceWrapperLibs);
if (IsSpirvAOT || TargetTriple.isNVPTX())
if (IsSpirvAOT)
addLibraries(SYCLDeviceFallbackLibs);

bool NativeBfloatLibs;
Expand Down Expand Up @@ -551,7 +582,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
this->getToolChain().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga;
StringRef LibPostfix = ".bc";
if (IsNVPTX || IsFPGA) {
if (IsFPGA) {
LibPostfix = ".o";
if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
C.getDriver().IsCLMode())
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// intrinsics. This allows the driver to link in the libdevice definitions for
// cosf etc. later in the driver flow.

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"
Expand Down
40 changes: 40 additions & 0 deletions clang/test/Driver/sycl-device-lib-nvptx.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Tests specific to `-fsycl-targets=nvptx64-nvidia-nvptx`
// Verify that the correct devicelib linking actions are spawned by the driver.
// Check also if the correct warnings are generated.

// UNSUPPORTED: system-windows

// Check if internal libraries are still linked against when linkage of all device libs is manually excluded.
// RUN: %clangxx -ccc-print-phases -std=c++11 \
MartinWehking marked this conversation as resolved.
Show resolved Hide resolved
// RUN: -fsycl -fno-sycl-device-lib=all \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s

// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
MartinWehking marked this conversation as resolved.
Show resolved Hide resolved
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50)

// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 \
// RUN: -fsycl -fsycl-device-lib=all \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that the -fsycl-device-lib flag has no effect when subsets of libs are specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 \
// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
// A warning should be printed that the flag got ignored.
// RUN: %clangxx -ccc-print-phases -std=c++11 \
// RUN: -fsycl -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s

// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50)
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, sm_50)

76 changes: 32 additions & 44 deletions clang/test/Driver/sycl-offload-nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,22 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object
// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object
// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
//
/// Check phases specifying a compute capability.
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
Expand All @@ -97,28 +91,22 @@
// CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object
// CHK-PHASES: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object
// CHK-PHASES: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object
// CHK-PHASES: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35)
// CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35)
// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35)
// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35)
// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35)
// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object
// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)

/// Check calling preprocessor only
// RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
Expand Down
2 changes: 1 addition & 1 deletion libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
foreach( file subnormal_use_default subnormal_disable )
link_bc(
TARGET ${file}
RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
)
install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE
Expand Down Expand Up @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Enable SPIR-V builtin function declarations, so they don't
# have to be explicity declared in the soruce.
list( APPEND flags -Xclang -fdeclare-spirv-builtins)

set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )

Expand Down
Loading
Loading