Skip to content

Commit

Permalink
[SYCL][NATIVECPU] Add device library and initial subgroup support (#1…
Browse files Browse the repository at this point in the history
…3979)

This PR implements SYCL NativeCPU runtime functions as C++ functions in
a new native_cpu device library instead of materializing them by LLVM
passes. This library also contains native_cpu implementations for many
SYCL builtins, including for subgroup support. The PR will make at least
the following e2e tests pass:
```
SubGroup/barrier.cpp
SubGroup/broadcast.cpp
SubGroup/broadcast_fp64.cpp
SubGroup/common.cpp
SubGroup/generic-shuffle.cpp
SubGroup/shuffle_fp64.cpp
SubGroup/sub_group_as.cpp
SubGroup/sub_group_as_vec.cpp
SubGroup/sub_group_by_value_semantics.cpp
SubGroup/sub_groups_sycl2020.cpp
```
Other tests are currently skipped as the NativeCPU UR adapter does not
yet report the new capabilities, which will be updated in a subsequent
PR.

---------

Co-authored-by: pietro.ghiglio <pietro.ghiglio@codeplay.com>
  • Loading branch information
uwedolinsky and PietroGhg authored Jul 9, 2024
1 parent 00b9b6d commit 17ee3e2
Show file tree
Hide file tree
Showing 15 changed files with 651 additions and 418 deletions.
121 changes: 54 additions & 67 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5528,18 +5528,17 @@ class OffloadingActionBuilder final {
// device libraries are only needed when current toolchain is using
// AOT compilation.
bool SYCLDeviceLibLinked = false;
if (IsSPIR || IsNVPTX) {
Action *NativeCPULib = nullptr;
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
bool UseJitLink =
IsSPIR &&
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
options::OPT_fno_sycl_device_lib_jit_link, false);
bool UseAOTLink = IsSPIR && (IsSpirvAOT || !UseJitLink);
SYCLDeviceLibLinked = addSYCLDeviceLibs(
TC, SYCLDeviceLibs, UseAOTLink,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment());
}
if (IsSYCLNativeCPU) {
SYCLDeviceLibLinked |= addSYCLNativeCPULibs(TC, SYCLDeviceLibs);
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(),
IsSYCLNativeCPU, NativeCPULib);
}
JobAction *LinkSYCLLibs =
C.MakeAction<LinkJobAction>(SYCLDeviceLibs, types::TY_LLVM_BC);
Expand Down Expand Up @@ -5622,6 +5621,15 @@ class OffloadingActionBuilder final {
};
Action *PostLinkAction = createPostLinkAction();
if (IsSYCLNativeCPU) {
if (NativeCPULib) {
// The native cpu device lib is linked without --only-needed
// as it contains builtins not referenced in source code but
// needed by the native cpu backend.
clang::driver::ActionList AllLibs = {FullDeviceLinkAction,
NativeCPULib};
FullDeviceLinkAction =
C.MakeAction<LinkJobAction>(AllLibs, types::TY_LLVM_BC);
}
// for SYCL Native CPU, we just take the linked device
// modules, lower them to an object file , and link it to the host
// object file.
Expand Down Expand Up @@ -5806,60 +5814,9 @@ class OffloadingActionBuilder final {
}
}

bool addSYCLNativeCPULibs(const ToolChain *TC,
ActionList &DeviceLinkObjects) {
std::string LibSpirvFile;
if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) {
auto ProvidedPath =
Args.getLastArgValue(options::OPT_fsycl_libspirv_path_EQ).str();
if (llvm::sys::fs::exists(ProvidedPath))
LibSpirvFile = ProvidedPath;
} else {
SmallVector<StringRef, 8> LibraryPaths;

// Expected path w/out install.
SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir);
llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
LibraryPaths.emplace_back(WithoutInstallPath.c_str());

// Expected path w/ install.
SmallString<256> WithInstallPath(C.getDriver().ResourceDir);
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
LibraryPaths.emplace_back(WithInstallPath.c_str());

// Select libclc variant based on target triple.
// On Windows long is 32 bits, so we have to select the right remangled
// libclc version.
std::string LibSpirvTargetName =
(TC->getAuxTriple()->isOSWindows())
? "remangled-l32-signed_char.libspirv-"
: "remangled-l64-signed_char.libspirv-";
LibSpirvTargetName.append(TC->getTripleString() + ".bc");

for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibSpirvTargetFile(LibraryPath);
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
if (llvm::sys::fs::exists(LibSpirvTargetFile) ||
Args.hasArg(options::OPT__HASH_HASH_HASH)) {
LibSpirvFile = std::string(LibSpirvTargetFile.str());
break;
}
}
}

if (!LibSpirvFile.empty()) {
Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibSpirvFile));
auto *SYCLLibClcInputAction =
C.MakeAction<InputAction>(*LibClcInputArg, types::TY_LLVM_BC);
DeviceLinkObjects.push_back(SYCLLibClcInputAction);
return true;
}
return false;
}

bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects,
bool isSpirvAOT, bool isMSVCEnv) {
bool isSpirvAOT, bool isMSVCEnv, bool isNativeCPU,
Action *&NativeCPULib) {
int NumOfDeviceLibLinked = 0;
SmallVector<SmallString<128>, 4> LibLocCandidates;
SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates);
Expand All @@ -5876,6 +5833,14 @@ class OffloadingActionBuilder final {
SmallString<128> LibName(LLCandidate);
llvm::sys::path::append(LibName, DeviceLib);
if (llvm::sys::fs::exists(LibName)) {

// NativeCPU currently only needs libsycl-nativecpu_utils and
// libclc, so temporarily skip other device libs in invocation.
// Todo: remove once NativeCPU tests the other libraries.
if (isNativeCPU &&
!LibName.str().contains("libsycl-nativecpu_utils"))
continue;

++NumOfDeviceLibLinked;
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibName));
Expand Down Expand Up @@ -5909,14 +5874,24 @@ class OffloadingActionBuilder final {
}
if (!LibLocSelected)
LibLocSelected = !LibLocSelected;

// The device link stage may remove symbols not referenced in the
// source code. Since libsycl-nativecpu_utils contains such symbols
// which are later needed by the NativeCPU backend passes we link
// that library separately afterwards without --only-needed.
if (isNativeCPU) {
assert(!NativeCPULib);
NativeCPULib = DeviceLinkObjects.back();
DeviceLinkObjects.pop_back();
}
}
}
}

// For NVPTX backend we need to also link libclc and CUDA libdevice
// at the same stage that we link all of the unbundled SYCL libdevice
// objects together.
if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) {
if ((TC->getTriple().isNVPTX() || isNativeCPU) && NumOfDeviceLibLinked) {
std::string LibSpirvFile;
if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) {
auto ProvidedPath =
Expand All @@ -5936,13 +5911,18 @@ class OffloadingActionBuilder final {
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
LibraryPaths.emplace_back(WithInstallPath.c_str());

// TODO: check if the isNVPTX() path can also use
// TC->getTripleString() so that the conditional could be removed
const std::string TrStr =
isNativeCPU ? TC->getTripleString() : "nvptx64-nvidia-cuda";

// Select remangled libclc variant
std::string LibSpirvTargetName =
(TC->getAuxTriple()->isOSWindows())
? "remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda."
"bc"
: "remangled-l64-signed_char.libspirv-nvptx64-nvidia-cuda."
"bc";
StringRef LibSpirvTargetNamePref =
TC->getAuxTriple()->isOSWindows()
? "remangled-l32-signed_char.libspirv-"
: "remangled-l64-signed_char.libspirv-";
llvm::Twine LibSpirvTargetNameTemp = LibSpirvTargetNamePref + TrStr;
llvm::Twine LibSpirvTargetName = LibSpirvTargetNameTemp + ".bc";

for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibSpirvTargetFile(LibraryPath);
Expand All @@ -5954,7 +5934,6 @@ class OffloadingActionBuilder final {
}
}
}

if (!LibSpirvFile.empty()) {
Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibSpirvFile));
Expand All @@ -5963,6 +5942,11 @@ class OffloadingActionBuilder final {
DeviceLinkObjects.push_back(SYCLLibClcInputAction);
}

if (isNativeCPU) {
// return here to not generate cuda actions
return NumOfDeviceLibLinked != 0;
}

const toolchains::CudaToolChain *CudaTC =
static_cast<const toolchains::CudaToolChain *>(TC);
for (const auto &LinkInputEnum : enumerate(DeviceLinkerInputs)) {
Expand Down Expand Up @@ -9238,7 +9222,10 @@ InputInfoList Driver::BuildJobsForActionNoCache(
Action::OffloadKind DependentOffloadKind;
if (UI.DependentOffloadKind == Action::OFK_SYCL &&
TargetDeviceOffloadKind == Action::OFK_None &&
!(isSYCLNativeCPU(Args) && isSYCLNativeCPU(C.getDefaultToolChain().getTriple(), TC->getTriple())))
!(isSYCLNativeCPU(Args) &&
isSYCLNativeCPU(C.getDefaultToolChain().getTriple(),
TC->getTriple()) &&
UA->getDependentActionsInfo().size() > 1))
DependentOffloadKind = Action::OFK_Host;
else
DependentOffloadKind = UI.DependentOffloadKind;
Expand Down
4 changes: 0 additions & 4 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5623,10 +5623,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// Let the FE know we are doing a SYCL offload compilation, but we are
// doing the host pass.
CmdArgs.push_back("-fsycl-is-host");
if (IsSYCLNativeCPU) {
CmdArgs.push_back("-D");
CmdArgs.push_back("__SYCL_NATIVE_CPU__");
}

if (!D.IsCLMode()) {
// SYCL library is guaranteed to work correctly only with dynamic
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
const SYCLDeviceLibsList SYCLDeviceSanitizerLibs = {
{"libsycl-sanitizer", "internal"}};
#endif

const SYCLDeviceLibsList SYCLNativeCpuDeviceLibs = {
{"libsycl-nativecpu_utils", "internal"}};

const bool isNativeCPU =
(driver::isSYCLNativeCPU(Args) &&
driver::isSYCLNativeCPU(C.getDefaultToolChain().getTriple(),
TargetTriple));

bool IsWindowsMSVCEnv =
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
Expand Down Expand Up @@ -368,6 +377,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
addLibraries(SYCLDeviceSanitizerLibs);
}
#endif

if (isNativeCPU)
addLibraries(SYCLNativeCpuDeviceLibs);

return LibraryList;
}

Expand Down
37 changes: 21 additions & 16 deletions clang/test/Driver/sycl-native-cpu-fsycl.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//UNSUPPORTED: (system-windows && !native_cpu)
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=CHECK_ACTIONS
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK_BINDINGS
//RUN: %clang -fsycl -fsycl-targets=native_cpu -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_INVO
Expand All @@ -19,24 +20,29 @@
//CHECK_ACTIONS: +- 7: compiler, {6}, ir, (host-sycl)
//CHECK_ACTIONS: +- 8: backend, {7}, assembler, (host-sycl)
//CHECK_ACTIONS: +- 9: assembler, {8}, object, (host-sycl)

//CHECK_ACTIONS:| +- 10: linker, {5}, ir, (device-sycl)
//CHECK_ACTIONS:| |- 11: input, "{{.*}}libspirv{{.*}}", ir, (device-sycl)
//CHECK_ACTIONS:| +- 12: linker, {10, 11}, ir, (device-sycl)
//CHECK_ACTIONS:| +- 13: backend, {12}, assembler, (device-sycl)
//CHECK_ACTIONS:| +- 14: assembler, {13}, object, (device-sycl)
//CHECK_ACTIONS:|- 15: offload, "device-sycl ({{.*}})" {14}, object
//CHECK_ACTIONS:| +- 16: sycl-post-link, {12}, tempfiletable, (device-sycl)
//CHECK_ACTIONS:| +- 17: clang-offload-wrapper, {16}, object, (device-sycl)
//CHECK_ACTIONS:|- 18: offload, "device-sycl ({{.*}})" {17}, object
//CHECK_ACTIONS:19: linker, {9, 15, 18}, image, (host-sycl)
//CHECK_ACTIONS: +- 10: linker, {5}, ir, (device-sycl)
//CHECK_ACTIONS: |- [[SPIRVLIB:.*]]: input, "{{.*}}libspirv{{.*}}", ir, (device-sycl)
//different libraries may be linked on different platforms, so just check the common stages
//CHECK_ACTIONS: +- [[LINKALL:.*]]: linker, {10, [[SPIRVLIB]]}, ir, (device-sycl)
//CHECK_ACTIONS: |- [[NCPUINP:.*]]: input, "{{.*}}nativecpu{{.*}}", ir, (device-sycl)
//CHECK_ACTIONS: +- [[NCPULINK:.*]]: linker, {[[LINKALL]], [[NCPUINP]]}, ir, (device-sycl)
//this is where we compile the device code to a shared lib, and we link the host shared lib and the device shared lib
//CHECK_ACTIONS:| +- [[VAL81:.*]]: backend, {[[NCPULINK]]}, assembler, (device-sycl)
//CHECK_ACTIONS:| +- [[VAL82:.*]]: assembler, {[[VAL81]]}, object, (device-sycl)
//CHECK_ACTIONS:|- [[VAL822:.*]]: offload, "device-sycl ({{.*}})" {[[VAL82]]}, object
//call sycl-post-link and clang-offload-wrapper
//CHECK_ACTIONS:| +- [[VAL83:.*]]: sycl-post-link, {[[LINKALL]]}, tempfiletable, (device-sycl)
//CHECK_ACTIONS:| +- [[VAL84:.*]]: clang-offload-wrapper, {[[VAL83]]}, object, (device-sycl)
//CHECK_ACTIONS:|- [[VAL85:.*]]: offload, "device-sycl ({{.*}})" {[[VAL84]]}, object
//CHECK_ACTIONS:[[VAL86:.*]]: linker, {9, [[VAL822]], [[VAL85]]}, image, (host-sycl)

//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["{{.*}}sycl-native-cpu-fsycl.cpp"], output: "[[KERNELIR:.*]].bc"
//CHECK_BINDINGS:# "{{.*}}" - "Append Footer to source", inputs: ["{{.*}}sycl-native-cpu-fsycl.cpp"], output: "[[SRCWFOOTER:.*]].cpp"
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[SRCWFOOTER]].cpp", "[[KERNELIR]].bc"], output: "[[HOSTOBJ:.*]].o"
//CHECK_BINDINGS:# "{{.*}}" - "SYCL::Linker", inputs: ["[[KERNELIR]].bc"], output: "[[KERNELLINK:.*]].bc"
//CHECK_BINDINGS:# "{{.*}}" - "SYCL::Linker", inputs: ["[[KERNELLINK]].bc", "{{.*}}.bc"], output: "[[KERNELLINKWLIB:.*]].bc"
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[KERNELLINKWLIB]].bc"], output: "[[KERNELOBJ:.*]].o"
//CHECK_BINDINGS:# "{{.*}}" - "SYCL::Linker", inputs: ["[[KERNELLINKWLIB]].bc", "[[UNBUNDLEDNCPU:.*]].bc"], output: "[[KERNELLINKWLIB12:.*]].bc"
//CHECK_BINDINGS:# "{{.*}}" - "clang", inputs: ["[[KERNELLINKWLIB12]].bc"], output: "[[KERNELOBJ:.*]].o"
//CHECK_BINDINGS:# "{{.*}}" - "SYCL post link", inputs: ["[[KERNELLINKWLIB]].bc"], output: "[[TABLEFILE:.*]].table"
//CHECK_BINDINGS:# "{{.*}}" - "offload wrapper", inputs: ["[[TABLEFILE]].table"], output: "[[WRAPPEROBJ:.*]].o"
//CHECK_BINDINGS:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[HOSTOBJ]].o", "[[KERNELOBJ]].o", "[[WRAPPEROBJ]].o"], output: "a.{{.*}}"
Expand All @@ -49,8 +55,8 @@

// checks that the device and host triple is correct in the generated actions when it is set explicitly
//CHECK_ACTIONS-AARCH64: +- 6: offload, "host-sycl (aarch64-unknown-linux-gnu)" {2}, "device-sycl (aarch64-unknown-linux-gnu)" {5}, c++-cpp-output
//CHECK_ACTIONS-AARCH64:|- 15: offload, "device-sycl (aarch64-unknown-linux-gnu)" {14}, object
//CHECK_ACTIONS-AARCH64:|- 18: offload, "device-sycl (aarch64-unknown-linux-gnu)" {17}, object
//CHECK_ACTIONS-AARCH64:|- 17: offload, "device-sycl (aarch64-unknown-linux-gnu)" {16}, object
//CHECK_ACTIONS-AARCH64:|- 20: offload, "device-sycl (aarch64-unknown-linux-gnu)" {19}, object

// checks that bindings are correct when linking together multiple TUs on native cpu
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload bundler", inputs: ["{{.*}}.o"], outputs: ["[[FILE1HOST:.*]].o", "[[FILE1DEV:.*]].o"]
Expand All @@ -59,8 +65,7 @@
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "Convert SPIR-V to LLVM-IR if needed", inputs: ["[[FILE2DEV]].o"], output: "[[FILE2SPV:.*]].bc"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[FILE1SPV]].bc", "[[FILE2SPV]].bc"], output: "[[LINK1:.*]].bc"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL::Linker", inputs: ["[[LINK1]].bc", "{{.*}}.bc"], output: "[[LINK2:.*]].bc"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "clang", inputs: ["[[LINK2]].bc"], output: "[[KERNELO:.*]].o"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "clang", inputs: ["{{.*}}.bc"], output: "[[KERNELO:.*]].o"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "SYCL post link", inputs: ["[[LINK2]].bc"], output: "[[POSTL:.*]].table"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "offload wrapper", inputs: ["[[POSTL]].table"], output: "[[WRAP:.*]].o"
//CHECK_BINDINGS_MULTI_TU:# "{{.*}}" - "{{.*}}::Linker", inputs: ["[[FILE1HOST]].o", "[[FILE2HOST]].o", "[[KERNELO]].o", "[[WRAP]].o"], output: "{{.*}}"

10 changes: 10 additions & 0 deletions libdevice/cmake/modules/SYCLLibdevice.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,16 @@ if (NOT MSVC)
sycl-compiler)
endif()

if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS)
if (NOT DEFINED NATIVE_CPU_DIR)
message( FATAL_ERROR "Undefined UR variable NATIVE_CPU_DIR. The name may have changed." )
endif()
# Include NativeCPU UR adapter path to enable finding header file with state struct.
# libsycl-nativecpu_utils is only needed as BC file by NativeCPU.
# Todo: add versions for other targets (for cross-compilation)
add_devicelib_bc(libsycl-nativecpu_utils SRC nativecpu_utils.cpp DEP ${itt_obj_deps} EXTRA_ARGS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu)
endif()

add_devicelib(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps})
add_devicelib(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps})
add_devicelib(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp DEP ${itt_obj_deps})
Expand Down
Loading

0 comments on commit 17ee3e2

Please sign in to comment.