Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Driver] Enable SYCL AOT compilation using --offload-arch #15353

Draft
wants to merge 16 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ def err_drv_sycl_missing_amdgpu_arch : Error<
"missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend%select{|=%1}0 --offload-arch=<arch-name>'">;
def err_drv_sycl_thinlto_split_off: Error<
"'%0' is not supported when '%1' is set with '-fsycl'">;
def err_drv_sycl_offload_arch_new_driver: Error<
"'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">;
def warn_drv_sycl_offload_target_duplicate : Warning<
"SYCL offloading target '%0' is similar to target '%1' already specified; "
"will be ignored">, InGroup<SyclTarget>;
Expand Down
103 changes: 101 additions & 2 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1182,12 +1182,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
llvm::StringMap<StringRef> FoundNormalizedTriples;
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
llvm::StringSet<> SYCLTriples;
if (HasSYCLTargetsOption) {
// At this point, we know we have a valid combination
// of -fsycl*target options passed
Arg *SYCLTargetsValues = SYCLTargets;
if (SYCLTargetsValues) {
llvm::StringSet<> SYCLTriples;

if (SYCLTargetsValues->getNumValues()) {

// Multiple targets are currently not supported when using
Expand Down Expand Up @@ -1276,9 +1277,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
if (!Arch.empty())
DerivedArchs[DeviceTriple.getTriple()].insert(Arch);
}

if (!SYCLTriples.empty()) {
for (const auto &SYCLTriple : SYCLTriples) {
llvm::Triple Triple(SYCLTriple.getKey());
llvm::Triple Triple(MakeSYCLDeviceTriple(SYCLTriple.getKey()));
UniqueSYCLTriplesVec.push_back(Triple);
}
}
Expand All @@ -1287,6 +1289,87 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
Diag(clang::diag::warn_drv_empty_joined_argument)
<< SYCLTargetsValues->getAsString(C.getInputArgs());
}
}
// If the user specified --offload-arch, deduce the offloading
// target triple(s) from the set of architecture(s).
// Create a toolchain for each valid triple.
else if (HasValidSYCLRuntime &&
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP &&
!IsCuda) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
HostTC->getTriple());

// Attempt to deduce the offloading triple from the set of architectures.
// We need to temporarily create these toolchains so that we can access
// tools for inferring architectures.
llvm::DenseSet<StringRef> Archs;
if (NVPTXTriple) {
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
*this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None);
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (AMDTriple) {
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, *AMDTriple, *HostTC, C.getInputArgs());
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true))
Archs.insert(Arch);
}
if (!AMDTriple && !NVPTXTriple) {
for (StringRef Arch :
getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true))
Archs.insert(Arch);
}
for (StringRef Arch : Archs) {
if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch(
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
} else if (AMDTriple &&
IsAMDOffloadArch(StringToOffloadArch(
getProcessorFromTargetID(*AMDTriple, Arch)))) {
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
} else if (IsIntelCPUOffloadArch(StringToOffloadArchIntel(Arch))) {
DerivedArchs["spir64_x86_64"].insert(Arch);
} else if (IsIntelGPUOffloadArch(StringToOffloadArchIntel(Arch))) {
DerivedArchs["spir64_gen"].insert(Arch);
} else {
Diag(clang::diag::err_drv_invalid_sycl_target) << Arch;
return;
}
srividya-sundaram marked this conversation as resolved.
Show resolved Hide resolved
srividya-sundaram marked this conversation as resolved.
Show resolved Hide resolved
}
// If the set is empty then we failed to find a native architecture.
if (Archs.empty()) {
Diag(clang::diag::err_drv_invalid_sycl_target) << "native";
return;
}

for (const auto &TripleAndArchs : DerivedArchs)
SYCLTriples.insert(TripleAndArchs.first());

for (const auto &Val : SYCLTriples) {
llvm::Triple SYCLTargetTriple(MakeSYCLDeviceTriple(Val.getKey()));
std::string NormalizedName = SYCLTargetTriple.normalize();

// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val.getKey() << Duplicate->second;
continue;
}

// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val.getKey();
UniqueSYCLTriplesVec.push_back(SYCLTargetTriple);
}

addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);

} else {
// If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
// For -fsycl-device-only, we also setup the implied triple as needed.
Expand Down Expand Up @@ -7244,6 +7327,22 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,

handleArguments(C, Args, Inputs, Actions);

bool HasValidSYCLRuntime =
C.getInputArgs().hasFlag(options::OPT_fsycl, options::OPT_fno_sycl,
false) ||
hasSYCLDeviceOnly(C.getInputArgs());
bool IsSYCLOffloadArchEnabled =
HasValidSYCLRuntime &&
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ);
srividya-sundaram marked this conversation as resolved.
Show resolved Hide resolved
/*
if (IsSYCLOffloadArchEnabled &&
!C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false)) {
Diag(clang::diag::err_drv_sycl_offload_arch_new_driver);
return;
}
*/

// If '-fintelfpga' is passed, add '-fsycl' to the list of arguments
const llvm::opt::OptTable &Opts = getOpts();
Arg *SYCLFpgaArg = C.getInputArgs().getLastArg(options::OPT_fintelfpga);
Expand Down
76 changes: 76 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,82 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;

struct StringToOffloadArchIntelMap {
const char *ArchName;
OffloadArchIntel IntelArch;
};

static const StringToOffloadArchIntelMap StringToArchNamesMap[] = {
{"skylake-avx512", OffloadArchIntel::SKYLAKEAVX512},
{"core-avx2", OffloadArchIntel::COREAVX2},
{"corei7-avx", OffloadArchIntel::COREI7AVX},
{"corei7", OffloadArchIntel::COREI7},
{"westmere", OffloadArchIntel::WESTMERE},
{"sandybridge", OffloadArchIntel::SANDYBRIDGE},
{"ivybridge", OffloadArchIntel::IVYBRIDGE},
{"broadwell", OffloadArchIntel::BROADWELL},
{"coffeelake", OffloadArchIntel::COFFEELAKE},
{"alderlake", OffloadArchIntel::ALDERLAKE},
{"skylake", OffloadArchIntel::SKYLAKE},
{"skx", OffloadArchIntel::SKX},
{"cascadelake", OffloadArchIntel::CASCADELAKE},
{"icelake-client", OffloadArchIntel::ICELAKECLIENT},
{"icelake-server", OffloadArchIntel::ICELAKESERVER},
{"sapphirerapids", OffloadArchIntel::SAPPHIRERAPIDS},
{"graniterapids", OffloadArchIntel::GRANITERAPIDS},
// Intel GPU mapping.
{"bdw", OffloadArchIntel::BDW},
{"skl", OffloadArchIntel::SKL},
{"kbl", OffloadArchIntel::KBL},
{"cfl", OffloadArchIntel::CFL},
{"apl", OffloadArchIntel::APL},
{"bxt", OffloadArchIntel::BXT},
{"glk", OffloadArchIntel::GLK},
{"whl", OffloadArchIntel::WHL},
{"aml", OffloadArchIntel::AML},
{"cml", OffloadArchIntel::CML},
{"icllp", OffloadArchIntel::ICLLP},
{"icl", OffloadArchIntel::ICL},
{"ehl", OffloadArchIntel::EHL},
{"jsl", OffloadArchIntel::JSL},
{"tgllp", OffloadArchIntel::TGLLP},
{"tgl", OffloadArchIntel::TGL},
{"rkl", OffloadArchIntel::RKL},
{"adl_s", OffloadArchIntel::ADL_S},
{"rpl_s", OffloadArchIntel::RPL_S},
{"adl_p", OffloadArchIntel::ADL_P},
{"adl_n", OffloadArchIntel::ADL_N},
{"dg1", OffloadArchIntel::DG1},
{"acm_g10", OffloadArchIntel::ACM_G10},
{"dg2_g10", OffloadArchIntel::DG2_G10},
{"acm_g11", OffloadArchIntel::ACM_G11},
{"dg2_g10", OffloadArchIntel::DG2_G10},
{"dg2_g11", OffloadArchIntel::DG2_G11},
{"acm_g12", OffloadArchIntel::ACM_G12},
{"dg2_g12", OffloadArchIntel::DG2_G12},
{"pvc", OffloadArchIntel::PVC},
{"pvc_vg", OffloadArchIntel::PVC_VG},
{"mtl_u", OffloadArchIntel::MTL_U},
{"mtl_s", OffloadArchIntel::MTL_S},
{"arl_u", OffloadArchIntel::ARL_U},
{"arl_s", OffloadArchIntel::ARL_S},
{"mtl_h", OffloadArchIntel::MTL_H},
{"arl_h", OffloadArchIntel::ARL_H},
{"bmg_g21", OffloadArchIntel::BMG_G21},
{"lnl_m", OffloadArchIntel::LNL_M}};

OffloadArchIntel
clang::driver::StringToOffloadArchIntel(llvm::StringRef ArchNameAsString) {
auto result = std::find_if(
std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap),
[ArchNameAsString](const StringToOffloadArchIntelMap &map) {
return ArchNameAsString == map.ArchName;
});
if (result == std::end(StringToArchNamesMap))
return OffloadArchIntel::UNKNOWN;
return result->IntelArch;
}

SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D)
: D(D), InstallationCandidates() {
InstallationCandidates.emplace_back(D.Dir + "/..");
Expand Down
76 changes: 76 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,82 @@
namespace clang {
namespace driver {

// List of supported Intel values for CPUs
// and GPUs.
enum class OffloadArchIntel {
// CPUs
UNKNOWN,
SKYLAKEAVX512,
COREAVX2,
COREI7AVX,
COREI7,
WESTMERE,
SANDYBRIDGE,
IVYBRIDGE,
BROADWELL,
COFFEELAKE,
ALDERLAKE,
SKYLAKE,
SKX,
CASCADELAKE,
ICELAKECLIENT,
ICELAKESERVER,
SAPPHIRERAPIDS,
GRANITERAPIDS,
// GPUs
BDW,
SKL,
KBL,
CFL,
APL,
BXT,
GLK,
WHL,
AML,
CML,
ICLLP,
ICL,
EHL,
JSL,
TGLLP,
TGL,
RKL,
ADL_S,
RPL_S,
ADL_P,
ADL_N,
DG1,
ACM_G10,
DG2_G10,
ACM_G11,
DG2_G11,
ACM_G12,
DG2_G12,
PVC,
PVC_VG,
MTL_U,
MTL_S,
ARL_U,
ARL_S,
MTL_H,
ARL_H,
BMG_G21,
LNL_M
};

// Check if the given Arch value is a valid Intel CPU.
static inline bool IsIntelCPUOffloadArch(OffloadArchIntel Arch) {
return Arch >= OffloadArchIntel::SKYLAKEAVX512 &&
Arch < OffloadArchIntel::BDW;
}

// Check if the given Arch value is a valid Intel GPU.
static inline bool IsIntelGPUOffloadArch(OffloadArchIntel Arch) {
return Arch >= OffloadArchIntel::BDW && Arch <= OffloadArchIntel::LNL_M;
}

OffloadArchIntel StringToOffloadArchIntel(llvm::StringRef ArchNameAsString);

class SYCLInstallationDetector {
public:
SYCLInstallationDetector(const Driver &D);
Expand Down
73 changes: 73 additions & 0 deletions clang/test/Driver/sycl-offload-arch-intelgpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/// Tests the behaviors of using -fsycl --offload-new-driver
// --offload-arch=<intel-gpu/cpu-values>.

// SYCL AOT compilation to Intel CPUs using --offload-arch

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=broadwell %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=broadwell

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=coffeelake %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=coffeelake

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icelake-client %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=icelake-client

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skylake-avx512 %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skylake-avx512

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=core-avx2 %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=core-avx2

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=corei7-avx %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=corei7-avx

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=corei7 %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=corei7

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=westmere %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=westmere

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=sandybridge %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=sandybridge

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=ivybridge %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=ivybridge

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=alderlake %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=alderlake

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skylake %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skylake

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skx %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skx

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cascadelake %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=cascadelake

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icelake-server %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=icelake-server

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=sapphirerapids %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=sapphirerapids

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=graniterapids %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=graniterapids

// TARGET-TRIPLE-CPU: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown"
// TARGET-TRIPLE-CPU: "-D__SYCL_TARGET_INTEL_X86_64__"
// CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_x86_64-unknown-unknown,arch=[[DEV_STR]],kind=sycl"


// SYCL AOT compilation to Intel GPUs using --offload-arch

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bdw -DMAC_STR=BDW

// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cfl %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cfl -DMAC_STR=CFL

// TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"
// TARGET-TRIPLE-GPU: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__"
// CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl"

Loading