Skip to content

Commit

Permalink
[SYCL] Introduce SYCL_JIT_TARGET_{CPU,FEATURES} env variables
Browse files Browse the repository at this point in the history
  • Loading branch information
jchlanda committed Jun 25, 2024
1 parent 5fcc02a commit 432ea9b
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 54 deletions.
8 changes: 4 additions & 4 deletions sycl-fusion/jit-compiler/include/KernelFusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ FusionResult fuseKernels(View<SYCLKernelInfo> KernelInformation,
View<ParameterInternalization> Internalization,
View<jit_compiler::JITConstant> JITConstants);

FusionResult
materializeSpecConstants(const char *KernelName,
jit_compiler::SYCLKernelBinaryInfo &BinInfo,
std::vector<unsigned char> &SpecConstBlob);
FusionResult materializeSpecConstants(
const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo,
std::vector<unsigned char> &SpecConstBlob, const std::string &TargetCPU,
const std::string &TargetFeatures);

/// Clear all previously set options.
void resetJITConfiguration();
Expand Down
11 changes: 6 additions & 5 deletions sycl-fusion/jit-compiler/lib/KernelFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) {
}
}

extern "C" FusionResult
materializeSpecConstants(const char *KernelName,
jit_compiler::SYCLKernelBinaryInfo &BinInfo,
std::vector<unsigned char> &SpecConstBlob) {
extern "C" FusionResult materializeSpecConstants(
const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo,
std::vector<unsigned char> &SpecConstBlob, const std::string &TargetCPU,
const std::string &TargetFeatures) {
auto &JITCtx = JITContext::getInstance();

TargetInfo TargetInfo = ConfigHelper::get<option::JITTargetInfo>();
Expand Down Expand Up @@ -105,7 +105,8 @@ materializeSpecConstants(const char *KernelName,

SYCLKernelInfo &MaterializerKernelInfo = *ModuleInfo.getKernelFor(KernelName);
if (auto Error = translation::KernelTranslator::translateKernel(
MaterializerKernelInfo, *NewMod, JITCtx, TargetFormat)) {
MaterializerKernelInfo, *NewMod, JITCtx, TargetFormat, TargetCPU,
TargetFeatures)) {
return errorToFusionResult(std::move(Error),
"Translation to output format failed");
}
Expand Down
86 changes: 50 additions & 36 deletions sycl-fusion/jit-compiler/lib/translation/KernelTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,11 @@ KernelTranslator::loadSPIRVKernel(llvm::LLVMContext &LLVMCtx,
return SPIRVLLVMTranslator::loadSPIRVKernel(LLVMCtx, Kernel);
}

llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
llvm::Module &Mod,
JITContext &JITCtx,
BinaryFormat Format) {
llvm::Error
KernelTranslator::translateKernel(SYCLKernelInfo &Kernel, llvm::Module &Mod,
JITContext &JITCtx, BinaryFormat Format,
const std::string &TargetCPU,
const std::string &TargetFeatures) {

KernelBinary *KernelBin = nullptr;
switch (Format) {
Expand All @@ -186,7 +187,7 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
}
case BinaryFormat::PTX: {
llvm::Expected<KernelBinary *> BinaryOrError =
translateToPTX(Kernel, Mod, JITCtx);
translateToPTX(Kernel, Mod, JITCtx, TargetCPU, TargetFeatures);
if (auto Error = BinaryOrError.takeError()) {
return Error;
}
Expand All @@ -195,7 +196,7 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
}
case BinaryFormat::AMDGCN: {
llvm::Expected<KernelBinary *> BinaryOrError =
translateToAMDGCN(Kernel, Mod, JITCtx);
translateToAMDGCN(Kernel, Mod, JITCtx, TargetCPU, TargetFeatures);
if (auto Error = BinaryOrError.takeError())
return Error;
KernelBin = *BinaryOrError;
Expand Down Expand Up @@ -226,9 +227,9 @@ KernelTranslator::translateToSPIRV(llvm::Module &Mod, JITContext &JITCtx) {
return SPIRVLLVMTranslator::translateLLVMtoSPIRV(Mod, JITCtx);
}

llvm::Expected<KernelBinary *>
KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
JITContext &JITCtx) {
llvm::Expected<KernelBinary *> KernelTranslator::translateToPTX(
SYCLKernelInfo &KernelInfo, llvm::Module &Mod, JITContext &JITCtx,
const std::string &TargetCPU, const std::string &TargetFeatures) {
#ifndef FUSION_JIT_SUPPORT_PTX
(void)KernelInfo;
(void)Mod;
Expand Down Expand Up @@ -257,23 +258,32 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
ErrorMessage.c_str());
}

llvm::StringRef TargetCPU{"sm_50"};
llvm::StringRef TargetFeatures{"+sm_50,+ptx76"};
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str())) {
if (KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
TargetCPU =
KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
// Give priority to user specified values (through environment variables:
// SYCL_JIT_TARGET_CPU and SYCL_JIT_TARGET_FEATURES).
llvm::StringRef CPU{TargetCPU};
llvm::StringRef Features{TargetFeatures};

auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str());
// If they were not set, use default and consult the module for alternatives
// (if present).
if (CPU.empty()) {
CPU = "sm_50";
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
}
if (KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
TargetFeatures = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
.getValueAsString();
}
if (Features.empty()) {
Features = "+sm_50,+ptx76";
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
.getValueAsString();
}
}

// FIXME: Check whether we can provide more accurate target information here
auto *TargetMachine = Target->createTargetMachine(
TargetTriple, TargetCPU, TargetFeatures, {}, llvm::Reloc::PIC_,
std::nullopt, llvm::CodeGenOptLevel::Default);
TargetTriple, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt,
llvm::CodeGenOptLevel::Default);

llvm::legacy::PassManager PM;

Expand All @@ -298,9 +308,9 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
#endif // FUSION_JIT_SUPPORT_PTX
}

llvm::Expected<KernelBinary *>
KernelTranslator::translateToAMDGCN(SYCLKernelInfo &KernelInfo,
llvm::Module &Mod, JITContext &JITCtx) {
llvm::Expected<KernelBinary *> KernelTranslator::translateToAMDGCN(
SYCLKernelInfo &KernelInfo, llvm::Module &Mod, JITContext &JITCtx,
const std::string &TargetCPU, const std::string &TargetFeatures) {
#ifndef FUSION_JIT_SUPPORT_AMDGCN
(void)KernelInfo;
(void)Mod;
Expand Down Expand Up @@ -329,25 +339,29 @@ KernelTranslator::translateToAMDGCN(SYCLKernelInfo &KernelInfo,
"Failed to load and translate AMDGCN LLVM IR module with error %s",
ErrorMessage.c_str());

// Set to the lowest tested target according to the GetStartedGuide, section
// "Build DPC++ toolchain with support for HIP AMD"
llvm::StringRef TargetCPU{"gfx906"};
llvm::StringRef TargetFeatures{""};
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str())) {
if (KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
TargetCPU =
KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
llvm::StringRef CPU{TargetCPU};
llvm::StringRef Features{TargetFeatures};

auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str());
if (CPU.empty()) {
// Set to the lowest tested target according to the GetStartedGuide, section
// "Build DPC++ toolchain with support for HIP AMD"
CPU = "gfx906";
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
}
if (KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
TargetFeatures = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
.getValueAsString();
}
if (Features.empty()) {
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
.getValueAsString();
}
}

// FIXME: Check whether we can provide more accurate target information here
auto *TargetMachine = Target->createTargetMachine(
TargetTriple, TargetCPU, TargetFeatures, {}, llvm::Reloc::PIC_,
std::nullopt, llvm::CodeGenOptLevel::Default);
TargetTriple, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt,
llvm::CodeGenOptLevel::Default);

std::string AMDObj;
{
Expand Down
11 changes: 8 additions & 3 deletions sycl-fusion/jit-compiler/lib/translation/KernelTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ class KernelTranslator {
loadKernels(llvm::LLVMContext &LLVMCtx, std::vector<SYCLKernelInfo> &Kernels);

static llvm::Error translateKernel(SYCLKernelInfo &Kernel, llvm::Module &Mod,
JITContext &JITCtx, BinaryFormat Format);
JITContext &JITCtx, BinaryFormat Format,
const std::string &TargetCPU = {},
const std::string &TargetFeatures = {});

private:
///
Expand All @@ -42,11 +44,14 @@ class KernelTranslator {
JITContext &JITCtx);

static llvm::Expected<KernelBinary *>
translateToPTX(SYCLKernelInfo &Kernel, llvm::Module &Mod, JITContext &JITCtx);
translateToPTX(SYCLKernelInfo &Kernel, llvm::Module &Mod, JITContext &JITCtx,
const std::string &TargetCPU = {},
const std::string &TargetFeatures = {});

static llvm::Expected<KernelBinary *>
translateToAMDGCN(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
JITContext &JITCtx);
JITContext &JITCtx, const std::string &TargetCPU = {},
const std::string &TargetFeatures = {});
};
} // namespace translation
} // namespace jit_compiler
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/config.def
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ CONFIG(ONEAPI_DEVICE_SELECTOR, 1024, __ONEAPI_DEVICE_SELECTOR)
CONFIG(SYCL_ENABLE_FUSION_CACHING, 1, __SYCL_ENABLE_FUSION_CACHING)
CONFIG(SYCL_CACHE_IN_MEM, 1, __SYCL_CACHE_IN_MEM)
CONFIG(SYCL_JIT_KERNELS, 1, __SYCL_JIT_KERNELS)
CONFIG(SYCL_JIT_TARGET_CPU, 1024, __SYCL_JIT_TARGET_CPU)
CONFIG(SYCL_JIT_TARGET_FEATURES, 1024, __SYCL_JIT_TARGET_FEATURES)
57 changes: 57 additions & 0 deletions sycl/source/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,63 @@ template <> class SYCLConfig<SYCL_JIT_KERNELS> {
return ValStr;
}
};

template <> class SYCLConfig<SYCL_JIT_TARGET_CPU> {
using BaseT = SYCLConfigBase<SYCL_CACHE_IN_MEM>;

public:
static std::string get() {
const std::string DefaultValue{""};

const char *ValStr = getCachedValue();

if (!ValStr)
return DefaultValue;

return std::string{ValStr};
}

static void reset() { (void)getCachedValue(/*ResetCache=*/true); }

static const char *getName() { return BaseT::MConfigName; }

private:
static const char *getCachedValue(bool ResetCache = false) {
static const char *ValStr = BaseT::getRawValue();
if (ResetCache)
ValStr = BaseT::getRawValue();
return ValStr;
}
};

template <> class SYCLConfig<SYCL_JIT_TARGET_FEATURES> {
using BaseT = SYCLConfigBase<SYCL_CACHE_IN_MEM>;

public:
static std::string get() {
const std::string DefaultValue{""};

const char *ValStr = getCachedValue();

if (!ValStr)
return DefaultValue;

return std::string{ValStr};
}

static void reset() { (void)getCachedValue(/*ResetCache=*/true); }

static const char *getName() { return BaseT::MConfigName; }

private:
static const char *getCachedValue(bool ResetCache = false) {
static const char *ValStr = BaseT::getRawValue();
if (ResetCache)
ValStr = BaseT::getRawValue();
return ValStr;
}
};

#undef INVALID_CONFIG_EXCEPTION

} // namespace detail
Expand Down
10 changes: 7 additions & 3 deletions sycl/source/detail/jit_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,6 @@ sycl::detail::pi::PiKernel jit_compiler::materializeSpecConstants(
BinaryImageFormat, 0, RawDeviceImage.BinaryStart, DeviceImageSize};

::jit_compiler::TargetInfo TargetInfo = getTargetInfo(Queue);
::jit_compiler::BinaryFormat TargetFormat = TargetInfo.getFormat();
AddToConfigHandle(
::jit_compiler::option::JITTargetInfo::set(std::move(TargetInfo)));
bool DebugEnabled =
Expand All @@ -736,8 +735,13 @@ sycl::detail::pi::PiKernel jit_compiler::materializeSpecConstants(
AddToConfigHandle(::jit_compiler::option::JITEnableCaching::set(
detail::SYCLConfig<detail::SYCL_ENABLE_FUSION_CACHING>::get()));

auto MaterializerResult =
MaterializeSpecConstHandle(KernelName.c_str(), BinInfo, SpecConstBlob);
std::string TargetCPU =
detail::SYCLConfig<detail::SYCL_JIT_TARGET_CPU>::get();
std::string TargetFeatures =
detail::SYCLConfig<detail::SYCL_JIT_TARGET_FEATURES>::get();

auto MaterializerResult = MaterializeSpecConstHandle(
KernelName.c_str(), BinInfo, SpecConstBlob, TargetCPU, TargetFeatures);
if (MaterializerResult.failed()) {
std::string Message{"Compilation for kernel failed with message:\n"};
Message.append(MaterializerResult.getErrorMessage());
Expand Down
10 changes: 7 additions & 3 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2454,10 +2454,14 @@ sycl::detail::pi::PiKernel ProgramManager::getOrCreateMaterializedKernel(
auto &Plugin = DeviceImpl->getPlugin();
ProgramPtr ProgramManaged(
Program, Plugin->getPiPlugin().PiFunctionTable.piProgramRelease);
// TODO: JKB: Flags and zeros.

std::string CompileOpts;
std::string LinkOpts;
applyOptionsFromEnvironment(CompileOpts, LinkOpts);
auto BuildProgram =
build(std::move(ProgramManaged), detail::getSyclObjImpl(Context), "", "",
DeviceImpl->getHandleRef(), 0);
build(std::move(ProgramManaged), detail::getSyclObjImpl(Context),
CompileOpts, LinkOpts, DeviceImpl->getHandleRef(),
/*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0);
sycl::detail::pi::PiKernel PiKernel{nullptr};
Plugin->call<errc::kernel_not_supported, PiApiKind::piKernelCreate>(
BuildProgram.get(), KernelName.c_str(), &PiKernel);
Expand Down

0 comments on commit 432ea9b

Please sign in to comment.