Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPIR-V Extension] fpbuiltin-max-error support #2056

Merged
merged 19 commits into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/LLVMSPIRVExtensions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ EXT(SPV_INTEL_tensor_float32_rounding)
EXT(SPV_EXT_relaxed_printf_string_address_space)
EXT(SPV_INTEL_fpga_argument_interfaces)
EXT(SPV_INTEL_fpga_latency_control)
EXT(SPV_INTEL_fp_max_error)
EXT(SPV_INTEL_cache_controls)
2 changes: 2 additions & 0 deletions lib/SPIRV/SPIRVBuiltinHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ Value *BuiltinCallMutator::doConversion() {
CallInst *NewCall =
Builder.Insert(addCallInst(CI->getModule(), FuncName, ReturnTy, Args,
&Attrs, nullptr, Mangler.get()));
NewCall->copyMetadata(*CI);
NewCall->setAttributes(CI->getAttributes());
Value *Result = MutateRet ? MutateRet(Builder, NewCall) : NewCall;
Result->takeName(CI);
if (!CI->getType()->isVoidTy())
Expand Down
41 changes: 41 additions & 0 deletions lib/SPIRV/SPIRVReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3893,7 +3893,48 @@ void SPIRVToLLVM::transDecorationsToMetadata(SPIRVValue *BV, Value *V) {
SetDecorationsMetadata(I);
}

namespace {

static float convertSPIRVWordToFloat(SPIRVWord Spir) {
union {
float F;
SPIRVWord Spir;
} FPMaxError;
FPMaxError.Spir = Spir;
return FPMaxError.F;
}

static bool transFPMaxErrorDecoration(SPIRVValue *BV, Value *V,
LLVMContext *Context) {
SPIRVWord ID;
if (Instruction *I = dyn_cast<Instruction>(V))
if (BV->hasDecorate(DecorationFPMaxErrorDecorationINTEL, 0, &ID)) {
auto Literals =
BV->getDecorationLiterals(DecorationFPMaxErrorDecorationINTEL);
assert(Literals.size() == 1 &&
"FP Max Error decoration shall have 1 operand");
auto F = convertSPIRVWordToFloat(Literals[0]);
if (CallInst *CI = dyn_cast<CallInst>(I)) {
// Add attribute
auto A = llvm::Attribute::get(*Context, "fpbuiltin-max-error",
std::to_string(F));
CI->addFnAttr(A);
} else {
// Add metadata
MDNode *N =
MDNode::get(*Context, MDString::get(*Context, std::to_string(F)));
I->setMetadata("fpbuiltin-max-error", N);
}
return true;
}
Comment on lines +3909 to +3929
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just minor suggestion to wrap this to a separate function - to have more consistent code in transDecoration()

return false;
}
} // namespace

bool SPIRVToLLVM::transDecoration(SPIRVValue *BV, Value *V) {
if (transFPMaxErrorDecoration(BV, V, Context))
return true;

if (!transAlign(BV, V))
return false;

Expand Down
154 changes: 153 additions & 1 deletion lib/SPIRV/SPIRVWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,19 @@ using namespace llvm;
using namespace SPIRV;
using namespace OCLUtil;

namespace {

static SPIRVWord convertFloatToSPIRVWord(float F) {
union {
float F;
SPIRVWord Spir;
} FPMaxError;
FPMaxError.F = F;
return FPMaxError.Spir;
}

} // namespace

namespace SPIRV {

static void foreachKernelArgMD(
Expand Down Expand Up @@ -3481,6 +3494,26 @@ bool LLVMToSPIRVBase::isKnownIntrinsic(Intrinsic::ID Id) {
}
}

// Add decoration if needed
SPIRVInstruction *addFPBuiltinDecoration(SPIRVModule *BM, IntrinsicInst *II,
SPIRVInstruction *I) {
const bool AllowFPMaxError =
BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_fp_max_error);
assert(II->getCalledFunction()->getName().startswith("llvm.fpbuiltin"));
// Add a new decoration for llvm.builtin intrinsics, if needed
if (AllowFPMaxError)
if (II->getAttributes().hasFnAttr("fpbuiltin-max-error")) {
double F = 0.0;
II->getAttributes()
.getFnAttr("fpbuiltin-max-error")
.getValueAsString()
.getAsDouble(F);
I->addDecorate(DecorationFPMaxErrorDecorationINTEL,
convertFloatToSPIRVWord(F));
}
return I;
}

// Performs mapping of LLVM IR rounding mode to SPIR-V rounding mode
// Value *V is metadata <rounding mode> argument of
// llvm.experimental.constrained.* intrinsics
Expand Down Expand Up @@ -4424,8 +4457,9 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II,
}
return Result;
}

default:
if (auto *BVar = transFPBuiltinIntrinsicInst(II, BB))
return BVar;
if (BM->isUnknownIntrinsicAllowed(II))
return BM->addCallInst(
transFunctionDecl(II->getCalledFunction()),
Expand All @@ -4441,6 +4475,124 @@ SPIRVValue *LLVMToSPIRVBase::transIntrinsicInst(IntrinsicInst *II,
return nullptr;
}

LLVMToSPIRVBase::FPBuiltinType
LLVMToSPIRVBase::getFPBuiltinType(IntrinsicInst *II, StringRef &OpName) {
StringRef Name = II->getCalledFunction()->getName();
if (!Name.startswith("llvm.fpbuiltin"))
return FPBuiltinType::UNKNOWN;
Name.consume_front("llvm.fpbuiltin.");
OpName = Name.split('.').first;
FPBuiltinType Type =
StringSwitch<FPBuiltinType>(OpName)
.Cases("fadd", "fsub", "fmul", "fdiv", "frem",
FPBuiltinType::REGULAR_MATH)
.Cases("sin", "cos", "tan", FPBuiltinType::EXT_1OPS)
.Cases("sinh", "cosh", "tanh", FPBuiltinType::EXT_1OPS)
.Cases("asin", "acos", "atan", FPBuiltinType::EXT_1OPS)
.Cases("asinh", "acosh", "atanh", FPBuiltinType::EXT_1OPS)
.Cases("exp", "exp2", "exp10", "expm1", FPBuiltinType::EXT_1OPS)
.Cases("log", "log2", "log10", "log1p", FPBuiltinType::EXT_1OPS)
.Cases("sqrt", "rsqrt", "erf", "erfc", FPBuiltinType::EXT_1OPS)
.Cases("atan2", "pow", "hypot", "ldexp", FPBuiltinType::EXT_2OPS)
.Case("sincos", FPBuiltinType::EXT_3OPS)
.Default(FPBuiltinType::UNKNOWN);
return Type;
}

SPIRVValue *LLVMToSPIRVBase::transFPBuiltinIntrinsicInst(IntrinsicInst *II,
SPIRVBasicBlock *BB) {
StringRef OpName;
auto FPBuiltinTypeVal = getFPBuiltinType(II, OpName);
if (FPBuiltinTypeVal == FPBuiltinType::UNKNOWN)
return nullptr;
switch (FPBuiltinTypeVal) {
case FPBuiltinType::REGULAR_MATH: {
auto BinOp = StringSwitch<Op>(OpName)
.Case("fadd", OpFAdd)
.Case("fsub", OpFSub)
.Case("fmul", OpFMul)
.Case("fdiv", OpFDiv)
.Case("frem", OpFRem)
.Default(OpUndef);
auto *BI = BM->addBinaryInst(BinOp, transType(II->getType()),
transValue(II->getArgOperand(0), BB),
transValue(II->getArgOperand(1), BB), BB);
return addFPBuiltinDecoration(BM, II, BI);
}
case FPBuiltinType::EXT_1OPS: {
if (!checkTypeForSPIRVExtendedInstLowering(II, BM))
break;
SPIRVType *STy = transType(II->getType());
std::vector<SPIRVValue *> Ops(1, transValue(II->getArgOperand(0), BB));
auto ExtOp = StringSwitch<SPIRVWord>(OpName)
Copy link
Contributor

@MrSidims MrSidims Jul 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it suppose to work for ESIMD SYCL programming model? I'm asking because not all backends support OpenCL ext instructions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do expect the backends to atleast support the subset of instructions that are shown here.

Thanks

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vmustya just checking your opinion

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently IGC VC backend only supports the native_* subset of OpenCL extended instructions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@asudarsa to consider changing math ext instructions to native_*

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andykaylor I'm talking about OpenCL builtins described here: https://registry.khronos.org/SPIR-V/specs/1.0/OpenCL.ExtendedInstructionSet.100.mobile.html native vs non-native. AFAIK IGC scalar support all of the builtins, while vector compiler support only 'native'. I just want to ensure, that we are on the same page and understand consequences of merging implementation going through non-native builtins.

Copy link
Contributor

@MrSidims MrSidims Aug 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance-wise: that's what the spec says:
The function may map to one or more native device instructions and will typically have better performance compared to the non native corresponding functions. Support for denormal values is implementation-defined for this function
I can neither confirm nor deny such statement for Intel and others hardware.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MrSidims My concern is for the case where we're trying to restrict accuracy beyond the normal SYCL requirements. For example, the cos() function normally only requires 4 ulp accuracy, but I might want to call it with a 1 ulp accuracy requirement. My understanding of the native_ OCL instructions is that native instructions may be used regardless of their accuracy. So if we're trying to require 1 ulp accuracy, using the native_ instructions isn't appropriate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andykaylor thanks for the explanation! I just wanted to ensure, that we understand that we sacrifice portability (at least temporary) and have a reasoning for it.
@asudarsa please resolve the conflict and I'll merge the PR.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MrSidims Yes, sacrificing portability when the accuracy controls are used is expected. I expect that the accuracy controls will only be used by advanced users who are trying to fine-tune their implementations. I hope that if the feature is successful more vendors will add support for it and the portability problem will be resolved.

.Case("sin", OpenCLLIB::Sin)
.Case("cos", OpenCLLIB::Cos)
.Case("tan", OpenCLLIB::Tan)
.Case("sinh", OpenCLLIB::Sinh)
.Case("cosh", OpenCLLIB::Cosh)
.Case("tanh", OpenCLLIB::Tanh)
.Case("asin", OpenCLLIB::Asin)
.Case("acos", OpenCLLIB::Acos)
.Case("atan", OpenCLLIB::Atan)
.Case("asinh", OpenCLLIB::Asinh)
.Case("acosh", OpenCLLIB::Acosh)
.Case("atanh", OpenCLLIB::Atanh)
.Case("exp", OpenCLLIB::Exp)
.Case("exp2", OpenCLLIB::Exp2)
.Case("exp10", OpenCLLIB::Exp10)
.Case("expm1", OpenCLLIB::Expm1)
.Case("log", OpenCLLIB::Log)
.Case("log2", OpenCLLIB::Log2)
.Case("log10", OpenCLLIB::Log10)
.Case("log1p", OpenCLLIB::Log1p)
.Case("sqrt", OpenCLLIB::Sqrt)
.Case("rsqrt", OpenCLLIB::Rsqrt)
.Case("erf", OpenCLLIB::Erf)
.Case("erfc", OpenCLLIB::Erfc)
.Default(SPIRVWORD_MAX);
asudarsa marked this conversation as resolved.
Show resolved Hide resolved
assert(ExtOp != SPIRVWORD_MAX);
auto *BI = BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp,
Ops, BB);
return addFPBuiltinDecoration(BM, II, BI);
}
case FPBuiltinType::EXT_2OPS: {
if (!checkTypeForSPIRVExtendedInstLowering(II, BM))
break;
SPIRVType *STy = transType(II->getType());
std::vector<SPIRVValue *> Ops{transValue(II->getArgOperand(0), BB),
transValue(II->getArgOperand(1), BB)};
auto ExtOp = StringSwitch<SPIRVWord>(OpName)
.Case("atan2", OpenCLLIB::Atan2)
.Case("hypot", OpenCLLIB::Hypot)
.Case("pow", OpenCLLIB::Pow)
.Case("ldexp", OpenCLLIB::Ldexp)
.Default(SPIRVWORD_MAX);
assert(ExtOp != SPIRVWORD_MAX);
auto *BI = BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp,
Ops, BB);
return addFPBuiltinDecoration(BM, II, BI);
}
case FPBuiltinType::EXT_3OPS: {
if (!checkTypeForSPIRVExtendedInstLowering(II, BM))
break;
SPIRVType *STy = transType(II->getType());
std::vector<SPIRVValue *> Ops{transValue(II->getArgOperand(0), BB),
transValue(II->getArgOperand(1), BB),
transValue(II->getArgOperand(2), BB)};
auto ExtOp = StringSwitch<SPIRVWord>(OpName)
.Case("sincos", OpenCLLIB::Sincos)
.Default(SPIRVWORD_MAX);
assert(ExtOp != SPIRVWORD_MAX);
auto *BI = BM->addExtInst(STy, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp,
Ops, BB);
return addFPBuiltinDecoration(BM, II, BI);
}
default:
return nullptr;
}
return nullptr;
}

SPIRVValue *LLVMToSPIRVBase::transFenceInst(FenceInst *FI,
SPIRVBasicBlock *BB) {
SPIRVWord MemorySemantics;
Expand Down
10 changes: 10 additions & 0 deletions lib/SPIRV/SPIRVWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ class LLVMToSPIRVBase : protected BuiltinCallHelper {
bool transBuiltinSet();
bool isKnownIntrinsic(Intrinsic::ID Id);
SPIRVValue *transIntrinsicInst(IntrinsicInst *Intrinsic, SPIRVBasicBlock *BB);
enum class FPBuiltinType {
REGULAR_MATH,
EXT_1OPS,
EXT_2OPS,
EXT_3OPS,
UNKNOWN
};
FPBuiltinType getFPBuiltinType(IntrinsicInst *II, StringRef &);
SPIRVValue *transFPBuiltinIntrinsicInst(IntrinsicInst *II,
SPIRVBasicBlock *BB);
SPIRVValue *transFenceInst(FenceInst *FI, SPIRVBasicBlock *BB);
SPIRVValue *transCallInst(CallInst *Call, SPIRVBasicBlock *BB);
SPIRVValue *transDirectCallInst(CallInst *Call, SPIRVBasicBlock *BB);
Expand Down
2 changes: 2 additions & 0 deletions lib/SPIRV/libSPIRV/SPIRVDecorate.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ class SPIRVDecorate : public SPIRVDecorateGeneric {
case DecorationLatencyControlLabelINTEL:
case DecorationLatencyControlConstraintINTEL:
return ExtensionID::SPV_INTEL_fpga_latency_control;
case DecorationFPMaxErrorDecorationINTEL:
return ExtensionID::SPV_INTEL_fp_max_error;
case internal::DecorationCacheControlLoadINTEL:
case internal::DecorationCacheControlStoreINTEL:
return ExtensionID::SPV_INTEL_cache_controls;
Expand Down
2 changes: 2 additions & 0 deletions lib/SPIRV/libSPIRV/SPIRVEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ template <> inline void SPIRVMap<Decoration, SPIRVCapVec>::init() {
{CapabilityFPGALatencyControlINTEL});
ADD_VEC_INIT(DecorationLatencyControlConstraintINTEL,
{CapabilityFPGALatencyControlINTEL});
ADD_VEC_INIT(DecorationFPMaxErrorDecorationINTEL,
{CapabilityFPMaxErrorINTEL});
}

template <> inline void SPIRVMap<BuiltIn, SPIRVCapVec>::init() {
Expand Down
2 changes: 2 additions & 0 deletions lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ template <> inline void SPIRVMap<Decoration, std::string>::init() {
add(DecorationStableKernelArgumentINTEL, "StableKernelArgumentINTEL");
add(DecorationLatencyControlLabelINTEL, "LatencyControlLabelINTEL");
add(DecorationLatencyControlConstraintINTEL, "LatencyControlConstraintINTEL");
add(DecorationFPMaxErrorDecorationINTEL, "FPMaxErrorDecorationINTEL");

// From spirv_internal.hpp
add(internal::DecorationCallableFunctionINTEL, "CallableFunctionINTEL");
Expand Down Expand Up @@ -623,6 +624,7 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
add(CapabilityMax, "Max");
add(CapabilityFPGAArgumentInterfacesINTEL, "FPGAArgumentInterfacesINTEL");
add(CapabilityFPGALatencyControlINTEL, "FPGALatencyControlINTEL");
add(CapabilityFPMaxErrorINTEL, "FPMaxErrorINTEL");
// From spirv_internal.hpp
add(internal::CapabilityFastCompositeINTEL, "FastCompositeINTEL");
add(internal::CapabilityOptNoneINTEL, "OptNoneINTEL");
Expand Down
2 changes: 1 addition & 1 deletion spirv-headers-tag.conf
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9b527c0fb60124936d0906d44803bec51a0200fb
51b106461707f46d962554efe1bf56dee28958a3
Loading
Loading