Skip to content

Commit

Permalink
[SYCL][SCLA] Add CodeGen capabilities for `sycl_ext_oneapi_private_al…
Browse files Browse the repository at this point in the history
…loca` (#12894)

The
[`sycl_ext_oneapi_private_alloca`](https://github.com/intel/llvm/blob/56e9067ba69809fb6ea1fd4328456ca3a009f984/sycl/doc/extensions/experimental/sycl_ext_oneapi_private_alloca.asciidoc)
adds new functions returning a pointer to a specialization constant
length SYCL private memory allocation. This commit adds codegen support
for these functions.

The `sycl::private_alloca` function is implemented as an alias to a new
`__builtin_intel_sycl_alloca` builtin. This is needed to guarantee the
call will lower to just an `alloca` instruction defining the private
memory allocation.

This builtin lowers to a SYCL builtin call to
`__builtin_sycl_unique_stable_id` and a call to a new `llvm.sycl.alloca`
intrinsic. This intrinsic receives three arguments encoding the
specialization constant used as array length, a type hint argument
encoding the allocation element type and the required alignment.

Note the `sycl_ext_oneapi_private_alloca` extension defines two
functions: `private_alloca` and `aligned_private_alloca`. This commit
adds codegen support only for the first signature, but already prepares
support for the aligned flavor by adding an argument encoding the memory
allocation alignment to the `llvm.sycl.alloca` intrinsic.

The intrinsic is needed as generating an `alloca` instruction right away
would lead to optimization passes converting the size argument, which
can be of any integral type, and thus difficulting lowering to a single
SPIR-V `OpVariable` later in the pipeline.

---------

Signed-off-by: Victor Perez <victor.perez@codeplay.com>
  • Loading branch information
victor-eds authored Mar 8, 2024
1 parent 03f61fc commit a105055
Show file tree
Hide file tree
Showing 20 changed files with 737 additions and 30 deletions.
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1450,12 +1450,12 @@ def SYCLType: InheritableAttr {
"specialization_id", "kernel_handler", "buffer_location",
"no_alias", "accessor_property_list", "group",
"private_memory", "aspect", "annotated_ptr", "annotated_arg",
"stream", "sampler", "host_pipe"],
"stream", "sampler", "host_pipe", "multi_ptr"],
["accessor", "local_accessor",
"specialization_id", "kernel_handler", "buffer_location",
"no_alias", "accessor_property_list", "group",
"private_memory", "aspect", "annotated_ptr", "annotated_arg",
"stream", "sampler", "host_pipe"]>];
"stream", "sampler", "host_pipe", "multi_ptr"]>];
// Only used internally by SYCL implementation
let Documentation = [InternalOnly];
}
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4635,6 +4635,12 @@ def IntelSYCLPtrAnnotation : Builtin {
let Prototype = "void(...)";
}

def IntelSYCLAlloca : Builtin {
let Spellings = ["__builtin_intel_sycl_alloca"];
let Attributes = [NoThrow, CustomTypeChecking];
let Prototype = "void *(void &)";
}

// Builtins for Intel FPGA
def IntelSYCLFPGAReg : Builtin {
let Spellings = ["__builtin_intel_fpga_reg"];
Expand Down
24 changes: 23 additions & 1 deletion clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,28 @@ def err_intel_sycl_ptr_annotation_mismatch
"a pointer"
"|a string literal or constexpr const char*}0">;

def err_intel_sycl_alloca_no_alias
: Error<"__builtin_intel_sycl_alloca cannot be used in source code. "
"Use the private_alloca alias instead">;
def err_intel_sycl_alloca_wrong_arg_count
: Error<"__builtin_intel_sycl_alloca expects to be passed a single "
"argument. Got %0">;
def err_intel_sycl_alloca_wrong_template_arg_count
: Error<"__builtin_intel_sycl_alloca expects to be passed three template "
"arguments. Got %0">;
def err_intel_sycl_alloca_wrong_arg
: Error<"__builtin_intel_sycl_alloca expects to be passed an argument of type "
"'sycl::kernel_handler &'. Got %0">;
def err_intel_sycl_alloca_wrong_type
: Error<"__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' "
"to a cv-unqualified object type. Got %0">;
def err_intel_sycl_alloca_wrong_size
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
"constant of integral value type as a template argument. Got %1 (%0)">;
def err_intel_sycl_alloca_no_size
: Error<"__builtin_intel_sycl_alloca must be passed a specialization "
"constant of integral value type as a template argument. Got %0">;

// C99 variable-length arrays
def ext_vla : Extension<"variable length arrays are a C99 feature">,
InGroup<VLAExtension>;
Expand Down Expand Up @@ -4470,7 +4492,7 @@ def err_attribute_preferred_name_arg_invalid : Error<
"argument %0 to 'preferred_name' attribute is not a typedef for "
"a specialization of %1">;
def err_attribute_builtin_alias : Error<
"%0 attribute can only be applied to a ARM, HLSL or RISC-V builtin">;
"%0 attribute can only be applied to a ARM, HLSL, SYCL or RISC-V builtin">;

// called-once attribute diagnostics.
def err_called_once_attribute_wrong_type : Error<
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -15193,6 +15193,9 @@ class Sema final {

bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *Call);
bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *Call);

private:
// We store SYCL Kernels here and handle separately -- which is a hack.
// FIXME: It would be best to refactor this.
Expand Down Expand Up @@ -15336,6 +15339,9 @@ class Sema final {
VDecl->hasGlobalStorage() &&
(VDecl->getType().getAddressSpace() == LangAS::sycl_private);
}

/// Check whether \p Ty corresponds to a SYCL type of name \p TypeName.
static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName);
};

DeductionFailureInfo
Expand Down
80 changes: 80 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5923,6 +5923,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return EmitIntelFPGAMemBuiltin(E);
case Builtin::BI__builtin_intel_sycl_ptr_annotation:
return EmitIntelSYCLPtrAnnotationBuiltin(E);
case Builtin::BI__builtin_intel_sycl_alloca:
return EmitIntelSYCLAllocaBuiltin(E, ReturnValue);
case Builtin::BI__builtin_get_device_side_mangled_name: {
auto Name = CGM.getCUDARuntime().getDeviceSideName(
cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
Expand Down Expand Up @@ -23655,6 +23657,84 @@ RValue CodeGenFunction::EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E) {
return RValue::get(Ann);
}

RValue
CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
ReturnValueSlot ReturnValue) {
const FunctionDecl *FD = E->getDirectCallee();
assert(FD && "Expecting direct call to builtin");

SourceLocation Loc = E->getExprLoc();

// Get specialization constant ID.
ValueDecl *SpecConst =
FD->getTemplateSpecializationArgs()->get(1).getAsDecl();
DeclRefExpr *Ref = DeclRefExpr::Create(
getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst,
/*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(),
SpecConst->getType(), ExprValueKind::VK_LValue);
llvm::Value *UID = EmitScalarExpr(
SYCLUniqueStableIdExpr::Create(getContext(), Loc, Loc, Loc, Ref));

// Get specialization ID pointer.
llvm::Value *SpecConstPtr =
EmitLValue(Ref, clang::CodeGen::KnownNonNull).getPointer(*this);

// Get specialization constant buffer.
// TODO: When this extension supports more targets, get RTBufferPtr from input
// sycl::kernel_handler &.
llvm::Value *RTBufferPtr = llvm::ConstantPointerNull::get(
cast<llvm::PointerType>(SpecConstPtr->getType()));

// Get allocation type.
const TemplateArgumentList &TAL =
cast<ClassTemplateSpecializationDecl>(E->getType()->getAsCXXRecordDecl())
->getTemplateArgs();
QualType AllocaType = TAL.get(0).getAsType();
llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType);
unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace();
llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS);

llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty);

llvm::Constant *Align = Builder.getInt64(
getContext().getTypeAlignInChars(AllocaType).getAsAlign().value());

llvm::Value *Allocation = [&]() {
// To implement automatic storage duration of the underlying memory object,
// insert intrinsic call before `AllocaInsertPt`. These will be lowered to
// an `alloca` or an equivalent construct in later compilation stages.
IRBuilderBase::InsertPointGuard IPG(Builder);
Builder.SetInsertPoint(AllocaInsertPt);
return Builder.CreateIntrinsic(
AllocaTy, Intrinsic::sycl_alloca,
{UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca");
}();

// Perform AS cast if needed.

constexpr int NoDecorated = 0;
llvm::APInt Decorated = TAL.get(2).getAsIntegral();
// Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private
// AS) pointer type. Perform cast if 'sycl::access::decorated::no'.
if (Decorated == NoDecorated) {
IRBuilderBase::InsertPointGuard IPG(Builder);
Builder.SetInsertPoint(getPostAllocaInsertPoint());
unsigned DestAddrSpace =
getContext().getTargetAddressSpace(LangAS::Default);
llvm::PointerType *DestTy =
llvm::PointerType::get(Builder.getContext(), DestAddrSpace);
Allocation = Builder.CreateAddrSpaceCast(Allocation, DestTy);
}

// If no slot is provided, simply return allocation.
if (ReturnValue.isNull())
return RValue::get(Allocation);

// If a slot is provided, store pointer there.
Builder.CreateStore(Allocation, ReturnValue.getValue());
return RValue::getAggregate(ReturnValue.getValue());
}

Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4446,6 +4446,8 @@ class CodeGenFunction : public CodeGenTypeCache {
RValue EmitIntelFPGAMemBuiltin(const CallExpr *E);

RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E);
RValue EmitIntelSYCLAllocaBuiltin(const CallExpr *E,
ReturnValueSlot ReturnValue);

llvm::CallInst *
MaybeEmitFPBuiltinofFD(llvm::FunctionType *IRFuncTy,
Expand Down
101 changes: 101 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2773,6 +2773,16 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
if (CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
case Builtin::BI__builtin_intel_sycl_alloca:
if (!Context.getLangOpts().SYCLIsDevice) {
Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language)
<< "__builtin_intel_sycl_alloca"
<< "SYCL device";
return ExprError();
}
if (CheckIntelSYCLAllocaBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
case Builtin::BI__builtin_intel_fpga_mem:
if (!Context.getLangOpts().SYCLIsDevice) {
Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language)
Expand Down Expand Up @@ -7487,6 +7497,97 @@ bool Sema::CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID,
return false;
}

bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) {
assert(getLangOpts().SYCLIsDevice &&
"Builtin can only be used in SYCL device code");

SourceLocation Loc = Call->getBeginLoc();

// This builtin cannot be called directly. As it needs to pass template
// arguments, this is always an alias.
const FunctionDecl *FD = Call->getDirectCallee();
assert(FD && "Builtin cannot be called from a function pointer");
if (!FD->hasAttr<BuiltinAliasAttr>()) {
Diag(Loc, diag::err_intel_sycl_alloca_no_alias);
return true;
}

// Check a single argument is passed
if (checkArgCount(*this, Call, 1))
return true;

// Check three template arguments are passed
if (const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs();
!TAL || TAL->size() != 3) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_template_arg_count)
<< (TAL ? TAL->size() : 0);
return true;
}

// Check the single argument is of type `sycl::kernel_handler &`
constexpr auto CheckArg = [](QualType Ty) {
if (!Ty->isLValueReferenceType())
return true;
Ty = Ty->getPointeeType();
return !(Ty.getQualifiers().empty() &&
isSyclType(Ty, SYCLTypeAttr::kernel_handler));
};
if (CheckArg(FD->getParamDecl(0)->getType())) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_arg)
<< FD->getParamDecl(0)->getType();
return true;
}

// Check the return type is `sycl::multi_ptr<ET,
// sycl::access::address_space::private_space, DecoratedAddress>`:
// - `ET`: non-const, non-volatile, non-void, non-function, non-reference type
constexpr auto CheckType = [](QualType RT) {
if (!isSyclType(RT, SYCLTypeAttr::multi_ptr))
return true;
// Check element type
const TemplateArgumentList &TAL =
cast<ClassTemplateSpecializationDecl>(RT->getAsRecordDecl())
->getTemplateArgs();
QualType ET = TAL.get(0).getAsType();
if (ET.isConstQualified() || ET.isVolatileQualified() || ET->isVoidType() ||
ET->isFunctionType() || ET->isReferenceType())
return true;
constexpr uint64_t PrivateAS = 0;
return TAL.get(1).getAsIntegral() != PrivateAS;
};
if (CheckType(FD->getReturnType())) {
Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) << FD->getReturnType();
return true;
}

// Check size is passed as a specialization constant
constexpr auto CheckSize = [](const ASTContext &Ctx,
const TemplateArgumentList *CST) {
QualType Ty = CST->get(1).getNonTypeTemplateArgumentType();
if (Ty.isNull() || !Ty->isReferenceType())
return true;
Ty = Ty->getPointeeType();
if (!isSyclType(Ty, SYCLTypeAttr::specialization_id))
return true;
const TemplateArgumentList &TAL =
cast<ClassTemplateSpecializationDecl>(Ty->getAsCXXRecordDecl())
->getTemplateArgs();
return !TAL.get(0).getAsType()->isIntegralType(Ctx);
};
const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs();
if (CheckSize(getASTContext(), CST)) {
TemplateArgument TA = CST->get(1);
QualType Ty = TA.getNonTypeTemplateArgumentType();
if (Ty.isNull())
Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA;
else
Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty;
return true;
}

return false;
}

/// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo
/// parameter with the FormatAttr's correct format_idx and firstDataArg.
/// Returns true when the format fits the function and the FormatStringInfo has
Expand Down
9 changes: 8 additions & 1 deletion clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8859,6 +8859,11 @@ static bool RISCVAliasValid(unsigned BuiltinID, StringRef AliasName) {
BuiltinID <= RISCV::LastRVVBuiltin;
}

static bool SYCLAliasValid(ASTContext &Context, unsigned BuiltinID) {
constexpr llvm::StringLiteral Prefix = "__builtin_intel_sycl";
return Context.BuiltinInfo.getName(BuiltinID).starts_with(Prefix);
}

static void handleBuiltinAliasAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
if (!AL.isArgIdent(0)) {
Expand All @@ -8875,11 +8880,13 @@ static void handleBuiltinAliasAttr(Sema &S, Decl *D,
bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
bool IsHLSL = S.Context.getLangOpts().HLSL;
bool IsSYCL = S.Context.getLangOpts().isSYCL();
if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
(IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
(IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||
(!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL)) {
(IsSYCL && !SYCLAliasValid(S.Context, BuiltinID)) ||
(!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL && !IsSYCL)) {
S.Diag(AL.getLoc(), diag::err_attribute_builtin_alias) << AL;
return;
}
Expand Down
Loading

0 comments on commit a105055

Please sign in to comment.