From 365e333dbad01382a27e16e09b1c8b581eceb0ba Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 4 Mar 2024 14:30:16 +0000 Subject: [PATCH 1/6] [SYCL][SCLA] Add CodeGen capabilities for `sycl_ext_oneapi_private_alloca` The [`sycl_ext_oneapi_private_alloca`](https://github.com/intel/llvm/blob/56e9067ba69809fb6ea1fd4328456ca3a009f984/sycl/doc/extensions/experimental/sycl_ext_oneapi_private_alloca.asciidoc) adds new functions returning a pointer to a specialization constant length SYCL private memory allocation. This commit adds codegen support for these functions. The `sycl::private_alloca` function is implemented as an alias to a new `__builtin_intel_sycl_alloca` builtin. This is needed to guarantee the call will lower to just an `alloca` instruction defining the private memory allocation. This builtin lowers to a SYCL builtin call to `__builtin_sycl_unique_stable_id` and a call to a new `llvm.sycl.alloca` intrinsic. This intrinsic receives three arguments encoding the specialization constant used as array length, a type hint argument encoding the allocation element type and the required alignment. Note the `sycl_ext_oneapi_private_alloca` extension defines two functions: `private_alloca` and `aligned_private_alloca`. This commit adds codegen support only for the first signature, but already prepares support for the aligned flavor by adding an argument encoding the memory allocation alignment to the `llvm.sycl.alloca` intrinsic. The intrinsic is needed as generating an `alloca` instruction right away would lead to optimization passes converting the size argument, which can be of any integral type, and thus difficulting lowering to a single SPIR-V `OpVariable` later in the pipeline. Signed-off-by: Victor Perez --- clang/include/clang/Basic/Attr.td | 4 +- clang/include/clang/Basic/Builtins.td | 6 + .../clang/Basic/DiagnosticSemaKinds.td | 24 +++- clang/include/clang/Sema/Sema.h | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 72 ++++++++++++ clang/lib/CodeGen/CodeGenFunction.h | 2 + clang/lib/Sema/SemaChecking.cpp | 102 ++++++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 9 +- clang/lib/Sema/SemaSYCL.cpp | 44 +++---- .../CodeGenSYCL/Inputs/private_alloca.hpp | 33 ++++++ clang/test/CodeGenSYCL/Inputs/sycl.hpp | 106 ++++++++++++++++- clang/test/CodeGenSYCL/builtin-alloca.cpp | 86 ++++++++++++++ clang/test/SemaSYCL/Inputs/sycl.hpp | 94 ++++++++++++++- .../SemaSYCL/builtin-alloca-errors-device.cpp | 111 ++++++++++++++++++ .../SemaSYCL/builtin-alloca-errors-host.cpp | 18 +++ clang/test/SemaSYCL/builtin-alloca.cpp | 26 ++++ clang/test/SemaSYCL/sycl-type-attr-ast.cpp | 3 + llvm/docs/LangRef.rst | 45 +++++++ llvm/include/llvm/IR/Intrinsics.td | 11 ++ 19 files changed, 772 insertions(+), 30 deletions(-) create mode 100644 clang/test/CodeGenSYCL/Inputs/private_alloca.hpp create mode 100644 clang/test/CodeGenSYCL/builtin-alloca.cpp create mode 100644 clang/test/SemaSYCL/builtin-alloca-errors-device.cpp create mode 100644 clang/test/SemaSYCL/builtin-alloca-errors-host.cpp create mode 100644 clang/test/SemaSYCL/builtin-alloca.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 73b0e74a221c7..87cba9f4563ab 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1450,12 +1450,12 @@ def SYCLType: InheritableAttr { "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "stream", "sampler", "host_pipe"], + "stream", "sampler", "host_pipe", "multi_ptr"], ["accessor", "local_accessor", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", - "stream", "sampler", "host_pipe"]>]; + "stream", "sampler", "host_pipe", "multi_ptr"]>]; // Only used internally by SYCL implementation let Documentation = [InternalOnly]; } diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 18b377d0d579c..d0f71bd4c18c1 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4635,6 +4635,12 @@ def IntelSYCLPtrAnnotation : Builtin { let Prototype = "void(...)"; } +def IntelSYCLAlloca : Builtin { + let Spellings = ["__builtin_intel_sycl_alloca"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void *(void &)"; +} + // Builtins for Intel FPGA def IntelSYCLFPGAReg : Builtin { let Spellings = ["__builtin_intel_fpga_reg"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 12dca5d46ee15..1b1b5b7ea9347 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -177,6 +177,28 @@ def err_intel_sycl_ptr_annotation_mismatch "a pointer" "|a string literal or constexpr const char*}0">; +def err_intel_sycl_alloca_no_alias + : Error<"__builtin_intel_sycl_alloca cannot be used in source code. " + "Use the private_alloca alias instead.">; +def err_intel_sycl_alloca_wrong_arg_count + : Error<"__builtin_intel_sycl_alloca expects to be passed a single " + "argument. Got %0">; +def err_intel_sycl_alloca_wrong_template_arg_count + : Error<"__builtin_intel_sycl_alloca expects to be passed three template " + "arguments. Got %0">; +def err_intel_sycl_alloca_wrong_arg + : Error<"__builtin_intel_sycl_alloca expects to be passed an argument of type " + "'sycl::kernel_handler &'. Got %0">; +def err_intel_sycl_alloca_wrong_type + : Error<"__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' " + "to a cv-unqualified object type. Got %0">; +def err_intel_sycl_alloca_wrong_size + : Error<"__builtin_intel_sycl_alloca must be passed a specialization " + "constant of integral value type as a template argument. Got %1 (%0)">; +def err_intel_sycl_alloca_no_size + : Error<"__builtin_intel_sycl_alloca must be passed a specialization " + "constant of integral value type as a template argument. Got %0">; + // C99 variable-length arrays def ext_vla : Extension<"variable length arrays are a C99 feature">, InGroup; @@ -4470,7 +4492,7 @@ def err_attribute_preferred_name_arg_invalid : Error< "argument %0 to 'preferred_name' attribute is not a typedef for " "a specialization of %1">; def err_attribute_builtin_alias : Error< - "%0 attribute can only be applied to a ARM, HLSL or RISC-V builtin">; + "%0 attribute can only be applied to a ARM, HLSL, SYCL or RISC-V builtin">; // called-once attribute diagnostics. def err_called_once_attribute_wrong_type : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 247c5016451af..2707699228a75 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -15193,6 +15193,9 @@ class Sema final { bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, CallExpr *Call); + bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call); + private: // We store SYCL Kernels here and handle separately -- which is a hack. // FIXME: It would be best to refactor this. @@ -15336,6 +15339,9 @@ class Sema final { VDecl->hasGlobalStorage() && (VDecl->getType().getAddressSpace() == LangAS::sycl_private); } + + /// Check whether \p Ty corresponds to a SYCL type of name \p TypeName. + static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName); }; DeductionFailureInfo diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6b96c42a63b7d..b09d0edfb263a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5923,6 +5923,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return EmitIntelFPGAMemBuiltin(E); case Builtin::BI__builtin_intel_sycl_ptr_annotation: return EmitIntelSYCLPtrAnnotationBuiltin(E); + case Builtin::BI__builtin_intel_sycl_alloca: + return EmitIntelSYCLAllocaBuiltin(E, ReturnValue); case Builtin::BI__builtin_get_device_side_mangled_name: { auto Name = CGM.getCUDARuntime().getDeviceSideName( cast(E->getArg(0)->IgnoreImpCasts())->getDecl()); @@ -23655,6 +23657,76 @@ RValue CodeGenFunction::EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E) { return RValue::get(Ann); } +RValue +CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, + ReturnValueSlot ReturnValue) { + const FunctionDecl *FD = E->getDirectCallee(); + assert(FD && "Expecting direct call to builtin"); + + SourceLocation Loc = E->getExprLoc(); + + // Get specialization constant ID. + ValueDecl *SpecConst = + FD->getTemplateSpecializationArgs()->get(1).getAsDecl(); + DeclRefExpr *Ref = DeclRefExpr::Create( + getContext(), NestedNameSpecifierLoc(), SourceLocation(), SpecConst, + /*RefersToEnclosingVariableOrCapture=*/false, E->getExprLoc(), + SpecConst->getType(), ExprValueKind::VK_LValue); + llvm::Value *UID = EmitScalarExpr( + SYCLUniqueStableIdExpr::Create(getContext(), Loc, Loc, Loc, Ref)); + + // Get specialization ID pointer. + llvm::Value *SpecConstPtr = + EmitLValue(Ref, clang::CodeGen::KnownNonNull).getPointer(*this); + + // Get specialization constant buffer. + // TODO: When this extension supports more targets, get RTBufferPtr from input + // sycl::kernel_handler &. + llvm::Value *RTBufferPtr = llvm::ConstantPointerNull::get( + cast(SpecConstPtr->getType())); + + // Get allocation type. + const TemplateArgumentList &TAL = + cast(E->getType()->getAsCXXRecordDecl()) + ->getTemplateArgs(); + QualType AllocaType = TAL.get(0).getAsType(); + llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType); + unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace(); + llvm::Type *AllocaTy = Ty->getPointerTo(AllocaAS); + + llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty); + + llvm::Constant *Align = Builder.getInt64( + getContext().getTypeAlignInChars(AllocaType).getAsAlign().value()); + + llvm::Value *Allocation = Builder.CreateIntrinsic( + AllocaTy, Intrinsic::sycl_alloca, + {UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca"); + + // Perform AS cast if needed. + + constexpr int NoDecorated = 0; + llvm::APInt Decorated = TAL.get(2).getAsIntegral(); + // Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private + // AS) pointer type. Perform cast if 'sycl::access::decorated::no'. + if (Decorated == NoDecorated) { + unsigned DestAddrSpace = + getContext().getTargetAddressSpace(LangAS::Default); + llvm::PointerType *DestTy = + llvm::PointerType::get(Builder.getContext(), DestAddrSpace); + Allocation = Builder.CreateAddrSpaceCast(Allocation, DestTy); + } + + // If no slot is provided, simply return allocation. + if (ReturnValue.isNull()) { + return RValue::get(Allocation); + } + + // If a slot is provided, store pointer there. + Builder.CreateStore(Allocation, ReturnValue.getValue()); + return RValue::getAggregate(ReturnValue.getValue()); +} + Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 491d1bf8898dd..7bb3ded2e4273 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4446,6 +4446,8 @@ class CodeGenFunction : public CodeGenTypeCache { RValue EmitIntelFPGAMemBuiltin(const CallExpr *E); RValue EmitIntelSYCLPtrAnnotationBuiltin(const CallExpr *E); + RValue EmitIntelSYCLAllocaBuiltin(const CallExpr *E, + ReturnValueSlot ReturnValue); llvm::CallInst * MaybeEmitFPBuiltinofFD(llvm::FunctionType *IRFuncTy, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 268661ed61cc6..da965d40a9704 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2773,6 +2773,16 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case Builtin::BI__builtin_intel_sycl_alloca: + if (!Context.getLangOpts().SYCLIsDevice) { + Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language) + << "__builtin_intel_sycl_alloca" + << "SYCL device"; + return ExprError(); + } + if (CheckIntelSYCLAllocaBuiltinFunctionCall(BuiltinID, TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_intel_fpga_mem: if (!Context.getLangOpts().SYCLIsDevice) { Diag(TheCall->getBeginLoc(), diag::err_builtin_requires_language) @@ -7487,6 +7497,98 @@ bool Sema::CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, return false; } +bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { + assert(getLangOpts().SYCLIsDevice && + "Builtin can only be used in SYCL device code"); + + SourceLocation Loc = Call->getBeginLoc(); + + // This builtin cannot be called directly. As it needs to pass template + // arguments, this is always an alias. + const FunctionDecl *FD = Call->getDirectCallee(); + assert(FD && "Builtin cannot be called from a function pointer"); + if (!FD->hasAttr()) { + Diag(Loc, diag::err_intel_sycl_alloca_no_alias); + return true; + } + + // Check a single argument is passed + if (checkArgCount(*this, Call, 1)) + return true; + + // Check three template arguments are passed + if (const TemplateArgumentList *TAL = FD->getTemplateSpecializationArgs(); + !TAL || TAL->size() != 3) { + Diag(Loc, diag::err_intel_sycl_alloca_wrong_template_arg_count) + << (TAL ? TAL->size() : 0); + return true; + } + + // Check the single argument is of type `sycl::kernel_handler &` + constexpr auto CheckArg = [](QualType Ty) { + if (!Ty->isLValueReferenceType()) + return true; + Ty = Ty->getPointeeType(); + return !(Ty.getQualifiers().empty() && + isSyclType(Ty, SYCLTypeAttr::kernel_handler)); + }; + if (CheckArg(FD->getParamDecl(0)->getType())) { + Diag(Loc, diag::err_intel_sycl_alloca_wrong_arg) + << FD->getParamDecl(0)->getType(); + return true; + } + + // Check the return type is `sycl::multi_ptr`: + // - `ET`: non-const, non-volatile, non-void, non-function, non-reference type + constexpr auto CheckType = [](QualType RT) { + if (!isSyclType(RT, SYCLTypeAttr::multi_ptr)) + return true; + // Check element type + const TemplateArgumentList &TAL = + cast(RT->getAsRecordDecl()) + ->getTemplateArgs(); + QualType ET = TAL.get(0).getAsType(); + if (ET.isConstQualified() || ET.isVolatileQualified() || ET->isVoidType() || + ET->isFunctionType() || ET->isReferenceType()) + return true; + constexpr uint64_t PrivateAS = 0; + return TAL.get(1).getAsIntegral() != PrivateAS; + }; + if (CheckType(FD->getReturnType())) { + Diag(Loc, diag::err_intel_sycl_alloca_wrong_type) << FD->getReturnType(); + return true; + } + + // Check size is passed as a specialization constant + constexpr auto CheckSize = [](const ASTContext &Ctx, + const TemplateArgumentList *CST) { + QualType Ty = CST->get(1).getNonTypeTemplateArgumentType(); + if (Ty.isNull() || !Ty->isReferenceType()) + return true; + Ty = Ty->getPointeeType(); + if (!isSyclType(Ty, SYCLTypeAttr::specialization_id)) + return true; + const TemplateArgumentList &TAL = + cast(Ty->getAsCXXRecordDecl()) + ->getTemplateArgs(); + return !TAL.get(0).getAsType()->isIntegralType(Ctx); + }; + const TemplateArgumentList *CST = FD->getTemplateSpecializationArgs(); + if (CheckSize(getASTContext(), CST)) { + TemplateArgument TA = CST->get(1); + QualType Ty = TA.getNonTypeTemplateArgumentType(); + if (Ty.isNull()) { + Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA; + } else { + Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty; + } + return true; + } + + return false; +} + /// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo /// parameter with the FormatAttr's correct format_idx and firstDataArg. /// Returns true when the format fits the function and the FormatStringInfo has diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 9f40c14905059..a970b8c850abe 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8859,6 +8859,11 @@ static bool RISCVAliasValid(unsigned BuiltinID, StringRef AliasName) { BuiltinID <= RISCV::LastRVVBuiltin; } +static bool SYCLAliasValid(ASTContext &Context, unsigned BuiltinID) { + constexpr llvm::StringLiteral Prefix = "__builtin_intel_sycl"; + return Context.BuiltinInfo.getName(BuiltinID).starts_with(Prefix); +} + static void handleBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { @@ -8875,11 +8880,13 @@ static void handleBuiltinAliasAttr(Sema &S, Decl *D, bool IsARM = S.Context.getTargetInfo().getTriple().isARM(); bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV(); bool IsHLSL = S.Context.getLangOpts().HLSL; + bool IsSYCL = S.Context.getLangOpts().isSYCL(); if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) || (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) && !ArmCdeAliasValid(BuiltinID, AliasName)) || (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) || - (!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL)) { + (IsSYCL && !SYCLAliasValid(S.Context, BuiltinID)) || + (!IsAArch64 && !IsARM && !IsRISCV && !IsHLSL && !IsSYCL)) { S.Diag(AL.getLoc(), diag::err_attribute_builtin_alias) << AL; return; } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 929adf5398d84..89705ca50ab83 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -69,7 +69,7 @@ static constexpr llvm::StringLiteral LibstdcxxFailedAssertion = "__failed_assertion"; constexpr unsigned MaxKernelArgsSize = 2048; -static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { +bool Sema::isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { const auto *RD = Ty->getAsCXXRecordDecl(); if (!RD) return false; @@ -87,8 +87,8 @@ static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { } static bool isSyclAccessorType(QualType Ty) { - return isSyclType(Ty, SYCLTypeAttr::accessor) || - isSyclType(Ty, SYCLTypeAttr::local_accessor); + return Sema::isSyclType(Ty, SYCLTypeAttr::accessor) || + Sema::isSyclType(Ty, SYCLTypeAttr::local_accessor); } // FIXME: Accessor property lists should be modified to use compile-time @@ -948,7 +948,7 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { // not a direct call - continue search return true; QualType Ty = Ctx.getRecordType(Call->getRecordDecl()); - if (!isSyclType(Ty, SYCLTypeAttr::group)) + if (!Sema::isSyclType(Ty, SYCLTypeAttr::group)) // not a member of sycl::group - continue search return true; auto Name = Callee->getName(); @@ -967,7 +967,7 @@ class MarkWIScopeFnVisitor : public RecursiveASTVisitor { }; static bool isSYCLPrivateMemoryVar(VarDecl *VD) { - return isSyclType(VD->getType(), SYCLTypeAttr::private_memory); + return Sema::isSyclType(VD->getType(), SYCLTypeAttr::private_memory); } static void addScopeAttrToLocalVars(CXXMethodDecl &F) { @@ -1046,7 +1046,7 @@ static ParamDesc makeParamDesc(ASTContext &Ctx, StringRef Name, QualType Ty) { /// \return the target of given SYCL accessor type static target getAccessTarget(QualType FieldTy, const ClassTemplateSpecializationDecl *AccTy) { - if (isSyclType(FieldTy, SYCLTypeAttr::local_accessor)) + if (Sema::isSyclType(FieldTy, SYCLTypeAttr::local_accessor)) return local; return static_cast( @@ -1108,7 +1108,7 @@ static ParmVarDecl *getSyclKernelHandlerArg(FunctionDecl *KernelCallerFunc) { // Specialization constants in SYCL 2020 are not captured by lambda and // accessed through new optional lambda argument kernel_handler auto IsHandlerLambda = [](ParmVarDecl *PVD) { - return isSyclType(PVD->getType(), SYCLTypeAttr::kernel_handler); + return Sema::isSyclType(PVD->getType(), SYCLTypeAttr::kernel_handler); }; assert(llvm::count_if(KernelCallerFunc->parameters(), IsHandlerLambda) <= 1 && @@ -1629,7 +1629,7 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { Loc, diag::err_sycl_invalid_accessor_property_template_param); QualType PropListTy = PropList.getAsType(); - if (!isSyclType(PropListTy, SYCLTypeAttr::accessor_property_list)) + if (!Sema::isSyclType(PropListTy, SYCLTypeAttr::accessor_property_list)) return SemaRef.Diag( Loc, diag::err_sycl_invalid_accessor_property_template_param); @@ -1693,9 +1693,9 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { // Annotated pointers and annotated arguments must be captured // directly by the SYCL kernel. - if ((isSyclType(Ty, SYCLTypeAttr::annotated_ptr) || - isSyclType(Ty, SYCLTypeAttr::annotated_arg)) && - (StructFieldDepth > 0 || StructBaseDepth > 0)) + if ((Sema::isSyclType(Ty, SYCLTypeAttr::annotated_ptr) || + Sema::isSyclType(Ty, SYCLTypeAttr::annotated_arg)) && + (StructFieldDepth > 0 || StructBaseDepth > 0)) return SemaRef.Diag(Loc.getBegin(), diag::err_bad_kernel_param_data_members) << Ty << /*Struct*/ 1; @@ -2283,7 +2283,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { handleAccessorPropertyList(Params.back(), RecordDecl, Loc); // If "accessor" type check if read only - if (isSyclType(FieldTy, SYCLTypeAttr::accessor)) { + if (Sema::isSyclType(FieldTy, SYCLTypeAttr::accessor)) { // Get access mode of accessor. const auto *AccessorSpecializationDecl = cast(RecordDecl); @@ -3732,18 +3732,18 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, CurOffset + offsetOf(FD, FieldTy)); - } else if (isSyclType(FieldTy, SYCLTypeAttr::stream)) { + } else if (Sema::isSyclType(FieldTy, SYCLTypeAttr::stream)) { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); - } else if (isSyclType(FieldTy, SYCLTypeAttr::sampler) || - isSyclType(FieldTy, SYCLTypeAttr::annotated_ptr) || - isSyclType(FieldTy, SYCLTypeAttr::annotated_arg)) { + } else if (Sema::isSyclType(FieldTy, SYCLTypeAttr::sampler) || + Sema::isSyclType(FieldTy, SYCLTypeAttr::annotated_ptr) || + Sema::isSyclType(FieldTy, SYCLTypeAttr::annotated_arg)) { CXXMethodDecl *InitMethod = getMethodByName(ClassTy, InitMethodName); assert(InitMethod && "type must have __init method"); const ParmVarDecl *InitArg = InitMethod->getParamDecl(0); assert(InitArg && "Init method must have arguments"); QualType T = InitArg->getType(); SYCLIntegrationHeader::kernel_param_kind_t ParamKind = - isSyclType(FieldTy, SYCLTypeAttr::sampler) + Sema::isSyclType(FieldTy, SYCLTypeAttr::sampler) ? SYCLIntegrationHeader::kind_sampler : (T->isPointerType() ? SYCLIntegrationHeader::kind_pointer : SYCLIntegrationHeader::kind_std_layout); @@ -5447,8 +5447,8 @@ void SYCLIntegrationFooter::addVarDecl(const VarDecl *VD) { if (isa(VD)) return; // Step 1: ensure that this is of the correct type template specialization. - if (!isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && - !isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && + if (!Sema::isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && + !Sema::isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && !S.isTypeDecoratedWithDeclAttribute( VD->getType())) { // Handle the case where this could be a deduced type, such as a deduction @@ -5644,8 +5644,8 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { // Skip if this isn't a SpecIdType, DeviceGlobal, or HostPipe. This // can happen if it was a deduced type. - if (!isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && - !isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && + if (!Sema::isSyclType(VD->getType(), SYCLTypeAttr::specialization_id) && + !Sema::isSyclType(VD->getType(), SYCLTypeAttr::host_pipe) && !S.isTypeDecoratedWithDeclAttribute( VD->getType())) continue; @@ -5676,7 +5676,7 @@ bool SYCLIntegrationFooter::emit(raw_ostream &OS) { DeviceGlobOS << SYCLUniqueStableIdExpr::ComputeName(S.getASTContext(), VD); DeviceGlobOS << "\");\n"; - } else if (isSyclType(VD->getType(), SYCLTypeAttr::host_pipe)) { + } else if (Sema::isSyclType(VD->getType(), SYCLTypeAttr::host_pipe)) { HostPipesEmitted = true; HostPipesOS << "host_pipe_map::add("; HostPipesOS << "(void *)&"; diff --git a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp new file mode 100644 index 0000000000000..49e4b7cd65538 --- /dev/null +++ b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include "./sycl.hpp" + +namespace sycl { +inline namespace _V1 { +namespace ext { +namespace oneapi { +namespace experimental { + +#ifdef __SYCL_DEVICE_ONLY__ + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +multi_ptr private_alloca(kernel_handler &h); + +#else + +template +multi_ptr +private_alloca(kernel_handler &h) { + throw "sycl::ext::oneapi::experimental::private_alloca is not supported in " + "the host"; +} + +#endif + +} // namespace experimental +} // namesapce oneapi +} // namespace ext +} // namespace _V1 +} // namespace sycl diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 4149cacad30c1..9d05b0645778c 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -2,6 +2,18 @@ #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) #define __SYCL_TYPE(x) [[__sycl_detail__::sycl_type(x)]] +#define __SYCL_BUILTIN_ALIAS(X) [[clang::builtin_alias(X)]] + +#ifdef SYCL_EXTERNAL +#define __DPCPP_SYCL_EXTERNAL SYCL_EXTERNAL +#else +#ifdef __SYCL_DEVICE_ONLY__ +#define __DPCPP_SYCL_EXTERNAL __attribute__((sycl_device)) +#else +#define __DPCPP_SYCL_EXTERNAL +#define SYCL_EXTERNAL +#endif +#endif extern "C" int printf(const char* fmt, ...); @@ -69,7 +81,14 @@ enum class address_space : int { private_space = 0, global_space, constant_space, - local_space + local_space, + generic_space +}; + +enum class decorated : int { + no = 0, + yes, + legacy }; } // namespace access @@ -131,6 +150,91 @@ struct __SYCL_TYPE(buffer_location) buffer_location { } // namespace intel } // namespace ext +template +struct DecoratedType; + +template +struct DecoratedType { + using type = __attribute__((opencl_private)) ElementType; +}; + +template +struct DecoratedType { + using type = ElementType; +}; + +template +struct DecoratedType { + using type = __attribute__((opencl_global)) ElementType; +}; + +template +struct DecoratedType { +#if defined(RESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR) + using type = const __attribute__((opencl_global)) ElementType; +#else + using type = __attribute__((opencl_global)) ElementType; +#endif +}; + +// Equivalent to std::conditional +template +struct conditional { using type = T; }; + +template +struct conditional { using type = F; }; + +template +using conditional_t = typename conditional::type; + +template +class __SYCL_TYPE(multi_ptr) multi_ptr { + static constexpr bool is_decorated = + DecorateAddress == access::decorated::yes; + + using decorated_type = typename DecoratedType::type; + + static_assert(DecorateAddress != access::decorated::legacy); + static_assert(AS != access::address_space::constant_space); + +public: + using pointer = conditional_t; + + multi_ptr(typename multi_ptr::pointer Ptr) + : m_Pointer((pointer)(Ptr)) {} // #MultiPtrConstructor + pointer get() { return m_Pointer; } + + private: + pointer m_Pointer; +}; + +template +struct LegacyPointerType { + using pointer_t = typename multi_ptr::pointer; +}; + +template +struct LegacyPointerType { + using decorated_type = typename DecoratedType::type; + using pointer_t = decorated_type *; +}; + +// Legacy specialization +template +class __SYCL_TYPE(multi_ptr) multi_ptr { +public: + using pointer_t = typename LegacyPointerType::pointer_t; + + multi_ptr(typename multi_ptr::pointer Ptr) + : m_Pointer((pointer_t)(Ptr)) {} + multi_ptr(T *Ptr) : m_Pointer((pointer_t)(Ptr)) {} // #LegacyMultiPtrConstructor + pointer_t get() { return m_Pointer; } + + private: + pointer_t m_Pointer; +}; + namespace ext { namespace oneapi { namespace property { diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp new file mode 100644 index 0000000000000..d2fe8b9be34ec --- /dev/null +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-DEVICE +// RUN: %clang_cc1 -fsycl-is-host -triple x86_64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s -fcxx-exceptions\ +// RUN: | FileCheck %s --check-prefix=CHECK-HOST + +#include + +#include "Inputs/sycl.hpp" +#include "Inputs/private_alloca.hpp" + +// expected-no-diagnostics + +struct myStruct { + char a; + char b; +}; + +constexpr sycl::specialization_id size(1); +constexpr sycl::specialization_id intSize(-1); +constexpr sycl::specialization_id shortSize(1); + +// COM: On the device, for each call, we should generate a chain of: 'call @sycl.alloca.' + ('addrspacecast') + 'store'. +// COM: The 'addrspacecast' will only appear when the pointer is not decorated, i.e., `DecorateAddress == sycl::access::decorated::no`. + +// CHECK-DEVICE-LABEL: define dso_local spir_func void @_Z4testRN4sycl3_V114kernel_handlerE( +// CHECK-DEVICE-SAME: ptr addrspace(4) noundef align 1 dereferenceable(1) [[KH:%.*]]) +// CHECK-DEVICE-NEXT: entry: +// CHECK-DEVICE-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-DEVICE-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 +// CHECK-DEVICE-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 +// CHECK-DEVICE-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8 +// CHECK-DEVICE-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 +// CHECK-DEVICE-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) +// CHECK-DEVICE-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 +// CHECK-DEVICE-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 4) +// CHECK-DEVICE-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 +// CHECK-DEVICE-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 1) +// CHECK-DEVICE-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 +// CHECK-DEVICE-NEXT: ret void + +// COM: On the host, each call should be materialized... + +// CHECK-HOST-LABEL: define dso_local void @_Z4testRN4sycl3_V114kernel_handlerE( +// CHECK-HOST-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[KH:%.*]]) +// CHECK-HOST-NEXT: entry: +// CHECK-HOST-NEXT: [[KH_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-HOST-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 +// CHECK-HOST-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 +// CHECK-HOST-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.1", align 8 +// CHECK-HOST-NEXT: store ptr [[KH]], ptr [[KH_ADDR]], align 8 +// CHECK-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 +// CHECK-HOST-NEXT: [[CALL:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIdTnRDaL_ZL4sizeELNS0_6access9decoratedE1EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) +// CHECK-HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr", ptr [[PTR0]], i32 0, i32 0 +// CHECK-HOST-NEXT: store ptr [[CALL]], ptr [[COERCE_DIVE]], align 8 +// CHECK-HOST-NEXT: [[TMP1:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 +// CHECK-HOST-NEXT: [[CALL1:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIiTnRDaL_ZL7intSizeELNS0_6access9decoratedE2EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]]) +// CHECK-HOST-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr.0", ptr [[PTR1]], i32 0, i32 0 +// CHECK-HOST-NEXT: store ptr [[CALL1]], ptr [[COERCE_DIVE2]], align 8 +// CHECK-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 +// CHECK-HOST-NEXT: [[CALL3:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaI8myStructTnRDaL_ZL7intSizeELNS0_6access9decoratedE0EEENS0_9multi_ptrIT_LNS7_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP2]]) +// CHECK-HOST-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr.1", ptr [[PTR2]], i32 0, i32 0 +// CHECK-HOST-NEXT: store ptr [[CALL3]], ptr [[COERCE_DIVE4]], align 8 +// CHECK-HOST-NEXT: ret void +// +SYCL_EXTERNAL void test(sycl::kernel_handler &kh) { + auto ptr0 = sycl::ext::oneapi::experimental::private_alloca(kh); + auto ptr1 = sycl::ext::oneapi::experimental::private_alloca(kh); + auto ptr2 = sycl::ext::oneapi::experimental::private_alloca(kh); +} + +// COM: And the body function should be simply a throw + +// CHECK-HOST-LABEL: define internal ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIdTnRDaL_ZL4sizeELNS0_6access9decoratedE1EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE +// CHECK-HOST-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[H:%.*]]) +// CHECK-HOST-NEXT: entry: +// CHECK-HOST-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-HOST-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 +// CHECK-HOST-NEXT: [[EXCEPTION:%.*]] = call ptr @__cxa_allocate_exception(i64 8) +// CHECK-HOST-NEXT: store ptr @.str, ptr [[EXCEPTION]], align 16 +// CHECK-HOST-NEXT: call void @__cxa_throw(ptr [[EXCEPTION]], ptr @_ZTIPKc, ptr null) +// CHECK-HOST-NEXT: unreachable diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 3fc13d86201de..7d355a3848276 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -2,6 +2,7 @@ #define SYCL_HPP #define __SYCL_TYPE(x) [[__sycl_detail__::sycl_type(x)]] +#define __SYCL_BUILTIN_ALIAS(X) [[clang::builtin_alias(X)]] // Shared code for SYCL tests @@ -38,6 +39,12 @@ enum class address_space : int { local_space, generic_space }; + +enum class decorated : int { + no = 0, + yes, + legacy, +}; } // namespace access // Dummy aspect enum with limited enumerators @@ -272,6 +279,25 @@ class __SYCL_TYPE(kernel_handler) kernel_handler { void __init_specialization_constants_buffer(char *specialization_constants_buffer) {} }; +template class __SYCL_TYPE(specialization_id) specialization_id { +public: + using value_type = T; + + template + explicit constexpr specialization_id(Args &&...args) + : MDefaultValue(args...) {} + + specialization_id(const specialization_id &rhs) = delete; + specialization_id(specialization_id &&rhs) = delete; + specialization_id &operator=(const specialization_id &rhs) = delete; + specialization_id &operator=(specialization_id &&rhs) = delete; + + T getDefaultValue() const { return MDefaultValue; } + +private: + T MDefaultValue; +}; + // Used when parallel_for range is rounded-up. template class __pf_kernel_wrapper; @@ -383,13 +409,56 @@ struct DecoratedType { using type = __attribute__((opencl_global)) ElementType; }; -template class multi_ptr { - using pointer_t = typename DecoratedType::type *; - pointer_t m_Pointer; +// Equivalent to std::conditional +template +struct conditional { using type = T; }; + +template +struct conditional { using type = F; }; + +template +using conditional_t = typename conditional::type; + +template +class __SYCL_TYPE(multi_ptr) multi_ptr { + static constexpr bool is_decorated = + DecorateAddress == access::decorated::yes; + + using decorated_type = typename DecoratedType::type; + + static_assert(DecorateAddress != access::decorated::legacy); + static_assert(AS != access::address_space::constant_space); + +public: + using pointer = conditional_t; + multi_ptr(typename multi_ptr::pointer Ptr) + : m_Pointer((pointer)(Ptr)) {} + pointer get() { return m_Pointer; } + + private: + pointer m_Pointer; +}; + +template +struct LegacyPointerType { + using pointer_t = typename multi_ptr::pointer; +}; + +// Legacy specialization +template +class __SYCL_TYPE(multi_ptr) multi_ptr { public: + using pointer_t = typename LegacyPointerType::pointer_t; + + multi_ptr(typename multi_ptr::pointer Ptr) + : m_Pointer((pointer_t)(Ptr)) {} multi_ptr(T *Ptr) : m_Pointer((pointer_t)(Ptr)) {} // #MultiPtrConstructor pointer_t get() { return m_Pointer; } + +private: + pointer_t m_Pointer; }; namespace ext { @@ -411,6 +480,25 @@ class __attribute__((sycl_special_class)) __SYCL_TYPE(annotated_ptr) annotated_p #endif }; +#ifdef __SYCL_DEVICE_ONLY__ + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +multi_ptr +private_alloca(kernel_handler &h); + +#else + +template +multi_ptr +private_alloca(kernel_handler &h) { + throw "sycl::ext::oneapi::experimental::private_alloca is not supported in the host"; +} + +#endif + } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp new file mode 100644 index 0000000000000..148f691f46196 --- /dev/null +++ b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp @@ -0,0 +1,111 @@ +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify -Wpedantic %s + +#include + +#include "Inputs/sycl.hpp" + +constexpr sycl::specialization_id size(1); +constexpr sycl::specialization_id badsize(1); + +struct wrapped_int { int a; }; + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_0(); + +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_1(sycl::kernel_handler &h); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_2(sycl::kernel_handler &h); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_3(const wrapped_int &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_4(sycl::kernel_handler); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_5(const sycl::kernel_handler &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_6(sycl::kernel_handler &); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_7(sycl::kernel_handler &); + +// expected-error@+4 {{cannot redeclare builtin function 'private_alloca'}} +// expected-note@+3 {{'private_alloca' is a builtin with type 'multi_ptr (kernel_handler &)'}} +template <> +sycl::multi_ptr +sycl::ext::oneapi::experimental::private_alloca(sycl::kernel_handler &h); + +void test(sycl::kernel_handler &h) { + // expected-error@+1 {{builtin functions must be directly called}} + auto funcPtr = sycl::ext::oneapi::experimental::private_alloca; + + // expected-error@+1 {{__builtin_intel_sycl_alloca cannot be used in source code. Use the private_alloca alias instead.}} + __builtin_intel_sycl_alloca(h); + + // expected-error@+1 {{too few arguments to function call, expected 1, have 0}} + private_alloca_bad_0(); + + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed three template arguments. Got 0}} + private_alloca_bad_1(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed three template arguments. Got 1}} + private_alloca_bad_2(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'const wrapped_int &'}} + private_alloca_bad_3(wrapped_int{10}); + + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'sycl::kernel_handler'}} + private_alloca_bad_4(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca expects to be passed an argument of type 'sycl::kernel_handler &'. Got 'const sycl::kernel_handler &'}} + private_alloca_bad_5(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'multi_ptr'}} + sycl::ext::oneapi::experimental::private_alloca(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca can only return 'sycl::private_ptr' to a cv-unqualified object type. Got 'sycl::multi_ptr'}} + private_alloca_bad_6(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca must be passed a specialization constant of integral value type as a template argument. Got 'int'}} + private_alloca_bad_7(h); + + // expected-error@+1 {{__builtin_intel_sycl_alloca must be passed a specialization constant of integral value type as a template argument. Got 'const sycl::specialization_id &'}} + sycl::ext::oneapi::experimental::private_alloca(h); +} diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp new file mode 100644 index 0000000000000..cc4949f90a908 --- /dev/null +++ b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify -Wpedantic -fcxx-exceptions %s + +#include + +#include "Inputs/sycl.hpp" + +constexpr sycl::specialization_id size(1); + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +sycl::multi_ptr +private_alloca_bad_0(sycl::kernel_handler &h); + +void test(sycl::kernel_handler &h) { + // expected-error@+1 {{'__builtin_intel_sycl_alloca' is only available in SYCL device}} + private_alloca_bad_0(h); +} diff --git a/clang/test/SemaSYCL/builtin-alloca.cpp b/clang/test/SemaSYCL/builtin-alloca.cpp new file mode 100644 index 0000000000000..8ce0a91ce107f --- /dev/null +++ b/clang/test/SemaSYCL/builtin-alloca.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify -Wpedantic %s +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify -Wpedantic -fcxx-exceptions %s + +#include + +#include "Inputs/sycl.hpp" + +// expected-no-diagnostics + +struct myStruct { + int a; + int b; +}; + +constexpr sycl::specialization_id size(1); +constexpr sycl::specialization_id intSize(-1); +constexpr sycl::specialization_id shortSize(1); + +void basic_test(sycl::kernel_handler &kh) { + sycl::ext::oneapi::experimental::private_alloca< + int, size, sycl::access::decorated::yes>(kh); + sycl::ext::oneapi::experimental::private_alloca< + float, intSize, sycl::access::decorated::no>(kh); + sycl::ext::oneapi::experimental::private_alloca< + myStruct, shortSize, sycl::access::decorated::legacy>(kh); +} diff --git a/clang/test/SemaSYCL/sycl-type-attr-ast.cpp b/clang/test/SemaSYCL/sycl-type-attr-ast.cpp index d292c61c14f08..677834f352057 100644 --- a/clang/test/SemaSYCL/sycl-type-attr-ast.cpp +++ b/clang/test/SemaSYCL/sycl-type-attr-ast.cpp @@ -25,3 +25,6 @@ class [[__sycl_detail__::sycl_type(local_accessor)]] local_accessor {}; // CHECK: ClassTemplateSpecializationDecl {{.*}} class local_accessor definition // CHECK: SYCLTypeAttr {{.*}} local_accessor +class [[__sycl_detail__::sycl_type(multi_ptr)]] multi_ptr {}; +// CHECK: CXXRecordDecl {{.*}} class multi_ptr definition +// CHECK: SYCLTypeAttr {{.*}} multi_ptr diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 49116e3d5049d..8415a4c840dcf 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -27870,6 +27870,51 @@ has a zeroing treatment of subnormal input values (such as indicated by the ``"denormal-fp-math"`` attribute), a subnormal value will be observed (will not be implicitly treated as zero). +SYCL Intrinsics +------------------ + +This class of intrinsics is designed to support SYCL-specific features. + +'``llvm.sycl.alloca``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare ptr @llvm.sycl.alloca(ptr , ptr , ptr , , align) + +Overview: +""""""""" + +The '``llvm.sycl.alloca``' intrinsic represents SYCL specialization constant +length array allocations. + +Arguments: +"""""""""" + +The first three arguments are used to encode the specialization constant length +of the allocated array. The first argument is a pointer to the specialization +constant unique ID and the second one, a pointer to the +``sycl::specialization_id`` itself. The third argument is a pointer to a buffer +holding specialization constant values for targets without native specialization +constant support. + +For the last two arguments, ``typehint`` is a helper value of the same type as +the array element type. The last argument, which must be an immediate value, +represents the allocation alignment. + +Semantics: +"""""""""" + +This intrinsic allocates an array of type ``type`` and length given by the input +specialization constants in SYCL private memory. Array alignment is given by +``align``. The allocated memory has the same duration as an ``alloca``-defined +pointer. The allocated memory is uninitialized. This intrinsic has undefined +behaviour if there is insufficient private memory for the allocation. Note the +return pointer type address space and the target private address space must +match. General Intrinsics ------------------ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index f0a5e7910ac15..358c5084e21fa 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2716,6 +2716,17 @@ def int_experimental_convergence_anchor def int_experimental_convergence_loop : DefaultAttrsIntrinsic<[llvm_token_ty], [], [IntrNoMem, IntrConvergent]>; +//===----------------------------------------------------------------------===// +// SYCL intrinsics +//===----------------------------------------------------------------------===// + +// Codegen for this intrinsic is handled by sycl-post-link. +def int_sycl_alloca + : DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty, llvm_anyptr_ty, + llvm_anyptr_ty, llvm_any_ty, llvm_i64_ty], + [IntrReadMem, ImmArg>]>; + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// From 47cff70599431df11c38ccb4b0a57861771e448e Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 4 Mar 2024 17:19:52 +0000 Subject: [PATCH 2/6] Insert intrinsic calls in alloca insertion points Signed-off-by: Victor Perez --- clang/lib/CodeGen/CGBuiltin.cpp | 13 ++++++++++--- clang/test/CodeGenSYCL/builtin-alloca.cpp | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b09d0edfb263a..e3914a98e4d86 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23699,9 +23699,16 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, llvm::Constant *Align = Builder.getInt64( getContext().getTypeAlignInChars(AllocaType).getAsAlign().value()); - llvm::Value *Allocation = Builder.CreateIntrinsic( - AllocaTy, Intrinsic::sycl_alloca, - {UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca"); + llvm::Value *Allocation = [&]() { + // To implement automatic storage duration of the underlying memory object, + // insert intrinsic call before `AllocaInsertPt`. These will be lowered to + // an `alloca` or an equivalent construct in later compilation stages. + IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(AllocaInsertPt); + return Builder.CreateIntrinsic( + AllocaTy, Intrinsic::sycl_alloca, + {UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca"); + }(); // Perform AS cast if needed. diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp index d2fe8b9be34ec..e0c32815143fa 100644 --- a/clang/test/CodeGenSYCL/builtin-alloca.cpp +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -27,18 +27,18 @@ constexpr sycl::specialization_id shortSize(1); // CHECK-DEVICE-NEXT: entry: // CHECK-DEVICE-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8 // CHECK-DEVICE-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 +// CHECK-DEVICE-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) // CHECK-DEVICE-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 +// CHECK-DEVICE-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 4) // CHECK-DEVICE-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8 +// CHECK-DEVICE-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 1) // CHECK-DEVICE-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) // CHECK-DEVICE-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 4) // CHECK-DEVICE-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 1) // CHECK-DEVICE-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 // CHECK-DEVICE-NEXT: ret void From fe558bfadcce7caff2b0281a63b4e20d5b99ff48 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Tue, 5 Mar 2024 12:34:29 +0000 Subject: [PATCH 3/6] Insert address space cast in post alloca IP --- clang/lib/CodeGen/CGBuiltin.cpp | 2 ++ clang/test/CodeGenSYCL/builtin-alloca.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e3914a98e4d86..8a6e871b4fd3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23717,6 +23717,8 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, // Both 'sycl::access::decorated::{yes and legacy}' lead to decorated (private // AS) pointer type. Perform cast if 'sycl::access::decorated::no'. if (Decorated == NoDecorated) { + IRBuilderBase::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(getPostAllocaInsertPoint()); unsigned DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); llvm::PointerType *DestTy = diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp index e0c32815143fa..8016c9f8c0583 100644 --- a/clang/test/CodeGenSYCL/builtin-alloca.cpp +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -36,10 +36,10 @@ constexpr sycl::specialization_id shortSize(1); // CHECK-DEVICE-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) +// CHECK-DEVICE-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 // CHECK-DEVICE-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 // CHECK-DEVICE-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) // CHECK-DEVICE-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 // CHECK-DEVICE-NEXT: ret void From 606be48f218611b601d38cf8373355444f848cc1 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Tue, 5 Mar 2024 12:37:54 +0000 Subject: [PATCH 4/6] Do not use typed pointer-like API --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8a6e871b4fd3f..e42bfdc462bdf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23692,7 +23692,7 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, QualType AllocaType = TAL.get(0).getAsType(); llvm::Type *Ty = CGM.getTypes().ConvertTypeForMem(AllocaType); unsigned AllocaAS = CGM.getDataLayout().getAllocaAddrSpace(); - llvm::Type *AllocaTy = Ty->getPointerTo(AllocaAS); + llvm::Type *AllocaTy = llvm::PointerType::get(Builder.getContext(), AllocaAS); llvm::Constant *EltTyConst = llvm::Constant::getNullValue(Ty); From a2998c563a3cbb7a8e78c2819e3e537672968c57 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Thu, 7 Mar 2024 11:37:55 +0000 Subject: [PATCH 5/6] Improve testing --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 3 +- clang/lib/Sema/SemaChecking.cpp | 5 +- clang/test/CodeGenSYCL/builtin-alloca.cpp | 90 ++++++------------- clang/test/SemaSYCL/Inputs/private_alloca.hpp | 20 +++++ clang/test/SemaSYCL/Inputs/sycl.hpp | 19 ---- .../SemaSYCL/builtin-alloca-errors-device.cpp | 7 +- .../SemaSYCL/builtin-alloca-errors-host.cpp | 5 +- clang/test/SemaSYCL/builtin-alloca.cpp | 4 +- 9 files changed, 62 insertions(+), 93 deletions(-) create mode 100644 clang/test/SemaSYCL/Inputs/private_alloca.hpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1b1b5b7ea9347..a9a785b916d91 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -179,7 +179,7 @@ def err_intel_sycl_ptr_annotation_mismatch def err_intel_sycl_alloca_no_alias : Error<"__builtin_intel_sycl_alloca cannot be used in source code. " - "Use the private_alloca alias instead.">; + "Use the private_alloca alias instead">; def err_intel_sycl_alloca_wrong_arg_count : Error<"__builtin_intel_sycl_alloca expects to be passed a single " "argument. Got %0">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e42bfdc462bdf..907594114c867 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23727,9 +23727,8 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, } // If no slot is provided, simply return allocation. - if (ReturnValue.isNull()) { + if (ReturnValue.isNull()) return RValue::get(Allocation); - } // If a slot is provided, store pointer there. Builder.CreateStore(Allocation, ReturnValue.getValue()); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index da965d40a9704..c8218e46b5ec5 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -7578,11 +7578,10 @@ bool Sema::CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned, CallExpr *Call) { if (CheckSize(getASTContext(), CST)) { TemplateArgument TA = CST->get(1); QualType Ty = TA.getNonTypeTemplateArgumentType(); - if (Ty.isNull()) { + if (Ty.isNull()) Diag(Loc, diag::err_intel_sycl_alloca_no_size) << TA; - } else { + else Diag(Loc, diag::err_intel_sycl_alloca_wrong_size) << TA << Ty; - } return true; } diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp index 8016c9f8c0583..30aa2df30f38c 100644 --- a/clang/test/CodeGenSYCL/builtin-alloca.cpp +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=CHECK-DEVICE -// RUN: %clang_cc1 -fsycl-is-host -triple x86_64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s -fcxx-exceptions\ -// RUN: | FileCheck %s --check-prefix=CHECK-HOST +// RUN: | FileCheck %s + +// Test codegen for __builtin_intel_sycl_alloca. #include @@ -17,70 +17,32 @@ struct myStruct { constexpr sycl::specialization_id size(1); constexpr sycl::specialization_id intSize(-1); -constexpr sycl::specialization_id shortSize(1); - -// COM: On the device, for each call, we should generate a chain of: 'call @sycl.alloca.' + ('addrspacecast') + 'store'. -// COM: The 'addrspacecast' will only appear when the pointer is not decorated, i.e., `DecorateAddress == sycl::access::decorated::no`. - -// CHECK-DEVICE-LABEL: define dso_local spir_func void @_Z4testRN4sycl3_V114kernel_handlerE( -// CHECK-DEVICE-SAME: ptr addrspace(4) noundef align 1 dereferenceable(1) [[KH:%.*]]) -// CHECK-DEVICE-NEXT: entry: -// CHECK-DEVICE-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8 -// CHECK-DEVICE-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 -// CHECK-DEVICE-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) -// CHECK-DEVICE-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 -// CHECK-DEVICE-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 4) -// CHECK-DEVICE-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8 -// CHECK-DEVICE-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 1) -// CHECK-DEVICE-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4) -// CHECK-DEVICE-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) -// CHECK-DEVICE-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) -// CHECK-DEVICE-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) -// CHECK-DEVICE-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) -// CHECK-DEVICE-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 -// CHECK-DEVICE-NEXT: ret void -// COM: On the host, each call should be materialized... - -// CHECK-HOST-LABEL: define dso_local void @_Z4testRN4sycl3_V114kernel_handlerE( -// CHECK-HOST-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[KH:%.*]]) -// CHECK-HOST-NEXT: entry: -// CHECK-HOST-NEXT: [[KH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-HOST-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 -// CHECK-HOST-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 -// CHECK-HOST-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.1", align 8 -// CHECK-HOST-NEXT: store ptr [[KH]], ptr [[KH_ADDR]], align 8 -// CHECK-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 -// CHECK-HOST-NEXT: [[CALL:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIdTnRDaL_ZL4sizeELNS0_6access9decoratedE1EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]) -// CHECK-HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr", ptr [[PTR0]], i32 0, i32 0 -// CHECK-HOST-NEXT: store ptr [[CALL]], ptr [[COERCE_DIVE]], align 8 -// CHECK-HOST-NEXT: [[TMP1:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 -// CHECK-HOST-NEXT: [[CALL1:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIiTnRDaL_ZL7intSizeELNS0_6access9decoratedE2EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP1]]) -// CHECK-HOST-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr.0", ptr [[PTR1]], i32 0, i32 0 -// CHECK-HOST-NEXT: store ptr [[CALL1]], ptr [[COERCE_DIVE2]], align 8 -// CHECK-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[KH_ADDR]], align 8 -// CHECK-HOST-NEXT: [[CALL3:%.*]] = call ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaI8myStructTnRDaL_ZL7intSizeELNS0_6access9decoratedE0EEENS0_9multi_ptrIT_LNS7_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE(ptr noundef nonnull align 1 dereferenceable(1) [[TMP2]]) -// CHECK-HOST-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds %"class.sycl::_V1::multi_ptr.1", ptr [[PTR2]], i32 0, i32 0 -// CHECK-HOST-NEXT: store ptr [[CALL3]], ptr [[COERCE_DIVE4]], align 8 -// CHECK-HOST-NEXT: ret void -// +// For each call, we should generate a chain of: 'call @llvm.sycl.alloca.' + ('addrspacecast') + 'store'. +// The 'addrspacecast' will only appear when the pointer is not decorated, i.e., `DecorateAddress == sycl::access::decorated::no`. + +// CHECK-LABEL: define dso_local spir_func void @_Z4testRN4sycl3_V114kernel_handlerE( +// CHECK-SAME: ptr addrspace(4) noundef align 1 dereferenceable(1) [[KH:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[KH_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-NEXT: [[PTR0:%.*]] = alloca %"class.sycl::_V1::multi_ptr", align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) +// CHECK-NEXT: [[PTR1:%.*]] = alloca %"class.sycl::_V1::multi_ptr.0", align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, i32 0, i64 4) +// CHECK-NEXT: [[PTR2:%.*]] = alloca %"class.sycl::_V1::multi_ptr.2", align 8 +// CHECK-NEXT: [[TMP4:%.*]] = call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs(ptr addrspace(4) addrspacecast (ptr {{.*}} to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) {{.*}} to ptr addrspace(4)), ptr addrspace(4) null, %struct.myStruct zeroinitializer, i64 1) +// CHECK-NEXT: [[KH_ADDR_ASCAST:%.*]] = addrspacecast ptr [[KH_ADDR]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR0_ASCAST:%.*]] = addrspacecast ptr [[PTR0]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR1_ASCAST:%.*]] = addrspacecast ptr [[PTR1]] to ptr addrspace(4) +// CHECK-NEXT: [[PTR2_ASCAST:%.*]] = addrspacecast ptr [[PTR2]] to ptr addrspace(4) +// CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(4) +// CHECK-NEXT: store ptr addrspace(4) [[KH]], ptr addrspace(4) [[KH_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP0]], ptr addrspace(4) [[PTR0_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(4) [[PTR1_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(4) [[TMP5]], ptr addrspace(4) [[PTR2_ASCAST]], align 8 +// CHECK-NEXT: ret void SYCL_EXTERNAL void test(sycl::kernel_handler &kh) { auto ptr0 = sycl::ext::oneapi::experimental::private_alloca(kh); auto ptr1 = sycl::ext::oneapi::experimental::private_alloca(kh); auto ptr2 = sycl::ext::oneapi::experimental::private_alloca(kh); } - -// COM: And the body function should be simply a throw - -// CHECK-HOST-LABEL: define internal ptr @_ZN4sycl3_V13ext6oneapi12experimental14private_allocaIdTnRDaL_ZL4sizeELNS0_6access9decoratedE1EEENS0_9multi_ptrIT_LNS6_13address_spaceE0EXT1_EEERNS0_14kernel_handlerE -// CHECK-HOST-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[H:%.*]]) -// CHECK-HOST-NEXT: entry: -// CHECK-HOST-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-HOST-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 -// CHECK-HOST-NEXT: [[EXCEPTION:%.*]] = call ptr @__cxa_allocate_exception(i64 8) -// CHECK-HOST-NEXT: store ptr @.str, ptr [[EXCEPTION]], align 16 -// CHECK-HOST-NEXT: call void @__cxa_throw(ptr [[EXCEPTION]], ptr @_ZTIPKc, ptr null) -// CHECK-HOST-NEXT: unreachable diff --git a/clang/test/SemaSYCL/Inputs/private_alloca.hpp b/clang/test/SemaSYCL/Inputs/private_alloca.hpp new file mode 100644 index 0000000000000..94eb6c007a01d --- /dev/null +++ b/clang/test/SemaSYCL/Inputs/private_alloca.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include "./sycl.hpp" + +namespace sycl { +inline namespace _V1 { +namespace ext { +namespace oneapi { +namespace experimental { + +template +__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +multi_ptr private_alloca(kernel_handler &h); + +} // namespace experimental +} // namesapce oneapi +} // namespace ext +} // namespace _V1 +} // namespace sycl diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 7d355a3848276..3cd50770f6241 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -480,25 +480,6 @@ class __attribute__((sycl_special_class)) __SYCL_TYPE(annotated_ptr) annotated_p #endif }; -#ifdef __SYCL_DEVICE_ONLY__ - -template -__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) -multi_ptr -private_alloca(kernel_handler &h); - -#else - -template -multi_ptr -private_alloca(kernel_handler &h) { - throw "sycl::ext::oneapi::experimental::private_alloca is not supported in the host"; -} - -#endif - } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp index 148f691f46196..29660388e329f 100644 --- a/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp +++ b/clang/test/SemaSYCL/builtin-alloca-errors-device.cpp @@ -1,8 +1,11 @@ -// RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify -Wpedantic %s +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify %s + +// Test errors of __builtin_intel_sycl_alloca when used in SYCL device code. #include #include "Inputs/sycl.hpp" +#include "Inputs/private_alloca.hpp" constexpr sycl::specialization_id size(1); constexpr sycl::specialization_id badsize(1); @@ -64,7 +67,7 @@ void test(sycl::kernel_handler &h) { // expected-error@+1 {{builtin functions must be directly called}} auto funcPtr = sycl::ext::oneapi::experimental::private_alloca; - // expected-error@+1 {{__builtin_intel_sycl_alloca cannot be used in source code. Use the private_alloca alias instead.}} + // expected-error@+1 {{__builtin_intel_sycl_alloca cannot be used in source code. Use the private_alloca alias instead}} __builtin_intel_sycl_alloca(h); // expected-error@+1 {{too few arguments to function call, expected 1, have 0}} diff --git a/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp index cc4949f90a908..49ec04817c386 100644 --- a/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp +++ b/clang/test/SemaSYCL/builtin-alloca-errors-host.cpp @@ -1,8 +1,11 @@ -// RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify -Wpedantic -fcxx-exceptions %s +// RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify %s + +// Test errors of __builtin_intel_sycl_alloca when used in targets other than SYCL devices. #include #include "Inputs/sycl.hpp" +#include "Inputs/private_alloca.hpp" constexpr sycl::specialization_id size(1); diff --git a/clang/test/SemaSYCL/builtin-alloca.cpp b/clang/test/SemaSYCL/builtin-alloca.cpp index 8ce0a91ce107f..ec882a28ab24e 100644 --- a/clang/test/SemaSYCL/builtin-alloca.cpp +++ b/clang/test/SemaSYCL/builtin-alloca.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -fsyntax-only -fsycl-is-device -triple spir64-unknown-unknown -verify -Wpedantic %s -// RUN: %clang_cc1 -fsyntax-only -fsycl-is-host -triple x86_64-unknown-unknown -verify -Wpedantic -fcxx-exceptions %s + +// Test verification of __builtin_intel_sycl_alloca when used in different valid ways. #include #include "Inputs/sycl.hpp" +#include "Inputs/private_alloca.hpp" // expected-no-diagnostics From ce4d3cd28f4b90c0eb638cc2565d3dbaf4cb2a70 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Thu, 7 Mar 2024 11:39:05 +0000 Subject: [PATCH 6/6] Drop host definition of `private_alloca` --- clang/test/CodeGenSYCL/Inputs/private_alloca.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp index 49e4b7cd65538..94eb6c007a01d 100644 --- a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp +++ b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp @@ -8,24 +8,11 @@ namespace ext { namespace oneapi { namespace experimental { -#ifdef __SYCL_DEVICE_ONLY__ - template __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) multi_ptr private_alloca(kernel_handler &h); -#else - -template -multi_ptr -private_alloca(kernel_handler &h) { - throw "sycl::ext::oneapi::experimental::private_alloca is not supported in " - "the host"; -} - -#endif - } // namespace experimental } // namesapce oneapi } // namespace ext