diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index d72edd1cbfd15..a03a29602c7f9 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -3,6 +3,9 @@ on: pull_request_target: branches: - main + - sycl + - sycl-devops-pr/** + - sycl-rel-** permissions: pull-requests: write diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 6c64e0c56c409..b9718b390bd99 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -186,6 +186,12 @@ jobs: # TODO consider moving this to Dockerfile. export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH cmake --build $GITHUB_WORKSPACE/build --target check-sycl + - name: check-sycl-unittests + if: always() && !cancelled() && contains(inputs.changes, 'sycl') + run: | + # TODO consider moving this to Dockerfile. + export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + cmake --build $GITHUB_WORKSPACE/build --target check-sycl-unittests - name: check-llvm-spirv if: always() && !cancelled() && contains(inputs.changes, 'llvm_spirv') run: | diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml index 64e62b7ef58fd..47014318c16de 100644 --- a/.github/workflows/sycl-windows-build.yml +++ b/.github/workflows/sycl-windows-build.yml @@ -109,6 +109,10 @@ jobs: if: always() && !cancelled() && contains(inputs.changes, 'sycl') run: | cmake --build build --target check-sycl + - name: check-sycl-unittests + if: always() && !cancelled() && contains(inputs.changes, 'sycl') + run: | + cmake --build build --target check-sycl-unittests - name: check-llvm-spirv if: always() && !cancelled() && contains(inputs.changes, 'llvm_spirv') run: | diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 526498d1675e3..7fe80879f7664 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -298,7 +298,9 @@ LANGOPT( "SYCL compiler assumes value fits within MAX_INT for member function of " "get/operator[], get_id/operator[] and get_global_id/get_global_linear_id " "in SYCL class id, iterm and nd_iterm") -LANGOPT(SYCLDisableRangeRounding, 1, 0, "Disable parallel for range rounding") +ENUM_LANGOPT(SYCLRangeRounding, SYCLRangeRoundingPreference, 2, + SYCLRangeRoundingPreference::On, + "Preference for SYCL parallel_for range rounding") LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the " "SYCL integration header") LANGOPT(SYCLAllowVirtualFunctions, 1, 0, diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 2c508c32674c3..c8081a77d65c9 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -151,6 +151,12 @@ class LangOptionsBase { undefined }; + enum class SYCLRangeRoundingPreference { + On, + Disable, + Force, + }; + enum HLSLLangStd { HLSL_Unset = 0, HLSL_2015 = 2015, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0eb5701a3a455..f770a6edb66d1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3997,6 +3997,21 @@ def fsycl_host_compiler_options_EQ : Joined<["-"], "fsycl-host-compiler-options= Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"When performing the host compilation with " "-fsycl-host-compiler specified, use the given options during that compile. " "Options are expected to be a quoted list of space separated options.">; +def fsycl_range_rounding_EQ : Joined<["-"], "fsycl-range-rounding=">, + Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, + Values<"on,disable,force">, + NormalizedValuesScope<"LangOptions::SYCLRangeRoundingPreference">, + NormalizedValues<["On", "Disable", "Force"]>, + MarshallingInfoEnum, "On">, + HelpText<"Options for range rounding of SYCL range kernels: " + "disable (do not generate range rounded kernels) " + "force (only generate range rounded kernels) " + "on (generate range rounded kernels as well as unrounded kernels). Default is 'on'">; +def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">, + Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, + Alias, AliasArgs<["disable"]>, + HelpText<"Deprecated: please use -fsycl-range-rounding=disable instead.">, + Flags<[Deprecated]>; def fno_sycl_use_footer : Flag<["-"], "fno-sycl-use-footer">, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Disable usage of the integration footer during SYCL enabled " "compilations.">; @@ -8256,9 +8271,6 @@ defm sycl_allow_func_ptr: BoolFOption<"sycl-allow-func-ptr", def fenable_sycl_dae : Flag<["-"], "fenable-sycl-dae">, HelpText<"Enable Dead Argument Elimination in SPIR kernels">, MarshallingInfoFlag>; -def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">, - HelpText<"Disable parallel for range rounding.">, - MarshallingInfoFlag>; def fsycl_enable_int_header_diags: Flag<["-"], "fsycl-enable-int-header-diags">, HelpText<"Enable diagnostics that require the SYCL integration header.">, MarshallingInfoFlag>; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 81fc0ce4501bd..c600bcaa0089b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2167,32 +2167,6 @@ class Sema final { // // - SYCLIntelIVDepAttr * - BuildSYCLIntelIVDepAttr(const AttributeCommonInfo &CI, Expr *Expr1, - Expr *Expr2); - LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A, - Expr *E); - OpenCLUnrollHintAttr * - BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); - - SYCLIntelLoopCountAttr * - BuildSYCLIntelLoopCountAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelInitiationIntervalAttr * - BuildSYCLIntelInitiationIntervalAttr(const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxConcurrencyAttr * - BuildSYCLIntelMaxConcurrencyAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxInterleavingAttr * - BuildSYCLIntelMaxInterleavingAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelSpeculatedIterationsAttr * - BuildSYCLIntelSpeculatedIterationsAttr(const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelLoopCoalesceAttr * - BuildSYCLIntelLoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxReinvocationDelayAttr * - BuildSYCLIntelMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, - Expr *E); - /// \name Casts /// Implementations are in SemaCast.cpp ///@{ @@ -2422,22 +2396,6 @@ class Sema final { }; bool IsLayoutCompatible(QualType T1, QualType T2) const; - template - static bool isTypeDecoratedWithDeclAttribute(QualType Ty) { - const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); - if (!RecTy) - return false; - - if (RecTy->hasAttr()) - return true; - - if (auto *CTSD = dyn_cast(RecTy)) { - ClassTemplateDecl *Template = CTSD->getSpecializedTemplate(); - if (CXXRecordDecl *RD = Template->getTemplatedDecl()) - return RD->hasAttr(); - } - return false; - } bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); @@ -2687,6 +2645,13 @@ class Sema final { /// Adds an expression to the set of gathered misaligned members. void AddPotentialMisalignedMembers(Expr *E, RecordDecl *RD, ValueDecl *MD, CharUnits Alignment); + + bool CheckIntelFPGARegBuiltinFunctionCall(unsigned BuiltinID, CallExpr *Call); + bool CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *Call); + bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call); + bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call); ///@} // @@ -3943,6 +3908,8 @@ class Sema final { // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + DeviceDiagnosticReason getEmissionReason(const FunctionDecl *Decl); + private: /// Function or variable declarations to be checked for whether the deferred /// diagnostics should be emitted. @@ -4209,20 +4176,6 @@ class Sema final { void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr); - /// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular - /// declaration. - void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); - SYCLIntelPipeIOAttr *MergeSYCLIntelPipeIOAttr(Decl *D, - const SYCLIntelPipeIOAttr &A); - - /// AddSYCLIntelMaxConcurrencyAttr - Adds a max_concurrency attribute to a - /// particular declaration. - void AddSYCLIntelMaxConcurrencyAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - - bool checkAllowedSYCLInitializer(VarDecl *VD); - //===--------------------------------------------------------------------===// DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI); DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI); MSInheritanceAttr *mergeMSInheritanceAttr(Decl *D, @@ -4230,20 +4183,10 @@ class Sema final { bool BestCase, MSInheritanceModel Model); - bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); - EnforceTCBAttr *mergeEnforceTCBAttr(Decl *D, const EnforceTCBAttr &AL); EnforceTCBLeafAttr *mergeEnforceTCBLeafAttr(Decl *D, const EnforceTCBLeafAttr &AL); -public: - - DeviceDiagnosticReason getEmissionReason(const FunctionDecl *Decl); - - //@} - - // More parsing and symbol table subroutines. - // Helper for delayed processing of attributes. void ProcessDeclAttributeDelayed(Decl *D, const ParsedAttributesView &AttrList); @@ -4300,8 +4243,173 @@ class Sema final { void redelayDiagnostics(sema::DelayedDiagnosticPool &pool); + void AddSYCLIntelBankBitsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + bool AnyWorkGroupSizesDiffer(const Expr *LHSXDim, const Expr *LHSYDim, + const Expr *LHSZDim, const Expr *RHSXDim, + const Expr *RHSYDim, const Expr *RHSZDim); + bool AllWorkGroupSizesSame(const Expr *LHSXDim, const Expr *LHSYDim, + const Expr *LHSZDim, const Expr *RHSXDim, + const Expr *RHSYDim, const Expr *RHSZDim); + void AddSYCLWorkGroupSizeHintAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLWorkGroupSizeHintAttr * + MergeSYCLWorkGroupSizeHintAttr(Decl *D, const SYCLWorkGroupSizeHintAttr &A); + void AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + IntelReqdSubGroupSizeAttr * + MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A); + IntelNamedSubGroupSizeAttr * + MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A); + void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNumSimdWorkItemsAttr * + MergeSYCLIntelNumSimdWorkItemsAttr(Decl *D, + const SYCLIntelNumSimdWorkItemsAttr &A); + void AddSYCLIntelESimdVectorizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelESimdVectorizeAttr * + MergeSYCLIntelESimdVectorizeAttr(Decl *D, + const SYCLIntelESimdVectorizeAttr &A); + void AddSYCLIntelSchedulerTargetFmaxMhzAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelSchedulerTargetFmaxMhzAttr *MergeSYCLIntelSchedulerTargetFmaxMhzAttr( + Decl *D, const SYCLIntelSchedulerTargetFmaxMhzAttr &A); + void AddSYCLIntelNoGlobalWorkOffsetAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNoGlobalWorkOffsetAttr *MergeSYCLIntelNoGlobalWorkOffsetAttr( + Decl *D, const SYCLIntelNoGlobalWorkOffsetAttr &A); + void AddSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelLoopFuseAttr * + MergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &A); + void AddSYCLIntelPrivateCopiesAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + void AddSYCLIntelMaxReplicatesAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelMaxReplicatesAttr * + MergeSYCLIntelMaxReplicatesAttr(Decl *D, const SYCLIntelMaxReplicatesAttr &A); + void AddSYCLIntelForcePow2DepthAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelForcePow2DepthAttr * + MergeSYCLIntelForcePow2DepthAttr(Decl *D, + const SYCLIntelForcePow2DepthAttr &A); + void AddSYCLIntelInitiationIntervalAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelInitiationIntervalAttr *MergeSYCLIntelInitiationIntervalAttr( + Decl *D, const SYCLIntelInitiationIntervalAttr &A); + + SYCLIntelMaxConcurrencyAttr * + MergeSYCLIntelMaxConcurrencyAttr(Decl *D, + const SYCLIntelMaxConcurrencyAttr &A); + void AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelMaxGlobalWorkDimAttr * + MergeSYCLIntelMaxGlobalWorkDimAttr(Decl *D, + const SYCLIntelMaxGlobalWorkDimAttr &A); + void AddSYCLIntelMinWorkGroupsPerComputeUnitAttr( + Decl *D, const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMinWorkGroupsPerComputeUnitAttr * + MergeSYCLIntelMinWorkGroupsPerComputeUnitAttr( + Decl *D, const SYCLIntelMinWorkGroupsPerComputeUnitAttr &A); + void AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( + Decl *D, const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxWorkGroupsPerMultiprocessorAttr * + MergeSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( + Decl *D, const SYCLIntelMaxWorkGroupsPerMultiprocessorAttr &A); + void AddSYCLIntelBankWidthAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelBankWidthAttr * + MergeSYCLIntelBankWidthAttr(Decl *D, const SYCLIntelBankWidthAttr &A); + void AddSYCLIntelNumBanksAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNumBanksAttr * + MergeSYCLIntelNumBanksAttr(Decl *D, const SYCLIntelNumBanksAttr &A); + SYCLDeviceHasAttr *MergeSYCLDeviceHasAttr(Decl *D, + const SYCLDeviceHasAttr &A); + void AddSYCLDeviceHasAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + SYCLUsesAspectsAttr *MergeSYCLUsesAspectsAttr(Decl *D, + const SYCLUsesAspectsAttr &A); + void AddSYCLUsesAspectsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + bool CheckMaxAllowedWorkGroupSize(const Expr *RWGSXDim, const Expr *RWGSYDim, + const Expr *RWGSZDim, const Expr *MWGSXDim, + const Expr *MWGSYDim, const Expr *MWGSZDim); + void AddSYCLIntelMaxWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLIntelMaxWorkGroupSizeAttr * + MergeSYCLIntelMaxWorkGroupSizeAttr(Decl *D, + const SYCLIntelMaxWorkGroupSizeAttr &A); + void CheckSYCLAddIRAttributesFunctionAttrConflicts(Decl *D); + SYCLAddIRAttributesFunctionAttr *MergeSYCLAddIRAttributesFunctionAttr( + Decl *D, const SYCLAddIRAttributesFunctionAttr &A); + void AddSYCLAddIRAttributesFunctionAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAttributesKernelParameterAttr * + MergeSYCLAddIRAttributesKernelParameterAttr( + Decl *D, const SYCLAddIRAttributesKernelParameterAttr &A); + void AddSYCLAddIRAttributesKernelParameterAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAttributesGlobalVariableAttr * + MergeSYCLAddIRAttributesGlobalVariableAttr( + Decl *D, const SYCLAddIRAttributesGlobalVariableAttr &A); + void AddSYCLAddIRAttributesGlobalVariableAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAnnotationsMemberAttr * + MergeSYCLAddIRAnnotationsMemberAttr(Decl *D, + const SYCLAddIRAnnotationsMemberAttr &A); + void AddSYCLAddIRAnnotationsMemberAttr(Decl *D, const AttributeCommonInfo &CI, + MutableArrayRef Args); + void AddSYCLReqdWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLReqdWorkGroupSizeAttr * + MergeSYCLReqdWorkGroupSizeAttr(Decl *D, const SYCLReqdWorkGroupSizeAttr &A); + + SYCLTypeAttr *MergeSYCLTypeAttr(Decl *D, const AttributeCommonInfo &CI, + SYCLTypeAttr::SYCLType TypeName); + + /// Emit a diagnostic about the given attribute having a deprecated name, and + /// also emit a fixit hint to generate the new attribute name. + void DiagnoseDeprecatedAttribute(const ParsedAttr &A, StringRef NewScope, + StringRef NewName); + + /// Diagnoses an attribute in the 'intelfpga' namespace and suggests using + /// the attribute in the 'intel' namespace instead. + void CheckDeprecatedSYCLAttributeSpelling(const ParsedAttr &A, + StringRef NewName = ""); + + /// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular + /// declaration. + void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); + SYCLIntelPipeIOAttr *MergeSYCLIntelPipeIOAttr(Decl *D, + const SYCLIntelPipeIOAttr &A); + + /// AddSYCLIntelMaxConcurrencyAttr - Adds a max_concurrency attribute to a + /// particular declaration. + void AddSYCLIntelMaxConcurrencyAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + + bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); + ///@} + + // + // + // ------------------------------------------------------------------------- + // // + + /// \name C++ Declarations + /// Implementations are in SemaDeclCXX.cpp + ///@{ + public: void CheckDelegatingCtorCycles(); @@ -5939,13 +6047,6 @@ class Sema final { SourceLocation RParen, ParsedType ParsedTy); - ExprResult BuildSYCLUniqueStableIdExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, Expr *E); - ExprResult ActOnSYCLUniqueStableIdExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, Expr *E); - bool CheckLoopHintExpr(Expr *E, SourceLocation Loc); ExprResult ActOnNumericConstant(const Token &Tok, Scope *UDLScope = nullptr); @@ -6997,6 +7098,14 @@ class Sema final { void CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E); void CheckAddressOfNoDeref(const Expr *E); +public: + ExprResult BuildSYCLUniqueStableIdExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, Expr *E); + ExprResult ActOnSYCLUniqueStableIdExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, Expr *E); + ///@} // @@ -9330,6 +9439,30 @@ class Sema final { const IdentifierInfo *AttrName, SourceRange Range); + SYCLIntelIVDepAttr *BuildSYCLIntelIVDepAttr(const AttributeCommonInfo &CI, + Expr *Expr1, Expr *Expr2); + LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A, + Expr *E); + OpenCLUnrollHintAttr * + BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); + + SYCLIntelLoopCountAttr * + BuildSYCLIntelLoopCountAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelInitiationIntervalAttr * + BuildSYCLIntelInitiationIntervalAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxConcurrencyAttr * + BuildSYCLIntelMaxConcurrencyAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxInterleavingAttr * + BuildSYCLIntelMaxInterleavingAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelSpeculatedIterationsAttr * + BuildSYCLIntelSpeculatedIterationsAttr(const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelLoopCoalesceAttr * + BuildSYCLIntelLoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxReinvocationDelayAttr * + BuildSYCLIntelMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, + Expr *E); + ///@} // @@ -12701,138 +12834,6 @@ class Sema final { AddMethodToGlobalPool(Method, impl, /*instance*/ false); } - void AddSYCLIntelBankBitsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - bool AnyWorkGroupSizesDiffer(const Expr *LHSXDim, const Expr *LHSYDim, - const Expr *LHSZDim, const Expr *RHSXDim, - const Expr *RHSYDim, const Expr *RHSZDim); - bool AllWorkGroupSizesSame(const Expr *LHSXDim, const Expr *LHSYDim, - const Expr *LHSZDim, const Expr *RHSXDim, - const Expr *RHSYDim, const Expr *RHSZDim); - void AddSYCLWorkGroupSizeHintAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLWorkGroupSizeHintAttr * - MergeSYCLWorkGroupSizeHintAttr(Decl *D, const SYCLWorkGroupSizeHintAttr &A); - void AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - IntelReqdSubGroupSizeAttr * - MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A); - IntelNamedSubGroupSizeAttr * - MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A); - void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNumSimdWorkItemsAttr * - MergeSYCLIntelNumSimdWorkItemsAttr(Decl *D, - const SYCLIntelNumSimdWorkItemsAttr &A); - void AddSYCLIntelESimdVectorizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelESimdVectorizeAttr * - MergeSYCLIntelESimdVectorizeAttr(Decl *D, - const SYCLIntelESimdVectorizeAttr &A); - void AddSYCLIntelSchedulerTargetFmaxMhzAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelSchedulerTargetFmaxMhzAttr *MergeSYCLIntelSchedulerTargetFmaxMhzAttr( - Decl *D, const SYCLIntelSchedulerTargetFmaxMhzAttr &A); - void AddSYCLIntelNoGlobalWorkOffsetAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNoGlobalWorkOffsetAttr *MergeSYCLIntelNoGlobalWorkOffsetAttr( - Decl *D, const SYCLIntelNoGlobalWorkOffsetAttr &A); - void AddSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelLoopFuseAttr * - MergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &A); - void AddSYCLIntelPrivateCopiesAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - void AddSYCLIntelMaxReplicatesAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxReplicatesAttr * - MergeSYCLIntelMaxReplicatesAttr(Decl *D, const SYCLIntelMaxReplicatesAttr &A); - void AddSYCLIntelForcePow2DepthAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelForcePow2DepthAttr * - MergeSYCLIntelForcePow2DepthAttr(Decl *D, - const SYCLIntelForcePow2DepthAttr &A); - void AddSYCLIntelInitiationIntervalAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelInitiationIntervalAttr *MergeSYCLIntelInitiationIntervalAttr( - Decl *D, const SYCLIntelInitiationIntervalAttr &A); - - SYCLIntelMaxConcurrencyAttr *MergeSYCLIntelMaxConcurrencyAttr( - Decl *D, const SYCLIntelMaxConcurrencyAttr &A); - void AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxGlobalWorkDimAttr * - MergeSYCLIntelMaxGlobalWorkDimAttr(Decl *D, - const SYCLIntelMaxGlobalWorkDimAttr &A); - void AddSYCLIntelMinWorkGroupsPerComputeUnitAttr( - Decl *D, const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMinWorkGroupsPerComputeUnitAttr * - MergeSYCLIntelMinWorkGroupsPerComputeUnitAttr( - Decl *D, const SYCLIntelMinWorkGroupsPerComputeUnitAttr &A); - void AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( - Decl *D, const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxWorkGroupsPerMultiprocessorAttr * - MergeSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( - Decl *D, const SYCLIntelMaxWorkGroupsPerMultiprocessorAttr &A); - void AddSYCLIntelBankWidthAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelBankWidthAttr * - MergeSYCLIntelBankWidthAttr(Decl *D, const SYCLIntelBankWidthAttr &A); - void AddSYCLIntelNumBanksAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNumBanksAttr * - MergeSYCLIntelNumBanksAttr(Decl *D, const SYCLIntelNumBanksAttr &A); - SYCLDeviceHasAttr *MergeSYCLDeviceHasAttr(Decl *D, - const SYCLDeviceHasAttr &A); - void AddSYCLDeviceHasAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - SYCLUsesAspectsAttr *MergeSYCLUsesAspectsAttr(Decl *D, - const SYCLUsesAspectsAttr &A); - void AddSYCLUsesAspectsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - bool CheckMaxAllowedWorkGroupSize(const Expr *RWGSXDim, const Expr *RWGSYDim, - const Expr *RWGSZDim, const Expr *MWGSXDim, - const Expr *MWGSYDim, const Expr *MWGSZDim); - void AddSYCLIntelMaxWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLIntelMaxWorkGroupSizeAttr * - MergeSYCLIntelMaxWorkGroupSizeAttr(Decl *D, - const SYCLIntelMaxWorkGroupSizeAttr &A); - void CheckSYCLAddIRAttributesFunctionAttrConflicts(Decl *D); - SYCLAddIRAttributesFunctionAttr *MergeSYCLAddIRAttributesFunctionAttr( - Decl *D, const SYCLAddIRAttributesFunctionAttr &A); - void AddSYCLAddIRAttributesFunctionAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAttributesKernelParameterAttr * - MergeSYCLAddIRAttributesKernelParameterAttr( - Decl *D, const SYCLAddIRAttributesKernelParameterAttr &A); - void AddSYCLAddIRAttributesKernelParameterAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAttributesGlobalVariableAttr * - MergeSYCLAddIRAttributesGlobalVariableAttr( - Decl *D, const SYCLAddIRAttributesGlobalVariableAttr &A); - void AddSYCLAddIRAttributesGlobalVariableAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAnnotationsMemberAttr * - MergeSYCLAddIRAnnotationsMemberAttr(Decl *D, - const SYCLAddIRAnnotationsMemberAttr &A); - void AddSYCLAddIRAnnotationsMemberAttr(Decl *D, const AttributeCommonInfo &CI, - MutableArrayRef Args); - void AddSYCLReqdWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLReqdWorkGroupSizeAttr * - MergeSYCLReqdWorkGroupSizeAttr(Decl *D, const SYCLReqdWorkGroupSizeAttr &A); - - SYCLTypeAttr *MergeSYCLTypeAttr(Decl *D, const AttributeCommonInfo &CI, - SYCLTypeAttr::SYCLType TypeName); - - private: /// AddMethodToGlobalPool - Add an instance or factory method to the global /// pool. See descriptoin of AddInstanceMethodToGlobalPool. @@ -15220,16 +15221,6 @@ class Sema final { void CheckSYCLKernelCall(FunctionDecl *CallerFunc, ArrayRef Args); - - bool CheckIntelFPGARegBuiltinFunctionCall(unsigned BuiltinID, CallExpr *Call); - bool CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *Call); - - bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, - CallExpr *Call); - bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, - CallExpr *Call); - -private: // We store SYCL Kernels here and handle separately -- which is a hack. // FIXME: It would be best to refactor this. llvm::SetVector SyclDeviceDecls; @@ -15321,15 +15312,7 @@ class Sema final { ExprResult BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, QualType SourceTy, Expr *Idx); - /// Emit a diagnostic about the given attribute having a deprecated name, and - /// also emit a fixit hint to generate the new attribute name. - void DiagnoseDeprecatedAttribute(const ParsedAttr &A, StringRef NewScope, - StringRef NewName); - - /// Diagnoses an attribute in the 'intelfpga' namespace and suggests using - /// the attribute in the 'intel' namespace instead. - void CheckDeprecatedSYCLAttributeSpelling(const ParsedAttr &A, - StringRef NewName = ""); + bool checkAllowedSYCLInitializer(VarDecl *VD); /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current /// context is "used as device code". @@ -15373,8 +15356,27 @@ class Sema final { (VDecl->getType().getAddressSpace() == LangAS::sycl_private); } + template + static bool isTypeDecoratedWithDeclAttribute(QualType Ty) { + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + if (!RecTy) + return false; + + if (RecTy->hasAttr()) + return true; + + if (auto *CTSD = dyn_cast(RecTy)) { + ClassTemplateDecl *Template = CTSD->getSpecializedTemplate(); + if (CXXRecordDecl *RD = Template->getTemplatedDecl()) + return RD->hasAttr(); + } + return false; + } + /// Check whether \p Ty corresponds to a SYCL type of name \p TypeName. static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName); + + ///@} }; DeductionFailureInfo diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index d5d2bd4aab75e..bfeb4a763da84 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -19,7 +19,7 @@ endif() # This must be in sync with llvm/sycl/CMakeLists.txt. SET_SOURCE_FILES_PROPERTIES( ToolChains/MSVC.cpp ToolChains/Clang.cpp - PROPERTIES COMPILE_DEFINITIONS SYCL_MAJOR_VERSION="7" ) + PROPERTIES COMPILE_DEFINITIONS SYCL_MAJOR_VERSION="8" ) add_clang_library(clangDriver Action.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index eb9c374741d81..13dbf17fad334 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1158,6 +1158,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, checkSingleArgValidity(DeviceCodeSplit, {"per_kernel", "per_source", "auto", "off"}); + Arg *RangeRoundingPreference = + C.getInputArgs().getLastArg(options::OPT_fsycl_range_rounding_EQ); + checkSingleArgValidity(RangeRoundingPreference, {"disable", "force", "on"}); + Arg *SYCLForceTarget = getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); if (SYCLForceTarget) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9b94b761a0897..ea652823950cc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5427,6 +5427,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_sycl_esimd_force_stateless_mem, true)) CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem"); + if (Arg *A = Args.getLastArg(options::OPT_fsycl_range_rounding_EQ)) + A->render(Args, CmdArgs); + // Add the Unique ID prefix StringRef UniqueID = D.getSYCLUniqueID(Input.getBaseInput()); if (!UniqueID.empty()) @@ -5451,10 +5454,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool DisableRangeRounding = false; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O0)) - DisableRangeRounding = true; + // If the user has set some range rounding preference then let that + // override not range rounding at -O0 + if (!Args.getLastArg(options::OPT_fsycl_range_rounding_EQ)) + DisableRangeRounding = true; } if (DisableRangeRounding || HasFPGA) - CmdArgs.push_back("-fsycl-disable-range-rounding"); + CmdArgs.push_back("-fsycl-range-rounding=disable"); if (HasFPGA) { // Pass -fintelfpga to both the host and device SYCL compilations if set. diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 6ea2be70b6d3d..0dde49bde21a0 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -579,8 +579,16 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, // Set __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ macro for // both host and device compilations if -fsycl-disable-range-rounding // flag is used. - if (LangOpts.SYCLDisableRangeRounding) + switch (LangOpts.getSYCLRangeRounding()) { + case LangOptions::SYCLRangeRoundingPreference::Disable: Builder.defineMacro("__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__"); + break; + case LangOptions::SYCLRangeRoundingPreference::Force: + Builder.defineMacro("__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__"); + break; + default: + break; + } } if (LangOpts.DeclareSPIRVBuiltins) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 89705ca50ab83..0d07d673f3880 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -5172,10 +5172,19 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "#endif //" << Macro.first << "\n\n"; } - if (S.getLangOpts().SYCLDisableRangeRounding) { + switch (S.getLangOpts().getSYCLRangeRounding()) { + case LangOptions::SYCLRangeRoundingPreference::Disable: O << "#ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ \n"; O << "#define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n"; O << "#endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__\n\n"; + break; + case LangOptions::SYCLRangeRoundingPreference::Force: + O << "#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ \n"; + O << "#define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n"; + O << "#endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__\n\n"; + break; + default: + break; } if (SpecConsts.size() > 0) { diff --git a/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp b/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp index 0cd39fd53fee2..752189ca53847 100644 --- a/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp +++ b/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp @@ -2,8 +2,10 @@ // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2020 // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -sycl-std=2017 -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2017 -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-disable-range-rounding -fsycl-int-header=%t.h %s +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=disable -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-RANGE +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=force -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-FORCE-RANGE // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-NO-RANGE @@ -33,4 +35,10 @@ int main() { // CHECK-RANGE: #ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ // CHECK-RANGE-NEXT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 // CHECK-RANGE-NEXT: #endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ + +// CHECK-FORCE-RANGE: #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ +// CHECK-FORCE-RANGE-NEXT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-FORCE-RANGE-NEXT: #endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index 92fddf11cc877..f86f2cc7e4f7e 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -26,13 +26,13 @@ // CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga" // CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host" -/// FPGA target implies -fsycl-disable-range-rounding +/// FPGA target implies -fsycl-range-rounding=disable // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s -// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" -// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host" +// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host" /// FPGA target implies -emit-only-kernels-as-entry-points in sycl-post-link // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \ @@ -41,12 +41,12 @@ // RUN: | FileCheck -check-prefix=CHK-NON-KERNEL-ENTRY-POINTS %s // CHK-NON-KERNEL-ENTRY-POINTS: sycl-post-link{{.*}} "-emit-only-kernels-as-entry-points" -/// -fsycl-disable-range-rounding is applied to all compilations if fpga is used +/// -fsycl-range-rounding=disable is applied to all compilations if fpga is used // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING-MULTI %s -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host" -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" /// -fintelfpga with -reuse-exe= // RUN: touch %t.cpp diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 636d9e89b8092..f67ca70bbb717 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -508,13 +508,33 @@ // RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s // CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2" -/// -fsycl-disable-range-rounding settings +/// -fsycl-range-rounding settings +/// +/// // Check that driver flag is passed to cc1 +// RUN: %clang -### -fsycl -fsycl-range-rounding=disable %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-DISABLE %s +// RUN: %clang -### -fsycl -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-FORCE %s +// RUN: %clang -### -fsycl -fsycl-range-rounding=on %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-ON %s +// CHK-DRIVER-RANGE-ROUNDING-DISABLE: "-cc1{{.*}}-fsycl-range-rounding=disable" +// CHK-DRIVER-RANGE-ROUNDING-FORCE: "-cc1{{.*}}-fsycl-range-rounding=force" +// CHK-DRIVER-RANGE-ROUNDING-ON: "-cc1{{.*}}-fsycl-range-rounding=on" +/// +/// // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=spir64 -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s // RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -Od %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s -// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-disable-range-rounding" +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: -O0 -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s +// RUN: %clang_cl -### -fsycl -Od %s 2>&1 -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s +// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-range-rounding=disable" +// CHK-OVERRIDE-RANGE-ROUNDING: "-fsycl-range-rounding=force" +// CHK-OVERRIDE-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable" // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=spir64 -O2 %s 2>&1 \ @@ -527,6 +547,8 @@ // RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s // CHK-RANGE-ROUNDING-NOT: "-fsycl-disable-range-rounding" +// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=force" /// ########################################################################### diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c index 119350ebf3fab..199fd03bee63b 100644 --- a/clang/test/Preprocessor/predefined-macros.c +++ b/clang/test/Preprocessor/predefined-macros.c @@ -284,32 +284,60 @@ // CHECK-RDC: #define __CLANG_RDC__ 1 // RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ -// RUN: -triple spir64-unknown-unknown -fsycl-disable-range-rounding -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=disable -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ // RUN: -triple spir64_fpga-unknown-unknown -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE -// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-disable-range-rounding \ +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=disable \ // RUN: -triple spir64_fpga-unknown-unknown -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE // RUN: %clang_cc1 %s -E -dM -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-host \ -// RUN: -triple x86_64-unknown-linux-gnu -fsycl-disable-range-rounding -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=disable -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE -// CHECK-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 -// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-DISABLE-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-DISABLE-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ +// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=force -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ +// RUN: -triple spir64_fpga-unknown-unknown -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=force \ +// RUN: -triple spir64_fpga-unknown-unknown -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \ +// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=force -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// CHECK-FORCE-RANGE: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-FORCE-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \ // RUN: -fgpu-default-stream=per-thread \ @@ -334,4 +362,4 @@ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \ // RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG // CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1 -// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1 \ No newline at end of file +// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1 diff --git a/devops/cts_exclude_filter b/devops/cts_exclude_filter index a03214a6e2314..e14c4e0eb1af9 100644 --- a/devops/cts_exclude_filter +++ b/devops/cts_exclude_filter @@ -1,8 +1,5 @@ -reduction -accessor -vector_swizzles +# These two take too much time kernel_bundle -spec_constants marray -group_functions -atomic_ref +# https://github.com/intel/llvm/issues/12926 +h_item diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 0c7a49b3f9dcd..6d3d1502d9d24 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -400,6 +400,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endif() message( " DEVICE: ${d} ( ${${d}_aliases} )" ) + set ( supports_generic_addrspace TRUE ) if ( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) if( ${ARCH} STREQUAL "spirv" ) set( t "spir--" ) @@ -416,6 +417,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) elseif( ${ARCH} STREQUAL "nvptx" OR ${ARCH} STREQUAL "nvptx64" ) set( build_flags ) set( opt_flags -O3 "--nvvm-reflect-enable=false" ) + # Note: when declaring builtins, we don't consider NVIDIA as supporting + # the generic address space. This is because it maps to the same target + # address space as the private address space, resulting in a mangling + # clash. + # Since we can't declare builtins overloaded on both address spaces + # simultaneously, we choose declare the builtins using the private space, + # which will also work for the generic address space. + set( supports_generic_addrspace FALSE ) elseif( ${ARCH} STREQUAL "clspv64" ) set( t "spir64--" ) set( build_flags "-Wno-unknown-assumption") @@ -437,8 +446,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) "+cl_khr_fp16," "+__opencl_c_3d_image_writes," "+__opencl_c_images," - "+cl_khr_3d_image_writes," - "+__opencl_c_generic_address_space") + "+cl_khr_3d_image_writes") + if(supports_generic_addrspace) + string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" ) + endif() list( APPEND flags ${CL_3_0_EXTENSIONS}) # Add platform specific flags diff --git a/libclc/generic/include/clc/math/fract.inc b/libclc/generic/include/clc/math/fract.inc index 71e6e8a921121..a85b30ce28d9f 100644 --- a/libclc/generic/include/clc/math/fract.inc +++ b/libclc/generic/include/clc/math/fract.inc @@ -23,3 +23,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr); +#endif diff --git a/libclc/generic/include/clc/math/frexp.inc b/libclc/generic/include/clc/math/frexp.inc index 2a6f7f5823969..f68206e67df76 100644 --- a/libclc/generic/include/clc/math/frexp.inc +++ b/libclc/generic/include/clc/math/frexp.inc @@ -1,3 +1,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, generic __CLC_INTN *iptr); +#endif diff --git a/libclc/generic/include/clc/math/modf.inc b/libclc/generic/include/clc/math/modf.inc index 42bcf625686d2..558376570812d 100644 --- a/libclc/generic/include/clc/math/modf.inc +++ b/libclc/generic/include/clc/math/modf.inc @@ -23,3 +23,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr); +#endif diff --git a/libclc/generic/include/clc/math/remquo.h b/libclc/generic/include/clc/math/remquo.h index 7daf82fc34b33..a367208894fe6 100644 --- a/libclc/generic/include/clc/math/remquo.h +++ b/libclc/generic/include/clc/math/remquo.h @@ -15,4 +15,13 @@ #include #undef __CLC_ADDRESS_SPACE +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif + #undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/math/sincos.inc b/libclc/generic/include/clc/math/sincos.inc index 423b25fb7534b..4a1b74cc7ac2c 100644 --- a/libclc/generic/include/clc/math/sincos.inc +++ b/libclc/generic/include/clc/math/sincos.inc @@ -1,3 +1,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, global __CLC_GENTYPE * cosval); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, local __CLC_GENTYPE * cosval); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, private __CLC_GENTYPE * cosval); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) + _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, generic __CLC_GENTYPE * cosval); +#endif diff --git a/libclc/generic/include/spirv/spirv_builtins.h b/libclc/generic/include/spirv/spirv_builtins.h index ca15fdf3c6547..2de6f72cfb077 100644 --- a/libclc/generic/include/spirv/spirv_builtins.h +++ b/libclc/generic/include/spirv/spirv_builtins.h @@ -14361,76 +14361,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_fmod(__clc_vec16_fp16_t, __clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, - __clc_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t *); +__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t *); +__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t *); +__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t *); +__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, - __clc_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t *); +__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t *); +__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t *); +__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t *); +__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -14438,114 +14438,162 @@ __spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, - __clc_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t *); +__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t *); +__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t *); +__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t *); +__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, - __clc_int32_t *); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __generic *); + +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -14553,44 +14601,91 @@ __spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +#endif + +#ifdef cl_khr_fp16 +__spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_half_cos(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t @@ -15012,76 +15107,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_lgamma(__clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -15089,44 +15184,90 @@ __spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_log(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t __spirv_ocl_log(__clc_vec2_fp32_t); @@ -15518,37 +15659,37 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t #endif _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, - __clc_fp32_t *); + __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, __clc_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, - __clc_vec2_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, - __clc_vec3_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, - __clc_vec4_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, - __clc_vec8_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t @@ -15556,37 +15697,37 @@ __spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, - __clc_fp64_t *); + __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, __clc_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, - __clc_vec2_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, - __clc_vec3_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, - __clc_vec4_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, - __clc_vec8_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -15595,43 +15736,90 @@ __spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #ifdef cl_khr_fp16 _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, - __clc_fp16_t *); + __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, __clc_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, - __clc_vec2_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, - __clc_vec3_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, - __clc_vec4_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, - __clc_vec8_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, + __clc_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, + __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, + __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_nan(__clc_int32_t); _CLC_OVERLOAD @@ -16457,78 +16645,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_remainder(__clc_vec16_fp16_t, __clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, - __clc_fp32_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_remquo(__clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( + __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_remquo(__clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( + __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_remquo(__clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( + __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_remquo(__clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( + __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( - __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t *); + __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, - __clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_remquo(__clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( + __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_remquo(__clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( + __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_remquo(__clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( + __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_remquo(__clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( + __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( - __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t *); + __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( @@ -16536,45 +16722,91 @@ _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, - __clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_remquo(__clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( + __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_remquo(__clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( + __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_remquo(__clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( + __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_remquo(__clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( + __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( - __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t *); + __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( + __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( + __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( + __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( + __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( + __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( + __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( + __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( + __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( + __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( + __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( + __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( + __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( + __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( + __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( + __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_rint(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t @@ -18679,76 +18911,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_sin(__clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, - __clc_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t *); +__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t *); +__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t *); +__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t *); +__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, - __clc_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t *); +__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t *); +__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t *); +__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t *); +__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -18756,44 +18988,91 @@ __spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, - __clc_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t *); +__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t *); +__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t *); +__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t *); +__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_sinh(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t diff --git a/libclc/generic/lib/math/fract.inc b/libclc/generic/lib/math/fract.inc index 9db5657bb45c5..84db0971b452c 100644 --- a/libclc/generic/lib/math/fract.inc +++ b/libclc/generic/lib/math/fract.inc @@ -31,18 +31,19 @@ #define ZERO 0.0h #endif -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { - return __spirv_ocl_fract(x, iptr); -} - - #define FRACT_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ return __spirv_ocl_fract(x, iptr); \ } +FRACT_DEF(private); FRACT_DEF(local); FRACT_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +FRACT_DEF(generic); +#endif #undef MIN_CONSTANT #undef ZERO diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl index cd2c717815112..7a1651d270ec4 100644 --- a/libclc/generic/lib/math/frexp.cl +++ b/libclc/generic/lib/math/frexp.cl @@ -15,3 +15,12 @@ #define __CLC_ADDRESS_SPACE local #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/lib/math/modf.inc b/libclc/generic/lib/math/modf.inc index 1ffc6d9e851bd..4a055a2998aca 100644 --- a/libclc/generic/lib/math/modf.inc +++ b/libclc/generic/lib/math/modf.inc @@ -28,14 +28,14 @@ #define ZERO 0.0h #endif -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { *iptr = trunc(x); return copysign(isinf(x) ? ZERO : x - *iptr, x); } #define MODF_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = modf(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -43,5 +43,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) MODF_DEF(local); MODF_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +MODF_DEF(generic); +#endif #undef ZERO diff --git a/libclc/generic/lib/math/remquo.cl b/libclc/generic/lib/math/remquo.cl index fc29b366b36ed..e022b737630d5 100644 --- a/libclc/generic/lib/math/remquo.cl +++ b/libclc/generic/lib/math/remquo.cl @@ -15,3 +15,12 @@ #define __CLC_ADDRESS_SPACE private #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index c33b5ddab3112..32bd41da37ddc 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,9 +1,9 @@ // TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { - __CLC_INTN local_q; - __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); - *q = local_q; + private __CLC_INTN private_q; + __CLC_GENTYPE ret = __clc_remquo(x, y, &private_q); + *q = private_q; return ret; } #endif diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index 05135d1b3290b..e56378ba0dff7 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -8,6 +8,11 @@ __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) +#endif #undef __CLC_DECLARE_SINCOS #endif diff --git a/libclc/generic/libspirv/math/fract.inc b/libclc/generic/libspirv/math/fract.inc index 55fbe4edaad39..8da1c2fe39627 100644 --- a/libclc/generic/libspirv/math/fract.inc +++ b/libclc/generic/libspirv/math/fract.inc @@ -38,7 +38,7 @@ __spirv_ocl_fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { #define FRACT_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fract( \ __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = __spirv_ocl_fract(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -46,6 +46,11 @@ __spirv_ocl_fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { FRACT_DEF(local); FRACT_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +FRACT_DEF(generic); +#endif #undef MIN_CONSTANT #undef ZERO diff --git a/libclc/generic/libspirv/math/frexp.cl b/libclc/generic/libspirv/math/frexp.cl index 314befe6d4f76..51bee881af259 100644 --- a/libclc/generic/libspirv/math/frexp.cl +++ b/libclc/generic/libspirv/math/frexp.cl @@ -23,3 +23,12 @@ #define __CLC_ADDRESS_SPACE local #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/libspirv/math/lgamma_r.cl b/libclc/generic/libspirv/math/lgamma_r.cl index 6f23c50c0607a..fbff9ddf3191e 100644 --- a/libclc/generic/libspirv/math/lgamma_r.cl +++ b/libclc/generic/libspirv/math/lgamma_r.cl @@ -658,3 +658,12 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_lgamma_r, half, #define __CLC_BODY #include #undef __CLC_ADDRSPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_ADDRSPACE generic +#define __CLC_BODY +#include +#undef __CLC_ADDRSPACE +#endif diff --git a/libclc/generic/libspirv/math/lgamma_r.inc b/libclc/generic/libspirv/math/lgamma_r.inc index 3d697814f221f..4a90b8812b3c5 100644 --- a/libclc/generic/libspirv/math/lgamma_r.inc +++ b/libclc/generic/libspirv/math/lgamma_r.inc @@ -8,7 +8,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { - __CLC_INTN private_iptr; + private __CLC_INTN private_iptr; __CLC_GENTYPE ret = __spirv_ocl_lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; diff --git a/libclc/generic/libspirv/math/modf.inc b/libclc/generic/libspirv/math/modf.inc index edd4171ac859c..8b276da7cb1b9 100644 --- a/libclc/generic/libspirv/math/modf.inc +++ b/libclc/generic/libspirv/math/modf.inc @@ -23,7 +23,7 @@ #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, - __CLC_GENTYPE *iptr) { + private __CLC_GENTYPE *iptr) { *iptr = __spirv_ocl_trunc(x); return __spirv_ocl_copysign( __CLC_CONVERT_NATN(__spirv_IsInf(x)) ? ZERO : x - *iptr, x); @@ -32,7 +32,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, #define MODF_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf( \ __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = __spirv_ocl_modf(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -41,5 +41,11 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, MODF_DEF(local); MODF_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +MODF_DEF(generic); +#endif + #undef __CLC_CONVERT_NATN #undef ZERO diff --git a/libclc/generic/libspirv/math/remquo.cl b/libclc/generic/libspirv/math/remquo.cl index 3c12d082b4614..5475604ac4ab2 100644 --- a/libclc/generic/libspirv/math/remquo.cl +++ b/libclc/generic/libspirv/math/remquo.cl @@ -24,3 +24,12 @@ #define __CLC_ADDRESS_SPACE private #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/libspirv/math/remquo.inc b/libclc/generic/libspirv/math/remquo.inc index f94fb6586b621..074a4d8c6faeb 100644 --- a/libclc/generic/libspirv/math/remquo.inc +++ b/libclc/generic/libspirv/math/remquo.inc @@ -8,8 +8,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_remquo( __CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { - __CLC_INTN local_q; - __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); - *q = local_q; + private __CLC_INTN private_q; + __CLC_GENTYPE ret = __clc_remquo(x, y, &private_q); + *q = private_q; return ret; } diff --git a/libclc/generic/libspirv/math/sincos.inc b/libclc/generic/libspirv/math/sincos.inc index 33f964be33768..f92f32e42c3f6 100644 --- a/libclc/generic/libspirv/math/sincos.inc +++ b/libclc/generic/libspirv/math/sincos.inc @@ -16,4 +16,10 @@ __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) +#endif + #undef __CLC_DECLARE_SINCOS diff --git a/libclc/utils/libclc-remangler/LibclcRemangler.cpp b/libclc/utils/libclc-remangler/LibclcRemangler.cpp index c9e92730c3e31..cdc0540221d31 100644 --- a/libclc/utils/libclc-remangler/LibclcRemangler.cpp +++ b/libclc/utils/libclc-remangler/LibclcRemangler.cpp @@ -777,12 +777,17 @@ class LibCLCRemangler : public ASTConsumer { void Initialize(ASTContext &C) override { ASTCtx = &C; - SMDiagnostic Err; std::unique_ptr const Buff = ExitOnErr( errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputIRFilename))); + + SMDiagnostic Err; std::unique_ptr const M = - ExitOnErr(Expected>( - parseIR(Buff.get()->getMemBufferRef(), Err, LLVMCtx))); + parseIR(Buff.get()->getMemBufferRef(), Err, LLVMCtx); + + if (!M) { + Err.print("libclc-remangler", errs()); + exit(1); + } handleModule(M.get()); } @@ -840,7 +845,7 @@ class LibCLCRemangler : public ASTConsumer { } bool remangleFunction(Function &Func, llvm::Module *M) { - if (!Func.getName().startswith("_Z")) + if (!Func.getName().starts_with("_Z")) return true; std::string const MangledName = Func.getName().str(); @@ -958,7 +963,7 @@ int main(int argc, const char **argv) { // Use a default Compilation DB instead of the build one, as it might contain // toolchain specific options, not compatible with clang. - FixedCompilationDatabase Compilations("/", std::vector()); + FixedCompilationDatabase Compilations(".", std::vector()); ClangTool Tool(Compilations, ExpectedParser->getSourcePathList()); LibCLCRemanglerActionFactory LRAF{}; diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 00ce045f43c3c..623a587c1dc6c 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -32,11 +32,11 @@ include(SYCLUtils) # # See doc/developer/ABIPolicyGuide.md for the meaning when in the middle of # development cycle. -set(SYCL_MAJOR_VERSION 7) -set(SYCL_MINOR_VERSION 2) +set(SYCL_MAJOR_VERSION 8) +set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 8) +set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index ea8135be0f09e..9571d43cc07c9 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -37,16 +37,34 @@ macro(add_sycl_unittest test_dirname link_variant) ) endif() - add_custom_target(check-sycl-${test_dirname} - ${CMAKE_COMMAND} -E env - LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" - env SYCL_CONFIG_FILE_NAME=null.cfg - env SYCL_DEVICELIB_NO_FALLBACK=1 - env SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" - ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} - DEPENDS - ${test_dirname} - ) + # check-sycl-unittests was using an old sycl library. So, to get + # around this problem, we add the new sycl library to the PATH and + # LD_LIBRARY_PATH on Windows and Linux respectively. + if (WIN32) + add_custom_target(check-sycl-${test_dirname} + ${CMAKE_COMMAND} -E env + LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" + SYCL_CONFIG_FILE_NAME=null.cfg + SYCL_DEVICELIB_NO_FALLBACK=1 + SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" + "PATH=${CMAKE_BINARY_DIR}/bin;$ENV{PATH}" + ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} + DEPENDS + ${test_dirname} + ) + else() + add_custom_target(check-sycl-${test_dirname} + ${CMAKE_COMMAND} -E env + LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" + SYCL_CONFIG_FILE_NAME=null.cfg + SYCL_DEVICELIB_NO_FALLBACK=1 + SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" + "LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH}" + ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} + DEPENDS + ${test_dirname} + ) + endif() add_dependencies(check-sycl-unittests check-sycl-${test_dirname}) @@ -60,7 +78,7 @@ macro(add_sycl_unittest test_dirname link_variant) if(SYCL_ENABLE_KERNEL_FUSION) target_link_libraries(${test_dirname} PRIVATE sycl-fusion) endif(SYCL_ENABLE_KERNEL_FUSION) - + target_include_directories(${test_dirname} PRIVATE SYSTEM ${sycl_inc_dir} diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 838b2568ae980..01ae7b7463749 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -447,8 +447,10 @@ The HIP backend offers a Graph managemenet API very similar to CUDA Graph feature for batching series of operations. The SYCL Graph HIP backend implementation is therefore very similar to that of CUDA. +The minimum version of ROCm required to support `sycl_ext_oneapi_graph` is 5.5.1. + UR commands (e.g. kernels) are mapped as graph nodes using the -[HIP Management API](https://docs.amd.com/projects/HIP/en/docs-5.5.0/doxygen/html/group___graph.html). +[HIP Management API](https://rocm.docs.amd.com/projects/HIP/en/docs-5.5.1/doxygen/html/group___graph.html). Synchronization between commands (UR sync-points) is implemented using graph dependencies. Executable HIP Graphs can be submitted to a HIP stream diff --git a/sycl/doc/design/ParallelForRangeRounding.md b/sycl/doc/design/ParallelForRangeRounding.md index a4199aed8e800..7f43cafe6e96e 100644 --- a/sycl/doc/design/ParallelForRangeRounding.md +++ b/sycl/doc/design/ParallelForRangeRounding.md @@ -42,5 +42,8 @@ rounding will only be used if the SYCL runtime X dimension exceeds some minimum value, which can be configured using the `SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS` environment variable. -Generation of range rounded kernels can be disabled by using the compiler flag -`-fsycl-disable-range-rounding`. +In order to reduce binary size, the user can tell the compiler only to generate +the range rounded kernel, `-fsycl-range-rounding=force`. The user can also tell +the SYCL implementation to only produce the unrounded kernel using the flag +`-fsycl-range-rounding=disable`. By default both kernels will be generated, +which is equivalent to `-fsycl-range-rounding=on`. diff --git a/sycl/include/sycl/detail/core.hpp b/sycl/include/sycl/detail/core.hpp new file mode 100644 index 0000000000000..c542ee7e8254b --- /dev/null +++ b/sycl/include/sycl/detail/core.hpp @@ -0,0 +1,23 @@ +//==------------ sycl.hpp - SYCL2020 standard header file ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// This is an ongoing experimental activity in its early stage. No code outside +// this project must rely on the behavior of this header file - keep using +// . +// +// Short-term plan/action items (in no particular order): +// * Update more tests to use this instead of full . +// * Refactor includes so that transitive dependencies don't bring as much as +// they currently do. +// * Determine what else should be included here. + +#include +#include +#include diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp index 9b6fd04a7e5a9..c41ee9c486a0c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include #include diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index b441f2cfe6a6d..fa6ee8f1edc49 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1211,7 +1211,6 @@ class __SYCL_EXPORT handler { // non-32-bit global range, we wrap the old kernel in a new kernel // that has each work item peform multiple invocations the old // kernel in a 32-bit global range. - auto Dev = detail::getSyclObjImpl(detail::getDeviceFromHandler(*this)); id MaxNWGs = [&] { auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2(); if (!HasMaxWGs) { @@ -1254,6 +1253,11 @@ class __SYCL_EXPORT handler { // will yield a rounded-up value for the total range. Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor); } +#ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // If we are forcing range rounding kernels to be used, we always want the + // rounded range kernel to be generated, even if rounding isn't needed + DidAdjust = true; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ for (int i = 0; i < Dims; ++i) if (RoundedRange[i] > MaxRange[i]) @@ -1361,6 +1365,9 @@ class __SYCL_EXPORT handler { { (void)UserRange; (void)Props; +#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // If parallel_for range rounding is forced then only range rounded + // kernel is generated kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ @@ -1372,6 +1379,9 @@ class __SYCL_EXPORT handler { setType(detail::CG::Kernel); setNDRangeUsed(false); #endif +#else + (void)KernelFunc; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ } } diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 73a37673d70b8..0903bce7dd23e 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -8,7 +8,8 @@ #pragma once -#include +#include + #include #include #include @@ -17,7 +18,6 @@ #if SYCL_BACKEND_OPENCL #include #endif -#include #include #include #include @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp index 6d039e4314fa5..52d419c93ee05 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp +++ b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp @@ -224,8 +224,9 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module if (PrintPiTrace) std::cout << "---> DLL_PROCESS_DETACH pi_win_proxy_loader.dll\n" << std::endl; - + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } diff --git a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp index c473da9cd5cbb..3a2ca6185f775 100644 --- a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp +++ b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp @@ -33,7 +33,9 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module if (PrintPiTrace) std::cout << "---> DLL_PROCESS_ATTACH " << __SYCL_PLUGIN_DLL_NAME << "\n" << std::endl; + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0eebfa5a02014..b4f01966d5418 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,13 +82,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f - # Merge: ca5c3421 69c43b45 - # Author: Kenneth Benzie (Benie) - # Date: Tue Mar 19 21:00:20 2024 +0000 - # Merge pull request #1326 from hdelan/refactor-guess-local-worksize - # [CUDA][HIP] Fix bug in guess local worksize funcs and improve local worksize guessing in HIP adapter - set(UNIFIED_RUNTIME_TAG ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f) + # commit 5f4dd113824e90522d813420932c14072dc3049d + # Merge: ed1f8bf b551c77 + # Author: Ewan Crawford + # Date: Fri Mar 15 10:22:39 2024 +0000 + # Merge pull request #1447 from Bensuo/ewan/rocm_5_5_1 + # [HIP][CMDBUF] Require ROCm 5.5.1 for HIP command-buffers + set(UNIFIED_RUNTIME_TAG 5f4dd113824e90522d813420932c14072dc3049d) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2dbbfe6767c25..d57b6e5a50354 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -357,7 +357,9 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, case DLL_PROCESS_ATTACH: if (PrintPiTrace) std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 62483acd24506..308fa742d2047 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -455,12 +455,12 @@ graph_impl::add(node_type NodeType, } // Look through the graph for nodes which share this requirement for (auto &Node : MNodeStorage) { - if (Node->hasRequirement(Req)) { + if (Node->hasRequirementDependency(Req)) { bool ShouldAddDep = true; // If any of this node's successors have this requirement then we skip // adding the current node as a dependency. for (auto &Succ : Node->MSuccessors) { - if (Succ.lock()->hasRequirement(Req)) { + if (Succ.lock()->hasRequirementDependency(Req)) { ShouldAddDep = false; break; } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 9eeea62b5c21b..ae6fedbfd12a0 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -175,17 +175,38 @@ class node_impl { } return *this; } + /// Checks if this node should be a dependency of another node based on + /// accessor requirements. This is calculated using access modes if a + /// requirement to the same buffer is found inside this node. + /// @param IncomingReq Incoming requirement. + /// @return True if a dependency is needed, false if not. + bool hasRequirementDependency(sycl::detail::AccessorImplHost *IncomingReq) { + access_mode InMode = IncomingReq->MAccessMode; + switch (InMode) { + case access_mode::read: + case access_mode::read_write: + case access_mode::atomic: + break; + // These access modes don't care about existing buffer data, so we don't + // need a dependency. + case access_mode::write: + case access_mode::discard_read_write: + case access_mode::discard_write: + return false; + } - /// Checks if this node has a given requirement. - /// @param Requirement Requirement to lookup. - /// @return True if \p Requirement is present in node, false otherwise. - bool hasRequirement(sycl::detail::AccessorImplHost *IncomingReq) { for (sycl::detail::AccessorImplHost *CurrentReq : MCommandGroup->getRequirements()) { if (IncomingReq->MSYCLMemObj == CurrentReq->MSYCLMemObj) { - return true; + access_mode CurrentMode = CurrentReq->MAccessMode; + // Since we have an incoming read requirement, we only care + // about requirements on this node if they are write + if (CurrentMode != access_mode::read) { + return true; + } } } + // No dependency necessary return false; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 64a1fb5e888ac..321cc48b29769 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a + // metadata entry to TLS for use by callback handlers + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call @@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7109555b05ecc..890891644bbac 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -92,7 +92,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -176,13 +176,16 @@ class queue_impl { // This section is the second part of the instrumentation that uses the // tracepoint information and notifies } + // We enable XPTI tracing events using the TLS mechanism; if the code // location data is available, then the tracing data will be rich. #if XPTI_ENABLE_INSTRUMENTATION constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); + // Using the instance override constructor for use with queues as queues + // maintain instance IDs in the object XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); // Cache the trace event, stream id and instance IDs for the destructor if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { @@ -207,6 +210,8 @@ class queue_impl { xpti::addMetadata(TEvent, "queue_handle", reinterpret_cast(getHandleRef())); }); + // Also publish to TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif @@ -244,7 +249,7 @@ class queue_impl { constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { // Cache the trace event, stream id and instance IDs for the destructor @@ -269,6 +274,8 @@ class queue_impl { if (!MHostQueue) xpti::addMetadata(TEvent, "queue_handle", getHandleRef()); }); + // Also publish to TLS before notification + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3da716ee1d8e8..7781fb7e1cd1e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); } #endif } @@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() { this->MRequirement.MAccessRange[0]); xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "allocation_type", commandToName(MAllocaCmd->getType())); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName, xpti::addMetadata(CmdTraceEvent, "sym_column_no", static_cast(Column)); } - xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID()); + // We no longer set the 'queue_id' in the metadata structure as it is a + // mutable value and multiple threads using the same queue created at the + // same location will overwrite the metadata values creating inconsistencies } } #endif @@ -2096,6 +2116,10 @@ std::pair emitKernelInstrumentationData( FromSource, InstanceID, CmdTraceEvent); if (CmdTraceEvent) { + // Stash the queue_id mutable metadata in TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + Queue->getQueueID()); + instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, SyclKernel, Queue, CGArgs); @@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() { CmdTraceEvent); if (CmdTraceEvent) { + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -3356,10 +3382,12 @@ void KernelFusionCommand::emitInstrumentationData() { deviceToString(MQueue->get_device())); xpti::addMetadata(CmdTraceEvent, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); } - if (MFirstInstance) { + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index be546e4e27905..681e2841c027b 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID; extern xpti::trace_event_data_t *GMemAllocEvent; extern xpti::trace_event_data_t *GSYCLGraphEvent; +// We will pick a global constant so that the pointer in TLS never goes stale +inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id"; + #define STR(x) #x #define SYCL_VERSION_STR \ "sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION) @@ -165,6 +168,45 @@ class XPTIRegistry { class XPTIScope { public: using TracePoint = xpti::framework::tracepoint_t; + /// @brief Scoped class for XPTI instrumentation using TLS data + /// @param CodePtr The address of the class/function to help differentiate + /// actions in case the code location information is not available + /// @param TraceType The type of trace event being created + /// @param StreamName The stream which will emit these notifications + /// @param InstanceID The instance ID associated with an object, otherwise 0 + /// will auto-generate + /// @param UserData String value that provides metadata about the + /// instrumentation + XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName, + uint64_t InstanceID, const char *UserData) + : MUserData(UserData), MStreamID(0), MInstanceID(InstanceID), + MScopedNotify(false), MTraceType(0) { + detail::tls_code_loc_t Tls; + auto TData = Tls.query(); + // If TLS is not set, we can still genertate universal IDs with user data + // and CodePtr information + const char *FuncName = TData.functionName(); + if (!TData.functionName() && !TData.fileName()) + FuncName = UserData; + // Create a tracepoint object that has a lifetime of this class + MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(), + TData.columnNumber(), CodePtr); + if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || + TraceType == (uint16_t)xpti::trace_point_type_t::node_create || + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) + MTP->parent_event(GSYCLGraphEvent); + // Now if tracing is enabled, create trace events and notify + if (xptiTraceEnabled() && MTP) { + MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType); + MTraceEvent = const_cast(MTP->trace_event()); + MStreamID = MTP->stream_id(); + // This constructor uses a manual override for the instance ID as some + // objects such as queues keep track of instance IDs + MTP->override_instance_id(MInstanceID); + } + } + /// @brief Scoped class for XPTI instrumentation using TLS data /// @param CodePtr The address of the class/function to help differentiate /// actions in case the code location information is not available @@ -188,7 +230,8 @@ class XPTIScope { TData.columnNumber(), CodePtr); if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || TraceType == (uint16_t)xpti::trace_point_type_t::node_create || - TraceType == (uint16_t)xpti::trace_point_type_t::edge_create) + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) MTP->parent_event(GSYCLGraphEvent); // Now if tracing is enabled, create trace events and notify if (xptiTraceEnabled() && MTP) { @@ -243,6 +286,8 @@ class XPTIScope { MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create || MTraceType == (uint16_t)xpti::trace_point_type_t::node_create || MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy || MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics) return; diff --git a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp index 763c1b49386c9..1a69f7005090f 100644 --- a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp +++ b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp @@ -31,7 +31,7 @@ #include "Inputs/kernels_in_file2.hpp" #include #include -#include +#include #include #ifdef DEFINE_NDEBUG_INFILE1 diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index e90f72f6cc48b..7e6a333e305d7 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp index 85abf3303ec7c..907faf0b5292a 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp @@ -17,7 +17,7 @@ //===---------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/BFloat16/bfloat_hw.cpp b/sycl/test-e2e/BFloat16/bfloat_hw.cpp index 952768ee74c11..0154d21156fd1 100644 --- a/sycl/test-e2e/BFloat16/bfloat_hw.cpp +++ b/sycl/test-e2e/BFloat16/bfloat_hw.cpp @@ -3,7 +3,7 @@ // "Hello world" bfloat16 test which checks conversion algorithms on host. -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/AMX_aspect.cpp b/sycl/test-e2e/Basic/AMX_aspect.cpp index 09bcb89111061..ff3225c3670f5 100644 --- a/sycl/test-e2e/Basic/AMX_aspect.cpp +++ b/sycl/test-e2e/Basic/AMX_aspect.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; using arch = sycl::ext::oneapi::experimental::architecture; diff --git a/sycl/test-e2e/Basic/access_to_subset.cpp b/sycl/test-e2e/Basic/access_to_subset.cpp index f2002df8116fe..3fc81f477c0de 100644 --- a/sycl/test-e2e/Basic/access_to_subset.cpp +++ b/sycl/test-e2e/Basic/access_to_subset.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; using acc_w = accessor; diff --git a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp index afee48c3cf081..b8e91547b83ca 100644 --- a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp b/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp index 62f61136f024f..abd5a1fb643f9 100644 --- a/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp @@ -3,7 +3,7 @@ // Tests the size and iterator members of an empty zero-dimensional accessor. -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp index 8b6e847c01597..943cced16aa3e 100644 --- a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp +++ b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp @@ -4,7 +4,7 @@ // RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} // RUN: %{run} %t2.out -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/aspects.cpp b/sycl/test-e2e/Basic/aspects.cpp index d14e4764f49e9..75daf263e638e 100644 --- a/sycl/test-e2e/Basic/aspects.cpp +++ b/sycl/test-e2e/Basic/aspects.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp b/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp index 250b8edb6b170..84e29a4f4d014 100644 --- a/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp +++ b/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp @@ -3,7 +3,7 @@ // RUN: %if windows %{ %clangxx -fsycl -fsycl-host-compiler=cl -fsycl-host-compiler-options='/std:c++17 /Zc:__cplusplus' -o %t2.out %s %} // RUN: %if windows %{ %{run} %t2.out %} -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/boolean.cpp b/sycl/test-e2e/Basic/boolean.cpp index 6c376c6dcf8b9..c19d614268a83 100644 --- a/sycl/test-e2e/Basic/boolean.cpp +++ b/sycl/test-e2e/Basic/boolean.cpp @@ -2,7 +2,10 @@ // RUN: %{run} %t.out #include #include -#include + +#include + +#include using namespace sycl; namespace s = sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer.cpp b/sycl/test-e2e/Basic/buffer/buffer.cpp index 8f3e243e86436..92c4f12cdfd6c 100644 --- a/sycl/test-e2e/Basic/buffer/buffer.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp b/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp index 90784e72249bb..f0a0ba3105ace 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp @@ -14,7 +14,7 @@ // device. #include -#include +#include constexpr size_t NumElems = 67; diff --git a/sycl/test-e2e/Basic/buffer/buffer_container.cpp b/sycl/test-e2e/Basic/buffer/buffer_container.cpp index 63395ad529169..0a263bb199a6c 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_container.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_container.cpp @@ -1,7 +1,7 @@ // RUN: %{build} %cxx_std_optionc++17 -o %t2.out // RUN: %{run} %t2.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_create.cpp b/sycl/test-e2e/Basic/buffer/buffer_create.cpp index eda7f81d5f764..15955d96b5ebc 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_create.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_create.cpp @@ -4,7 +4,7 @@ // UNSUPPORTED: ze_debug #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp b/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp index ef66cb97f3b65..e44f2a628dbc0 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp b/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp index b72cc4650cbac..20206f19b03eb 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include void check_copy_device_to_host(sycl::queue &Queue) { constexpr int size = 6, offset = 2; diff --git a/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp b/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp index 6096ee41d9918..4af49f8ad9bef 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp @@ -7,7 +7,7 @@ // #include -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/buffer/reinterpret.cpp b/sycl/test-e2e/Basic/buffer/reinterpret.cpp index 6dc5c40ff9e01..e61cd760a78c8 100644 --- a/sycl/test-e2e/Basic/buffer/reinterpret.cpp +++ b/sycl/test-e2e/Basic/buffer/reinterpret.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include // This tests verifies basic cases of using sycl::buffer::reinterpret // functionality - changing buffer type and range. This test checks that diff --git a/sycl/test-e2e/Basic/buffer/subbuffer.cpp b/sycl/test-e2e/Basic/buffer/subbuffer.cpp index 6a57883062f95..f34c86ab2be6f 100644 --- a/sycl/test-e2e/Basic/buffer/subbuffer.cpp +++ b/sycl/test-e2e/Basic/buffer/subbuffer.cpp @@ -16,7 +16,7 @@ // 1) Correct results after usage of different type of accessors to sub buffer // 2) Exceptions if we trying to create sub buffer not according to spec -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp index c925eebd0a8c9..af296fc3b56f9 100644 --- a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp +++ b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp index 203caefbe63a0..03a7c720e9afd 100644 --- a/sycl/test-e2e/Basic/built-ins/helpers.hpp +++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp @@ -1,4 +1,6 @@ -#include +#include + +#include template bool equal(T x, T y, D delta) { // Maybe should be C++20's std::equality_comparable. diff --git a/sycl/test-e2e/Basic/built-ins/marray_math.cpp b/sycl/test-e2e/Basic/built-ins/marray_math.cpp index 46636a6ccd93d..f40de39214916 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_math.cpp @@ -6,7 +6,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t_preview.out%} #include -#include +#include + +#include // Reference // https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-2020/util/accuracy.h diff --git a/sycl/test-e2e/Basic/built-ins/vec_common.cpp b/sycl/test-e2e/Basic/built-ins/vec_common.cpp index 090852ea14e78..ea62bdb7c72d4 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_common.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_common.cpp @@ -8,7 +8,9 @@ #endif #include -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp index eceeadb40f2e2..432057a171f02 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp @@ -3,7 +3,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_math.cpp b/sycl/test-e2e/Basic/built-ins/vec_math.cpp index ea6ab81431bc2..61ef24547ec94 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_math.cpp @@ -5,7 +5,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes %{mathflags} -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp index 45f12f22952fa..48b347143c824 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp @@ -3,7 +3,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, TYPE, EXPECTED, N, ...) \ { \ diff --git a/sycl/test-e2e/Basic/compare_exchange_strong.cpp b/sycl/test-e2e/Basic/compare_exchange_strong.cpp index 8b749119f97a9..c95b7797f0749 100644 --- a/sycl/test-e2e/Basic/compare_exchange_strong.cpp +++ b/sycl/test-e2e/Basic/compare_exchange_strong.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/context_platforms.cpp b/sycl/test-e2e/Basic/context_platforms.cpp index 28e1656101e89..9e81b8480e7a0 100644 --- a/sycl/test-e2e/Basic/context_platforms.cpp +++ b/sycl/test-e2e/Basic/context_platforms.cpp @@ -2,7 +2,7 @@ // RUN: %{run-unfiltered-devices} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/default_device.cpp b/sycl/test-e2e/Basic/default_device.cpp index 7cb44321e3150..f56ab6ae47a5a 100644 --- a/sycl/test-e2e/Basic/default_device.cpp +++ b/sycl/test-e2e/Basic/default_device.cpp @@ -9,7 +9,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::device Dev; diff --git a/sycl/test-e2e/Basic/default_platform.cpp b/sycl/test-e2e/Basic/default_platform.cpp index 5b7b7b9a68e7a..5fd2c324a7df1 100644 --- a/sycl/test-e2e/Basic/default_platform.cpp +++ b/sycl/test-e2e/Basic/default_platform.cpp @@ -9,7 +9,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::platform Plt; diff --git a/sycl/test-e2e/Basic/device-selectors.cpp b/sycl/test-e2e/Basic/device-selectors.cpp index 00c97d98f09f7..2c50e71138fd9 100644 --- a/sycl/test-e2e/Basic/device-selectors.cpp +++ b/sycl/test-e2e/Basic/device-selectors.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out -DSYCL_DISABLE_IMAGE_ASPECT_WARNING // RUN: %{run-unfiltered-devices} %t.out -#include +#include using namespace sycl; auto exception_handler_lambda = [](exception_list elist) { diff --git a/sycl/test-e2e/Basic/device.cpp b/sycl/test-e2e/Basic/device.cpp index 76978470ced08..be97706f3bc92 100644 --- a/sycl/test-e2e/Basic/device.cpp +++ b/sycl/test-e2e/Basic/device.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/device_equality.cpp b/sycl/test-e2e/Basic/device_equality.cpp index d4719d99661d6..1655476b16373 100644 --- a/sycl/test-e2e/Basic/device_equality.cpp +++ b/sycl/test-e2e/Basic/device_equality.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/diagnostics/device-check.cpp b/sycl/test-e2e/Basic/diagnostics/device-check.cpp index 25682a723db59..52e10db655dc5 100644 --- a/sycl/test-e2e/Basic/diagnostics/device-check.cpp +++ b/sycl/test-e2e/Basic/diagnostics/device-check.cpp @@ -18,7 +18,7 @@ //==---------------------------------------------------------------==// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/diagnostics/handler.cpp b/sycl/test-e2e/Basic/diagnostics/handler.cpp index d704335a6e5b9..70b5dbba1472c 100644 --- a/sycl/test-e2e/Basic/diagnostics/handler.cpp +++ b/sycl/test-e2e/Basic/diagnostics/handler.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include int main() { sycl::queue Queue([](sycl::exception_list ExceptionList) { diff --git a/sycl/test-e2e/Basic/enqueue_barrier.cpp b/sycl/test-e2e/Basic/enqueue_barrier.cpp index 6a2a118c2ff55..0eae48b74f28f 100644 --- a/sycl/test-e2e/Basic/enqueue_barrier.cpp +++ b/sycl/test-e2e/Basic/enqueue_barrier.cpp @@ -6,7 +6,7 @@ // UNSUPPORTED: windows #include -#include +#include int main() { sycl::context Context; diff --git a/sycl/test-e2e/Basic/event.cpp b/sycl/test-e2e/Basic/event.cpp index df4b44bed626b..000149bdc6807 100644 --- a/sycl/test-e2e/Basic/event.cpp +++ b/sycl/test-e2e/Basic/event.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include int main() { { diff --git a/sycl/test-e2e/Basic/event_async_exception.cpp b/sycl/test-e2e/Basic/event_async_exception.cpp index 9a9ef88a36896..20b7d5e71bc2f 100644 --- a/sycl/test-e2e/Basic/event_async_exception.cpp +++ b/sycl/test-e2e/Basic/event_async_exception.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include // This test checks that if there is a submit failure, the asynchronous // exception is associated with the returned event. diff --git a/sycl/test-e2e/Basic/fill_accessor.cpp b/sycl/test-e2e/Basic/fill_accessor.cpp index 0c17e02aa2ca0..7c3996b2fc4c2 100644 --- a/sycl/test-e2e/Basic/fill_accessor.cpp +++ b/sycl/test-e2e/Basic/fill_accessor.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/fill_accessor_pi.cpp b/sycl/test-e2e/Basic/fill_accessor_pi.cpp index a92cc44c0fa51..820c2fdeb66d7 100644 --- a/sycl/test-e2e/Basic/fill_accessor_pi.cpp +++ b/sycl/test-e2e/Basic/fill_accessor_pi.cpp @@ -4,7 +4,7 @@ // This test merely checks the use of the correct PI call. Its sister test // fill_accessor.cpp thoroughly checks the workings of the .fill() call. -#include +#include constexpr int width = 32; constexpr int height = 16; constexpr int depth = 8; diff --git a/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp b/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp index 5d42f4b37b715..aa54e5d6f7162 100644 --- a/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include int main() { sycl::queue Queue; diff --git a/sycl/test-e2e/Basic/get_backend.cpp b/sycl/test-e2e/Basic/get_backend.cpp index c3930b607b78a..374bcc0fba8f7 100644 --- a/sycl/test-e2e/Basic/get_backend.cpp +++ b/sycl/test-e2e/Basic/get_backend.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/get_info_aspect.cpp b/sycl/test-e2e/Basic/get_info_aspect.cpp index cea05384cc5a8..7576e0f98d11b 100644 --- a/sycl/test-e2e/Basic/get_info_aspect.cpp +++ b/sycl/test-e2e/Basic/get_info_aspect.cpp @@ -5,7 +5,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; bool failed = true; diff --git a/sycl/test-e2e/Basic/half_builtins.cpp b/sycl/test-e2e/Basic/half_builtins.cpp index 9637742cb67bb..e05b14ccb5555 100644 --- a/sycl/test-e2e/Basic/half_builtins.cpp +++ b/sycl/test-e2e/Basic/half_builtins.cpp @@ -2,7 +2,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/Basic/half_type.cpp b/sycl/test-e2e/Basic/half_type.cpp index b5769089abf43..110d428f82ee0 100644 --- a/sycl/test-e2e/Basic/half_type.cpp +++ b/sycl/test-e2e/Basic/half_type.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp b/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp index cd3a936475d5e..97810b92d570a 100644 --- a/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp +++ b/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp b/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp index 27883afc064c1..b68022ff45141 100644 --- a/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp +++ b/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp @@ -7,7 +7,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/handler/handler_mem_op.cpp b/sycl/test-e2e/Basic/handler/handler_mem_op.cpp index c3c8fd625e760..8fe99aa149951 100644 --- a/sycl/test-e2e/Basic/handler/handler_mem_op.cpp +++ b/sycl/test-e2e/Basic/handler/handler_mem_op.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 1e5903f76173f..c142b92e22587 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -7,7 +7,7 @@ #define SYCL2020_DISABLE_DEPRECATION_WARNINGS -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/host_defer_copy.cpp b/sycl/test-e2e/Basic/host_defer_copy.cpp index 468f748212375..f4711d6339920 100644 --- a/sycl/test-e2e/Basic/host_defer_copy.cpp +++ b/sycl/test-e2e/Basic/host_defer_copy.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include constexpr int N = 10 * 1024 * 1024; diff --git a/sycl/test-e2e/Basic/host_write_back.cpp b/sycl/test-e2e/Basic/host_write_back.cpp index 44291bbc039bd..ed21bdb61e098 100644 --- a/sycl/test-e2e/Basic/host_write_back.cpp +++ b/sycl/test-e2e/Basic/host_write_back.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/image/image_constructors.cpp b/sycl/test-e2e/Basic/image/image_constructors.cpp index 260a31a0378d9..585e0ff65a562 100644 --- a/sycl/test-e2e/Basic/image/image_constructors.cpp +++ b/sycl/test-e2e/Basic/image/image_constructors.cpp @@ -10,7 +10,7 @@ // Tests the constructors, size and get_range APIs. #include -#include +#include void no_delete(void *) {} diff --git a/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp b/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp index 3715ef536de7c..5c892d716fcc5 100644 --- a/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp +++ b/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/info.cpp b/sycl/test-e2e/Basic/info.cpp index 398fd161728c4..b6e9546a62d84 100644 --- a/sycl/test-e2e/Basic/info.cpp +++ b/sycl/test-e2e/Basic/info.cpp @@ -8,7 +8,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/interop/construction_ze.cpp b/sycl/test-e2e/Basic/interop/construction_ze.cpp index 7ef4c89c7695a..57f89f7d41059 100644 --- a/sycl/test-e2e/Basic/interop/construction_ze.cpp +++ b/sycl/test-e2e/Basic/interop/construction_ze.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include constexpr auto BE = sycl::backend::ext_oneapi_level_zero; diff --git a/sycl/test-e2e/Basic/interop/ze_context_device.cpp b/sycl/test-e2e/Basic/interop/ze_context_device.cpp index 104205858a339..9dd34e93ba54d 100644 --- a/sycl/test-e2e/Basic/interop/ze_context_device.cpp +++ b/sycl/test-e2e/Basic/interop/ze_context_device.cpp @@ -5,7 +5,7 @@ // This test checks that an interop Level Zero device is properly handled during // interop context construction. #include -#include +#include #include diff --git a/sycl/test-e2e/Basic/library_loading.cpp b/sycl/test-e2e/Basic/library_loading.cpp index 8a3244e6599af..59f0434c012da 100644 --- a/sycl/test-e2e/Basic/library_loading.cpp +++ b/sycl/test-e2e/Basic/library_loading.cpp @@ -4,7 +4,7 @@ // RUN: FileCheck --input-file=%t_trace_no_filter.txt --check-prefix=CHECK-NO-FILTER %s -dump-input=fail // Checks pi traces on library loading -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/linear-sub_group.cpp b/sycl/test-e2e/Basic/linear-sub_group.cpp index 169876874896d..df99664c8f9b9 100644 --- a/sycl/test-e2e/Basic/linear-sub_group.cpp +++ b/sycl/test-e2e/Basic/linear-sub_group.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/local_mem_type.cpp b/sycl/test-e2e/Basic/local_mem_type.cpp index 78aa2d9073fb0..7ae5b64ab8afa 100644 --- a/sycl/test-e2e/Basic/local_mem_type.cpp +++ b/sycl/test-e2e/Basic/local_mem_type.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/multisource.cpp b/sycl/test-e2e/Basic/multisource.cpp index 056e9e0631b25..778fc7007e0a8 100644 --- a/sycl/test-e2e/Basic/multisource.cpp +++ b/sycl/test-e2e/Basic/multisource.cpp @@ -19,7 +19,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -o %t.fat // RUN: %{run} %t.fat -#include +#include #include diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp index b05db159473a2..46bbfceac0fb9 100644 --- a/sycl/test-e2e/Basic/multisource_spv_obj.cpp +++ b/sycl/test-e2e/Basic/multisource_spv_obj.cpp @@ -28,7 +28,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -o %t.fat // RUN: %{run} %t.fat -#include +#include #include diff --git a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp index c588874d3b6b6..097405ef5763b 100644 --- a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp +++ b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp @@ -11,7 +11,7 @@ // accessors in the past. Not relevant for images, which do not support offset // accessors. -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp b/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp index 3a7fd80f249f1..2b747d5a7cbc2 100644 --- a/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp +++ b/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp @@ -10,7 +10,7 @@ // RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix CHECK-ENABLED #include -#include +#include using namespace sycl; range<1> Range1 = {0}; diff --git a/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp b/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp index 35380ae45a428..74eae3ec9e3af 100644 --- a/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp +++ b/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp @@ -3,7 +3,7 @@ // RUN: %{build} -DLAMBDA_KERNEL=0 -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t2.out // RUN: %{run} %t2.out -#include +#include template class func { AccT acc; diff --git a/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp b/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp index 0a456ef02ea00..9e6d74dcb3f85 100644 --- a/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp +++ b/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp @@ -1,152 +1,164 @@ // REQUIRES: gpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s - +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix=CHECK-DEFAULT + +// RUN: %{build} -fsycl-range-rounding=force -o %t.out +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix=CHECK-DEFAULT + +// These tests test 3 things: +// +// 1. The user range is the same as the in kernel range (using BufRange) as +// reported by get_range(). +// 2. That the effective range is the same as the reported range (using +// BufCounter). i.e. check that the mapping of effective range to user range +// is "onto". +// 3. That every index in a 1, 2, or 3 dimension range is active the execution +// (using BufIndexes). i.e. check that the mapping of effective range to user +// range is "one-to-one". +// #include #include + using namespace sycl; +constexpr size_t MagicY = 33, MagicZ = 64; + range<1> Range1 = {0}; range<2> Range2 = {0, 0}; range<3> Range3 = {0, 0, 0}; +template class Kernel1; +template class Kernel2; +template class Kernel3; + void check(const char *msg, size_t v, size_t ref) { std::cout << msg << v << std::endl; assert(v == ref); } -int try_item1(size_t size) { - range<1> Size{size}; - int Counter = 0; - { - buffer, 1> BufRange(&Range1, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<1> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range1.get(0), size); - check("Counter = ", Counter, size); - return 0; +template void checkVec(vec a, vec b) { + static_assert(Dims == 1 || Dims == 2 || Dims == 3, + "Should only be use for 1, 2 or 3 dimensional vectors"); + assert(a[0] == b[0]); + if constexpr (Dims > 1) + assert(a[1] == b[1]); + if constexpr (Dims > 2) + assert(a[2] == b[2]); } -void try_item2(size_t size) { - range<2> Size{size, 10}; - int Counter = 0; - { - buffer, 1> BufRange(&Range2, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<2> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0][0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range2.get(0), size); - check("Counter = ", Counter, size * 10); -} - -void try_item3(size_t size) { - range<3> Size{size, 10, 10}; - int Counter = 0; - { - buffer, 1> BufRange(&Range3, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<3> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0][0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range3.get(0), size); - check("Counter = ", Counter, size * 10 * 10); -} - -void try_id1(size_t size) { +template void try_1d_range(size_t size) { + using IndexCheckT = int; range<1> Size{size}; int Counter = 0; + std::vector ItemIndexes(Size[0]); { buffer, 1> BufRange(&Range1, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<1> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = sycl::range<1>(I.get_range(0)); + int Idx = I[0]; + AccIndexes[Idx] = IndexCheckT(I[0]); }); }); myQueue.wait(); } + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range1.get(0), size); + } check("Counter = ", Counter, size); + for (auto i = 0; i < Size[0]; ++i) { + checkVec<1>(vec(ItemIndexes[i]), vec(i)); + } + std::cout << "Correct kernel indexes used\n"; } -void try_id2(size_t size) { - range<2> Size{size, 10}; +template void try_2d_range(size_t size) { + using IndexCheckT = int2; + range<2> Size{size, MagicY}; int Counter = 0; + std::vector ItemIndexes(Size[0] * Size[1]); { buffer, 1> BufRange(&Range2, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<2> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0][0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = sycl::range<2>(I.get_range(0), I.get_range(1)); + int Idx = I[0] * Size[1] + I[1]; + AccIndexes[Idx] = IndexCheckT(I[0], I[1]); }); }); myQueue.wait(); } - check("Counter = ", Counter, size * 10); + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range2.get(0), Size[0]); + check("Size seen by user at Dim 1 = ", Range2.get(1), Size[1]); + } + check("Counter = ", Counter, size * MagicY); + for (auto i = 0; i < Size[0]; ++i) + for (auto j = 0; j < Size[1]; ++j) + checkVec<2>(ItemIndexes[i * Size[1] + j], IndexCheckT(i, j)); + std::cout << "Correct kernel indexes used\n"; } -void try_id3(size_t size) { - range<3> Size{size, 10, 10}; +template void try_3d_range(size_t size) { + using IndexCheckT = int3; + range<3> Size{size, MagicY, MagicZ}; int Counter = 0; + std::vector ItemIndexes(Size[0] * Size[1] * Size[2]); { buffer, 1> BufRange(&Range3, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<3> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0][0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = + sycl::range<3>(I.get_range(0), I.get_range(1), I.get_range(2)); + int Idx = I[0] * Size[1] * Size[2] + I[1] * Size[2] + I[2]; + AccIndexes[Idx] = IndexCheckT(I[0], I[1], I[2]); }); }); myQueue.wait(); } - check("Counter = ", Counter, size * 10 * 10); + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range3.get(0), Size[0]); + check("Size seen by user at Dim 1 = ", Range3.get(1), Size[1]); + check("Size seen by user at Dim 2 = ", Range3.get(2), Size[2]); + } + check("Counter = ", Counter, size * MagicY * MagicZ); + for (auto i = 0; i < Size[0]; ++i) + for (auto j = 0; j < Size[1]; ++j) + for (auto k = 0; k < Size[2]; ++k) + checkVec<3>(ItemIndexes[i * Size[1] * Size[2] + j * Size[2] + k], + IndexCheckT(i, j, k)); + std::cout << "Correct kernel indexes used\n"; } void try_unnamed_lambda(size_t size) { - range<3> Size{size, 10, 10}; + range<3> Size{size, MagicY, MagicZ}; int Counter = 0; { buffer, 1> BufRange(&Range3, 1); @@ -163,57 +175,71 @@ void try_unnamed_lambda(size_t size) { }); myQueue.wait(); } - check("Counter = ", Counter, size * 10 * 10); + check("Counter = ", Counter, size * MagicY * MagicZ); } int main() { - int x; - - x = 1500; - try_item1(x); - try_item2(x); - try_item3(x); - try_id1(x); - try_id2(x); - try_id3(x); + int x = 1500; + try_1d_range>(x); + try_1d_range>(x); + try_2d_range>(x); + try_2d_range>(x); + try_3d_range>(x); + try_3d_range>(x); try_unnamed_lambda(x); x = 256; - try_item1(x); - try_item2(x); - try_item3(x); - try_id1(x); - try_id2(x); - try_id3(x); + try_1d_range>(x); + try_1d_range>(x); + try_2d_range>(x); + try_2d_range>(x); + try_3d_range>(x); + try_3d_range>(x); try_unnamed_lambda(x); - - return 0; } -// CHECK: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 1500 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 15000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 1500 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 15000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 256 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 2560 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 25600 -// CHECK-NEXT: Counter = 256 -// CHECK-NEXT: Counter = 2560 -// CHECK-NEXT: Counter = 25600 -// CHECK-NEXT: Counter = 25600 +// CHECK-DEFAULT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Counter = 1500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 1500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Counter = 49500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 49500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 2 = 64 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Counter = 256 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 256 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Counter = 8448 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 8448 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 2 = 64 +// CHECK-DEFAULT-NEXT: Counter = 540672 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 540672 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 540672 diff --git a/sycl/test-e2e/Basic/partition_supported.cpp b/sycl/test-e2e/Basic/partition_supported.cpp index f75a983754112..1dfcec0eae158 100644 --- a/sycl/test-e2e/Basic/partition_supported.cpp +++ b/sycl/test-e2e/Basic/partition_supported.cpp @@ -14,7 +14,7 @@ not support the info::partition_affinity_domain provided, an exception with the */ #include -#include +#include /** returns true if the device supports a particular affinity domain */ static bool diff --git a/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp b/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp index 30f68dac1b558..418b8891956fe 100644 --- a/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp +++ b/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp @@ -4,7 +4,7 @@ // This test checks that the queue constructor throws a sycl::exception if the // device selected by the provided selector is not in the specified context. -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp b/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp index 14beee2134dd3..3bbf9cc74daf2 100644 --- a/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp +++ b/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp @@ -9,7 +9,7 @@ // //==------------------------------------------------------------------------==// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index 87f3dc0ae8dbb..b56be05dc5308 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -3,7 +3,7 @@ // // XFAIL: hip_nvidia -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp index e9cfe42a458c0..76a5739e9ee35 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp @@ -3,7 +3,7 @@ // UNSUPPORTED: hip -#include +#include #define CHECK_INVALID_REQD_WORK_GROUP_SIZE(Dim, ...) \ { \ diff --git a/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp b/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp index b4bd89e6ee1fc..4c6f371814b69 100644 --- a/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp +++ b/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp @@ -5,7 +5,7 @@ // This test verifies SPIR-V based fat objects. -#include +#include int main() { sycl::buffer Buffer(4); diff --git a/sycl/test-e2e/Basic/submit_barrier.cpp b/sycl/test-e2e/Basic/submit_barrier.cpp index 763ed9f642547..0b15314d05246 100644 --- a/sycl/test-e2e/Basic/submit_barrier.cpp +++ b/sycl/test-e2e/Basic/submit_barrier.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include int main() { diff --git a/sycl/test-e2e/Basic/swizzle_op.cpp b/sycl/test-e2e/Basic/swizzle_op.cpp index d87fdf26c486e..a81b9cb16e54d 100644 --- a/sycl/test-e2e/Basic/swizzle_op.cpp +++ b/sycl/test-e2e/Basic/swizzle_op.cpp @@ -10,7 +10,7 @@ #define SYCL_SIMPLE_SWIZZLES #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/sycl-namespace.cpp b/sycl/test-e2e/Basic/sycl-namespace.cpp index f1f34a8ab57af..e4c8a64e611cf 100644 --- a/sycl/test-e2e/Basic/sycl-namespace.cpp +++ b/sycl/test-e2e/Basic/sycl-namespace.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { ::sycl::queue q; diff --git a/sycl/test-e2e/Basic/unused_pointer.cpp b/sycl/test-e2e/Basic/unused_pointer.cpp index 08701954a7ea4..89e24720ff738 100644 --- a/sycl/test-e2e/Basic/unused_pointer.cpp +++ b/sycl/test-e2e/Basic/unused_pointer.cpp @@ -10,7 +10,7 @@ //==----------------------------------------------------------------------==// #include -#include +#include using namespace std; diff --git a/sycl/test-e2e/Basic/vector/bool.cpp b/sycl/test-e2e/Basic/vector/bool.cpp index 00257baebe969..424bf0e5d83a0 100644 --- a/sycl/test-e2e/Basic/vector/bool.cpp +++ b/sycl/test-e2e/Basic/vector/bool.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include +#include constexpr int size = 2; diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index 36b16737140cd..a2d70d1a0ba31 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include // std::byte #include // std::ignore diff --git a/sycl/test-e2e/Basic/vector/int-convert.cpp b/sycl/test-e2e/Basic/vector/int-convert.cpp index e09390d9c14e3..c9ff8acfaab0e 100644 --- a/sycl/test-e2e/Basic/vector/int-convert.cpp +++ b/sycl/test-e2e/Basic/vector/int-convert.cpp @@ -10,7 +10,7 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/vector/operators.cpp b/sycl/test-e2e/Basic/vector/operators.cpp index 2648fd939e166..ddd1bdb2a2b66 100644 --- a/sycl/test-e2e/Basic/vector/operators.cpp +++ b/sycl/test-e2e/Basic/vector/operators.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #define SYCL_SIMPLE_SWIZZLES -#include +#include namespace s = sycl; template diff --git a/sycl/test-e2e/Complex/sycl_complex_include_order.cpp b/sycl/test-e2e/Complex/sycl_complex_include_order.cpp index 245d222c65209..92c4d341e7f3f 100644 --- a/sycl/test-e2e/Complex/sycl_complex_include_order.cpp +++ b/sycl/test-e2e/Complex/sycl_complex_include_order.cpp @@ -18,7 +18,7 @@ #include #endif -#include +#include #ifndef INCLUDE_BEFORE #include diff --git a/sycl/test-e2e/Config/config.cpp b/sycl/test-e2e/Config/config.cpp index d369d72f0cfe6..701fd8c2961f0 100644 --- a/sycl/test-e2e/Config/config.cpp +++ b/sycl/test-e2e/Config/config.cpp @@ -16,7 +16,7 @@ // RUN: %t.out // RUN: ls | not grep dot -#include +#include int main() { sycl::buffer Buf(sycl::range<1>{1}); diff --git a/sycl/test-e2e/Config/default-context.cpp b/sycl/test-e2e/Config/default-context.cpp index 53148feab12f5..11e3c2e42302c 100644 --- a/sycl/test-e2e/Config/default-context.cpp +++ b/sycl/test-e2e/Config/default-context.cpp @@ -3,7 +3,7 @@ // RUN: env SYCL_ENABLE_DEFAULT_CONTEXTS=1 %t.out // RUN: env SYCL_ENABLE_DEFAULT_CONTEXTS=0 %t.out 1 -#include +#include // when not using the environment variable, we use the "default context" on both // Lin and Win. This test asserts it defaults correctly, and that the diff --git a/sycl/test-e2e/Config/device_selector.cpp b/sycl/test-e2e/Config/device_selector.cpp index 74ad5db48dafe..6ae5213b5810f 100644 --- a/sycl/test-e2e/Config/device_selector.cpp +++ b/sycl/test-e2e/Config/device_selector.cpp @@ -4,7 +4,7 @@ // Checks that no device is selected when no device of desired type is // available. -#include +#include #include diff --git a/sycl/test-e2e/Config/env_vars.cpp b/sycl/test-e2e/Config/env_vars.cpp index 2cf3f2efc2fcb..97ed4ef884d0f 100644 --- a/sycl/test-e2e/Config/env_vars.cpp +++ b/sycl/test-e2e/Config/env_vars.cpp @@ -15,7 +15,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index f890036511772..aaaaa27e11bb1 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -18,7 +18,7 @@ // CHECK: Passed #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Config/select_device.cpp b/sycl/test-e2e/Config/select_device.cpp index 2d62c1bf5e1e0..8cab3a707ad44 100644 --- a/sycl/test-e2e/Config/select_device.cpp +++ b/sycl/test-e2e/Config/select_device.cpp @@ -57,7 +57,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp b/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp index 6b82cd1a8adeb..aa185afc3da2e 100644 --- a/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp b/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp index 9710d58050447..12fa070e86507 100644 --- a/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp @@ -18,7 +18,7 @@ // This test is to check if a warning message is displayed when using the // enviroment variable SYCL_DEVICE_FILTER // TODO: Remove test when SYCL_DEVICE_FILTER is removed -#include +#include int main() { using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/platform.cpp b/sycl/test-e2e/DeprecatedFeatures/platform.cpp index 9f1829c91229a..528309688caa3 100644 --- a/sycl/test-e2e/DeprecatedFeatures/platform.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/platform.cpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp index a31df4ff8b8b3..79e8e04d20134 100644 --- a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp b/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp index 046ee7de6985c..d717030d368d9 100644 --- a/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp @@ -4,7 +4,7 @@ // RUN: %if any-device-is-acc %{ env SYCL_DEVICE_FILTER=acc %{run-unfiltered-devices} %t.out %} // TODO: Remove this test once SYCL_DEVICE_FILTER is removed. -#include +#include int main() { namespace dev_info = sycl::info::device; diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp index 4f13d16b4a694..61ca876b64d01 100644 --- a/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp @@ -3,7 +3,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp index e5fa01bd012bd..b58e1ce59899a 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp @@ -4,7 +4,7 @@ // // XFAIL: hip_nvidia -#include +#include class Kern1; class Kern2; diff --git a/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp b/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp index 26934041d0a87..59e02c7a37add 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp @@ -2,7 +2,9 @@ // RUN: %{run} %t.out #include -#include +#include + +#include using namespace sycl; diff --git a/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp b/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp index 37917e103c7a6..8c68c15d34352 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp @@ -4,9 +4,12 @@ // RUN: %{run} %t.out #include -#include #include +#include + +#include + using namespace sycl; constexpr float eps = 1e-6; diff --git a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp index c4eb526f23c04..31f3aeef3c565 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #define TEST(FUNC, MARRAY_RET_TYPE, DIM, EXPECTED, ...) \ { \ diff --git a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp index 0e60689a6b74f..4d723acbc415d 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp @@ -5,10 +5,12 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} #include -#include - #include +#include + +#include + namespace s = sycl; using namespace std; diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp index 0bb04b5dc336a..5e12037006e91 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp index 81c60ed484806..634706c578ccf 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp index 314b6fd5515cc..bcd640d37ef54 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp @@ -4,7 +4,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp index aafb32379e479..ffd405ef5b21b 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp @@ -3,7 +3,9 @@ // RUN: %{build} %{mathflags} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp index 3bce5c5dc2ee2..919e6c8d13dae 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp index 7a62735397e57..0eab2036a98b8 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp @@ -2,7 +2,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out %{mathflags} // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp index 11feb90d71d19..d817eacf61079 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp index 86c83fa6d956b..6fa9948e77c5c 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp index 739f7b020db55..f7d1aa1b55c33 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp index bbd80cc2599ce..5b6075f06f11d 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp index f895c85c1c90a..d5938a08e31a4 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp @@ -4,7 +4,9 @@ // RUN: %{build} -o %t.out %{mathflags} // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 77aeb312571d2..6391154feb820 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp b/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp index 7683c2eb97add..43a90d8d518bc 100644 --- a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp +++ b/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp @@ -11,7 +11,7 @@ #include "imf_utils.hpp" #include -#include +#include int main() { sycl::queue device_queue(sycl::default_selector_v); diff --git a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp b/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp index ecd4561e98c9d..493cb2a0b1f7e 100644 --- a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 355d3f1ab2d55..94b91255a5f1b 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/math_override_test.cpp b/sycl/test-e2e/DeviceLib/math_override_test.cpp index c0feaf37f3094..491f21d76a486 100644 --- a/sycl/test-e2e/DeviceLib/math_override_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_override_test.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx -fsycl %s -o %t.out -fno-builtin #include #include -#include +#include #include "math_utils.hpp" namespace s = sycl; diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index d2e44f51cb48b..ccc368a21e668 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DotProduct/dot_product_int_test.cpp b/sycl/test-e2e/DotProduct/dot_product_int_test.cpp index fdd32c0127468..41749efd13055 100644 --- a/sycl/test-e2e/DotProduct/dot_product_int_test.cpp +++ b/sycl/test-e2e/DotProduct/dot_product_int_test.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include // Change if tests are added/removed static int testCount = 4; diff --git a/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp b/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp index c22fbcd8274c1..a83774d712e40 100644 --- a/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp +++ b/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include // Change if tests are added/removed static int testCount = 4; diff --git a/sycl/test-e2e/ESIMD/regression/double_conversion.cpp b/sycl/test-e2e/ESIMD/regression/double_conversion.cpp index 66bd9ee478658..654629e73e39c 100644 --- a/sycl/test-e2e/ESIMD/regression/double_conversion.cpp +++ b/sycl/test-e2e/ESIMD/regression/double_conversion.cpp @@ -12,7 +12,7 @@ // RUN: %{run} %t.out #include -#include +#include #include #include diff --git a/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp b/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp index 5f7a64a6b79b7..d449c11858f59 100644 --- a/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp +++ b/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include constexpr auto sycl_write = sycl::access::mode::write; diff --git a/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp b/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp index c6f3eb4d9ec52..5a2a26a5ad485 100644 --- a/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp +++ b/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp b/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp index f34caf146bc2f..bff75087186a6 100644 --- a/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp +++ b/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include using namespace ::sycl; diff --git a/sycl/test-e2e/ESIMD/regression/unused_load.cpp b/sycl/test-e2e/ESIMD/regression/unused_load.cpp index 2442ea3634588..1400cf5d31744 100644 --- a/sycl/test-e2e/ESIMD/regression/unused_load.cpp +++ b/sycl/test-e2e/ESIMD/regression/unused_load.cpp @@ -12,7 +12,7 @@ // copy_from invocation. #include -#include +#include #include diff --git a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp index 80963c5168a46..da3d34df48f37 100644 --- a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp +++ b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp @@ -9,7 +9,7 @@ // REQUIRES: level_zero,gpu #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/Functor/functor_inheritance.cpp b/sycl/test-e2e/Functor/functor_inheritance.cpp index 926fcc2859955..bcb8f2d9394cc 100644 --- a/sycl/test-e2e/Functor/functor_inheritance.cpp +++ b/sycl/test-e2e/Functor/functor_inheritance.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include constexpr auto sycl_read_write = sycl::access::mode::read_write; constexpr auto sycl_device = sycl::access::target::device; diff --git a/sycl/test-e2e/Functor/kernel_functor.cpp b/sycl/test-e2e/Functor/kernel_functor.cpp index 09840ff6705db..de6cc865c5ea9 100644 --- a/sycl/test-e2e/Functor/kernel_functor.cpp +++ b/sycl/test-e2e/Functor/kernel_functor.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp index 92fda2c837dc6..b7a5c689de1ed 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE0_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp index e06deb61a205c..ddc8d9818370b 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp @@ -14,28 +14,28 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE0_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR4:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR5:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR6:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; -// CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" -// CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE2]]" +// CHECK-DAG: "0x[[#%x,NODE3:]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" -// CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE4:]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp index 6ef999f7d41a7..5729c36160acc 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE0_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp index c1697fc755ef2..313678b1b3932 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp @@ -17,26 +17,26 @@ // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/HierPar/hier_par_basic.cpp b/sycl/test-e2e/HierPar/hier_par_basic.cpp index 523a26d6e8ee6..a51a582b2e00f 100644 --- a/sycl/test-e2e/HierPar/hier_par_basic.cpp +++ b/sycl/test-e2e/HierPar/hier_par_basic.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include using namespace sycl; @@ -59,7 +59,7 @@ struct PFWIFunctor { if (id >= wg_chunk) return; size_t wi_offset = wg_offset + id * wi_chunk; - size_t ub = sycl::min(wi_offset + wi_chunk, range_length); + size_t ub = std::min(wi_offset + wi_chunk, range_length); for (size_t ind = wi_offset; ind < ub; ind++) dev_ptr[ind] += v; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp index 8399cf410a094..1f60d9a95f2b8 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out 10 #include -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp index a1b1c778a8003..bac3f777c7be1 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp index 1a8c8faf1a674..bb315329195bf 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out -#include +#include sycl::event submit(sycl::queue &Q, sycl::buffer &B) { return Q.submit([&](sycl::handler &CGH) { diff --git a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp index 9afdd9ba43907..eef7cf70b21bb 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp @@ -4,7 +4,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task.cpp b/sycl/test-e2e/HostInteropTask/host-task.cpp index b1e588b147e2a..6bae8850cc6c2 100644 --- a/sycl/test-e2e/HostInteropTask/host-task.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp index a16b903c09a84..ecec5787c1e1a 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp index ea7434dd31498..2cd70a2051dc0 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp index 58978e203d8a8..773a596fbe549 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp index 98d9b1ff1520d..715df0c00bc14 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp index 47f0fd98311ae..6dfbe70e92030 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp index 395eb4af68c03..3ace63d902bb4 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp index e36a15cf1cbf4..7372765faf3fa 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp index 53b64cf2ba2fd..f1ce6e2f729c2 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp index 0ce42082fe2ef..286e7ee9b860a 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp index 86151cdc7cab7..20e62c61813b2 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp index 78321716777ac..a1d2ec9220763 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp b/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp index 00c3eb3830f38..f92912919c786 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp b/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp index 00c3eb3830f38..f92912919c786 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp b/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp index 812e30a7c9f56..07286b34b8d4d 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp b/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp index 739feed41d6c9..9097a40131bbb 100644 --- a/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp +++ b/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp b/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp index 60d200d1c99df..a6a754289e533 100644 --- a/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp +++ b/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_float_add.cpp b/sycl/test-e2e/InlineAsm/asm_float_add.cpp index b6374c96d2010..f1d4d681b8edc 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_add.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_add.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include using dataType = sycl::opencl::cl_float; diff --git a/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp b/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp index 948d983554c4e..b10aec8e47278 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include constexpr float IMM_ARGUMENT = 0.5; diff --git a/sycl/test-e2e/InlineAsm/asm_float_neg.cpp b/sycl/test-e2e/InlineAsm/asm_float_neg.cpp index 43ad56e41222d..307a853fa407f 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_neg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_neg.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_float; diff --git a/sycl/test-e2e/InlineAsm/asm_if.cpp b/sycl/test-e2e/InlineAsm/asm_if.cpp index 679980a62aaf7..54a679a0509ea 100644 --- a/sycl/test-e2e/InlineAsm/asm_if.cpp +++ b/sycl/test-e2e/InlineAsm/asm_if.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp b/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp index d2a53f14691da..2506938a1bef8 100644 --- a/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include constexpr int CONST_ARGUMENT = 0xabc; diff --git a/sycl/test-e2e/InlineAsm/asm_loop.cpp b/sycl/test-e2e/InlineAsm/asm_loop.cpp index 56518dffbf4e3..eccc02ae1cab7 100644 --- a/sycl/test-e2e/InlineAsm/asm_loop.cpp +++ b/sycl/test-e2e/InlineAsm/asm_loop.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_mul.cpp b/sycl/test-e2e/InlineAsm/asm_mul.cpp index b038a79f8abb1..df759b75d2a05 100644 --- a/sycl/test-e2e/InlineAsm/asm_mul.cpp +++ b/sycl/test-e2e/InlineAsm/asm_mul.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_no_operands.cpp b/sycl/test-e2e/InlineAsm/asm_no_operands.cpp index 5b770bb5baa0d..bf9df8ca40ae2 100644 --- a/sycl/test-e2e/InlineAsm/asm_no_operands.cpp +++ b/sycl/test-e2e/InlineAsm/asm_no_operands.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include class no_operands_kernel; int main() { diff --git a/sycl/test-e2e/InlineAsm/asm_no_output.cpp b/sycl/test-e2e/InlineAsm/asm_no_output.cpp index 0622f4f8edb38..3a130f1e4b819 100644 --- a/sycl/test-e2e/InlineAsm/asm_no_output.cpp +++ b/sycl/test-e2e/InlineAsm/asm_no_output.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp b/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp index db2659d8e7077..077446cf72859 100644 --- a/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp +++ b/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_switch.cpp b/sycl/test-e2e/InlineAsm/asm_switch.cpp index ccd81fd34af45..4f96a55b554d2 100644 --- a/sycl/test-e2e/InlineAsm/asm_switch.cpp +++ b/sycl/test-e2e/InlineAsm/asm_switch.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp index 71343f736b370..e8a1c45ecf903 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp index c84eaa0f4ce0a..840b21d9140c1 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp index 1911e9b4d0e47..dc42670d4781d 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp index 415260686e36a..93cbabf425188 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/KernelAndProgram/build-log.cpp b/sycl/test-e2e/KernelAndProgram/build-log.cpp index cd57692a8160a..89ec9960d197f 100644 --- a/sycl/test-e2e/KernelAndProgram/build-log.cpp +++ b/sycl/test-e2e/KernelAndProgram/build-log.cpp @@ -14,7 +14,7 @@ //===--------------------------------------------------------------===// #include -#include +#include SYCL_EXTERNAL void symbol_that_does_not_exist(); diff --git a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp index 90561a48c9edb..8b84140a28d77 100644 --- a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp @@ -5,7 +5,7 @@ // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -DGPU -o %t_gpu.out // RUN: env SYCL_CACHE_PERSISTENT=1 %{run} %if gpu %{ %t_gpu.out %} %else %{ %t.out %} -#include +#include SYCL_EXTERNAL void undefined(); diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp index 96b6be8f3df2a..23a6acfcdf863 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp @@ -5,7 +5,7 @@ // kernel_bundle::get_kernel() is the same as a Kernel // object retrieved via other methods. -#include +#include class KernelA; diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp index dad3872079b5d..9a2a8b5c5d046 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp @@ -12,7 +12,7 @@ // //===--------------------------------------------------------------===// -#include +#include class MyKernel; diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp index f904f62b0d825..8be17e24d229d 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp @@ -25,7 +25,7 @@ // CHECK: ---> piProgramLink // CHECK: ZE ---> zeModuleCreate -#include +#include class MyKernel; diff --git a/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp b/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp index f800f79144ad8..fd8b77de0846e 100644 --- a/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp +++ b/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp @@ -14,7 +14,7 @@ // RUN: %{run} %t_off.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp index ffb48e30bbbbe..70c5ce2bc64a5 100644 --- a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp +++ b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp @@ -11,7 +11,7 @@ // CHECK-WITHOUT-NOT: -igc_opts // CHECK-WITHOUT: ) ---> pi_result : PI_SUCCESS -#include +#include int main() { sycl::buffer Buffer(4); diff --git a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp index 856d1510edfc5..7b1cd268bd0d6 100644 --- a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp +++ b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp @@ -71,7 +71,7 @@ // RESULT2: Result (1): 2 // RESULT2: Result (2): 2 -#include +#include int main() { for (int i = 0; i < 3; ++i) { diff --git a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp index 2a5c72161e46c..047f5bfbb970b 100644 --- a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp +++ b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp @@ -12,7 +12,7 @@ // //===--------------------------------------------------------------===// -#include +#include SYCL_EXTERNAL void symbol_that_does_not_exist(); diff --git a/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp b/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp index d8c0124a7f9cb..4285353e018b4 100644 --- a/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp +++ b/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp b/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp index 0234519fd48c8..6559532bd6c1d 100644 --- a/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp +++ b/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/non-standard-layout.cpp b/sycl/test-e2e/KernelParams/non-standard-layout.cpp index ac28aa1ea7635..39d6640b97c4f 100644 --- a/sycl/test-e2e/KernelParams/non-standard-layout.cpp +++ b/sycl/test-e2e/KernelParams/non-standard-layout.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp index d6585602774c5..f204ee0ccaa8c 100644 --- a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp +++ b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/union_kernel_param.cpp b/sycl/test-e2e/KernelParams/union_kernel_param.cpp index d6ee0b503ae6a..863be92971e27 100644 --- a/sycl/test-e2e/KernelParams/union_kernel_param.cpp +++ b/sycl/test-e2e/KernelParams/union_kernel_param.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include union TestUnion { public: diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp new file mode 100644 index 0000000000000..239b018a9f30f --- /dev/null +++ b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp @@ -0,0 +1,236 @@ +// Test that llvm.bitreverse is lowered correctly by llvm-spirv. + +// UNSUPPORTED: hip || cuda + +// Make dump directory. +// RUN: rm -rf %t.spvdir && mkdir %t.spvdir + +// Ensure that SPV_KHR_bit_instructions is disabled so that translator +// will lower llvm.bitreverse.* intrinsics instead of relying on SPIRV +// BitReverse instruction. +// Also build executable with SPV dump. +// RUN: %{build} -o %t.out -O2 -Xspirv-translator --spirv-ext=-SPV_KHR_bit_instructions -fsycl-dump-device-code=%t.spvdir + +// Rename SPV file to explictly known filename. +// RUN: mv %t.spvdir/*.spv %t.spvdir/dump.spv + +// Convert to text. +// RUN: llvm-spirv -to-text %t.spvdir/dump.spv + +// Check that all lowerings are done by llvm-spirv. +// RUN: cat %t.spvdir/dump.spt | FileCheck %s --check-prefix CHECK-SPV --implicit-check-not=BitReverse + +// Execute to ensure lowering has correct functionality. +// RUN: %{run} %t.out + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// TODO FIXME Change NOT_READY to RUN when llvm.bitreverse.* is supported. + +// Build without lowering explicitly disabled. +// NOT_READY: %{build} -o %t.bitinstructions.out + +// Execution should still be correct. +// NOT_READY: %{run} %t.bitinstructions.out + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i32" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i64" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i32" + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i32" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i64" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i32" Export + +#include +#include +#include +#include "common.hpp" + +using namespace sycl; + +template +__attribute__((optnone, noinline)) TYPE reference_reverse(TYPE a, const int bitlength) { + TYPE ret = 0; + for (auto i = 0; i>=1; + } + return ret; +} + +template +__attribute__((noinline)) TYPE reverse(TYPE a, int bitlength) { + if (bitlength==8) { + // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 8-bit types. + a = ((0x55 & a) << 1) | (0x55 & (a >> 1)); + a = ((0x33 & a) << 2) | (0x33 & (a >> 2)); + return (a << 4) | (a >> 4); + } else if (bitlength==16) { + // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 16-bit types. + a = ((0x5555 & a) << 1) | (0x5555 & (a >> 1)); + a = ((0x3333 & a) << 2) | (0x3333 & (a >> 2)); + a = ((0x0F0F & a) << 4) | (0x0F0F & (a >> 4)); + return (a << 8) | (a >> 8); + } else + return __builtin_elementwise_bitreverse(a); +} + +template class BitreverseTest; + +#define NUM_TESTS 1024 + +template +void do_scalar_bitreverse_test() { + queue q; + + TYPE *Input = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context()); + TYPE *Output = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context()); + + for (unsigned i=0; i(); + q.submit([=](handler &cgh) { + cgh.single_task> ([=]() { + for (unsigned i=0; i +void do_vector_bitreverse_test() { + queue q; + + VTYPE *Input = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context()); + VTYPE *Output = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context()); + + for (unsigned i=0; i::type>(); + + q.submit([=](handler &cgh) { + cgh.single_task> ([=]() { + for (unsigned i=0; i(); + do_scalar_bitreverse_test(); + do_scalar_bitreverse_test(); + do_scalar_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + return 0; +} + diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp b/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp new file mode 100644 index 0000000000000..45c0a99840d93 --- /dev/null +++ b/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp @@ -0,0 +1,27 @@ +//==------- common.hpp - DPC++ ESIMD on-device test ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +template class KernelID; + +template T get_rand() { + using Tuint = std::conditional_t< + sizeof(T) == 1, uint8_t, + std::conditional_t< + sizeof(T) == 2, uint16_t, + std::conditional_t>>>; + Tuint v = rand(); + if constexpr (sizeof(Tuint) > 4) + v = (v << 32) | rand(); + return sycl::bit_cast(v); +} diff --git a/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp b/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp index f602b4c8ec071..e8183ce41d3d2 100644 --- a/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp +++ b/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -fsyntax-only -o %t.out -#include +#include namespace syclex = sycl::ext::oneapi::experimental; #ifdef SYCL_EXT_ONEAPI_ROOT_GROUP diff --git a/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp b/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp index e7e42db4e4b3b..161726d40f6e6 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp @@ -9,7 +9,7 @@ // devices, not sub-devices. #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp b/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp index f102fbb9fdb13..758584c8b5008 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp @@ -18,7 +18,7 @@ // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out 1 // RUN: %{run-unfiltered-devices} %t.out 1 -#include +#include using namespace sycl; int main(int Argc, const char *Argv[]) { diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp index 50521e77b3577..ad2fe4ce2371a 100644 --- a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp +++ b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp @@ -8,7 +8,7 @@ // re-used by other backends is kept in online_compiler_common.hpp file. #include -#include +#include #include diff --git a/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp b/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp index 2bd800460028f..4cc583b4c691a 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include int main() { sycl::queue Queue; diff --git a/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp b/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp index f6708382b40e1..969095bf5f552 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp @@ -5,7 +5,7 @@ // Tests that aspect::fp64 requirements are affected by optimizations. -#include +#include int main() { sycl::queue Q; diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp index a0cd2e5a4447d..535c3647793c7 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp @@ -4,7 +4,7 @@ // RUN: %{build} -O0 -o %t.out // RUN: %{run} %t.out -#include +#include [[sycl::device_has(sycl::aspect::cpu)]] void foo(){}; [[sycl::device_has(sycl::aspect::gpu)]] void bar(){}; diff --git a/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp b/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp index 2284d1a6758fc..3cc05cd101bda 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp @@ -5,7 +5,7 @@ // i.e. there are no exceptions thrown about aspects fp16 or fp64 being // unsuppored on device. -#include +#include void foo(sycl::half &value) { value += sycl::half(1.0f); } diff --git a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp index 15ebc62bfa792..3f38a4df3023d 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp @@ -5,7 +5,7 @@ #ifdef SOURCE1 #include -#include +#include using accT = sycl::accessor; constexpr int value = 42; @@ -34,7 +34,7 @@ int main() { #endif // SOURCE1 #ifdef SOURCE2 -#include +#include constexpr int value = 42; diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp index 49a891105ea0c..aec4cc5297c8f 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s #include -#include -#include + +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp index 2ae7d43c46665..8b66c121522e9 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp @@ -13,7 +13,7 @@ // RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s #include -#include + #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp index aee595729ce5c..e6824a5fc0ecb 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp @@ -5,7 +5,7 @@ // Test 2D and 3D interoperability buffers for the Level Zero backend. #include "interop-level-zero-buffer-helpers.hpp" -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp index 877c54870f3ad..7fa9c9895a38e 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp @@ -29,7 +29,7 @@ // CHECK-NOT: zeMemFree #include "interop-level-zero-buffer-helpers.hpp" -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp index 4829f8f651633..9f097992de68d 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp @@ -8,7 +8,7 @@ // Test interoperability buffer for the Level Zer backend #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp index 2cb4a9ee481e5..f769052dd3d00 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp @@ -13,7 +13,7 @@ // SYCL #include "interop-level-zero-buffer-helpers.hpp" #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-image.cpp b/sycl/test-e2e/Plugin/interop-level-zero-image.cpp index 2b0b36112d2a1..cce3f9d4e9980 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-image.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-image.cpp @@ -3,7 +3,8 @@ // RUN: %{run} %t.out // spir-v gen for legacy images at O0 not working -// UNSUPPORTED: O0 +// UNSUPPORTED: gpu-intel-dg2 +// This test is currently broken see https://github.com/intel/llvm/issues/13090 // This test verifies that make_image is working for 1D, 2D and 3D images. // We instantiate an image with L0, set its body, then use a host accessor to diff --git a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp index dcedc9b1bfcfc..befdb27f439e2 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp @@ -5,7 +5,7 @@ // Test for Level Zero interop API where SYCL RT doesn't take ownership #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero.cpp b/sycl/test-e2e/Plugin/interop-level-zero.cpp index dc796c02aebf6..fe9ce76da6a0d 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero.cpp @@ -6,7 +6,7 @@ // Test for Level Zero interop API #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp b/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp index 59bb49348f0a7..dea18c8e3330d 100644 --- a/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp +++ b/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp @@ -3,7 +3,7 @@ // RUN: %{run} %t.out 2>&1 | FileCheck %s #include -#include +#include // Check for queries of USM capabilities. // All supported L0 devices have these capabilities currently: diff --git a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp index 76890edaea93d..d4792c9177a28 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include int main(void) { diff --git a/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp b/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp index bb31d169bcb24..86518fde9343d 100644 --- a/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp +++ b/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp @@ -10,7 +10,7 @@ // CHECK: Memory bus width #include -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp index c7603c038645f..b1aa94d280682 100644 --- a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp +++ b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp @@ -30,7 +30,7 @@ // clang-format on #include -#include +#include int main(int argc, char **argv) { sycl::queue queue(sycl::gpu_selector_v); diff --git a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp index 25a97c4e26238..9c19691fd872a 100644 --- a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp +++ b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp @@ -3,18 +3,25 @@ // RUN: %{build} -o %t.out -// RUN: env ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be set to get cslice +// or the extra level of partition_by_affinity_domain with the "EXPOSE_" env +// var. +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 -// RUN: env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ -// RUN: ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC + +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ +// RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC // Same, but without using immediate commandlists: -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 ZEX_NUMBER_OF_CCS=0:4 \ +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 \ // RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ -// RUN: ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ +// RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC #include diff --git a/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp b/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp index e04f280a8e986..0e3752733291a 100644 --- a/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp +++ b/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp @@ -1,10 +1,18 @@ // REQUIRES: aspect-ext_intel_device_id // REQUIRES: level_zero // RUN: %{build} -o %t.out -// RUN: env ZEX_NUMBER_OF_CCS=0:4 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC + +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be set to get cslice +// or the extra level of partition_by_affinity_domain with the "EXPOSE_" env +// var. +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 + +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC // // Same with Immediate CommandLists -// RUN: env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 env ZEX_NUMBER_OF_CCS=0:4 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC #include diff --git a/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp b/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp index 51e692c9329b2..153af5c5a1f9d 100644 --- a/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp +++ b/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp @@ -13,7 +13,7 @@ // CHECK-ONE-CMDLIST: zeCommandListCreateImmediate = 2 // CHECK-PER-THREAD-CMDLIST: zeCommandListCreateImmediate = 4 -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/level_zero_queue_create.cpp b/sycl/test-e2e/Plugin/level_zero_queue_create.cpp index 598ec12e8f9e0..a0330c4d5e56d 100644 --- a/sycl/test-e2e/Plugin/level_zero_queue_create.cpp +++ b/sycl/test-e2e/Plugin/level_zero_queue_create.cpp @@ -8,7 +8,7 @@ // with the embedded UR_L0_LEAKS_DEBUG=1 testing capability. // -#include +#include int main(int argc, char **argv) { sycl::queue Q; diff --git a/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp b/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp index 60287e71cee85..d9333e176acd7 100644 --- a/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp +++ b/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp @@ -1,8 +1,15 @@ // REQUIRES: gpu-intel-pvc, level_zero // RUN: %{build} %level_zero_options -o %t.out -// RUN: env UR_L0_DEBUG=1 env ZEX_NUMBER_OF_CCS=0:4 %{run} %t.out 2>&1 | FileCheck %s -// RUN: env ZEX_NUMBER_OF_CCS=0:4 %{run} %t.out + +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be used in +// conjunction with ZEX_NUMBER_OF_CCS +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 + +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: %{setup_env} %{run} %t.out // Check that queues created on sub-sub-devices are going to specific compute // engines: diff --git a/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp b/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp index 1171b252f2ff3..40cfee5b66c0e 100644 --- a/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp +++ b/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp @@ -30,7 +30,7 @@ #define LENGTH 10 -#include +#include using namespace sycl; void update_d2_data(queue &q) { diff --git a/sycl/test-e2e/Plugin/sycl-partition-info.cpp b/sycl/test-e2e/Plugin/sycl-partition-info.cpp index c52fb2c1ffc44..e6f911f541d3e 100644 --- a/sycl/test-e2e/Plugin/sycl-partition-info.cpp +++ b/sycl/test-e2e/Plugin/sycl-partition-info.cpp @@ -5,7 +5,7 @@ // supports ensure we are only returning SYCL standard partition properties. #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp index e724855d296c3..7a14c4067b898 100644 --- a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp +++ b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp @@ -6,7 +6,7 @@ // Test to help identify that E2E testing correctly detects and uses the preview // library. -#include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp index e454a85151c26..d7b5d55161107 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp @@ -19,7 +19,7 @@ // -O2 | -ze-opt-level=2 // -O3 | -ze-opt-level=2 -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp index d0c49dc4fc698..c5c9fc6074dab 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp @@ -22,7 +22,7 @@ // -O2 | /* no option */ // -O3 | /* no option */ -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/README.md b/sycl/test-e2e/README.md index 2564d7172a5a6..0bc6b8f1e7896 100644 --- a/sycl/test-e2e/README.md +++ b/sycl/test-e2e/README.md @@ -8,6 +8,7 @@ * [Creating or modifying tests](#creating-or-modifying-tests) * [LIT feature checks](#lit-feature-checks) * [llvm-lit parameters](#llvm-lit-parameters) + * [sycl/detail/core.hpp header file](#sycl/detail/core.hpp) # Overview This directory contains SYCL-related tests distributed in subdirectories based @@ -282,3 +283,15 @@ llvm-lit --param dpcpp_compiler=path/to/clang++ --param dump_ir=True \ SYCL/External/RSBench ``` +## sycl/detail/core.hpp + +While SYCL specification dictates that the only user-visible interface is +`` header file we found out that as the implementation and +multiple extensions grew, the compile time was getting worse and worse, +negatively affecting our CI turnaround time. We are just starting some efforts +to create a much smaller set of basic feature needed for every SYCL end-to-end +test/program so that this issue could be somewhat mitigated. This activity is in +its early stage and NO production code should rely on it. It WILL be changed as +we go with our experiments. For any code outside of this project only the +`` must be used until we feel confident to propose an extension +that can provide an alternative. diff --git a/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp b/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp index 55d7b7e8387e6..b25ff9c8de49b 100644 --- a/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp +++ b/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp @@ -1,6 +1,6 @@ // RUN: not %{build} -fsyntax-only -o %t.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/DAE-separate-compile.cpp b/sycl/test-e2e/Regression/DAE-separate-compile.cpp index f05095f36cc7e..a6c8ae885cb39 100644 --- a/sycl/test-e2e/Regression/DAE-separate-compile.cpp +++ b/sycl/test-e2e/Regression/DAE-separate-compile.cpp @@ -13,7 +13,7 @@ #include -#include +#include int main() { constexpr int THE_ANSWER = 42; diff --git a/sycl/test-e2e/Regression/atomic_load.cpp b/sycl/test-e2e/Regression/atomic_load.cpp index 15aaefd4a6ea8..8771db61db935 100644 --- a/sycl/test-e2e/Regression/atomic_load.cpp +++ b/sycl/test-e2e/Regression/atomic_load.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; template class foo; diff --git a/sycl/test-e2e/Regression/cache_test.cpp b/sycl/test-e2e/Regression/cache_test.cpp index 1d2255ee8885f..3d25323c88026 100644 --- a/sycl/test-e2e/Regression/cache_test.cpp +++ b/sycl/test-e2e/Regression/cache_test.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include using namespace sycl::ext::oneapi; diff --git a/sycl/test-e2e/Regression/device_num.cpp b/sycl/test-e2e/Regression/device_num.cpp index 4bb1bda94d114..efa7a82ef25f7 100644 --- a/sycl/test-e2e/Regression/device_num.cpp +++ b/sycl/test-e2e/Regression/device_num.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp index 2fcd96e8ad903..79ef8c29caa1e 100644 --- a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp +++ b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Regression/empty_accessor_use.cpp b/sycl/test-e2e/Regression/empty_accessor_use.cpp index 977d687a3d7a5..869a5526c389e 100644 --- a/sycl/test-e2e/Regression/empty_accessor_use.cpp +++ b/sycl/test-e2e/Regression/empty_accessor_use.cpp @@ -4,7 +4,7 @@ // Tests that 3D accessors with 0 elements are allowed to be captured in a // kernel. -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp b/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp index 8a24469b298b5..580d99cfc15f1 100644 --- a/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp +++ b/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-fp16 // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/Regression/fsycl-host-compiler.cpp b/sycl/test-e2e/Regression/fsycl-host-compiler.cpp index 92375484109bc..bb08f7955da1f 100644 --- a/sycl/test-e2e/Regression/fsycl-host-compiler.cpp +++ b/sycl/test-e2e/Regression/fsycl-host-compiler.cpp @@ -11,7 +11,7 @@ // // Uses -fsycl-host-compiler= on a simple test, requires 'g++' -#include +#include #ifndef DEFINE_CHECK #error predefined macro not set diff --git a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp index 4e5635a89eb9a..eb910425ea8d4 100644 --- a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp +++ b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/global_queue.cpp b/sycl/test-e2e/Regression/global_queue.cpp index b443b88de3c8f..169b5ce6872c8 100644 --- a/sycl/test-e2e/Regression/global_queue.cpp +++ b/sycl/test-e2e/Regression/global_queue.cpp @@ -9,7 +9,7 @@ // use reverse order to call destructors, and low level runtime's objects are // destroyed before global queue in user code. -#include +#include sycl::queue Queue; diff --git a/sycl/test-e2e/Regression/host_unified_memory.cpp b/sycl/test-e2e/Regression/host_unified_memory.cpp index 77a6fecb6c38f..a679111611d29 100644 --- a/sycl/test-e2e/Regression/host_unified_memory.cpp +++ b/sycl/test-e2e/Regression/host_unified_memory.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp index 0d444e07ba487..ee89b993eedce 100644 --- a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp +++ b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/isordered.cpp b/sycl/test-e2e/Regression/isordered.cpp index 8d8fa28919a4b..7d3d0d32b688c 100644 --- a/sycl/test-e2e/Regression/isordered.cpp +++ b/sycl/test-e2e/Regression/isordered.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUNx: %{run} %t.out -#include +#include + +#include int main() { sycl::range<1> ndRng(3); diff --git a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp index 21d20fd719228..799a69854d4e9 100644 --- a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp +++ b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp @@ -7,7 +7,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include class KernelName; diff --git a/sycl/test-e2e/Regression/kernel_name_class.cpp b/sycl/test-e2e/Regression/kernel_name_class.cpp index a6a9b0d6da084..e777a22309e73 100644 --- a/sycl/test-e2e/Regression/kernel_name_class.cpp +++ b/sycl/test-e2e/Regression/kernel_name_class.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #define GOLD 10 diff --git a/sycl/test-e2e/Regression/kernel_unnamed.cpp b/sycl/test-e2e/Regression/kernel_unnamed.cpp index 994e2ff266dc2..bff1a8b82cc70 100644 --- a/sycl/test-e2e/Regression/kernel_unnamed.cpp +++ b/sycl/test-e2e/Regression/kernel_unnamed.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #define GOLD 10 static int NumTestCases = 0; diff --git a/sycl/test-e2e/Regression/mad_sat.cpp b/sycl/test-e2e/Regression/mad_sat.cpp index 117a7104ba06c..570b2307a3ae4 100644 --- a/sycl/test-e2e/Regression/mad_sat.cpp +++ b/sycl/test-e2e/Regression/mad_sat.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUNx: %{run} %t.out -#include +#include + +#include int main() { sycl::queue testQueue; diff --git a/sycl/test-e2e/Regression/multiple-targets.cpp b/sycl/test-e2e/Regression/multiple-targets.cpp index 13f136f8aa64a..d21f7c458aec2 100644 --- a/sycl/test-e2e/Regression/multiple-targets.cpp +++ b/sycl/test-e2e/Regression/multiple-targets.cpp @@ -2,17 +2,17 @@ // It tests if the target triples can be specified with any order. // The test is repeated for per_kernel device code splitting. // -// REQUIRES: CUDA || HIP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spirv64 -o %t.out %s +// REQUIRES: cuda || hip || native_cpu +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=spirv64,%{sycl_triple} -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spirv64 -fsycl-device-code-split=per_kernel -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -fsycl-device-code-split=per_kernel -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=spirv64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t.out %s // RUN: %{run} %t.out // // XFAIL: hip_nvidia diff --git a/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp b/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp index f4408139625e4..6c8c41292c2b8 100644 --- a/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp +++ b/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp @@ -4,7 +4,7 @@ // Note: Tests that non-trivially copyable types marked as device-copyable are // copied and used correctly on the device. -#include +#include #include diff --git a/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp b/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp index 07cb1f655d7b0..106460629b242 100644 --- a/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp +++ b/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp @@ -6,7 +6,7 @@ // NOTE: Tests that debugging information can be generated for all integral // optimization levels. -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp index 5856e6f084c29..f7c9aad155b43 100644 --- a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp +++ b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp @@ -7,7 +7,7 @@ // NOTE: Tests that debugging information can be generated for all special-name // optimization levels. -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/pi_release.cpp b/sycl/test-e2e/Regression/pi_release.cpp index 3b4d79d13caed..84fbee6417b8e 100644 --- a/sycl/test-e2e/Regression/pi_release.cpp +++ b/sycl/test-e2e/Regression/pi_release.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/private_array_init_test.cpp b/sycl/test-e2e/Regression/private_array_init_test.cpp index b822c8aea684d..286204bf8e12f 100644 --- a/sycl/test-e2e/Regression/private_array_init_test.cpp +++ b/sycl/test-e2e/Regression/private_array_init_test.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include namespace s = sycl; diff --git a/sycl/test-e2e/Regression/range-rounding-this-id.cpp b/sycl/test-e2e/Regression/range-rounding-this-id.cpp index a5970adf46e41..03111edc87f1f 100644 --- a/sycl/test-e2e/Regression/range-rounding-this-id.cpp +++ b/sycl/test-e2e/Regression/range-rounding-this-id.cpp @@ -4,7 +4,7 @@ // RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:0 \ // RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 \ // RUN: %{run} %t.out | FileCheck %s -#include +#include constexpr int N = 3; diff --git a/sycl/test-e2e/Regression/same_unnamed_kernels.cpp b/sycl/test-e2e/Regression/same_unnamed_kernels.cpp index dff1cb6d7d1ae..13460332777ce 100644 --- a/sycl/test-e2e/Regression/same_unnamed_kernels.cpp +++ b/sycl/test-e2e/Regression/same_unnamed_kernels.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include template void run(sycl::queue &q, B &buf, const F &func) { diff --git a/sycl/test-e2e/Regression/static-buffer-dtor.cpp b/sycl/test-e2e/Regression/static-buffer-dtor.cpp index b2e49467af7d0..8ff9328d6535d 100644 --- a/sycl/test-e2e/Regression/static-buffer-dtor.cpp +++ b/sycl/test-e2e/Regression/static-buffer-dtor.cpp @@ -18,7 +18,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include int main() { uint8_t *h_A = (uint8_t *)malloc(256); diff --git a/sycl/test-e2e/Regression/subalign_no_alloc.cpp b/sycl/test-e2e/Regression/subalign_no_alloc.cpp index b9661a120b734..a2348c1662f08 100644 --- a/sycl/test-e2e/Regression/subalign_no_alloc.cpp +++ b/sycl/test-e2e/Regression/subalign_no_alloc.cpp @@ -4,7 +4,7 @@ // Tests that a type with a different alignment from its size does not cause // the runtime to reallocate memory. -#include +#include #include diff --git a/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp b/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp index 14669f2dda1ef..c7f2bd282f527 100644 --- a/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp +++ b/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp @@ -5,7 +5,7 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} #include -#include +#include template bool testAndOperator(const std::string &typeName) { diff --git a/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp b/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp index 987625577dc95..a644c9c9a95ae 100644 --- a/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp +++ b/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp b/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp index a67f3371ba16d..36c16320196a6 100644 --- a/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp +++ b/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -o %t.out -lpthread // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/Scheduler/DataMovement.cpp b/sycl/test-e2e/Scheduler/DataMovement.cpp index 557a3130d1e62..afe7790ab7601 100644 --- a/sycl/test-e2e/Scheduler/DataMovement.cpp +++ b/sycl/test-e2e/Scheduler/DataMovement.cpp @@ -12,7 +12,7 @@ // The test checks that no additional host allocation is performed by the SYCL // RT if host ptr is used -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp index 5406108554918..9c87e903c6b1f 100644 --- a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp +++ b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include int main() { size_t size = 3; diff --git a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp index 3c3c40c71c86d..9d56822ff4d69 100644 --- a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp +++ b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp @@ -4,7 +4,7 @@ // XFAIL: hip_nvidia #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Scheduler/MultipleDevices.cpp b/sycl/test-e2e/Scheduler/MultipleDevices.cpp index bc5bb1b0172f5..fb7982c678520 100644 --- a/sycl/test-e2e/Scheduler/MultipleDevices.cpp +++ b/sycl/test-e2e/Scheduler/MultipleDevices.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp index 77e69a340b8f1..eb8fd2c9f5eba 100644 --- a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include "../helpers.hpp" diff --git a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp index 65e52c6ca44c2..22e742e0d6c25 100644 --- a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp +++ b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp @@ -20,7 +20,7 @@ // CHECK-NEXT: : // CHECK-NEXT: : 3 -#include +#include int main(int argc, const char **argv) { diff --git a/sycl/test-e2e/SeparateCompile/same-kernel.cpp b/sycl/test-e2e/SeparateCompile/same-kernel.cpp index baad7c5bb55f3..15a2bfc9bc5c5 100644 --- a/sycl/test-e2e/SeparateCompile/same-kernel.cpp +++ b/sycl/test-e2e/SeparateCompile/same-kernel.cpp @@ -15,7 +15,7 @@ // RUN: %clangxx %t-same-kernel-a.o %t-same-kernel-b.o -o %t-same-kernel.exe -fsycl -fsycl-targets=%{sycl_triple} // RUN: %{run} %t-same-kernel.exe -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/SeparateCompile/sycl-external.cpp b/sycl/test-e2e/SeparateCompile/sycl-external.cpp index 008c200f3a63a..25a95a2f32288 100644 --- a/sycl/test-e2e/SeparateCompile/sycl-external.cpp +++ b/sycl/test-e2e/SeparateCompile/sycl-external.cpp @@ -13,7 +13,7 @@ // RUN: %{run} %t.exe #include -#include +#include #ifdef SOURCE1 int bar(int b); diff --git a/sycl/test-e2e/SeparateCompile/test.cpp b/sycl/test-e2e/SeparateCompile/test.cpp index 6d6e7f6bfaf52..5c7c34c204cec 100644 --- a/sycl/test-e2e/SeparateCompile/test.cpp +++ b/sycl/test-e2e/SeparateCompile/test.cpp @@ -65,7 +65,7 @@ #ifdef B_CPP // ----------------------------------------------------------------------------- #include -#include +#include int run_test_b(int v) { int arr[] = {v}; @@ -84,7 +84,7 @@ int run_test_b(int v) { // ----------------------------------------------------------------------------- #include -#include +#include using namespace std; diff --git a/sycl/test-e2e/SharedLib/use_when_link.cpp b/sycl/test-e2e/SharedLib/use_when_link.cpp index 3cedcafe0cdc6..6c74149e60c89 100644 --- a/sycl/test-e2e/SharedLib/use_when_link.cpp +++ b/sycl/test-e2e/SharedLib/use_when_link.cpp @@ -8,7 +8,7 @@ // RUN: %{build} -L%T -o %t.out -l%basename_t -Wl,-rpath=%T // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp index 39c7e5cdeb76b..04f67fe9fad3c 100644 --- a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp @@ -8,7 +8,7 @@ // RUN: %{build} -L%T -o %t.out -l%basename_t -Wl,-rpath=%T // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild -#include +#include #include diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp index e98b8545b28fc..d5fd6f8fd5150 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp @@ -17,7 +17,7 @@ // RUNx: %{compile} -DRUN_LAST // RUNx: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp index 1247ac0bed6eb..de7dc3dd3897b 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp @@ -19,7 +19,7 @@ // RUNx: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild // clang-format on -#include +#include #include #include diff --git a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp index 8d0d424d2fa8a..feb4579ea5526 100644 --- a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp +++ b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl, level-zero, gpu, ocloc +// REQUIRES: (opencl || level-zero) && gpu && ocloc // Check the case when -fsycl-add-default-spec-consts-image option is used which // results in generation of two types of images: where specialization constants diff --git a/sycl/test-e2e/TaskSequence/concurrent-loops.cpp b/sycl/test-e2e/TaskSequence/concurrent-loops.cpp index 2c4b56b7c0e67..6253655574dce 100644 --- a/sycl/test-e2e/TaskSequence/concurrent-loops.cpp +++ b/sycl/test-e2e/TaskSequence/concurrent-loops.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/in-order-async-get.cpp b/sycl/test-e2e/TaskSequence/in-order-async-get.cpp index f8c42a05ab324..826cb2766fede 100644 --- a/sycl/test-e2e/TaskSequence/in-order-async-get.cpp +++ b/sycl/test-e2e/TaskSequence/in-order-async-get.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/mult-and-add.cpp b/sycl/test-e2e/TaskSequence/mult-and-add.cpp index 3b8119a44223b..303f7a5376b15 100644 --- a/sycl/test-e2e/TaskSequence/mult-and-add.cpp +++ b/sycl/test-e2e/TaskSequence/mult-and-add.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp b/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp index cb3abf85a1ead..98aebb6aab057 100644 --- a/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp +++ b/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/producer-consumer.cpp b/sycl/test-e2e/TaskSequence/producer-consumer.cpp index 2b5e46ba0e386..dbd1c5d06878c 100644 --- a/sycl/test-e2e/TaskSequence/producer-consumer.cpp +++ b/sycl/test-e2e/TaskSequence/producer-consumer.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp b/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp index 94a630591b132..e1ffc6d603a3a 100644 --- a/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp +++ b/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Tracing/buffer_printers.cpp b/sycl/test-e2e/Tracing/buffer_printers.cpp index 6854d506cb582..3f3a3c9aea858 100644 --- a/sycl/test-e2e/Tracing/buffer_printers.cpp +++ b/sycl/test-e2e/Tracing/buffer_printers.cpp @@ -3,7 +3,7 @@ // // XFAIL: hip_nvidia -#include +#include #include // Test image-specific printers of the Plugin Interace diff --git a/sycl/test-e2e/Tracing/pi_tracing_test.cpp b/sycl/test-e2e/Tracing/pi_tracing_test.cpp index 635980517e6c6..78bb0c31c1d9d 100644 --- a/sycl/test-e2e/Tracing/pi_tracing_test.cpp +++ b/sycl/test-e2e/Tracing/pi_tracing_test.cpp @@ -42,7 +42,7 @@ // CHECK-SAME: [ {{0[xX]?[0-9a-fA-F]*}} ... ] // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -#include +#include int main() { sycl::queue Queue; sycl::buffer Buf(10); diff --git a/sycl/test-e2e/USM/math.cpp b/sycl/test-e2e/USM/math.cpp index 4baa2560b4b46..c9e063cba7248 100644 --- a/sycl/test-e2e/USM/math.cpp +++ b/sycl/test-e2e/USM/math.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp index a7c00dffdf1cd..be75f61137ea3 100644 --- a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp +++ b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp @@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *Event, uint64_t, const void *UserData) { + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); std::lock_guard Lock{GMutex}; auto Type = static_cast(TraceType); switch (Type) { @@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, std::cout << "Unknown tracepoint\n"; } + if (HaveKeyValue) { + std::cout << " " << Key << " : " << Value << "\n"; + } xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << " " << xptiLookupString(Item.first) << " : " diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 61b53feed0622..5a895b67d0097 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -28,6 +28,7 @@ // CHECK-NEXT: PI Call Begin : piPlatformGetInfo // CHECK-NEXT: PI Call Begin : piKernelSetExecInfo // CHECK: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -35,10 +36,14 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Node create -// CHECK-NEXT: kernel_name : virtual_node[{{.*}}] +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Edge create -// CHECK-NEXT: event : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: event : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -51,6 +56,7 @@ // CHECK-NEXT: PI Call Begin : piKernelRelease // CHECK-NEXT: PI Call Begin : piProgramRelease // CHECK-NEXT: Signal +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -58,6 +64,7 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -65,27 +72,34 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Wait begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: PI Call Begin : piEventsWait // CHECK-NEXT: Wait end +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piextUSMEnqueueMemcpy // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piEventRelease // CHECK-NEXT: Wait begin -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} // CHECK: PI Call Begin : piQueueFinish // CHECK-NEXT: Wait end -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} diff --git a/sycl/test-e2e/XPTI/buffer/host_array.cpp b/sycl/test-e2e/XPTI/buffer/host_array.cpp index c7fc506961b47..2574f93ae53f6 100644 --- a/sycl/test-e2e/XPTI/buffer/host_array.cpp +++ b/sycl/test-e2e/XPTI/buffer/host_array.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp index 0021a97b7cfb9..75fd3b3b96e85 100644 --- a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp +++ b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include bool func(sycl::queue &Queue, int depth = 0) { bool MismatchFound = false; // Create a buffer of 4 ints to be used inside the kernel code. diff --git a/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp b/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp index 9e8e21e6ea0f4..874f81c11b481 100644 --- a/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp +++ b/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/recursion.cpp b/sycl/test-e2e/XPTI/buffer/recursion.cpp index 0086dfb586810..a8a4f4ab65a2c 100644 --- a/sycl/test-e2e/XPTI/buffer/recursion.cpp +++ b/sycl/test-e2e/XPTI/buffer/recursion.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include bool func(sycl::queue &Queue, int depth = 0) { bool MismatchFound = false; // Create a buffer of 4 ints to be used inside the kernel code. diff --git a/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp b/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp index f4a7e2001b18b..7bc46f33baa8e 100644 --- a/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp +++ b/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp b/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp index e2f4b997afc79..23e0f2e44ff80 100644 --- a/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp +++ b/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; sycl::queue Queue{}; diff --git a/sycl/test-e2e/XPTI/image/accessors.cpp b/sycl/test-e2e/XPTI/image/accessors.cpp index d428c4095b779..70a11fdf9f767 100644 --- a/sycl/test-e2e/XPTI/image/accessors.cpp +++ b/sycl/test-e2e/XPTI/image/accessors.cpp @@ -9,7 +9,7 @@ #else -#include +#include using namespace sycl::access; diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 62bd5a05ce080..bddcfded0f614 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -340,7 +340,7 @@ ( "%sycl_options", " " - + os.path.normpath(os.path.join(config.sycl_libs_dir + "/../lib/sycl7.lib")) + + os.path.normpath(os.path.join(config.sycl_libs_dir + "/../lib/sycl8.lib")) + " /I" + config.sycl_include + " /I" @@ -356,7 +356,7 @@ config.substitutions.append( ( "%sycl_options", - (" -lsycl7" if platform.system() == "Windows" else " -lsycl") + (" -lsycl8" if platform.system() == "Windows" else " -lsycl") + " -I" + config.sycl_include + " -I" diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp index bbd406a35e69e..30a8853aed05e 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp index f212701c10572..544eb09ba8d82 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp @@ -40,7 +40,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp index b315816034a51..ce66db044ee8a 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp @@ -39,7 +39,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 4ccc67fbff53e..2acaa85022f0b 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -38,7 +38,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp index 4dd5cd3a634a1..a7fe71ce4c995 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp @@ -37,7 +37,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/dim.cpp b/sycl/test-e2e/syclcompat/dim.cpp index 69d4d487d49a4..b4f1a1595bde5 100644 --- a/sycl/test-e2e/syclcompat/dim.cpp +++ b/sycl/test-e2e/syclcompat/dim.cpp @@ -24,7 +24,7 @@ // RUN: %{run} %t.out #include -#include +#include #include int main() { diff --git a/sycl/test-e2e/syclcompat/math/math_length_test.cpp b/sycl/test-e2e/syclcompat/math/math_length_test.cpp index 63fe324fd3a16..929395d01e309 100644 --- a/sycl/test-e2e/syclcompat/math/math_length_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_length_test.cpp @@ -35,7 +35,7 @@ #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp index e5b5ec5202a28..15f6372a97539 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_is_greater_1(unsigned int vect_count, diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp index b5c30c4d6441a..326bf8b8ccfc1 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_max(unsigned int vect_count, unsigned int *input_1, diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp index 6a83ea8c3e92a..d303337633173 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_min(unsigned int vect_count, unsigned int *input_1, diff --git a/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp b/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp index 2a78ca8fdf1fe..5c6fb48a40e3b 100644 --- a/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp +++ b/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp @@ -35,7 +35,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp b/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp index 24b3039194344..0d15e042bd307 100644 --- a/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp +++ b/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp @@ -34,7 +34,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_async.cpp b/sycl/test-e2e/syclcompat/memory/memory_async.cpp index 7396fcb872953..3eb4123014497 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_async.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_async.cpp @@ -37,7 +37,7 @@ #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp b/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp index 008cdb14ec36c..3b3b54914e999 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp @@ -23,7 +23,7 @@ // RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp b/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp index 7d808306d9f01..0d35a0721bbc7 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp b/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp index eb6ab964ebf3f..c3d5b23ddd0ba 100644 --- a/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp @@ -35,7 +35,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp b/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp index f3416fa8f5ce4..8a38c0dfdce56 100644 --- a/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp +++ b/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void find_first_set_test(int *test_result) { diff --git a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp index 1de54cf5eb6ec..9e31ca24ba499 100644 --- a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp @@ -36,7 +36,7 @@ #include -#include +#include #include // work-item: diff --git a/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp b/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp index 7e606cfb6b7ec..7bec92ccb9ae3 100644 --- a/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp @@ -34,7 +34,7 @@ // RUN: %{run} %t.out #include -#include +#include #include #define M 3 diff --git a/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp b/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp index b7a19f1df7da5..9a4ebe441352c 100644 --- a/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp @@ -37,7 +37,7 @@ #include #include -#include +#include #include void kernel_1( diff --git a/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp b/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp index b4578e3c4ef5b..7e3f0e3b523fe 100644 --- a/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp @@ -35,7 +35,7 @@ #include #include -#include +#include #include void byte_perm_ref(unsigned int *d_data) { diff --git a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp index f544b9d1f277f..75c0382053e69 100644 --- a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp +++ b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define WARP_SIZE 32 diff --git a/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp b/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp index a9a339874a9ca..32d0594f821dc 100644 --- a/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_reverse_bits() { diff --git a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp index fc40ede7357e0..a5ee1c3277552 100644 --- a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define WARP_SIZE 32 diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp index 3a3d66265b4f2..061b21b61bd53 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define DATA_NUM 128 diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp index 1eb65d4302224..e80c16ed9fa00 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define DATA_NUM 128 diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 9d609646c50da..daf42c02be9ec 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3,7 +3,7 @@ # DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. ################################################################################ -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %llvm_build_bin_dir/sycl7.dll +# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %llvm_build_bin_dir/sycl8.dll # REQUIRES: windows # UNSUPPORTED: libcxx diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 104a4bd6c9d22..1fc5ef99e6f5a 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -133,7 +133,7 @@ config.substitutions.append(("%fsycl-host-only", sycl_host_only_options)) config.substitutions.append( - ("%sycl_lib", " -lsycl7" if platform.system() == "Windows" else "-lsycl") + ("%sycl_lib", " -lsycl8" if platform.system() == "Windows" else "-lsycl") ) llvm_config.add_tool_substitutions(["llvm-spirv"], [config.sycl_tools_dir]) diff --git a/sycl/test/native_cpu/multi-devices-swap.cpp b/sycl/test/native_cpu/multi-devices-swap.cpp index 282b89ba23e41..9b2971cf94fe1 100644 --- a/sycl/test/native_cpu/multi-devices-swap.cpp +++ b/sycl/test/native_cpu/multi-devices-swap.cpp @@ -2,7 +2,6 @@ // REQUIRES: opencl_be // RUN: %clangxx -fsycl -fsycl-targets=native_cpu,spir64 %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t -// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:cpu" %t #include diff --git a/sycl/test/native_cpu/multi-devices.cpp b/sycl/test/native_cpu/multi-devices.cpp index 6a3cba9ec5307..1e74020c63730 100644 --- a/sycl/test/native_cpu/multi-devices.cpp +++ b/sycl/test/native_cpu/multi-devices.cpp @@ -2,7 +2,6 @@ // REQUIRES: opencl_be // RUN: %clangxx -fsycl -fsycl-targets=spir64,native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t -// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:cpu" %t #include diff --git a/sycl/test/native_cpu/vector-add-pointers.cpp b/sycl/test/native_cpu/vector-add-pointers.cpp old mode 100755 new mode 100644 diff --git a/sycl/tools/sycl-trace/sycl_trace_collector.cpp b/sycl/tools/sycl-trace/sycl_trace_collector.cpp index 55075c5437879..5cf5b3bc5f5b9 100644 --- a/sycl/tools/sycl-trace/sycl_trace_collector.cpp +++ b/sycl/tools/sycl-trace/sycl_trace_collector.cpp @@ -57,6 +57,11 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!Event) return; + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + std::cout << "[SYCL] Task " << (IsBegin ? "begin" : "end ") << " (event=" << Event << ",instanceID=" << InstanceID << ")" << std::endl; @@ -67,6 +72,10 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!IsBegin || !PrintSyclVerbose) return; + if (HaveKeyValue) { + std::cout << "\t " << Key << " : " << Value << std::endl; + } + xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << "\t " << xptiLookupString(Item.first) << " : " diff --git a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt index 7fb1849268236..b7213d2eaeae6 100644 --- a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt +++ b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt @@ -7,6 +7,7 @@ add_sycl_unittest(CommandGraphExtensionTests OBJECT InOrderQueue.cpp MultiThreaded.cpp Queries.cpp + Regressions.cpp Subgraph.cpp Update.cpp ) diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 07fa7434cca00..63b5b2a04de05 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -81,7 +81,7 @@ TEST_F(CommandGraphTest, Finalize) { // Add a node that depends on Node1 due to the accessor auto Node3 = Graph.add([&](sycl::handler &cgh) { - sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); + sycl::accessor A(Buf, cgh, sycl::read_write); cgh.single_task>([]() {}); }); @@ -510,3 +510,121 @@ TEST_F(CommandGraphTest, FillMemsetNodes) { sycl::free(USMPtr, Queue); } } + +// Test that the expected dependencies are created when recording a graph node +// containing an accessor with mode FirstMode, followed by one containing an +// accessor with mode SecondMode +template +void testAccessorModeCombo(sycl::queue Queue) { + buffer Buffer{range<1>{16}}; + + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + // Create the first node with a write mode + auto EventFirst = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + + auto EventSecond = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + Graph.end_recording(Queue); + + EXPECT_EQ(Graph.get_root_nodes().size(), ShouldCreateDep ? 1ul : 2ul); + + experimental::node NodeFirst = + experimental::node::get_node_from_event(EventFirst); + EXPECT_EQ(NodeFirst.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeFirst.get_successors().size(), ShouldCreateDep ? 1ul : 0ul); + + experimental::node NodeSecond = + experimental::node::get_node_from_event(EventSecond); + EXPECT_EQ(NodeSecond.get_predecessors().size(), ShouldCreateDep ? 1ul : 0ul); + EXPECT_EQ(NodeSecond.get_successors().size(), 0ul); +} + +// Tests that access modes are correctly respected when recording graph nodes +TEST_F(CommandGraphTest, AccessorModeEdges) { + + // Testing access_mode::write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::discard_read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::discard_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::atomic and others + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp new file mode 100644 index 0000000000000..17b58f542d760 --- /dev/null +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -0,0 +1,60 @@ +//==------------------------ Regressions.cpp -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Common.hpp" + +using namespace sycl; +using namespace sycl::ext::oneapi; + +// Tests in this file are based on specific error reports + +// Regression test example based on a reported issue with accessor modes not +// being respected in graphs. The test records 3 kernel nodes which all have +// read only dependencies on the same two buffers, with a write dependency on a +// buffer which is different per kernel. This should result in no edges being +// created between these nodes because the accessor mode combinations do not +// indicate a need for dependencies. +// Originally reported here: https://github.com/intel/llvm/issues/12473 +TEST_F(CommandGraphTest, AccessorModeRegression) { + buffer BufferA{range<1>{16}}; + buffer BufferB{range<1>{16}}; + buffer BufferC{range<1>{16}}; + buffer BufferD{range<1>{16}}; + buffer BufferE{range<1>{16}}; + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventB = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccD = BufferD.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventC = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccE = BufferE.get_access(CGH); + CGH.single_task>([]() {}); + }); + + Graph.end_recording(Queue); + + experimental::node NodeA = experimental::node::get_node_from_event(EventA); + EXPECT_EQ(NodeA.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeA.get_successors().size(), 0ul); + experimental::node NodeB = experimental::node::get_node_from_event(EventB); + EXPECT_EQ(NodeB.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeB.get_successors().size(), 0ul); + experimental::node NodeC = experimental::node::get_node_from_event(EventC); + EXPECT_EQ(NodeC.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeC.get_successors().size(), 0ul); +} diff --git a/sycl/unittests/xpti_trace/QueueIDCheck.cpp b/sycl/unittests/xpti_trace/QueueIDCheck.cpp index c57c7091c1fe9..1baf72b87a59a 100644 --- a/sycl/unittests/xpti_trace/QueueIDCheck.cpp +++ b/sycl/unittests/xpti_trace/QueueIDCheck.cpp @@ -136,7 +136,9 @@ TEST_F(QueueID, QueueCreationAndKernelWithDeps) { checkTaskBeginEnd(QueueIDSTr); } -TEST_F(QueueID, QueueCreationUSMOperations) { +// Re-enable this test after fixing +// https://github.com/intel/llvm/issues/12963 +TEST_F(QueueID, DISABLED_QueueCreationUSMOperations) { sycl::queue Q0; auto Queue0ImplPtr = sycl::detail::getSyclObjImpl(Q0); auto QueueIDSTr = std::to_string(Queue0ImplPtr->getQueueID()); diff --git a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp index fcbbb02126a62..2c79f76269c11 100644 --- a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp +++ b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp @@ -35,6 +35,13 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, if (GAnalyzedTraceTypes.find(TraceType) == GAnalyzedTraceTypes.end()) return; + // Since "queue_id" is no longer a metadata item, we have to retrieve it from + // TLS using new XPTI API + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + if (TraceType == xpti::trace_diagnostics) { std::string AggregatedData; if (Event && Event->reserved.payload && Event->reserved.payload->name && @@ -111,30 +118,22 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, } else if (TraceType == xpti::trace_task_begin) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_begin:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_begin:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } } else if (TraceType == xpti::trace_task_end) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_end:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_end:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 90a0e57047b2e..58fa9a117b6d5 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -90,6 +90,41 @@ XPTI_EXPORT_API uint64_t xptiGetUniversalId(); /// @param uid Unique 64 bit identifier. XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid); +/// @brief Returns stashed tuple +/// @details The XPTI Framework allows the notification mechanism to stash a +/// key-value tupe before a notification that can be accessed in the callback +/// handler fo the notification. This value is guranteed to be valid for the +/// duration of the notifiation. +/// @param key The Key of the stashed tuple is contained in this parameter after +/// the call +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_NOTFOUND if there is nothing stashed. Also returns error if +/// 'key' argument is invalid (XPTI_RESULT_INVALIDARG) +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, uint64_t &value); + +/// @brief Stash a key-value tuple +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows one such tuple to be provided and +/// stashed. +/// @param key The Key of the tuple that is being stashed and needs to be +/// available for the duration of the notification call. +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if key is invalid +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value); + +/// @brief Un-Stash a key-value tuple or pop it from a stack, if one exists +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows such values to be provided and +/// stashed. This function pops the top of the stack tuple value when it is no +/// longer needed; Currently a stack depth of 1 is supported. +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if there are no tuples present +XPTI_EXPORT_API void xptiUnstashTuple(); + /// @brief Generates a unique ID /// @details When a tool is subscribing to the event stream and wants to /// generate task IDs that do not collide with unique IDs currently being @@ -498,6 +533,9 @@ typedef void (*xpti_finalize_t)(const char *); typedef uint64_t (*xpti_get_universal_id_t)(); typedef void (*xpti_set_universal_id_t)(uint64_t uid); typedef uint64_t (*xpti_get_unique_id_t)(); +typedef xpti::result_t (*xpti_stash_tuple_t)(const char *key, uint64_t value); +typedef xpti::result_t (*xpti_get_stashed_tuple_t)(char **key, uint64_t &value); +typedef void (*xpti_unstash_tuple_t)(); typedef xpti::string_id_t (*xpti_register_string_t)(const char *, char **); typedef const char *(*xpti_lookup_string_t)(xpti::string_id_t); typedef xpti::string_id_t (*xpti_register_object_t)(const char *, size_t, diff --git a/xpti/include/xpti/xpti_trace_framework.hpp b/xpti/include/xpti/xpti_trace_framework.hpp index c38a149fa401c..55f6c69760cdb 100644 --- a/xpti/include/xpti/xpti_trace_framework.hpp +++ b/xpti/include/xpti/xpti_trace_framework.hpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include "xpti/xpti_data_types.h" @@ -539,6 +541,24 @@ class scoped_notify { uint64_t m_instance; }; +// Scoped class that assists in stashing a tuple and clearing it when it is pout +// of scope +class stash_tuple { +public: + stash_tuple(const char *key, uint64_t value) : m_stashed(false) { + m_stashed = + (xptiStashTuple(key, value) == xpti::result_t::XPTI_RESULT_SUCCESS); + } + ~stash_tuple() { + if (m_stashed) { + xptiUnstashTuple(); + } + } + +private: + bool m_stashed; +}; + // --------------- Commented section of the code ------------- // // github.com/bombela/backward-cpp/blob/master/backward.hpp @@ -759,9 +779,12 @@ class tracepoint_t { // Method to extract the stream used by the current tracepoint type uint8_t stream_id() { return m_default_stream; } - // Method to extract the stream used by the current tracepoint type + // Method to extract the instance ID used by the current tracepoint type uint64_t instance_id() { return m_instID; } + // Method to override the instance ID generated by the xptiMakeEvent() call + void override_instance_id(uint64_t instance) { m_instID = instance; } + uint64_t universal_id() { if (m_payload && (m_payload->flags & diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 2d17517ee3089..a09b970060033 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -43,6 +43,9 @@ enum functions_t { XPTI_FORCE_SET_TRACE_ENABLED, XPTI_CHECK_TRACE_ENABLED, XPTI_RELEASE_EVENT, + XPTI_STASH_TUPLE, + XPTI_GET_STASHED_TUPLE, + XPTI_UNSTASH_TUPLE, // All additional functions need to appear before // the XPTI_FW_API_COUNT enum XPTI_FW_API_COUNT ///< This enum must always be the last one in the list @@ -79,6 +82,9 @@ class ProxyLoader { {XPTI_TRACE_ENABLED, "xptiTraceEnabled"}, {XPTI_CHECK_TRACE_ENABLED, "xptiCheckTraceEnabled"}, {XPTI_FORCE_SET_TRACE_ENABLED, "xptiForceSetTraceEnabled"}, + {XPTI_STASH_TUPLE, "xptiStashTuple"}, + {XPTI_GET_STASHED_TUPLE, "xptiGetStashedTuple"}, + {XPTI_UNSTASH_TUPLE, "xptiUnstashTuple"}, {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}}; public: @@ -250,6 +256,37 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { } } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_STASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API xpti::result_t xptiSetGetStashedTuple(char **key, + uint64_t &value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = + xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_STASHED_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_UNSTASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(); + } + } +} + XPTI_EXPORT_API uint64_t xptiGetUniqueId() { if (xpti::ProxyLoader::instance().noErrors()) { auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_UNIQUE_ID); diff --git a/xptifw/CMakeLists.txt b/xptifw/CMakeLists.txt index ccdabf46c9810..4cbf597513772 100644 --- a/xptifw/CMakeLists.txt +++ b/xptifw/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.8) -set(XPTI_VERSION 0.4.1) +set(XPTI_VERSION 0.6.0) project (xptifw VERSION "${XPTI_VERSION}" LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 41ccaf6a7e27b..93d151094aba4 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,9 @@ static_assert( std::is_trivially_destructible::value, "PlatformHelper is not trivial"); +// TLS variables to support stashing tupples and universal IDs +using stash_tuple_t = std::tuple; +static thread_local stash_tuple_t g_tls_stash_tuple = stash_tuple_t(nullptr, 0); static thread_local uint64_t g_tls_uid = xpti::invalid_uid; namespace xpti { @@ -359,12 +363,16 @@ class Tracepoints { // Protect simultaneous insert operations on the metadata tables { + xpti::result_t res; std::lock_guard HashLock(MMetadataMutex); if (Event->reserved.metadata.count(KeyID)) { - return xpti::result_t::XPTI_RESULT_DUPLICATE; + // One already existed, but we overwrote it + res = xpti::result_t::XPTI_RESULT_DUPLICATE; + } else { + res = xpti::result_t::XPTI_RESULT_SUCCESS; } Event->reserved.metadata[KeyID] = ValueID; - return xpti::result_t::XPTI_RESULT_SUCCESS; + return res; } } @@ -818,6 +826,38 @@ class Framework { void setUniversalID(uint64_t uid) noexcept { g_tls_uid = uid; } + xpti::result_t stashTuple(const char *key, uint64_t value) { + if (!key) + return xpti::result_t::XPTI_RESULT_FAIL; + + std::get<0>(g_tls_stash_tuple) = key; + std::get<1>(g_tls_stash_tuple) = value; + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + xpti::result_t getStashedTuple(char **key, uint64_t &value) { + if (!key) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + + const char *tls_key = std::get<0>(g_tls_stash_tuple); + if (!tls_key) + return xpti::result_t::XPTI_RESULT_NOTFOUND; + + (*key) = const_cast(tls_key); + value = std::get<1>(g_tls_stash_tuple); + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + void unstashTuple() { + if (!std::get<0>(g_tls_stash_tuple)) + return; + + // std::get<0>(g_tls_stash_tuple) = nullptr; + // std::get<1>(g_tls_stash_tuple) = 0; + // We will use the actual unstash code when we implement a stack to allow + // multiple stashes/thread + } + bool checkTraceEnabled(uint16_t stream, uint16_t type) { if (MTraceEnabled) { return MNotifier.checkSubscribed(stream, type); @@ -1086,6 +1126,19 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { xpti::Framework::instance().setUniversalID(uid); } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + return xpti::Framework::instance().stashTuple(key, value); +} + +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, + uint64_t &value) { + return xpti::Framework::instance().getStashedTuple(key, value); +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + xpti::Framework::instance().unstashTuple(); +} + XPTI_EXPORT_API uint16_t xptiRegisterUserDefinedTracePoint(const char *ToolName, uint8_t UserDefinedTP) { uint8_t ToolID = xpti::Framework::instance().registerVendor(ToolName);