diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 7e643b89971c17..fa191c7378dba4 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -284,12 +284,15 @@ class DefaultIntArgument : IntArgument { // This argument is more complex, it includes the enumerator type // name, whether the enum type is externally defined, a list of -// strings to accept, and a list of enumerators to map them to. -class EnumArgument values, +// possible values, and a list of enumerators to map them to. +class EnumArgument values, list enums, bit opt = 0, bit fake = 0, bit isExternalType = 0> : Argument { string Type = type; + // When true, the argument will be parsed as an unevaluated string literal + // and otherwise as an identifier. + bit IsString = is_string; list Values = values; list Enums = enums; bit IsExternalType = isExternalType; @@ -297,10 +300,14 @@ class EnumArgument values, // FIXME: There should be a VariadicArgument type that takes any other type // of argument and generates the appropriate type. -class VariadicEnumArgument values, - list enums, bit isExternalType = 0> +class VariadicEnumArgument values, list enums, + bit isExternalType = 0> : Argument { string Type = type; + // When true, the argument will be parsed as an unevaluated string literal + // and otherwise as an identifier. + bit IsString = is_string; list Values = values; list Enums = enums; bit IsExternalType = isExternalType; @@ -907,7 +914,7 @@ def ARMInterrupt : InheritableAttr, TargetSpecificAttr { // MSP430Interrupt's, MipsInterrupt's and AnyX86Interrupt's spellings // must match. let Spellings = [GCC<"interrupt">]; - let Args = [EnumArgument<"Interrupt", "InterruptType", + let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true, ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", ""], ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", "Generic"], 1>]; @@ -1032,7 +1039,8 @@ def ExternalSourceSymbol : InheritableAttr { def Blocks : InheritableAttr { let Spellings = [Clang<"blocks">]; - let Args = [EnumArgument<"Type", "BlockType", ["byref"], ["ByRef"]>]; + let Args = [EnumArgument<"Type", "BlockType", /*is_string=*/true, + ["byref"], ["ByRef"]>]; let Documentation = [Undocumented]; } @@ -1614,7 +1622,7 @@ def FlagEnum : InheritableAttr { def EnumExtensibility : InheritableAttr { let Spellings = [Clang<"enum_extensibility">]; let Subjects = SubjectList<[Enum]>; - let Args = [EnumArgument<"Extensibility", "Kind", + let Args = [EnumArgument<"Extensibility", "Kind", /*is_string=*/false, ["closed", "open"], ["Closed", "Open"]>]; let Documentation = [EnumExtensibilityDocs]; } @@ -1780,7 +1788,7 @@ def MipsInterrupt : InheritableAttr, TargetSpecificAttr { // must match. let Spellings = [GCC<"interrupt">]; let Subjects = SubjectList<[Function]>; - let Args = [EnumArgument<"Interrupt", "InterruptType", + let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true, ["vector=sw0", "vector=sw1", "vector=hw0", "vector=hw1", "vector=hw2", "vector=hw3", "vector=hw4", "vector=hw5", "eic", ""], @@ -1968,7 +1976,7 @@ def NoMicroMips : InheritableAttr, TargetSpecificAttr { def RISCVInterrupt : InheritableAttr, TargetSpecificAttr { let Spellings = [GCC<"interrupt">]; let Subjects = SubjectList<[Function]>; - let Args = [EnumArgument<"Interrupt", "InterruptType", + let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true, ["supervisor", "machine"], ["supervisor", "machine"], 1>]; @@ -2339,7 +2347,7 @@ def ObjCException : InheritableAttr { def ObjCMethodFamily : InheritableAttr { let Spellings = [Clang<"objc_method_family">]; let Subjects = SubjectList<[ObjCMethod], ErrorDiag>; - let Args = [EnumArgument<"Family", "FamilyKind", + let Args = [EnumArgument<"Family", "FamilyKind", /*is_string=*/false, ["none", "alloc", "copy", "init", "mutableCopy", "new"], ["OMF_None", "OMF_alloc", "OMF_copy", "OMF_init", "OMF_mutableCopy", "OMF_new"]>]; @@ -2513,7 +2521,7 @@ def IntelOclBicc : DeclOrTypeAttr { def Pcs : DeclOrTypeAttr { let Spellings = [GCC<"pcs">]; - let Args = [EnumArgument<"PCS", "PCSType", + let Args = [EnumArgument<"PCS", "PCSType", /*is_string=*/true, ["aapcs", "aapcs-vfp"], ["AAPCS", "AAPCS_VFP"]>]; // let Subjects = [Function, ObjCMethod]; @@ -2646,7 +2654,7 @@ def SwiftObjCMembers : Attr { def SwiftError : InheritableAttr { let Spellings = [GNU<"swift_error">]; let Args = [ - EnumArgument<"Convention", "ConventionKind", + EnumArgument<"Convention", "ConventionKind", /*is_string=*/false, ["none", "nonnull_error", "null_result", "zero_result", "nonzero_result"], ["None", "NonNullError", "NullResult", "ZeroResult", "NonZeroResult"]> ]; @@ -2678,7 +2686,7 @@ def SwiftName : InheritableAttr { def SwiftNewType : InheritableAttr { let Spellings = [GNU<"swift_newtype">, GNU<"swift_wrapper">]; - let Args = [EnumArgument<"NewtypeKind", "NewtypeKind", + let Args = [EnumArgument<"NewtypeKind", "NewtypeKind", /*is_string=*/false, ["struct", "enum"], ["NK_Struct", "NK_Enum"]>]; let Subjects = SubjectList<[TypedefName], ErrorDiag>; let Documentation = [SwiftNewTypeDocs]; @@ -2814,7 +2822,7 @@ def PragmaClangTextSection : InheritableAttr { def CodeModel : InheritableAttr, TargetSpecificAttr { let Spellings = [GCC<"model">]; - let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", + let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", /*is_string=*/1, ["normal", "medium", "extreme"], ["Small", "Medium", "Large"], /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>]; let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>; @@ -2870,7 +2878,7 @@ def SwiftIndirectResult : ParameterABIAttr { def SwiftAsync : InheritableAttr { let Spellings = [Clang<"swift_async">]; let Subjects = SubjectList<[Function, ObjCMethod]>; - let Args = [EnumArgument<"Kind", "Kind", + let Args = [EnumArgument<"Kind", "Kind", /*is_string=*/false, ["none", "swift_private", "not_swift_private"], ["None", "SwiftPrivate", "NotSwiftPrivate"]>, ParamIdxArgument<"CompletionHandlerIndex", /*opt=*/1>]; @@ -2880,7 +2888,7 @@ def SwiftAsync : InheritableAttr { def SwiftAsyncError : InheritableAttr { let Spellings = [Clang<"swift_async_error">]; let Subjects = SubjectList<[Function, ObjCMethod]>; - let Args = [EnumArgument<"Convention", "ConventionKind", + let Args = [EnumArgument<"Convention", "ConventionKind", /*is_string=*/false, ["none", "nonnull_error", "zero_argument", "nonzero_argument"], ["None", "NonNullError", "ZeroArgument", "NonZeroArgument"]>, UnsignedArgument<"HandlerParamIdx", /*opt=*/1>]; @@ -2925,7 +2933,7 @@ def ZeroCallUsedRegs : InheritableAttr { let Spellings = [GCC<"zero_call_used_regs">]; let Subjects = SubjectList<[Function], ErrorDiag>; let Args = [ - EnumArgument<"ZeroCallUsedRegs", "ZeroCallUsedRegsKind", + EnumArgument<"ZeroCallUsedRegs", "ZeroCallUsedRegsKind", /*is_string=*/true, ["skip", "used-gpr-arg", "used-gpr", "used-arg", "used", "all-gpr-arg", "all-gpr", "all-arg", "all"], ["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", @@ -3097,7 +3105,7 @@ def TransparentUnion : InheritableAttr { def Unavailable : InheritableAttr { let Spellings = [Clang<"unavailable">]; let Args = [StringArgument<"Message", 1>, - EnumArgument<"ImplicitReason", "ImplicitReason", + EnumArgument<"ImplicitReason", "ImplicitReason", /*is_string=*/0, // FIXME ["", "", "", ""], ["IR_None", "IR_ARCForbiddenType", @@ -3117,8 +3125,8 @@ def DiagnoseIf : InheritableAttr { let Spellings = [GNU<"diagnose_if">]; let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>; let Args = [ExprArgument<"Cond">, StringArgument<"Message">, - EnumArgument<"DiagnosticType", - "DiagnosticType", + EnumArgument<"DiagnosticType", "DiagnosticType", + /*is_string=*/true, ["error", "warning"], ["DT_Error", "DT_Warning"]>, BoolArgument<"ArgDependent", 0, /*fake*/ 1>, @@ -3220,7 +3228,7 @@ def MatrixType : TypeAttr { def Visibility : InheritableAttr { let Clone = 0; let Spellings = [GCC<"visibility">]; - let Args = [EnumArgument<"Visibility", "VisibilityType", + let Args = [EnumArgument<"Visibility", "VisibilityType", /*is_string=*/true, ["default", "hidden", "internal", "protected"], ["Default", "Hidden", "Hidden", "Protected"]>]; let MeaningfulToClassTemplateDefinition = 1; @@ -3230,7 +3238,7 @@ def Visibility : InheritableAttr { def TypeVisibility : InheritableAttr { let Clone = 0; let Spellings = [Clang<"type_visibility">]; - let Args = [EnumArgument<"Visibility", "VisibilityType", + let Args = [EnumArgument<"Visibility", "VisibilityType", /*is_string=*/true, ["default", "hidden", "internal", "protected"], ["Default", "Hidden", "Hidden", "Protected"]>]; // let Subjects = SubjectList<[Tag, ObjCInterface, Namespace], ErrorDiag>; @@ -3628,7 +3636,7 @@ def Consumable : InheritableAttr { // FIXME: should this attribute have a CPlusPlus language option? let Spellings = [Clang<"consumable", 0>]; let Subjects = SubjectList<[CXXRecord]>; - let Args = [EnumArgument<"DefaultState", "ConsumedState", + let Args = [EnumArgument<"DefaultState", "ConsumedState", /*is_string=*/false, ["unknown", "consumed", "unconsumed"], ["Unknown", "Consumed", "Unconsumed"]>]; let Documentation = [ConsumableDocs]; @@ -3661,6 +3669,7 @@ def CallableWhen : InheritableAttr { let Spellings = [Clang<"callable_when", 0>]; let Subjects = SubjectList<[CXXMethod]>; let Args = [VariadicEnumArgument<"CallableStates", "ConsumedState", + /*is_string=*/true, ["unknown", "consumed", "unconsumed"], ["Unknown", "Consumed", "Unconsumed"]>]; let Documentation = [CallableWhenDocs]; @@ -3672,7 +3681,7 @@ def ParamTypestate : InheritableAttr { // FIXME: should this attribute have a CPlusPlus language option? let Spellings = [Clang<"param_typestate", 0>]; let Subjects = SubjectList<[ParmVar]>; - let Args = [EnumArgument<"ParamState", "ConsumedState", + let Args = [EnumArgument<"ParamState", "ConsumedState", /*is_string=*/false, ["unknown", "consumed", "unconsumed"], ["Unknown", "Consumed", "Unconsumed"]>]; let Documentation = [ParamTypestateDocs]; @@ -3684,7 +3693,7 @@ def ReturnTypestate : InheritableAttr { // FIXME: should this attribute have a CPlusPlus language option? let Spellings = [Clang<"return_typestate", 0>]; let Subjects = SubjectList<[Function, ParmVar]>; - let Args = [EnumArgument<"State", "ConsumedState", + let Args = [EnumArgument<"State", "ConsumedState", /*is_string=*/false, ["unknown", "consumed", "unconsumed"], ["Unknown", "Consumed", "Unconsumed"]>]; let Documentation = [ReturnTypestateDocs]; @@ -3696,7 +3705,7 @@ def SetTypestate : InheritableAttr { // FIXME: should this attribute have a CPlusPlus language option? let Spellings = [Clang<"set_typestate", 0>]; let Subjects = SubjectList<[CXXMethod]>; - let Args = [EnumArgument<"NewState", "ConsumedState", + let Args = [EnumArgument<"NewState", "ConsumedState", /*is_string=*/false, ["unknown", "consumed", "unconsumed"], ["Unknown", "Consumed", "Unconsumed"]>]; let Documentation = [SetTypestateDocs]; @@ -3708,7 +3717,7 @@ def TestTypestate : InheritableAttr { // FIXME: should this attribute have a CPlusPlus language option? let Spellings = [Clang<"test_typestate", 0>]; let Subjects = SubjectList<[CXXMethod]>; - let Args = [EnumArgument<"TestState", "ConsumedState", + let Args = [EnumArgument<"TestState", "ConsumedState", /*is_string=*/false, ["consumed", "unconsumed"], ["Consumed", "Unconsumed"]>]; let Documentation = [TestTypestateDocs]; @@ -3785,7 +3794,8 @@ def CFGuard : InheritableAttr, TargetSpecificAttr { // we might also want to support __declspec(guard(suppress)). let Spellings = [Declspec<"guard">, Clang<"guard">]; let Subjects = SubjectList<[Function]>; - let Args = [EnumArgument<"Guard", "GuardArg", ["nocf"], ["nocf"]>]; + let Args = [EnumArgument<"Guard", "GuardArg", /*is_string=*/false, + ["nocf"], ["nocf"]>]; let Documentation = [CFGuardDocs]; } @@ -3941,7 +3951,7 @@ def LoopHint : Attr { Pragma<"", "nounroll_and_jam">]; /// State of the loop optimization specified by the spelling. - let Args = [EnumArgument<"Option", "OptionType", + let Args = [EnumArgument<"Option", "OptionType", /*is_string=*/false, ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", @@ -3950,7 +3960,7 @@ def LoopHint : Attr { "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", "VectorizePredicate"]>, - EnumArgument<"State", "LoopHintState", + EnumArgument<"State", "LoopHintState", /*is_string=*/false, ["enable", "disable", "numeric", "fixed_width", "scalable_width", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "FixedWidth", @@ -4039,7 +4049,7 @@ def OMPDeclareSimdDecl : Attr { let HasCustomParsing = 1; let Documentation = [OMPDeclareSimdDocs]; let Args = [ - EnumArgument<"BranchState", "BranchStateTy", + EnumArgument<"BranchState", "BranchStateTy", /*is_string=*/false, [ "", "inbranch", "notinbranch" ], [ "BS_Undefined", "BS_Inbranch", "BS_Notinbranch" ]>, ExprArgument<"Simdlen">, VariadicExprArgument<"Uniforms">, @@ -4059,10 +4069,10 @@ def OMPDeclareTargetDecl : InheritableAttr { let Subjects = SubjectList<[Function, SharedVar]>; let Documentation = [OMPDeclareTargetDocs]; let Args = [ - EnumArgument<"MapType", "MapTypeTy", + EnumArgument<"MapType", "MapTypeTy", /*is_string=*/false, [ "to", "enter", "link" ], [ "MT_To", "MT_Enter", "MT_Link" ]>, - EnumArgument<"DevType", "DevTypeTy", + EnumArgument<"DevType", "DevTypeTy", /*is_string=*/false, [ "host", "nohost", "any" ], [ "DT_Host", "DT_NoHost", "DT_Any" ]>, ExprArgument<"IndirectExpr">, @@ -4084,7 +4094,7 @@ def OMPAllocateDecl : InheritableAttr { let Spellings = []; let SemaHandler = 0; let Args = [ - EnumArgument<"AllocatorType", "AllocatorTypeTy", + EnumArgument<"AllocatorType", "AllocatorTypeTy", /*is_string=*/false, [ "omp_null_allocator", "omp_default_mem_alloc", "omp_large_cap_mem_alloc", "omp_const_mem_alloc", @@ -4333,7 +4343,7 @@ def HLSLShader : InheritableAttr { let Subjects = SubjectList<[HLSLEntry]>; let LangOpts = [HLSL]; let Args = [ - EnumArgument<"Type", "ShaderType", + EnumArgument<"Type", "ShaderType", /*is_string=*/true, ["pixel", "vertex", "geometry", "hull", "domain", "compute", "raygeneration", "intersection", "anyhit", "closesthit", "miss", "callable", "mesh", "amplification"], @@ -4349,10 +4359,12 @@ def HLSLResource : InheritableAttr { let Subjects = SubjectList<[Struct]>; let LangOpts = [HLSL]; let Args = [EnumArgument<"ResourceClass", "llvm::hlsl::ResourceClass", + /*is_string=*/0, ["SRV", "UAV", "CBuffer", "Sampler"], ["SRV", "UAV", "CBuffer", "Sampler"], /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>, EnumArgument<"ResourceKind", "llvm::hlsl::ResourceKind", + /*is_string=*/0, ["Texture1D", "Texture2D", "Texture2DMS", "Texture3D", "TextureCube", "Texture1DArray", "Texture2DArray", "Texture2DMSArray", @@ -4409,7 +4421,7 @@ def : MutualExclusions<[RandomizeLayout, NoRandomizeLayout]>; def FunctionReturnThunks : InheritableAttr, TargetSpecificAttr { let Spellings = [GCC<"function_return">]; - let Args = [EnumArgument<"ThunkType", "Kind", + let Args = [EnumArgument<"ThunkType", "Kind", /*is_string=*/true, ["keep", "thunk-extern"], ["Keep", "Extern"] >]; diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 72789d9d348144..6ad75d4e034a9d 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1468,7 +1468,7 @@ bool ByteCodeExprGen::VisitCompoundAssignOperator( std::optional LHSComputationT = classify(E->getComputationLHSType()); std::optional LT = classify(LHS->getType()); - std::optional RT = classify(E->getComputationResultType()); + std::optional RT = classify(RHS->getType()); std::optional ResultT = classify(E->getType()); if (!LT || !RT || !ResultT || !LHSComputationT) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index f75a9fc4b46650..ce7ed9cec3db3f 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -139,7 +139,7 @@ static void moveArrayDesc(Block *B, const std::byte *Src, std::byte *Dst, static void ctorRecord(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable, bool IsActive, const Descriptor *D) { const bool IsUnion = D->ElemRecord->isUnion(); - auto CtorSub = [=](unsigned SubOff, Descriptor *F, bool IsBase) { + auto CtorSub = [=](unsigned SubOff, const Descriptor *F, bool IsBase) { auto *Desc = reinterpret_cast(Ptr + SubOff) - 1; Desc->Offset = SubOff; Desc->Desc = F; @@ -161,7 +161,7 @@ static void ctorRecord(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable, } static void dtorRecord(Block *B, std::byte *Ptr, const Descriptor *D) { - auto DtorSub = [=](unsigned SubOff, Descriptor *F) { + auto DtorSub = [=](unsigned SubOff, const Descriptor *F) { if (auto Fn = F->DtorFn) Fn(B, Ptr + SubOff, F); }; diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 964c0377c6dc1f..ec6cdebcd820fa 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -247,7 +247,8 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) { unsigned VirtSize = 0; // Helper to get a base descriptor. - auto GetBaseDesc = [this](const RecordDecl *BD, Record *BR) -> Descriptor * { + auto GetBaseDesc = [this](const RecordDecl *BD, + const Record *BR) -> const Descriptor * { if (!BR) return nullptr; return allocateDescriptor(BD, BR, std::nullopt, /*isConst=*/false, @@ -258,31 +259,39 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) { // Reserve space for base classes. Record::BaseList Bases; Record::VirtualBaseList VirtBases; - if (auto *CD = dyn_cast(RD)) { + if (const auto *CD = dyn_cast(RD)) { + for (const CXXBaseSpecifier &Spec : CD->bases()) { if (Spec.isVirtual()) continue; - const RecordDecl *BD = Spec.getType()->castAs()->getDecl(); - Record *BR = getOrCreateRecord(BD); - if (Descriptor *Desc = GetBaseDesc(BD, BR)) { - BaseSize += align(sizeof(InlineDescriptor)); - Bases.push_back({BD, BaseSize, Desc, BR}); - BaseSize += align(BR->getSize()); - continue; + // In error cases, the base might not be a RecordType. + if (const auto *RT = Spec.getType()->getAs()) { + const RecordDecl *BD = RT->getDecl(); + const Record *BR = getOrCreateRecord(BD); + + if (const Descriptor *Desc = GetBaseDesc(BD, BR)) { + BaseSize += align(sizeof(InlineDescriptor)); + Bases.push_back({BD, BaseSize, Desc, BR}); + BaseSize += align(BR->getSize()); + continue; + } } return nullptr; } for (const CXXBaseSpecifier &Spec : CD->vbases()) { - const RecordDecl *BD = Spec.getType()->castAs()->getDecl(); - Record *BR = getOrCreateRecord(BD); - if (Descriptor *Desc = GetBaseDesc(BD, BR)) { - VirtSize += align(sizeof(InlineDescriptor)); - VirtBases.push_back({BD, VirtSize, Desc, BR}); - VirtSize += align(BR->getSize()); - continue; + if (const auto *RT = Spec.getType()->getAs()) { + const RecordDecl *BD = RT->getDecl(); + const Record *BR = getOrCreateRecord(BD); + + if (const Descriptor *Desc = GetBaseDesc(BD, BR)) { + VirtSize += align(sizeof(InlineDescriptor)); + VirtBases.push_back({BD, VirtSize, Desc, BR}); + VirtSize += align(BR->getSize()); + continue; + } } return nullptr; } @@ -298,7 +307,7 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) { QualType FT = FD->getType(); const bool IsConst = FT.isConstQualified(); const bool IsMutable = FD->isMutable(); - Descriptor *Desc; + const Descriptor *Desc; if (std::optional T = Ctx.classify(FT)) { Desc = createDescriptor(FD, *T, std::nullopt, IsConst, /*isTemporary=*/false, IsMutable); diff --git a/clang/lib/AST/Interp/Record.h b/clang/lib/AST/Interp/Record.h index b0952af2d1ac6c..284bb468d6af47 100644 --- a/clang/lib/AST/Interp/Record.h +++ b/clang/lib/AST/Interp/Record.h @@ -28,7 +28,7 @@ class Record final { struct Field { const FieldDecl *Decl; unsigned Offset; - Descriptor *Desc; + const Descriptor *Desc; bool isBitField() const { return Decl->isBitField(); } }; @@ -36,8 +36,8 @@ class Record final { struct Base { const RecordDecl *Decl; unsigned Offset; - Descriptor *Desc; - Record *R; + const Descriptor *Desc; + const Record *R; }; /// Mapping from identifiers to field descriptors. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 0728113ba7c936..edfab11c37cf0f 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -291,7 +291,7 @@ static bool attributeHasIdentifierArg(const IdentifierInfo &II) { /// Determine whether the given attribute has an identifier argument. static ParsedAttributeArgumentsProperties -attributeStringLiteralListArg(const IdentifierInfo &II) { +attributeStringLiteralListArg(const llvm::Triple &T, const IdentifierInfo &II) { #define CLANG_ATTR_STRING_LITERAL_ARG_LIST return llvm::StringSwitch(normalizeAttrName(II.getName())) #include "clang/Parse/AttrParserStringSwitches.inc" @@ -550,7 +550,7 @@ unsigned Parser::ParseAttributeArgsCommon( ExprVector ParsedExprs; ParsedAttributeArgumentsProperties ArgProperties = - attributeStringLiteralListArg(*AttrName); + attributeStringLiteralListArg(getTargetInfo().getTriple(), *AttrName); if (ParseAttributeArgumentList(*AttrName, ParsedExprs, ArgProperties)) { SkipUntil(tok::r_paren, StopAtSemi); return 0; diff --git a/clang/test/AST/Interp/crash-GH49103-2.cpp b/clang/test/AST/Interp/crash-GH49103-2.cpp new file mode 100644 index 00000000000000..82d78e2aeab0cc --- /dev/null +++ b/clang/test/AST/Interp/crash-GH49103-2.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -verify -std=c++98 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++11 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++14 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++17 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++20 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++23 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -verify -std=c++2c %s -fexperimental-new-constant-interpreter + +// https://github.com/llvm/llvm-project/issues/49103 + +template struct A; // expected-note 0+ {{}} +struct S : __make_integer_seq { }; // expected-error 0+ {{}} +S s; diff --git a/clang/test/AST/Interp/shifts.cpp b/clang/test/AST/Interp/shifts.cpp index cf71e7145c2742..e5201b3f8bbef7 100644 --- a/clang/test/AST/Interp/shifts.cpp +++ b/clang/test/AST/Interp/shifts.cpp @@ -188,3 +188,12 @@ namespace shifts { // ref-cxx17-error {{not an integral constant expression}} \ // ref-cxx17-note {{in call to 'foo(2)'}} }; + +namespace LongInt { + constexpr int f() { + int a = 1; + a <<= (long)0; + return 1; + } + static_assert(f() == 1, ""); +}; diff --git a/clang/test/Sema/attr-function-return.c b/clang/test/Sema/attr-function-return.c index c6fe88b821e35f..d2c9156da7ab61 100644 --- a/clang/test/Sema/attr-function-return.c +++ b/clang/test/Sema/attr-function-return.c @@ -13,7 +13,7 @@ __attribute__((function_return("thunk-extern"))) void w(void) {} // expected-warning@+1 {{'function_return' attribute argument not supported: invalid}} __attribute__((function_return("invalid"))) void v(void) {} -// expected-error@+1 {{'function_return' attribute requires a string}} +// expected-error@+1 {{expected string literal as argument of 'function_return' attribute}} __attribute__((function_return(5))) void a(void) {} // expected-error@+1 {{'function_return' attribute takes one argument}} diff --git a/clang/test/Sema/callingconv-iamcu.c b/clang/test/Sema/callingconv-iamcu.c index 2874a8164545aa..d6b8f8f011d0f3 100644 --- a/clang/test/Sema/callingconv-iamcu.c +++ b/clang/test/Sema/callingconv-iamcu.c @@ -36,7 +36,7 @@ int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs' int __attribute__((pcs())) pcs2(void); // expected-error {{'pcs' attribute takes one argument}} int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute requires a string}} \ // expected-error {{invalid PCS type}} -int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}} +int __attribute__((pcs(0))) pcs4(void); // expected-error {{expected string literal as argument of 'pcs' attribute}} /* These are ignored because the target is i386 and not ARM */ int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}} int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}} diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c index fd009b8973bfc2..f0b8b80a329749 100644 --- a/clang/test/Sema/callingconv.c +++ b/clang/test/Sema/callingconv.c @@ -45,7 +45,7 @@ int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs' int __attribute__((pcs())) pcs2(void); // expected-error {{'pcs' attribute takes one argument}} int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute requires a string}} \ // expected-error {{invalid PCS type}} -int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}} +int __attribute__((pcs(0))) pcs4(void); // expected-error {{expected string literal as argument of 'pcs' attribute}} /* These are ignored because the target is i386 and not ARM */ int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}} int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}} diff --git a/clang/test/Sema/mips-interrupt-attr.c b/clang/test/Sema/mips-interrupt-attr.c index 7f8958341b8f0f..733f899ecf239e 100644 --- a/clang/test/Sema/mips-interrupt-attr.c +++ b/clang/test/Sema/mips-interrupt-attr.c @@ -3,6 +3,7 @@ struct a { int b; }; struct a test __attribute__((interrupt)); // expected-warning {{'interrupt' attribute only applies to functions and methods}} +__attribute((interrupt(42))) void foo0(void) {} // expected-error {{expected string literal as argument of 'interrupt' attribute}} __attribute__((interrupt("EIC"))) void foo1(void) {} // expected-warning {{'interrupt' attribute argument not supported: 'EIC'}} __attribute__((interrupt("eic", 1))) void foo2(void) {} // expected-error {{'interrupt' attribute takes no more than 1 argument}} diff --git a/clang/test/Sema/riscv-interrupt-attr.c b/clang/test/Sema/riscv-interrupt-attr.c index e66a5799bb3945..756bfa0582de7b 100644 --- a/clang/test/Sema/riscv-interrupt-attr.c +++ b/clang/test/Sema/riscv-interrupt-attr.c @@ -25,6 +25,7 @@ struct a { int b; }; struct a test __attribute__((interrupt)); // expected-warning {{'interrupt' attribute only applies to functions}} +__attribute__((interrupt(42))) void foo0(void) {} // expected-error {{expected string literal as argument of 'interrupt' attribute}} __attribute__((interrupt("USER"))) void foo1(void) {} // expected-warning {{'interrupt' attribute argument not supported: USER}} __attribute__((interrupt("user"))) void foo1b(void) {} // expected-warning {{'interrupt' attribute argument not supported: user}} __attribute__((interrupt("MACHINE"))) void foo1c(void) {} // expected-warning {{'interrupt' attribute argument not supported: MACHINE}} diff --git a/clang/test/Sema/zero_call_used_regs.c b/clang/test/Sema/zero_call_used_regs.c index 3313707f97cf9c..6de18fa3d7d0dd 100644 --- a/clang/test/Sema/zero_call_used_regs.c +++ b/clang/test/Sema/zero_call_used_regs.c @@ -4,7 +4,7 @@ void failure1(void) _zero_call_used_regs(); // expected-error {{takes one argument}} void failure2(void) _zero_call_used_regs("used", "used-gpr"); // expected-error {{takes one argument}} -void failure3(void) _zero_call_used_regs(0); // expected-error {{requires a string}} +void failure3(void) _zero_call_used_regs(0); // expected-error {{expected string literal}} void failure4(void) _zero_call_used_regs("hello"); // expected-warning {{argument not supported: hello}} void success1(void) _zero_call_used_regs("skip"); diff --git a/clang/test/SemaCXX/warn-consumed-parsing.cpp b/clang/test/SemaCXX/warn-consumed-parsing.cpp index 722a60bf98632f..63f4135d0d2654 100644 --- a/clang/test/SemaCXX/warn-consumed-parsing.cpp +++ b/clang/test/SemaCXX/warn-consumed-parsing.cpp @@ -35,7 +35,7 @@ void function3() CONSUMABLE(consumed); // expected-warning {{'consumable' attrib class CONSUMABLE(unknown) AttrTester1 { void callableWhen0() CALLABLE_WHEN("unconsumed"); - void callableWhen1() CALLABLE_WHEN(42); // expected-error {{'callable_when' attribute requires a string}} + void callableWhen1() CALLABLE_WHEN(42); // expected-error {{expected string literal as argument of 'callable_when' attribute}} void callableWhen2() CALLABLE_WHEN("foo"); // expected-warning {{'callable_when' attribute argument not supported: foo}} void callableWhen3() CALLABLE_WHEN(unconsumed); void consumes() SET_TYPESTATE(consumed); diff --git a/clang/test/SemaHLSL/shader_type_attr.hlsl b/clang/test/SemaHLSL/shader_type_attr.hlsl index d497b0582d1a72..52d3b1c9d012f1 100644 --- a/clang/test/SemaHLSL/shader_type_attr.hlsl +++ b/clang/test/SemaHLSL/shader_type_attr.hlsl @@ -47,9 +47,9 @@ int forwardDecl() { // expected-error@+1 {{'shader' attribute takes one argument}} [shader()] -// expected-error@+1 {{'shader' attribute takes one argument}} +// expected-error@+1 {{expected string literal as argument of 'shader' attribute}} [shader(1, 2)] -// expected-error@+1 {{'shader' attribute requires a string}} +// expected-error@+1 {{expected string literal as argument of 'shader' attribute}} [shader(1)] // expected-warning@+1 {{'shader' attribute argument not supported: cs}} [shader("cs")] diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 89b88e386f2572..935b9846990ee5 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -171,12 +171,13 @@ static StringRef NormalizeGNUAttrSpelling(StringRef AttrSpelling) { typedef std::vector> ParsedAttrMap; static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records, - ParsedAttrMap *Dupes = nullptr) { + ParsedAttrMap *Dupes = nullptr, + bool SemaOnly = true) { std::vector Attrs = Records.getAllDerivedDefinitions("Attr"); std::set Seen; ParsedAttrMap R; for (const auto *Attr : Attrs) { - if (Attr->getValueAsBit("SemaHandler")) { + if (!SemaOnly || Attr->getValueAsBit("SemaHandler")) { std::string AN; if (Attr->isSubClassOf("TargetSpecificAttr") && !Attr->isValueUnset("ParseKind")) { @@ -2358,19 +2359,21 @@ static bool isVariadicExprArgument(const Record *Arg) { } static bool isStringLiteralArgument(const Record *Arg) { - return !Arg->getSuperClasses().empty() && - llvm::StringSwitch( - Arg->getSuperClasses().back().first->getName()) - .Case("StringArgument", true) - .Default(false); + if (Arg->getSuperClasses().empty()) + return false; + StringRef ArgKind = Arg->getSuperClasses().back().first->getName(); + if (ArgKind == "EnumArgument") + return Arg->getValueAsBit("IsString"); + return ArgKind == "StringArgument"; } static bool isVariadicStringLiteralArgument(const Record *Arg) { - return !Arg->getSuperClasses().empty() && - llvm::StringSwitch( - Arg->getSuperClasses().back().first->getName()) - .Case("VariadicStringArgument", true) - .Default(false); + if (Arg->getSuperClasses().empty()) + return false; + StringRef ArgKind = Arg->getSuperClasses().back().first->getName(); + if (ArgKind == "VariadicEnumArgument") + return Arg->getValueAsBit("IsString"); + return ArgKind == "VariadicStringArgument"; } static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records, @@ -2393,14 +2396,18 @@ static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records, OS << "#endif // CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST\n\n"; } +static bool GenerateTargetSpecificAttrChecks(const Record *R, + std::vector &Arches, + std::string &Test, + std::string *FnName); + // Emits the list of arguments that should be parsed as unevaluated string // literals for each attribute. static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_STRING_LITERAL_ARG_LIST)\n"; - std::vector Attrs = Records.getAllDerivedDefinitions("Attr"); - for (const auto *Attr : Attrs) { - std::vector Args = Attr->getValueAsListOfDefs("Args"); + + auto MakeMask = [](ArrayRef Args) { uint32_t Bits = 0; assert(Args.size() <= 32 && "unsupported number of arguments in attribute"); for (uint32_t N = 0; N < Args.size(); ++N) { @@ -2411,11 +2418,46 @@ static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records, break; } } - if (!Bits) + return Bits; + }; + + auto AddMaskWithTargetCheck = [](const Record *Attr, uint32_t Mask, + std::string &MaskStr) { + const Record *T = Attr->getValueAsDef("Target"); + std::vector Arches = T->getValueAsListOfStrings("Arches"); + std::string Test; + GenerateTargetSpecificAttrChecks(T, Arches, Test, nullptr); + MaskStr.append(Test + " ? " + std::to_string(Mask) + " : "); + }; + + ParsedAttrMap Dupes; + ParsedAttrMap Attrs = getParsedAttrList(Records, &Dupes, /*SemaOnly=*/false); + for (const auto &[AttrName, Attr] : Attrs) { + std::string MaskStr; + if (Attr->isSubClassOf("TargetSpecificAttr") && + !Attr->isValueUnset("ParseKind")) { + if (uint32_t Mask = MakeMask(Attr->getValueAsListOfDefs("Args"))) + AddMaskWithTargetCheck(Attr, Mask, MaskStr); + StringRef ParseKind = Attr->getValueAsString("ParseKind"); + for (const auto &[DupeParseKind, DupAttr] : Dupes) { + if (DupeParseKind != ParseKind) + continue; + if (uint32_t Mask = MakeMask(DupAttr->getValueAsListOfDefs("Args"))) + AddMaskWithTargetCheck(DupAttr, Mask, MaskStr); + } + if (!MaskStr.empty()) + MaskStr.append("0"); + } else { + if (uint32_t Mask = MakeMask(Attr->getValueAsListOfDefs("Args"))) + MaskStr = std::to_string(Mask); + } + + if (MaskStr.empty()) continue; + // All these spellings have at least one string literal has argument. forEachUniqueSpelling(*Attr, [&](const FlattenedSpelling &S) { - OS << ".Case(\"" << S.name() << "\", " << Bits << ")\n"; + OS << ".Case(\"" << S.name() << "\", " << MaskStr << ")\n"; }); } OS << "#endif // CLANG_ATTR_STRING_LITERAL_ARG_LIST\n\n"; @@ -3404,6 +3446,8 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) { OS << " }\n"; } +} // namespace clang + // Helper function for GenerateTargetSpecificAttrChecks that alters the 'Test' // parameter with only a single check type, if applicable. static bool GenerateTargetSpecificAttrCheck(const Record *R, std::string &Test, @@ -3570,6 +3614,8 @@ static void GenerateHasAttrSpellingStringSwitch( OS << " .Default(0);\n"; } +namespace clang { + // Emits list of regular keyword attributes with info about their arguments. void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records, raw_ostream &OS) { diff --git a/libcxx/include/istream b/libcxx/include/istream index 0d05a26091a4b3..7975a9e599a5b6 100644 --- a/libcxx/include/istream +++ b/libcxx/include/istream @@ -1010,17 +1010,18 @@ basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::unget() { template int basic_istream<_CharT, _Traits>::sync() { ios_base::iostate __state = ios_base::goodbit; - int __r = 0; sentry __sen(*this, true); + if (this->rdbuf() == nullptr) + return -1; + + int __r = 0; if (__sen) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { #endif // _LIBCPP_HAS_NO_EXCEPTIONS - if (this->rdbuf() == nullptr) - return -1; if (this->rdbuf()->pubsync() == -1) { __state |= ios_base::badbit; - return -1; + __r = -1; } #ifndef _LIBCPP_HAS_NO_EXCEPTIONS } catch (...) { diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp index ec1195d6b32bd9..4fa58c0abfbaa2 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp @@ -10,6 +10,12 @@ // int sync(); +// The fix for bug 51497 and bug 51499 require and updated dylib due to +// explicit instantiations. That means Apple backdeployment targets remain +// broken. +// TODO(#82107) Enable XFAIL. +// UNSUPPORTED: using-built-library-before-llvm-19 + #include #include @@ -48,6 +54,18 @@ struct testbuf } }; +template +struct testbuf_pubsync_error + : public std::basic_streambuf +{ +public: + + testbuf_pubsync_error() {} +protected: + virtual int sync() { return -1; } +}; + + #ifndef TEST_HAS_NO_EXCEPTIONS struct testbuf_exception { }; @@ -85,21 +103,62 @@ struct throwing_testbuf int main(int, char**) { + { + std::istream is(nullptr); + assert(is.sync() == -1); + } { testbuf sb(" 123456789"); std::istream is(&sb); assert(is.sync() == 0); assert(sync_called == 1); } + { + testbuf_pubsync_error sb; + std::istream is(&sb); + is.exceptions(std::ios_base::failbit | std::ios_base::eofbit); + assert(is.sync() == -1); + assert( is.bad()); + assert(!is.eof()); + assert( is.fail()); + } #ifndef TEST_HAS_NO_WIDE_CHARACTERS + { + std::wistream is(nullptr); + assert(is.sync() == -1); + } { testbuf sb(L" 123456789"); std::wistream is(&sb); assert(is.sync() == 0); assert(sync_called == 2); } + { + testbuf_pubsync_error sb; + std::wistream is(&sb); + is.exceptions(std::ios_base::failbit | std::ios_base::eofbit); + assert(is.sync() == -1); + assert( is.bad()); + assert(!is.eof()); + assert( is.fail()); + } #endif #ifndef TEST_HAS_NO_EXCEPTIONS + { + testbuf_pubsync_error sb; + std::istream is(&sb); + is.exceptions(std::ios_base::badbit); + bool threw = false; + try { + is.sync(); + } catch (std::ios_base::failure const&) { + threw = true; + } + assert( is.bad()); + assert(!is.eof()); + assert( is.fail()); + assert(threw); + } { throwing_testbuf sb(" 123456789"); std::basic_istream is(&sb); @@ -116,6 +175,21 @@ int main(int, char**) assert(threw); } #ifndef TEST_HAS_NO_WIDE_CHARACTERS + { + testbuf_pubsync_error sb; + std::wistream is(&sb); + is.exceptions(std::ios_base::badbit); + bool threw = false; + try { + is.sync(); + } catch (std::ios_base::failure const&) { + threw = true; + } + assert( is.bad()); + assert(!is.eof()); + assert( is.fail()); + assert(threw); + } { throwing_testbuf sb(L" 123456789"); std::basic_istream is(&sb); @@ -131,7 +205,7 @@ int main(int, char**) assert( is.fail()); assert(threw); } -#endif +#endif // TEST_HAS_NO_WIDE_CHARACTERS #endif // TEST_HAS_NO_EXCEPTIONS return 0; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f647c67ef99a22..073052d599dbd6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6306,7 +6306,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op, return expandFMINNUM_FMAXNUM(Op.getNode(), DAG); if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 || - VT == MVT::v16f16) + VT == MVT::v32f16) return splitBinaryVectorOp(Op, DAG); return Op; } @@ -14570,7 +14570,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N->getValueType(0); // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x)) - if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2f16) { + if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16) { SDLoc SL(N); SDValue Src = N->getOperand(0); EVT EltVT = Src.getValueType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 13e1859ad6b250..42e3b4a003511b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1177,9 +1177,6 @@ class VPInstruction : public VPRecipeWithIRFlags { bool isFPMathOp() const; #endif -protected: - void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } - public: VPInstruction(unsigned Opcode, ArrayRef Operands, DebugLoc DL, const Twine &Name = "") diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp index fbcadba33e6768..98ccf216946357 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -461,7 +461,6 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef Values) { assert(CombinedOperands.size() > 0 && "Need more some operands"); auto *Inst = cast(Values[0])->getUnderlyingInstr(); auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc()); - VPI->setUnderlyingInstr(Inst); LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " << *cast(Values[0]) << "\n"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index 6a971b37cad7c5..5ceb85e7d9903b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -470,6 +470,88 @@ struct BubbleUpPackOpThroughGenericOpPattern ControlPropagationFn controlFn; }; +/// Propagate a tensor.pack operation up through a tensor.pad. The idea is to +/// add as many zero padding dimensions in `high` and `low` based on the number +/// of point loops. +class BubbleUpPackThroughPadOp final : public OpRewritePattern { +public: + BubbleUpPackThroughPadOp(MLIRContext *context, ControlPropagationFn fun) + : OpRewritePattern(context), controlFn(std::move(fun)) {} + + LogicalResult matchAndRewrite(tensor::PackOp packOp, + PatternRewriter &rewriter) const override { + auto padOp = packOp.getSource().getDefiningOp(); + if (!padOp) + return failure(); + + // User controlled propagation function. + if (!controlFn(padOp)) + return failure(); + + if (!padOp.getResult().hasOneUse()) + return failure(); + + // TODO: Enable padding when the padding values are the same. + if (packOp.getPaddingValue()) + return failure(); + + // Fail for non-constant padding values. The body of the pad could + // depend on the padding indices and/or properties of the padded + // tensor so for now we fail. + // TODO: Support non-constant padding values. + Value paddingVal = padOp.getConstantPaddingValue(); + if (!paddingVal) + return failure(); + + if (!packOp.getDest().getDefiningOp()) + return failure(); + + ArrayRef innerDimsPos = packOp.getInnerDimsPos(); + ArrayRef outerDimsPerm = packOp.getOuterDimsPerm(); + + // Bail out if one of the padded dimension is a tiled one. + llvm::SmallBitVector paddedDims = padOp.getPaddedDims(); + llvm::SmallBitVector innerDims(paddedDims.size()); + for (int64_t dim : innerDimsPos) + innerDims.flip(dim); + if (paddedDims.anyCommon(innerDims)) + return failure(); + + Location loc = padOp->getLoc(); + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(padOp); + + auto empty = tensor::PackOp::createDestinationTensor( + rewriter, loc, padOp.getSource(), packOp.getMixedTiles(), innerDimsPos, + outerDimsPerm); + Value packedSource = rewriter.create( + loc, padOp.getSource(), empty, innerDimsPos, packOp.getMixedTiles(), + /*padding=*/std::nullopt, outerDimsPerm); + + // If we have `outer_dims_perms` we need to adjust the padded dimensions. + SmallVector lowPad = padOp.getMixedLowPad(); + SmallVector highPad = padOp.getMixedHighPad(); + if (!outerDimsPerm.empty()) { + applyPermutationToVector(lowPad, outerDimsPerm); + applyPermutationToVector(highPad, outerDimsPerm); + } + // The tiled dimensions were verified to be unpadded above, so here we + // just append 0 for the inner tile dimensions. + size_t pointLoopsSize = innerDimsPos.size(); + lowPad.append(pointLoopsSize, rewriter.getIndexAttr(0)); + highPad.append(pointLoopsSize, rewriter.getIndexAttr(0)); + + auto newPadOp = rewriter.create( + loc, /*result=*/Type(), packedSource, lowPad, highPad, paddingVal, + padOp.getNofold()); + rewriter.replaceOp(packOp, newPadOp.getResult()); + return success(); + } + +private: + ControlPropagationFn controlFn; +}; + // TODO: Relax this restriction. We should unpack a generic op also // in the presence of multiple unpack ops as producers. /// Return the unpacked operand, if present, for the current generic op. @@ -690,7 +772,8 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern { void mlir::linalg::populateDataLayoutPropagationPatterns( RewritePatternSet &patterns, const ControlPropagationFn &controlPackUnPackPropagation) { - patterns.insert( - patterns.getContext(), controlPackUnPackPropagation); + patterns + .insert( + patterns.getContext(), controlPackUnPackPropagation); } diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index 4c59c97aecc251..e036695a2ac9fd 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -21,28 +21,28 @@ func.func @dynamic_elem_pack(%arg0: tensor, %dest: tensor) into %dest : tensor -> tensor return %4 : tensor } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @dynamic_elem_pack -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] -// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]] -// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]] -// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 2] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ELEM:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: return %[[ELEM]] : tensor +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @dynamic_elem_pack +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]] +// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]] +// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 2] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ELEM:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP2]], #[[$MAP2]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: return %[[ELEM]] : tensor // ----- @@ -62,20 +62,20 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32> return %pack : tensor<4x16x16x32xi32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @elem_pack_transpose_inner_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ELEM:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: return %[[ELEM]] : tensor<4x16x16x32xi32> +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @elem_pack_transpose_inner_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ELEM:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: return %[[ELEM]] : tensor<4x16x16x32xi32> // ----- @@ -96,20 +96,20 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten into %dest : tensor<128x256xi32> -> tensor<16x4x32x16xi32> return %pack : tensor<16x4x32x16xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @elem_pack_transpose_outer_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] -// CHECK-SAME: into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32> -// CHECK: %[[ELEM:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: return %[[ELEM]] : tensor<16x4x32x16xi32> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @elem_pack_transpose_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] +// CHECK-SAME: into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32> +// CHECK: %[[ELEM:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: return %[[ELEM]] : tensor<16x4x32x16xi32> // ----- @@ -130,20 +130,20 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>, into %dest : tensor<128x256xi32> -> tensor<16x4x16x32xi32> return %pack : tensor<16x4x16x32xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @elem_pack_transpose_inner_and_outer_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ELEM:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: return %[[ELEM]] : tensor<16x4x16x32xi32> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @elem_pack_transpose_inner_and_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32> +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ELEM:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP0]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: return %[[ELEM]] : tensor<16x4x16x32xi32> // ----- @@ -169,34 +169,34 @@ func.func @dynamic_broadcast_pack(%arg0: tensor, %arg1: tensor, %d into %dest : tensor -> tensor return %4 : tensor } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @dynamic_broadcast_pack -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] -// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]] -// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]] -// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor -// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2] -// CHECK-SAME: into %[[ARG1_EMPTY]] -// CHECK: %[[ELEM:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]], #[[MAP4]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]], %[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: return %[[ELEM]] : tensor +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)> +// CHECK-DAG: #[[$MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @dynamic_broadcast_pack +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]] +// CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]] +// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor +// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2] +// CHECK-SAME: into %[[ARG1_EMPTY]] +// CHECK: %[[ELEM:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP4]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]], %[[PACK_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: return %[[ELEM]] : tensor // ----- @@ -215,19 +215,19 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des %2 = tensor.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32> return %2 : tensor<1x2x56x57x32xf32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @elem_pack_transpose_inner_and_outer_dims2 -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @elem_pack_transpose_inner_and_outer_dims2 +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]] +// CHECK-SAME: outs(%[[DEST]] // ----- @@ -253,27 +253,27 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32> return %4 : tensor<100x200x4x16x16x32xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)> -// CHECK: func.func @transpose_pack -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] -// CHECK-SAME: into %[[ARG2_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)> +// CHECK-LABEL: func.func @transpose_pack +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> +// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] +// CHECK-SAME: into %[[ARG2_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] +// CHECK-SAME: outs(%[[DEST]] // ----- @@ -299,27 +299,27 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32> return %4 : tensor<100x200x4x16x16x32xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, 0, 0, 0)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (0, d1, 0, 0, d5)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)> -// CHECK: func.func @affine_constant_expr_pack -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] -// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [32] -// CHECK-SAME: into %[[ARG2_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, 0, 0, 0)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (0, d1, 0, 0, d5)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)> +// CHECK-LABEL: func.func @affine_constant_expr_pack +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32> +// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [32] +// CHECK-SAME: into %[[ARG2_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] +// CHECK-SAME: outs(%[[DEST]] // ----- @@ -347,26 +347,26 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a return %4 : tensor<200x4x16x100x16x32xi32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)> -// CHECK: func.func @transpose_pack_with_outer_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] -// CHECK-SAME: into %[[ARG2_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)> +// CHECK-LABEL: func.func @transpose_pack_with_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> +// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] +// CHECK-SAME: into %[[ARG2_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]], #[[$MAP]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] +// CHECK-SAME: outs(%[[DEST]] // ----- @@ -388,22 +388,22 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten return %pack : tensor<16x4x32x16xi32> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: func.func @elem_pack_transpose_outer_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]] -// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] -// CHECK-SAME: into %[[ARG1_EMPTY]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[PACKED_ARG1]] +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @elem_pack_transpose_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> +// CHECK: %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] +// CHECK-SAME: into %[[ARG1_EMPTY]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]] +// CHECK-SAME: outs(%[[PACKED_ARG1]] // ----- @@ -420,23 +420,23 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56 return %2 : tensor<12x56x56x64xf32> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @unpack_on_output -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] -// CHECK: %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_EMPTY_PACK]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]]] -// CHECK-SAME: outs(%[[PACKED_ARG0]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @unpack_on_output +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32> +// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] +// CHECK: %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_EMPTY_PACK]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]]] +// CHECK-SAME: outs(%[[PACKED_ARG0]] +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] // ----- @@ -453,29 +453,29 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56 return %2 : tensor<12x56x56x64xf32> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @unpack_on_input -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] -// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] -// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] -// CHECK-SAME: ins(%[[ARG0_PACK]] -// CHECK-SAME: outs(%[[ARG1_PACK]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @unpack_on_input +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> +// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] +// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] +// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] +// CHECK-SAME: ins(%[[ARG0_PACK]] +// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] // ----- @@ -492,30 +492,30 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t return %2 : tensor<12x56x56x64xf16> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @unpack_element_type_change -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] -// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16> -// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] -// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] -// CHECK-SAME: ins(%[[ARG0_PACK]] -// CHECK-SAME: outs(%[[ARG1_PACK]] -// CHECK: %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_NEW_EMPTY_UNPACK]] +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @unpack_element_type_change +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> +// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] +// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16> +// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] +// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] +// CHECK-SAME: ins(%[[ARG0_PACK]] +// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK: %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_NEW_EMPTY_UNPACK]] // ----- @@ -533,29 +533,29 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5 return %2 : tensor<12x56x56x64xf32> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @forward_tensor_empty -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] -// CHECK: %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[DEST]] -// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @forward_tensor_empty +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> +// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] +// CHECK: %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_PACK_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]] +// CHECK-SAME: outs(%[[DEST]] +// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] // ----- -func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> { +func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<1x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> @@ -566,18 +566,18 @@ func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58 return %padded : tensor<1x58x58x64xf32> } -// CHECK: func.func @pad_valid_propagation( -// CHECK-SAME: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) -// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32> +// CHECK-LABEL: func.func @pad_valid_unpack_propagation( +// CHECK-SAME: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32> // ----- -func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> { +func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<1x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> @@ -588,14 +588,14 @@ func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58 return %padded : tensor<2x58x58x64xf32> } -// CHECK: func.func @pad_valid_propagation( -// CHECK-SAME: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) -// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32> +// CHECK-LABEL: func.func @pad_valid_unpack_propagation( +// CHECK-SAME: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32> // ----- @@ -610,14 +610,80 @@ func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x5 return %padded : tensor<1x58x58x66xf32> } -// CHECK: func.func @pad_along_unpacked_dim( -// CHECK: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) -// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> -// CHECK: %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1] +// CHECK-LABEL: func.func @pad_along_unpacked_dim( +// CHECK: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> +// CHECK: %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1] + +// ----- + +func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1x2x58x58x32xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %arg0 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): + tensor.yield %cst : f32 + } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> + %0 = tensor.empty() : tensor<1x2x58x58x32xf32> + %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> + return %1 : tensor<1x2x58x58x32xf32> +} + +// CHECK-LABEL: func.func @pad_valid_pack_propagation( +// CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32> +// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32> +// CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] +// CHECK: return %[[PADDED]] + +// ----- + +func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1x58x58x2x32xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %arg0 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): + tensor.yield %cst : f32 + } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> + %0 = tensor.empty() : tensor<1x58x58x2x32xf32> + %1 = tensor.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32> + return %1 : tensor<1x58x58x2x32xf32> +} + +// CHECK-LABEL: func.func @pad_valid_outer_dims_pack_propagation( +// CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x2x32xf32> +// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x56x56x2x32xf32> +// CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 1, 1, 0, 0] high[0, 1, 1, 0, 0] +// CHECK: return %[[PADDED]] + +// ----- + +func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x58x32xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %arg0 low[0, 2, 1, 1] high[0, 2, 1, 1] { + ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): + tensor.yield %cst : f32 + } : tensor<1x60x56x56xf32> to tensor<1x64x58x58xf32> + %0 = tensor.empty() : tensor<1x2x58x58x32xf32> + %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> + return %1 : tensor<1x2x58x58x32xf32> +} + +// CHECK-LABEL: func.func @pad_along_packed_dim( +// CHECK-SAME: %[[ARG0:.+]]: tensor<1x60x56x56xf32>) +// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 2, 1, 1] high[0, 2, 1, 1] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x58x58x32xf32> +// CHECK: tensor.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK-SAME: into %[[EMPTY]] : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> // ----- @@ -639,16 +705,16 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3 return %pack : tensor<4x16x16x32xi32> } -// CHECK: func.func @would_break_dominance( -// CHECK-SAME: %[[ARG0:.+]]: tensor<128x256xi32>) -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32> -// CHECK-NEXT: %[[GEN:.+]] = linalg.generic -// CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[EMPTY]] -// CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32> -// CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]] -// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ALLOC]] +// CHECK-LABEL: func.func @would_break_dominance( +// CHECK-SAME: %[[ARG0:.+]]: tensor<128x256xi32>) +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32> +// CHECK-NEXT: %[[GEN:.+]] = linalg.generic +// CHECK-SAME: ins(%[[ARG0]] +// CHECK-SAME: outs(%[[EMPTY]] +// CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32> +// CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]] +// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ALLOC]] // ----- @@ -666,16 +732,16 @@ func.func @scalar_tensor(%arg0 : tensor) -> tensor<1x32x7x7x32xf32> { return %pack : tensor<1x32x7x7x32xf32> } -// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> ()> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK: func.func @scalar_tensor -// CHECK-SAME: %[[ARG0:.+]]: tensor) -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x32x7x7x32xf32> -// CHECK: linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> ()> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-LABEL: func.func @scalar_tensor +// CHECK-SAME: %[[ARG0:.+]]: tensor) +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x32x7x7x32xf32> +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] +// CHECK-SAME: ins(%[[ARG0]] +// CHECK-SAME: outs(%[[EMPTY]] // ----- @@ -692,15 +758,15 @@ func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x return %2 : tensor<12x56x56x64xf32> } -// CHECK: func.func @unpack_empty_inner_dims -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] +// CHECK-LABEL: func.func @unpack_empty_inner_dims +// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: ins(%[[PACKED_ARG0]] +// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] +// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] // ----- @@ -722,25 +788,25 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>, into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32> return %pack : tensor<4x16x16x32xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)> -// CHECK: func.func @reduction_pack_transpose_inner_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> -// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] -// CHECK-SME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG1_EMPTY]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[RED:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[PACK_ARG1]] -// CHECK: return %[[RED]] : tensor<4x16x16x32xi32> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)> +// CHECK-LABEL: func.func @reduction_pack_transpose_inner_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> +// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK-SME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG1_EMPTY]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32> +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[RED:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]] +// CHECK-SAME: outs(%[[PACK_ARG1]] +// CHECK: return %[[RED]] : tensor<4x16x16x32xi32> // ----- @@ -770,31 +836,31 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a return %4 : tensor<4x16x100x16x32xi32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d5)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)> -// CHECK: func.func @reduction_pack_with_outer_dims -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] -// CHECK: %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32> -// CHECK: %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]] -// CHECK-SAME: outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG3_EMPTY]] -// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] -// CHECK-SAME: outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] -// CHECK-SAME: into %[[ARG0_EMPTY]] -// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] -// CHECK-SAME: into %[[ARG2_EMPTY]] -// CHECK: %[[RES:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] -// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[PACKED_ARG3]] +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d5)> +// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)> +// CHECK-LABEL: func.func @reduction_pack_with_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] +// CHECK: %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32> +// CHECK: %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]] +// CHECK-SAME: outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG3_EMPTY]] +// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32> +// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK-SAME: outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] +// CHECK-SAME: into %[[ARG0_EMPTY]] +// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> +// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] +// CHECK-SAME: into %[[ARG2_EMPTY]] +// CHECK: %[[RES:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]] +// CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] +// CHECK-SAME: outs(%[[PACKED_ARG3]] // ----- @@ -818,24 +884,24 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 } -> tensor<16x540x960xi32> return %pool : tensor<16x540x960xi32> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2 * 2 + d4, d3 * 2 + d5, d6)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1, d2, d3, d6)> -// CHECK: func.func @unpack_different_destination_shape -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32> -// CHECK: %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack -// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [16] -// CHECK-SAME: into %[[PACK_EMPTY]] -// CHECK: %[[POOL:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] -// CHECK-SAME: ins(%[[PACK_ARG0]], %[[ARG1]] -// CHECK-SAME: outs(%[[INIT]] -// CHECK: %[[UNPACK_NEW_DEST:.+]] = tensor.empty() : tensor<16x540x960xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[POOL]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] -// CHECK-SAME: into %[[UNPACK_NEW_DEST]] -// CHECK: return %[[UNPACK]] : tensor<16x540x960xi32> +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2 * 2 + d4, d3 * 2 + d5, d6)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5)> +// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1, d2, d3, d6)> +// CHECK-LABEL: func.func @unpack_different_destination_shape +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32> +// CHECK: %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32> +// CHECK: %[[PACK_ARG0:.+]] = tensor.pack +// CHECK-SAME: inner_dims_pos = [1] inner_tiles = [16] +// CHECK-SAME: into %[[PACK_EMPTY]] +// CHECK: %[[POOL:.+]] = linalg.generic +// CHECK-SAME: indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] +// CHECK-SAME: ins(%[[PACK_ARG0]], %[[ARG1]] +// CHECK-SAME: outs(%[[INIT]] +// CHECK: %[[UNPACK_NEW_DEST:.+]] = tensor.empty() : tensor<16x540x960xi32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[POOL]] +// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] +// CHECK-SAME: into %[[UNPACK_NEW_DEST]] +// CHECK: return %[[UNPACK]] : tensor<16x540x960xi32>