diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 7e643b89971c17..fa191c7378dba4 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -284,12 +284,15 @@ class DefaultIntArgument<string name, int default> : IntArgument<name, 1> {
 
 // This argument is more complex, it includes the enumerator type
 // name, whether the enum type is externally defined, a list of
-// strings to accept, and a list of enumerators to map them to.
-class EnumArgument<string name, string type, list<string> values,
+// possible values, and a list of enumerators to map them to.
+class EnumArgument<string name, string type, bit is_string, list<string> values,
                    list<string> enums, bit opt = 0, bit fake = 0,
                    bit isExternalType = 0>
     : Argument<name, opt, fake> {
   string Type = type;
+  // When true, the argument will be parsed as an unevaluated string literal
+  // and otherwise as an identifier.
+  bit IsString = is_string;
   list<string> Values = values;
   list<string> Enums = enums;
   bit IsExternalType = isExternalType;
@@ -297,10 +300,14 @@ class EnumArgument<string name, string type, list<string> values,
 
 // FIXME: There should be a VariadicArgument type that takes any other type
 //        of argument and generates the appropriate type.
-class VariadicEnumArgument<string name, string type, list<string> values,
-                           list<string> enums, bit isExternalType = 0>
+class VariadicEnumArgument<string name, string type, bit is_string,
+                           list<string> values, list<string> enums,
+                           bit isExternalType = 0>
     : Argument<name, 1>  {
   string Type = type;
+  // When true, the argument will be parsed as an unevaluated string literal
+  // and otherwise as an identifier.
+  bit IsString = is_string;
   list<string> Values = values;
   list<string> Enums = enums;
   bit IsExternalType = isExternalType;
@@ -907,7 +914,7 @@ def ARMInterrupt : InheritableAttr, TargetSpecificAttr<TargetARM> {
   // MSP430Interrupt's, MipsInterrupt's and AnyX86Interrupt's spellings
   // must match.
   let Spellings = [GCC<"interrupt">];
-  let Args = [EnumArgument<"Interrupt", "InterruptType",
+  let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true,
                            ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", ""],
                            ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", "Generic"],
                            1>];
@@ -1032,7 +1039,8 @@ def ExternalSourceSymbol : InheritableAttr {
 
 def Blocks : InheritableAttr {
   let Spellings = [Clang<"blocks">];
-  let Args = [EnumArgument<"Type", "BlockType", ["byref"], ["ByRef"]>];
+  let Args = [EnumArgument<"Type", "BlockType", /*is_string=*/true,
+                           ["byref"], ["ByRef"]>];
   let Documentation = [Undocumented];
 }
 
@@ -1614,7 +1622,7 @@ def FlagEnum : InheritableAttr {
 def EnumExtensibility : InheritableAttr {
   let Spellings = [Clang<"enum_extensibility">];
   let Subjects = SubjectList<[Enum]>;
-  let Args = [EnumArgument<"Extensibility", "Kind",
+  let Args = [EnumArgument<"Extensibility", "Kind", /*is_string=*/false,
               ["closed", "open"], ["Closed", "Open"]>];
   let Documentation = [EnumExtensibilityDocs];
 }
@@ -1780,7 +1788,7 @@ def MipsInterrupt : InheritableAttr, TargetSpecificAttr<TargetMips32> {
   // must match.
   let Spellings = [GCC<"interrupt">];
   let Subjects = SubjectList<[Function]>;
-  let Args = [EnumArgument<"Interrupt", "InterruptType",
+  let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true,
                            ["vector=sw0", "vector=sw1", "vector=hw0",
                             "vector=hw1", "vector=hw2", "vector=hw3",
                             "vector=hw4", "vector=hw5", "eic", ""],
@@ -1968,7 +1976,7 @@ def NoMicroMips : InheritableAttr, TargetSpecificAttr<TargetMips32> {
 def RISCVInterrupt : InheritableAttr, TargetSpecificAttr<TargetRISCV> {
   let Spellings = [GCC<"interrupt">];
   let Subjects = SubjectList<[Function]>;
-  let Args = [EnumArgument<"Interrupt", "InterruptType",
+  let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true,
                            ["supervisor", "machine"],
                            ["supervisor", "machine"],
                            1>];
@@ -2339,7 +2347,7 @@ def ObjCException : InheritableAttr {
 def ObjCMethodFamily : InheritableAttr {
   let Spellings = [Clang<"objc_method_family">];
   let Subjects = SubjectList<[ObjCMethod], ErrorDiag>;
-  let Args = [EnumArgument<"Family", "FamilyKind",
+  let Args = [EnumArgument<"Family", "FamilyKind", /*is_string=*/false,
                ["none", "alloc", "copy", "init", "mutableCopy", "new"],
                ["OMF_None", "OMF_alloc", "OMF_copy", "OMF_init",
                 "OMF_mutableCopy", "OMF_new"]>];
@@ -2513,7 +2521,7 @@ def IntelOclBicc : DeclOrTypeAttr {
 
 def Pcs : DeclOrTypeAttr {
   let Spellings = [GCC<"pcs">];
-  let Args = [EnumArgument<"PCS", "PCSType",
+  let Args = [EnumArgument<"PCS", "PCSType", /*is_string=*/true,
                            ["aapcs", "aapcs-vfp"],
                            ["AAPCS", "AAPCS_VFP"]>];
 //  let Subjects = [Function, ObjCMethod];
@@ -2646,7 +2654,7 @@ def SwiftObjCMembers : Attr {
 def SwiftError : InheritableAttr {
   let Spellings = [GNU<"swift_error">];
   let Args = [
-      EnumArgument<"Convention", "ConventionKind",
+      EnumArgument<"Convention", "ConventionKind", /*is_string=*/false,
                    ["none", "nonnull_error", "null_result", "zero_result", "nonzero_result"],
                    ["None", "NonNullError", "NullResult", "ZeroResult", "NonZeroResult"]>
   ];
@@ -2678,7 +2686,7 @@ def SwiftName : InheritableAttr {
 
 def SwiftNewType : InheritableAttr {
   let Spellings = [GNU<"swift_newtype">, GNU<"swift_wrapper">];
-  let Args = [EnumArgument<"NewtypeKind", "NewtypeKind",
+  let Args = [EnumArgument<"NewtypeKind", "NewtypeKind", /*is_string=*/false,
                            ["struct", "enum"], ["NK_Struct", "NK_Enum"]>];
   let Subjects = SubjectList<[TypedefName], ErrorDiag>;
   let Documentation = [SwiftNewTypeDocs];
@@ -2814,7 +2822,7 @@ def PragmaClangTextSection : InheritableAttr {
 
 def CodeModel : InheritableAttr, TargetSpecificAttr<TargetLoongArch> {
   let Spellings = [GCC<"model">];
-  let Args = [EnumArgument<"Model", "llvm::CodeModel::Model",
+  let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", /*is_string=*/1,
               ["normal", "medium", "extreme"], ["Small", "Medium", "Large"],
               /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>];
   let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>;
@@ -2870,7 +2878,7 @@ def SwiftIndirectResult : ParameterABIAttr {
 def SwiftAsync : InheritableAttr {
   let Spellings = [Clang<"swift_async">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
-  let Args = [EnumArgument<"Kind", "Kind",
+  let Args = [EnumArgument<"Kind", "Kind", /*is_string=*/false,
                 ["none", "swift_private", "not_swift_private"],
                 ["None", "SwiftPrivate", "NotSwiftPrivate"]>,
               ParamIdxArgument<"CompletionHandlerIndex", /*opt=*/1>];
@@ -2880,7 +2888,7 @@ def SwiftAsync : InheritableAttr {
 def SwiftAsyncError : InheritableAttr {
   let Spellings = [Clang<"swift_async_error">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
-  let Args = [EnumArgument<"Convention", "ConventionKind",
+  let Args = [EnumArgument<"Convention", "ConventionKind", /*is_string=*/false,
               ["none", "nonnull_error", "zero_argument", "nonzero_argument"],
               ["None", "NonNullError", "ZeroArgument", "NonZeroArgument"]>,
               UnsignedArgument<"HandlerParamIdx", /*opt=*/1>];
@@ -2925,7 +2933,7 @@ def ZeroCallUsedRegs : InheritableAttr {
   let Spellings = [GCC<"zero_call_used_regs">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Args = [
-    EnumArgument<"ZeroCallUsedRegs", "ZeroCallUsedRegsKind",
+    EnumArgument<"ZeroCallUsedRegs", "ZeroCallUsedRegsKind", /*is_string=*/true,
                  ["skip", "used-gpr-arg", "used-gpr", "used-arg", "used",
                   "all-gpr-arg", "all-gpr", "all-arg", "all"],
                  ["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used",
@@ -3097,7 +3105,7 @@ def TransparentUnion : InheritableAttr {
 def Unavailable : InheritableAttr {
   let Spellings = [Clang<"unavailable">];
   let Args = [StringArgument<"Message", 1>,
-              EnumArgument<"ImplicitReason", "ImplicitReason",
+              EnumArgument<"ImplicitReason", "ImplicitReason", /*is_string=*/0, // FIXME
                 ["", "", "", ""],
                 ["IR_None",
                  "IR_ARCForbiddenType",
@@ -3117,8 +3125,8 @@ def DiagnoseIf : InheritableAttr {
   let Spellings = [GNU<"diagnose_if">];
   let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>;
   let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
-              EnumArgument<"DiagnosticType",
-                           "DiagnosticType",
+              EnumArgument<"DiagnosticType", "DiagnosticType",
+                           /*is_string=*/true,
                            ["error", "warning"],
                            ["DT_Error", "DT_Warning"]>,
               BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
@@ -3220,7 +3228,7 @@ def MatrixType : TypeAttr {
 def Visibility : InheritableAttr {
   let Clone = 0;
   let Spellings = [GCC<"visibility">];
-  let Args = [EnumArgument<"Visibility", "VisibilityType",
+  let Args = [EnumArgument<"Visibility", "VisibilityType", /*is_string=*/true,
                            ["default", "hidden", "internal", "protected"],
                            ["Default", "Hidden", "Hidden", "Protected"]>];
   let MeaningfulToClassTemplateDefinition = 1;
@@ -3230,7 +3238,7 @@ def Visibility : InheritableAttr {
 def TypeVisibility : InheritableAttr {
   let Clone = 0;
   let Spellings = [Clang<"type_visibility">];
-  let Args = [EnumArgument<"Visibility", "VisibilityType",
+  let Args = [EnumArgument<"Visibility", "VisibilityType", /*is_string=*/true,
                            ["default", "hidden", "internal", "protected"],
                            ["Default", "Hidden", "Hidden", "Protected"]>];
   // let Subjects = SubjectList<[Tag, ObjCInterface, Namespace], ErrorDiag>;
@@ -3628,7 +3636,7 @@ def Consumable : InheritableAttr {
   // FIXME: should this attribute have a CPlusPlus language option?
   let Spellings = [Clang<"consumable", 0>];
   let Subjects = SubjectList<[CXXRecord]>;
-  let Args = [EnumArgument<"DefaultState", "ConsumedState",
+  let Args = [EnumArgument<"DefaultState", "ConsumedState", /*is_string=*/false,
                            ["unknown", "consumed", "unconsumed"],
                            ["Unknown", "Consumed", "Unconsumed"]>];
   let Documentation = [ConsumableDocs];
@@ -3661,6 +3669,7 @@ def CallableWhen : InheritableAttr {
   let Spellings = [Clang<"callable_when", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
   let Args = [VariadicEnumArgument<"CallableStates", "ConsumedState",
+                                   /*is_string=*/true,
                                    ["unknown", "consumed", "unconsumed"],
                                    ["Unknown", "Consumed", "Unconsumed"]>];
   let Documentation = [CallableWhenDocs];
@@ -3672,7 +3681,7 @@ def ParamTypestate : InheritableAttr {
   // FIXME: should this attribute have a CPlusPlus language option?
   let Spellings = [Clang<"param_typestate", 0>];
   let Subjects = SubjectList<[ParmVar]>;
-  let Args = [EnumArgument<"ParamState", "ConsumedState",
+  let Args = [EnumArgument<"ParamState", "ConsumedState", /*is_string=*/false,
                            ["unknown", "consumed", "unconsumed"],
                            ["Unknown", "Consumed", "Unconsumed"]>];
   let Documentation = [ParamTypestateDocs];
@@ -3684,7 +3693,7 @@ def ReturnTypestate : InheritableAttr {
   // FIXME: should this attribute have a CPlusPlus language option?
   let Spellings = [Clang<"return_typestate", 0>];
   let Subjects = SubjectList<[Function, ParmVar]>;
-  let Args = [EnumArgument<"State", "ConsumedState",
+  let Args = [EnumArgument<"State", "ConsumedState", /*is_string=*/false,
                            ["unknown", "consumed", "unconsumed"],
                            ["Unknown", "Consumed", "Unconsumed"]>];
   let Documentation = [ReturnTypestateDocs];
@@ -3696,7 +3705,7 @@ def SetTypestate : InheritableAttr {
   // FIXME: should this attribute have a CPlusPlus language option?
   let Spellings = [Clang<"set_typestate", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
-  let Args = [EnumArgument<"NewState", "ConsumedState",
+  let Args = [EnumArgument<"NewState", "ConsumedState", /*is_string=*/false,
                            ["unknown", "consumed", "unconsumed"],
                            ["Unknown", "Consumed", "Unconsumed"]>];
   let Documentation = [SetTypestateDocs];
@@ -3708,7 +3717,7 @@ def TestTypestate : InheritableAttr {
   // FIXME: should this attribute have a CPlusPlus language option?
   let Spellings = [Clang<"test_typestate", 0>];
   let Subjects = SubjectList<[CXXMethod]>;
-  let Args = [EnumArgument<"TestState", "ConsumedState",
+  let Args = [EnumArgument<"TestState", "ConsumedState", /*is_string=*/false,
                            ["consumed", "unconsumed"],
                            ["Consumed", "Unconsumed"]>];
   let Documentation = [TestTypestateDocs];
@@ -3785,7 +3794,8 @@ def CFGuard : InheritableAttr, TargetSpecificAttr<TargetWindows> {
   // we might also want to support __declspec(guard(suppress)).
   let Spellings = [Declspec<"guard">, Clang<"guard">];
   let Subjects = SubjectList<[Function]>;
-  let Args = [EnumArgument<"Guard", "GuardArg", ["nocf"], ["nocf"]>];
+  let Args = [EnumArgument<"Guard", "GuardArg", /*is_string=*/false,
+                           ["nocf"], ["nocf"]>];
   let Documentation = [CFGuardDocs];
 }
 
@@ -3941,7 +3951,7 @@ def LoopHint : Attr {
                    Pragma<"", "nounroll_and_jam">];
 
   /// State of the loop optimization specified by the spelling.
-  let Args = [EnumArgument<"Option", "OptionType",
+  let Args = [EnumArgument<"Option", "OptionType", /*is_string=*/false,
                           ["vectorize", "vectorize_width", "interleave", "interleave_count",
                            "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count",
                            "pipeline", "pipeline_initiation_interval", "distribute",
@@ -3950,7 +3960,7 @@ def LoopHint : Attr {
                            "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount",
                            "PipelineDisabled", "PipelineInitiationInterval", "Distribute",
                            "VectorizePredicate"]>,
-              EnumArgument<"State", "LoopHintState",
+              EnumArgument<"State", "LoopHintState", /*is_string=*/false,
                            ["enable", "disable", "numeric", "fixed_width",
                             "scalable_width", "assume_safety", "full"],
                            ["Enable", "Disable", "Numeric", "FixedWidth",
@@ -4039,7 +4049,7 @@ def OMPDeclareSimdDecl : Attr {
   let HasCustomParsing = 1;
   let Documentation = [OMPDeclareSimdDocs];
   let Args = [
-    EnumArgument<"BranchState", "BranchStateTy",
+    EnumArgument<"BranchState", "BranchStateTy", /*is_string=*/false,
                  [ "", "inbranch", "notinbranch" ],
                  [ "BS_Undefined", "BS_Inbranch", "BS_Notinbranch" ]>,
     ExprArgument<"Simdlen">, VariadicExprArgument<"Uniforms">,
@@ -4059,10 +4069,10 @@ def OMPDeclareTargetDecl : InheritableAttr {
   let Subjects = SubjectList<[Function, SharedVar]>;
   let Documentation = [OMPDeclareTargetDocs];
   let Args = [
-    EnumArgument<"MapType", "MapTypeTy",
+    EnumArgument<"MapType", "MapTypeTy", /*is_string=*/false,
                  [ "to", "enter", "link" ],
                  [ "MT_To", "MT_Enter", "MT_Link" ]>,
-    EnumArgument<"DevType", "DevTypeTy",
+    EnumArgument<"DevType", "DevTypeTy", /*is_string=*/false,
                  [ "host", "nohost", "any" ],
                  [ "DT_Host", "DT_NoHost", "DT_Any" ]>,
     ExprArgument<"IndirectExpr">,
@@ -4084,7 +4094,7 @@ def OMPAllocateDecl : InheritableAttr {
   let Spellings = [];
   let SemaHandler = 0;
   let Args = [
-    EnumArgument<"AllocatorType", "AllocatorTypeTy",
+    EnumArgument<"AllocatorType", "AllocatorTypeTy", /*is_string=*/false,
                  [
                    "omp_null_allocator", "omp_default_mem_alloc",
                    "omp_large_cap_mem_alloc", "omp_const_mem_alloc",
@@ -4333,7 +4343,7 @@ def HLSLShader : InheritableAttr {
   let Subjects = SubjectList<[HLSLEntry]>;
   let LangOpts = [HLSL];
   let Args = [
-    EnumArgument<"Type", "ShaderType",
+    EnumArgument<"Type", "ShaderType", /*is_string=*/true,
                  ["pixel", "vertex", "geometry", "hull", "domain", "compute",
                   "raygeneration", "intersection", "anyhit", "closesthit",
                   "miss", "callable", "mesh", "amplification"],
@@ -4349,10 +4359,12 @@ def HLSLResource : InheritableAttr {
   let Subjects = SubjectList<[Struct]>;
   let LangOpts = [HLSL];
   let Args = [EnumArgument<"ResourceClass", "llvm::hlsl::ResourceClass",
+                           /*is_string=*/0,
                            ["SRV", "UAV", "CBuffer", "Sampler"],
                            ["SRV", "UAV", "CBuffer", "Sampler"],
                            /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>,
               EnumArgument<"ResourceKind", "llvm::hlsl::ResourceKind",
+                           /*is_string=*/0,
                            ["Texture1D", "Texture2D", "Texture2DMS",
                             "Texture3D", "TextureCube", "Texture1DArray",
                             "Texture2DArray", "Texture2DMSArray",
@@ -4409,7 +4421,7 @@ def : MutualExclusions<[RandomizeLayout, NoRandomizeLayout]>;
 def FunctionReturnThunks : InheritableAttr,
     TargetSpecificAttr<TargetAnyX86> {
   let Spellings = [GCC<"function_return">];
-  let Args = [EnumArgument<"ThunkType", "Kind",
+  let Args = [EnumArgument<"ThunkType", "Kind", /*is_string=*/true,
     ["keep", "thunk-extern"],
     ["Keep", "Extern"]
   >];
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 72789d9d348144..6ad75d4e034a9d 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1468,7 +1468,7 @@ bool ByteCodeExprGen<Emitter>::VisitCompoundAssignOperator(
   std::optional<PrimType> LHSComputationT =
       classify(E->getComputationLHSType());
   std::optional<PrimType> LT = classify(LHS->getType());
-  std::optional<PrimType> RT = classify(E->getComputationResultType());
+  std::optional<PrimType> RT = classify(RHS->getType());
   std::optional<PrimType> ResultT = classify(E->getType());
 
   if (!LT || !RT || !ResultT || !LHSComputationT)
diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp
index f75a9fc4b46650..ce7ed9cec3db3f 100644
--- a/clang/lib/AST/Interp/Descriptor.cpp
+++ b/clang/lib/AST/Interp/Descriptor.cpp
@@ -139,7 +139,7 @@ static void moveArrayDesc(Block *B, const std::byte *Src, std::byte *Dst,
 static void ctorRecord(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable,
                        bool IsActive, const Descriptor *D) {
   const bool IsUnion = D->ElemRecord->isUnion();
-  auto CtorSub = [=](unsigned SubOff, Descriptor *F, bool IsBase) {
+  auto CtorSub = [=](unsigned SubOff, const Descriptor *F, bool IsBase) {
     auto *Desc = reinterpret_cast<InlineDescriptor *>(Ptr + SubOff) - 1;
     Desc->Offset = SubOff;
     Desc->Desc = F;
@@ -161,7 +161,7 @@ static void ctorRecord(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable,
 }
 
 static void dtorRecord(Block *B, std::byte *Ptr, const Descriptor *D) {
-  auto DtorSub = [=](unsigned SubOff, Descriptor *F) {
+  auto DtorSub = [=](unsigned SubOff, const Descriptor *F) {
     if (auto Fn = F->DtorFn)
       Fn(B, Ptr + SubOff, F);
   };
diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp
index 964c0377c6dc1f..ec6cdebcd820fa 100644
--- a/clang/lib/AST/Interp/Program.cpp
+++ b/clang/lib/AST/Interp/Program.cpp
@@ -247,7 +247,8 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) {
   unsigned VirtSize = 0;
 
   // Helper to get a base descriptor.
-  auto GetBaseDesc = [this](const RecordDecl *BD, Record *BR) -> Descriptor * {
+  auto GetBaseDesc = [this](const RecordDecl *BD,
+                            const Record *BR) -> const Descriptor * {
     if (!BR)
       return nullptr;
     return allocateDescriptor(BD, BR, std::nullopt, /*isConst=*/false,
@@ -258,31 +259,39 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) {
   // Reserve space for base classes.
   Record::BaseList Bases;
   Record::VirtualBaseList VirtBases;
-  if (auto *CD = dyn_cast<CXXRecordDecl>(RD)) {
+  if (const auto *CD = dyn_cast<CXXRecordDecl>(RD)) {
+
     for (const CXXBaseSpecifier &Spec : CD->bases()) {
       if (Spec.isVirtual())
         continue;
 
-      const RecordDecl *BD = Spec.getType()->castAs<RecordType>()->getDecl();
-      Record *BR = getOrCreateRecord(BD);
-      if (Descriptor *Desc = GetBaseDesc(BD, BR)) {
-        BaseSize += align(sizeof(InlineDescriptor));
-        Bases.push_back({BD, BaseSize, Desc, BR});
-        BaseSize += align(BR->getSize());
-        continue;
+      // In error cases, the base might not be a RecordType.
+      if (const auto *RT = Spec.getType()->getAs<RecordType>()) {
+        const RecordDecl *BD = RT->getDecl();
+        const Record *BR = getOrCreateRecord(BD);
+
+        if (const Descriptor *Desc = GetBaseDesc(BD, BR)) {
+          BaseSize += align(sizeof(InlineDescriptor));
+          Bases.push_back({BD, BaseSize, Desc, BR});
+          BaseSize += align(BR->getSize());
+          continue;
+        }
       }
       return nullptr;
     }
 
     for (const CXXBaseSpecifier &Spec : CD->vbases()) {
-      const RecordDecl *BD = Spec.getType()->castAs<RecordType>()->getDecl();
-      Record *BR = getOrCreateRecord(BD);
 
-      if (Descriptor *Desc = GetBaseDesc(BD, BR)) {
-        VirtSize += align(sizeof(InlineDescriptor));
-        VirtBases.push_back({BD, VirtSize, Desc, BR});
-        VirtSize += align(BR->getSize());
-        continue;
+      if (const auto *RT = Spec.getType()->getAs<RecordType>()) {
+        const RecordDecl *BD = RT->getDecl();
+        const Record *BR = getOrCreateRecord(BD);
+
+        if (const Descriptor *Desc = GetBaseDesc(BD, BR)) {
+          VirtSize += align(sizeof(InlineDescriptor));
+          VirtBases.push_back({BD, VirtSize, Desc, BR});
+          VirtSize += align(BR->getSize());
+          continue;
+        }
       }
       return nullptr;
     }
@@ -298,7 +307,7 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) {
     QualType FT = FD->getType();
     const bool IsConst = FT.isConstQualified();
     const bool IsMutable = FD->isMutable();
-    Descriptor *Desc;
+    const Descriptor *Desc;
     if (std::optional<PrimType> T = Ctx.classify(FT)) {
       Desc = createDescriptor(FD, *T, std::nullopt, IsConst,
                               /*isTemporary=*/false, IsMutable);
diff --git a/clang/lib/AST/Interp/Record.h b/clang/lib/AST/Interp/Record.h
index b0952af2d1ac6c..284bb468d6af47 100644
--- a/clang/lib/AST/Interp/Record.h
+++ b/clang/lib/AST/Interp/Record.h
@@ -28,7 +28,7 @@ class Record final {
   struct Field {
     const FieldDecl *Decl;
     unsigned Offset;
-    Descriptor *Desc;
+    const Descriptor *Desc;
     bool isBitField() const { return Decl->isBitField(); }
   };
 
@@ -36,8 +36,8 @@ class Record final {
   struct Base {
     const RecordDecl *Decl;
     unsigned Offset;
-    Descriptor *Desc;
-    Record *R;
+    const Descriptor *Desc;
+    const Record *R;
   };
 
   /// Mapping from identifiers to field descriptors.
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 0728113ba7c936..edfab11c37cf0f 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -291,7 +291,7 @@ static bool attributeHasIdentifierArg(const IdentifierInfo &II) {
 
 /// Determine whether the given attribute has an identifier argument.
 static ParsedAttributeArgumentsProperties
-attributeStringLiteralListArg(const IdentifierInfo &II) {
+attributeStringLiteralListArg(const llvm::Triple &T, const IdentifierInfo &II) {
 #define CLANG_ATTR_STRING_LITERAL_ARG_LIST
   return llvm::StringSwitch<uint32_t>(normalizeAttrName(II.getName()))
 #include "clang/Parse/AttrParserStringSwitches.inc"
@@ -550,7 +550,7 @@ unsigned Parser::ParseAttributeArgsCommon(
 
       ExprVector ParsedExprs;
       ParsedAttributeArgumentsProperties ArgProperties =
-          attributeStringLiteralListArg(*AttrName);
+          attributeStringLiteralListArg(getTargetInfo().getTriple(), *AttrName);
       if (ParseAttributeArgumentList(*AttrName, ParsedExprs, ArgProperties)) {
         SkipUntil(tok::r_paren, StopAtSemi);
         return 0;
diff --git a/clang/test/AST/Interp/crash-GH49103-2.cpp b/clang/test/AST/Interp/crash-GH49103-2.cpp
new file mode 100644
index 00000000000000..82d78e2aeab0cc
--- /dev/null
+++ b/clang/test/AST/Interp/crash-GH49103-2.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -verify -std=c++98 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++11 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++14 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++17 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++20 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++23 %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -verify -std=c++2c %s -fexperimental-new-constant-interpreter
+
+// https://github.com/llvm/llvm-project/issues/49103
+
+template<class> struct A; // expected-note 0+ {{}}
+struct S : __make_integer_seq<A, int, 42> { }; // expected-error 0+ {{}}
+S s;
diff --git a/clang/test/AST/Interp/shifts.cpp b/clang/test/AST/Interp/shifts.cpp
index cf71e7145c2742..e5201b3f8bbef7 100644
--- a/clang/test/AST/Interp/shifts.cpp
+++ b/clang/test/AST/Interp/shifts.cpp
@@ -188,3 +188,12 @@ namespace shifts {
                           // ref-cxx17-error {{not an integral constant expression}} \
                           // ref-cxx17-note {{in call to 'foo(2)'}}
 };
+
+namespace LongInt {
+  constexpr int f() {
+    int a = 1;
+    a <<= (long)0;
+    return 1;
+  }
+  static_assert(f() == 1, "");
+};
diff --git a/clang/test/Sema/attr-function-return.c b/clang/test/Sema/attr-function-return.c
index c6fe88b821e35f..d2c9156da7ab61 100644
--- a/clang/test/Sema/attr-function-return.c
+++ b/clang/test/Sema/attr-function-return.c
@@ -13,7 +13,7 @@ __attribute__((function_return("thunk-extern"))) void w(void) {}
 // expected-warning@+1 {{'function_return' attribute argument not supported: invalid}}
 __attribute__((function_return("invalid"))) void v(void) {}
 
-// expected-error@+1 {{'function_return' attribute requires a string}}
+// expected-error@+1 {{expected string literal as argument of 'function_return' attribute}}
 __attribute__((function_return(5))) void a(void) {}
 
 // expected-error@+1 {{'function_return' attribute takes one argument}}
diff --git a/clang/test/Sema/callingconv-iamcu.c b/clang/test/Sema/callingconv-iamcu.c
index 2874a8164545aa..d6b8f8f011d0f3 100644
--- a/clang/test/Sema/callingconv-iamcu.c
+++ b/clang/test/Sema/callingconv-iamcu.c
@@ -36,7 +36,7 @@ int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs'
 int __attribute__((pcs())) pcs2(void); // expected-error {{'pcs' attribute takes one argument}}
 int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute requires a string}} \
                                            // expected-error {{invalid PCS type}}
-int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}}
+int __attribute__((pcs(0))) pcs4(void); // expected-error {{expected string literal as argument of 'pcs' attribute}}
 /* These are ignored because the target is i386 and not ARM */
 int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
 int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index fd009b8973bfc2..f0b8b80a329749 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -45,7 +45,7 @@ int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs'
 int __attribute__((pcs())) pcs2(void); // expected-error {{'pcs' attribute takes one argument}}
 int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute requires a string}} \
                                            // expected-error {{invalid PCS type}}
-int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}}
+int __attribute__((pcs(0))) pcs4(void); // expected-error {{expected string literal as argument of 'pcs' attribute}}
 /* These are ignored because the target is i386 and not ARM */
 int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
 int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
diff --git a/clang/test/Sema/mips-interrupt-attr.c b/clang/test/Sema/mips-interrupt-attr.c
index 7f8958341b8f0f..733f899ecf239e 100644
--- a/clang/test/Sema/mips-interrupt-attr.c
+++ b/clang/test/Sema/mips-interrupt-attr.c
@@ -3,6 +3,7 @@ struct a { int b; };
 
 struct a test __attribute__((interrupt)); // expected-warning {{'interrupt' attribute only applies to functions and methods}}
 
+__attribute((interrupt(42))) void foo0(void) {} // expected-error {{expected string literal as argument of 'interrupt' attribute}} 
 __attribute__((interrupt("EIC"))) void foo1(void) {} // expected-warning {{'interrupt' attribute argument not supported: 'EIC'}}
 
 __attribute__((interrupt("eic", 1))) void foo2(void) {} // expected-error {{'interrupt' attribute takes no more than 1 argument}}
diff --git a/clang/test/Sema/riscv-interrupt-attr.c b/clang/test/Sema/riscv-interrupt-attr.c
index e66a5799bb3945..756bfa0582de7b 100644
--- a/clang/test/Sema/riscv-interrupt-attr.c
+++ b/clang/test/Sema/riscv-interrupt-attr.c
@@ -25,6 +25,7 @@ struct a { int b; };
 
 struct a test __attribute__((interrupt)); // expected-warning {{'interrupt' attribute only applies to functions}}
 
+__attribute__((interrupt(42))) void foo0(void) {} // expected-error {{expected string literal as argument of 'interrupt' attribute}}
 __attribute__((interrupt("USER"))) void foo1(void) {} // expected-warning {{'interrupt' attribute argument not supported: USER}}
 __attribute__((interrupt("user"))) void foo1b(void) {} // expected-warning {{'interrupt' attribute argument not supported: user}}
 __attribute__((interrupt("MACHINE"))) void foo1c(void) {} // expected-warning {{'interrupt' attribute argument not supported: MACHINE}}
diff --git a/clang/test/Sema/zero_call_used_regs.c b/clang/test/Sema/zero_call_used_regs.c
index 3313707f97cf9c..6de18fa3d7d0dd 100644
--- a/clang/test/Sema/zero_call_used_regs.c
+++ b/clang/test/Sema/zero_call_used_regs.c
@@ -4,7 +4,7 @@
 
 void failure1(void) _zero_call_used_regs();                   // expected-error {{takes one argument}}
 void failure2(void) _zero_call_used_regs("used", "used-gpr"); // expected-error {{takes one argument}}
-void failure3(void) _zero_call_used_regs(0);                  // expected-error {{requires a string}}
+void failure3(void) _zero_call_used_regs(0);                  // expected-error {{expected string literal}}
 void failure4(void) _zero_call_used_regs("hello");            // expected-warning {{argument not supported: hello}}
 
 void success1(void) _zero_call_used_regs("skip");
diff --git a/clang/test/SemaCXX/warn-consumed-parsing.cpp b/clang/test/SemaCXX/warn-consumed-parsing.cpp
index 722a60bf98632f..63f4135d0d2654 100644
--- a/clang/test/SemaCXX/warn-consumed-parsing.cpp
+++ b/clang/test/SemaCXX/warn-consumed-parsing.cpp
@@ -35,7 +35,7 @@ void function3() CONSUMABLE(consumed); // expected-warning {{'consumable' attrib
 
 class CONSUMABLE(unknown) AttrTester1 {
   void callableWhen0()  CALLABLE_WHEN("unconsumed");
-  void callableWhen1()  CALLABLE_WHEN(42); // expected-error {{'callable_when' attribute requires a string}}
+  void callableWhen1()  CALLABLE_WHEN(42); // expected-error {{expected string literal as argument of 'callable_when' attribute}}
   void callableWhen2()  CALLABLE_WHEN("foo"); // expected-warning {{'callable_when' attribute argument not supported: foo}}
   void callableWhen3()  CALLABLE_WHEN(unconsumed);
   void consumes()       SET_TYPESTATE(consumed);
diff --git a/clang/test/SemaHLSL/shader_type_attr.hlsl b/clang/test/SemaHLSL/shader_type_attr.hlsl
index d497b0582d1a72..52d3b1c9d012f1 100644
--- a/clang/test/SemaHLSL/shader_type_attr.hlsl
+++ b/clang/test/SemaHLSL/shader_type_attr.hlsl
@@ -47,9 +47,9 @@ int forwardDecl() {
 
 // expected-error@+1 {{'shader' attribute takes one argument}}
 [shader()]
-// expected-error@+1 {{'shader' attribute takes one argument}}
+// expected-error@+1 {{expected string literal as argument of 'shader' attribute}}
 [shader(1, 2)]
-// expected-error@+1 {{'shader' attribute requires a string}}
+// expected-error@+1 {{expected string literal as argument of 'shader' attribute}}
 [shader(1)]
 // expected-warning@+1 {{'shader' attribute argument not supported: cs}}
 [shader("cs")]
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 89b88e386f2572..935b9846990ee5 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -171,12 +171,13 @@ static StringRef NormalizeGNUAttrSpelling(StringRef AttrSpelling) {
 typedef std::vector<std::pair<std::string, const Record *>> ParsedAttrMap;
 
 static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records,
-                                       ParsedAttrMap *Dupes = nullptr) {
+                                       ParsedAttrMap *Dupes = nullptr,
+                                       bool SemaOnly = true) {
   std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
   std::set<std::string> Seen;
   ParsedAttrMap R;
   for (const auto *Attr : Attrs) {
-    if (Attr->getValueAsBit("SemaHandler")) {
+    if (!SemaOnly || Attr->getValueAsBit("SemaHandler")) {
       std::string AN;
       if (Attr->isSubClassOf("TargetSpecificAttr") &&
           !Attr->isValueUnset("ParseKind")) {
@@ -2358,19 +2359,21 @@ static bool isVariadicExprArgument(const Record *Arg) {
 }
 
 static bool isStringLiteralArgument(const Record *Arg) {
-  return !Arg->getSuperClasses().empty() &&
-         llvm::StringSwitch<bool>(
-             Arg->getSuperClasses().back().first->getName())
-             .Case("StringArgument", true)
-             .Default(false);
+  if (Arg->getSuperClasses().empty())
+    return false;
+  StringRef ArgKind = Arg->getSuperClasses().back().first->getName();
+  if (ArgKind == "EnumArgument")
+    return Arg->getValueAsBit("IsString");
+  return ArgKind == "StringArgument";
 }
 
 static bool isVariadicStringLiteralArgument(const Record *Arg) {
-  return !Arg->getSuperClasses().empty() &&
-         llvm::StringSwitch<bool>(
-             Arg->getSuperClasses().back().first->getName())
-             .Case("VariadicStringArgument", true)
-             .Default(false);
+  if (Arg->getSuperClasses().empty())
+    return false;
+  StringRef ArgKind = Arg->getSuperClasses().back().first->getName();
+  if (ArgKind == "VariadicEnumArgument")
+    return Arg->getValueAsBit("IsString");
+  return ArgKind == "VariadicStringArgument";
 }
 
 static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records,
@@ -2393,14 +2396,18 @@ static void emitClangAttrVariadicIdentifierArgList(RecordKeeper &Records,
   OS << "#endif // CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST\n\n";
 }
 
+static bool GenerateTargetSpecificAttrChecks(const Record *R,
+                                             std::vector<StringRef> &Arches,
+                                             std::string &Test,
+                                             std::string *FnName);
+
 // Emits the list of arguments that should be parsed as unevaluated string
 // literals for each attribute.
 static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records,
                                                       raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_STRING_LITERAL_ARG_LIST)\n";
-  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
-  for (const auto *Attr : Attrs) {
-    std::vector<Record *> Args = Attr->getValueAsListOfDefs("Args");
+
+  auto MakeMask = [](ArrayRef<Record *> Args) {
     uint32_t Bits = 0;
     assert(Args.size() <= 32 && "unsupported number of arguments in attribute");
     for (uint32_t N = 0; N < Args.size(); ++N) {
@@ -2411,11 +2418,46 @@ static void emitClangAttrUnevaluatedStringLiteralList(RecordKeeper &Records,
         break;
       }
     }
-    if (!Bits)
+    return Bits;
+  };
+
+  auto AddMaskWithTargetCheck = [](const Record *Attr, uint32_t Mask,
+                                   std::string &MaskStr) {
+    const Record *T = Attr->getValueAsDef("Target");
+    std::vector<StringRef> Arches = T->getValueAsListOfStrings("Arches");
+    std::string Test;
+    GenerateTargetSpecificAttrChecks(T, Arches, Test, nullptr);
+    MaskStr.append(Test + " ? " + std::to_string(Mask) + " : ");
+  };
+
+  ParsedAttrMap Dupes;
+  ParsedAttrMap Attrs = getParsedAttrList(Records, &Dupes, /*SemaOnly=*/false);
+  for (const auto &[AttrName, Attr] : Attrs) {
+    std::string MaskStr;
+    if (Attr->isSubClassOf("TargetSpecificAttr") &&
+        !Attr->isValueUnset("ParseKind")) {
+      if (uint32_t Mask = MakeMask(Attr->getValueAsListOfDefs("Args")))
+        AddMaskWithTargetCheck(Attr, Mask, MaskStr);
+      StringRef ParseKind = Attr->getValueAsString("ParseKind");
+      for (const auto &[DupeParseKind, DupAttr] : Dupes) {
+        if (DupeParseKind != ParseKind)
+          continue;
+        if (uint32_t Mask = MakeMask(DupAttr->getValueAsListOfDefs("Args")))
+          AddMaskWithTargetCheck(DupAttr, Mask, MaskStr);
+      }
+      if (!MaskStr.empty())
+        MaskStr.append("0");
+    } else {
+      if (uint32_t Mask = MakeMask(Attr->getValueAsListOfDefs("Args")))
+        MaskStr = std::to_string(Mask);
+    }
+
+    if (MaskStr.empty())
       continue;
+
     // All these spellings have at least one string literal has argument.
     forEachUniqueSpelling(*Attr, [&](const FlattenedSpelling &S) {
-      OS << ".Case(\"" << S.name() << "\", " << Bits << ")\n";
+      OS << ".Case(\"" << S.name() << "\", " << MaskStr << ")\n";
     });
   }
   OS << "#endif // CLANG_ATTR_STRING_LITERAL_ARG_LIST\n\n";
@@ -3404,6 +3446,8 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
   OS << "  }\n";
 }
 
+} // namespace clang
+
 // Helper function for GenerateTargetSpecificAttrChecks that alters the 'Test'
 // parameter with only a single check type, if applicable.
 static bool GenerateTargetSpecificAttrCheck(const Record *R, std::string &Test,
@@ -3570,6 +3614,8 @@ static void GenerateHasAttrSpellingStringSwitch(
   OS << "    .Default(0);\n";
 }
 
+namespace clang {
+
 // Emits list of regular keyword attributes with info about their arguments.
 void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records,
                                           raw_ostream &OS) {
diff --git a/libcxx/include/istream b/libcxx/include/istream
index 0d05a26091a4b3..7975a9e599a5b6 100644
--- a/libcxx/include/istream
+++ b/libcxx/include/istream
@@ -1010,17 +1010,18 @@ basic_istream<_CharT, _Traits>& basic_istream<_CharT, _Traits>::unget() {
 template <class _CharT, class _Traits>
 int basic_istream<_CharT, _Traits>::sync() {
   ios_base::iostate __state = ios_base::goodbit;
-  int __r                   = 0;
   sentry __sen(*this, true);
+  if (this->rdbuf() == nullptr)
+    return -1;
+
+  int __r = 0;
   if (__sen) {
 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
     try {
 #endif // _LIBCPP_HAS_NO_EXCEPTIONS
-      if (this->rdbuf() == nullptr)
-        return -1;
       if (this->rdbuf()->pubsync() == -1) {
         __state |= ios_base::badbit;
-        return -1;
+        __r = -1;
       }
 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
     } catch (...) {
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
index ec1195d6b32bd9..4fa58c0abfbaa2 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp
@@ -10,6 +10,12 @@
 
 // int sync();
 
+// The fix for bug 51497 and bug 51499 require and updated dylib due to
+// explicit instantiations. That means Apple backdeployment targets remain
+// broken.
+// TODO(#82107) Enable XFAIL.
+// UNSUPPORTED: using-built-library-before-llvm-19
+
 #include <istream>
 #include <cassert>
 
@@ -48,6 +54,18 @@ struct testbuf
     }
 };
 
+template <class CharT>
+struct testbuf_pubsync_error
+    : public std::basic_streambuf<CharT>
+{
+public:
+
+    testbuf_pubsync_error() {}
+protected:
+    virtual int sync() { return -1; }
+};
+
+
 #ifndef TEST_HAS_NO_EXCEPTIONS
 struct testbuf_exception { };
 
@@ -85,21 +103,62 @@ struct throwing_testbuf
 
 int main(int, char**)
 {
+    {
+        std::istream is(nullptr);
+        assert(is.sync() == -1);
+    }
     {
         testbuf<char> sb(" 123456789");
         std::istream is(&sb);
         assert(is.sync() == 0);
         assert(sync_called == 1);
     }
+    {
+        testbuf_pubsync_error<char> sb;
+        std::istream is(&sb);
+        is.exceptions(std::ios_base::failbit | std::ios_base::eofbit);
+        assert(is.sync() == -1);
+        assert( is.bad());
+        assert(!is.eof());
+        assert( is.fail());
+    }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
+    {
+        std::wistream is(nullptr);
+        assert(is.sync() == -1);
+    }
     {
         testbuf<wchar_t> sb(L" 123456789");
         std::wistream is(&sb);
         assert(is.sync() == 0);
         assert(sync_called == 2);
     }
+    {
+        testbuf_pubsync_error<wchar_t> sb;
+        std::wistream is(&sb);
+        is.exceptions(std::ios_base::failbit | std::ios_base::eofbit);
+        assert(is.sync() == -1);
+        assert( is.bad());
+        assert(!is.eof());
+        assert( is.fail());
+    }
 #endif
 #ifndef TEST_HAS_NO_EXCEPTIONS
+    {
+        testbuf_pubsync_error<char> sb;
+        std::istream is(&sb);
+        is.exceptions(std::ios_base::badbit);
+        bool threw = false;
+        try {
+            is.sync();
+        } catch (std::ios_base::failure const&) {
+            threw = true;
+        }
+        assert( is.bad());
+        assert(!is.eof());
+        assert( is.fail());
+        assert(threw);
+    }
     {
         throwing_testbuf<char> sb(" 123456789");
         std::basic_istream<char> is(&sb);
@@ -116,6 +175,21 @@ int main(int, char**)
         assert(threw);
     }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
+    {
+        testbuf_pubsync_error<wchar_t> sb;
+        std::wistream is(&sb);
+        is.exceptions(std::ios_base::badbit);
+        bool threw = false;
+        try {
+            is.sync();
+        } catch (std::ios_base::failure const&) {
+            threw = true;
+        }
+        assert( is.bad());
+        assert(!is.eof());
+        assert( is.fail());
+        assert(threw);
+    }
     {
         throwing_testbuf<wchar_t> sb(L" 123456789");
         std::basic_istream<wchar_t> is(&sb);
@@ -131,7 +205,7 @@ int main(int, char**)
         assert( is.fail());
         assert(threw);
     }
-#endif
+#endif // TEST_HAS_NO_WIDE_CHARACTERS
 #endif // TEST_HAS_NO_EXCEPTIONS
 
     return 0;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f647c67ef99a22..073052d599dbd6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6306,7 +6306,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
     return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
 
   if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
-      VT == MVT::v16f16)
+      VT == MVT::v32f16)
     return splitBinaryVectorOp(Op, DAG);
   return Op;
 }
@@ -14570,7 +14570,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     EVT VT = N->getValueType(0);
 
     // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
-    if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2f16) {
+    if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16) {
       SDLoc SL(N);
       SDValue Src = N->getOperand(0);
       EVT EltVT = Src.getValueType();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 13e1859ad6b250..42e3b4a003511b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1177,9 +1177,6 @@ class VPInstruction : public VPRecipeWithIRFlags {
   bool isFPMathOp() const;
 #endif
 
-protected:
-  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
-
 public:
   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
                 const Twine &Name = "")
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index fbcadba33e6768..98ccf216946357 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -461,7 +461,6 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
   assert(CombinedOperands.size() > 0 && "Need more some operands");
   auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
   auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
-  VPI->setUnderlyingInstr(Inst);
 
   LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
                     << *cast<VPInstruction>(Values[0]) << "\n");
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 6a971b37cad7c5..5ceb85e7d9903b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -470,6 +470,88 @@ struct BubbleUpPackOpThroughGenericOpPattern
   ControlPropagationFn controlFn;
 };
 
+/// Propagate a tensor.pack operation up through a tensor.pad. The idea is to
+/// add as many zero padding dimensions in `high` and `low` based on the number
+/// of point loops.
+class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
+public:
+  BubbleUpPackThroughPadOp(MLIRContext *context, ControlPropagationFn fun)
+      : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+
+  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+                                PatternRewriter &rewriter) const override {
+    auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
+    if (!padOp)
+      return failure();
+
+    // User controlled propagation function.
+    if (!controlFn(padOp))
+      return failure();
+
+    if (!padOp.getResult().hasOneUse())
+      return failure();
+
+    // TODO: Enable padding when the padding values are the same.
+    if (packOp.getPaddingValue())
+      return failure();
+
+    // Fail for non-constant padding values. The body of the pad could
+    // depend on the padding indices and/or properties of the padded
+    // tensor so for now we fail.
+    // TODO: Support non-constant padding values.
+    Value paddingVal = padOp.getConstantPaddingValue();
+    if (!paddingVal)
+      return failure();
+
+    if (!packOp.getDest().getDefiningOp<tensor::EmptyOp>())
+      return failure();
+
+    ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
+    ArrayRef<int64_t> outerDimsPerm = packOp.getOuterDimsPerm();
+
+    // Bail out if one of the padded dimension is a tiled one.
+    llvm::SmallBitVector paddedDims = padOp.getPaddedDims();
+    llvm::SmallBitVector innerDims(paddedDims.size());
+    for (int64_t dim : innerDimsPos)
+      innerDims.flip(dim);
+    if (paddedDims.anyCommon(innerDims))
+      return failure();
+
+    Location loc = padOp->getLoc();
+    OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(padOp);
+
+    auto empty = tensor::PackOp::createDestinationTensor(
+        rewriter, loc, padOp.getSource(), packOp.getMixedTiles(), innerDimsPos,
+        outerDimsPerm);
+    Value packedSource = rewriter.create<tensor::PackOp>(
+        loc, padOp.getSource(), empty, innerDimsPos, packOp.getMixedTiles(),
+        /*padding=*/std::nullopt, outerDimsPerm);
+
+    // If we have `outer_dims_perms` we need to adjust the padded dimensions.
+    SmallVector<OpFoldResult> lowPad = padOp.getMixedLowPad();
+    SmallVector<OpFoldResult> highPad = padOp.getMixedHighPad();
+    if (!outerDimsPerm.empty()) {
+      applyPermutationToVector<OpFoldResult>(lowPad, outerDimsPerm);
+      applyPermutationToVector<OpFoldResult>(highPad, outerDimsPerm);
+    }
+    // The tiled dimensions were verified to be unpadded above, so here we
+    // just append 0 for the inner tile dimensions.
+    size_t pointLoopsSize = innerDimsPos.size();
+    lowPad.append(pointLoopsSize, rewriter.getIndexAttr(0));
+    highPad.append(pointLoopsSize, rewriter.getIndexAttr(0));
+
+    auto newPadOp = rewriter.create<tensor::PadOp>(
+        loc, /*result=*/Type(), packedSource, lowPad, highPad, paddingVal,
+        padOp.getNofold());
+    rewriter.replaceOp(packOp, newPadOp.getResult());
+    return success();
+  }
+
+private:
+  ControlPropagationFn controlFn;
+};
+
 // TODO: Relax this restriction. We should unpack a generic op also
 // in the presence of multiple unpack ops as producers.
 /// Return the unpacked operand, if present, for the current generic op.
@@ -690,7 +772,8 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
 void mlir::linalg::populateDataLayoutPropagationPatterns(
     RewritePatternSet &patterns,
     const ControlPropagationFn &controlPackUnPackPropagation) {
-  patterns.insert<BubbleUpPackOpThroughGenericOpPattern,
-                  PushDownUnPackOpThroughGenericOp, PushDownUnPackThroughPadOp>(
-      patterns.getContext(), controlPackUnPackPropagation);
+  patterns
+      .insert<BubbleUpPackOpThroughGenericOpPattern, BubbleUpPackThroughPadOp,
+              PushDownUnPackOpThroughGenericOp, PushDownUnPackThroughPadOp>(
+          patterns.getContext(), controlPackUnPackPropagation);
 }
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
index 4c59c97aecc251..e036695a2ac9fd 100644
--- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
+++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -21,28 +21,28 @@ func.func @dynamic_elem_pack(%arg0: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>)
     into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
   return %4 : tensor<?x?x8x2xf32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
-// CHECK-DAG:  #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
-// CHECK-DAG:  #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @dynamic_elem_pack
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG:    %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG:    %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
-// CHECK-DAG:    %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
-// CHECK-DAG:    %[[OUTER_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]]
-// CHECK-DAG:    %[[OUTER_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor<?x?x8x2xf32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     inner_dims_pos = [0, 1] inner_tiles = [8, 2]
-// CHECK-SAME:     into %[[ARG0_EMPTY]]
-// CHECK:        %[[ELEM:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
-// CHECK:        return %[[ELEM]] : tensor<?x?x8x2xf32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+// CHECK-DAG:  #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
+// CHECK-DAG:  #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL:  func.func @dynamic_elem_pack
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
+// CHECK-DAG:      %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG:      %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG:      %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+// CHECK-DAG:      %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
+// CHECK-DAG:      %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
+// CHECK-DAG:      %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
+// CHECK:          %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor<?x?x8x2xf32>
+// CHECK:          %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:       inner_dims_pos = [0, 1] inner_tiles = [8, 2]
+// CHECK-SAME:       into %[[ARG0_EMPTY]]
+// CHECK:          %[[ELEM:.+]] = linalg.generic
+// CHECK-SAME:       indexing_maps = [#[[$MAP2]], #[[$MAP2]]]
+// CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:       ins(%[[PACK_ARG0]]
+// CHECK-SAME:       outs(%[[DEST]]
+// CHECK:          return %[[ELEM]] : tensor<?x?x8x2xf32>
 
 // -----
 
@@ -62,20 +62,20 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten
     into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
   return %pack : tensor<4x16x16x32xi32>
 }
-// CHECK-DAG:  #[[MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @elem_pack_transpose_inner_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     inner_dims_pos = [1, 0] inner_tiles = [16, 32]
-// CHECK-SAME:     into %[[ARG0_EMPTY]]
-// CHECK:        %[[ELEM:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP]], #[[MAP]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
-// CHECK:        return %[[ELEM]] : tensor<4x16x16x32xi32>
+// CHECK-DAG:  #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @elem_pack_transpose_inner_dims
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[ELEM:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
+// CHECK:         return %[[ELEM]] : tensor<4x16x16x32xi32>
 
 // -----
 
@@ -96,20 +96,20 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten
     into %dest : tensor<128x256xi32> -> tensor<16x4x32x16xi32>
   return %pack : tensor<16x4x32x16xi32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @elem_pack_transpose_outer_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
-// CHECK-SAME:     into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32>
-// CHECK:        %[[ELEM:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP0]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
-// CHECK:        return %[[ELEM]] : tensor<16x4x32x16xi32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @elem_pack_transpose_outer_dims
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
+// CHECK-SAME:      into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32>
+// CHECK:         %[[ELEM:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
+// CHECK:         return %[[ELEM]] : tensor<16x4x32x16xi32>
 
 // -----
 
@@ -130,20 +130,20 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>,
     into %dest : tensor<128x256xi32> -> tensor<16x4x16x32xi32>
   return %pack : tensor<16x4x16x32xi32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @elem_pack_transpose_inner_and_outer_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32]
-// CHECK-SAME:     into %[[ARG0_EMPTY]]
-// CHECK:        %[[ELEM:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP0]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
-// CHECK:        return %[[ELEM]] : tensor<16x4x16x32xi32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @elem_pack_transpose_inner_and_outer_dims
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[ELEM:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
+// CHECK:         return %[[ELEM]] : tensor<16x4x16x32xi32>
 
 // -----
 
@@ -169,34 +169,34 @@ func.func @dynamic_broadcast_pack(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %d
     into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
   return %4 : tensor<?x?x8x2xf32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
-// CHECK-DAG:  #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
-// CHECK-DAG:  #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
-// CHECK-DAG:  #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
-// CHECK-DAG:  #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK:      func.func @dynamic_broadcast_pack
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG:    %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
-// CHECK-DAG:    %[[OUTER_D0:.+]] = affine.apply #[[MAP0]]()[%[[D0]]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor<?x8xf32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     inner_dims_pos = [0] inner_tiles = [8]
-// CHECK-SAME:     into %[[ARG0_EMPTY]]
-// CHECK-DAG:    %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]]
-// CHECK-DAG:    %[[OUTER_D1:.+]] = affine.apply #[[MAP1]]()[%[[D1]]]
-// CHECK:        %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor<?x2xf32>
-// CHECK:        %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
-// CHECK-SAME:     inner_dims_pos = [0] inner_tiles = [2]
-// CHECK-SAME:     into %[[ARG1_EMPTY]]
-// CHECK:        %[[ELEM:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]], #[[MAP4]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]], %[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[DEST]]
-// CHECK:        return %[[ELEM]] : tensor<?x?x8x2xf32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+// CHECK-DAG:  #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
+// CHECK-DAG:  #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2)>
+// CHECK-DAG:  #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
+// CHECK-DAG:  #[[$MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @dynamic_broadcast_pack
+// CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
+// CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG:     %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+// CHECK-DAG:     %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor<?x8xf32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [8]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK-DAG:     %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]]
+// CHECK-DAG:     %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
+// CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor<?x2xf32>
+// CHECK:         %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [2]
+// CHECK-SAME:      into %[[ARG1_EMPTY]]
+// CHECK:         %[[ELEM:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP4]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]], %[[PACK_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
+// CHECK:         return %[[ELEM]] : tensor<?x?x8x2xf32>
 
 // -----
 
@@ -215,19 +215,19 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des
   %2 = tensor.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32>
   return %2 : tensor<1x2x56x57x32xf32>
 }
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK:     func.func @elem_pack_transpose_inner_and_outer_dims2
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:       %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32>
-// CHECK:       %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:    inner_dims_pos = [0] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK:       %[[RES:.+]] = linalg.generic
-// CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]]]
-// CHECK-SAME:    ins(%[[PACKED_ARG0]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @elem_pack_transpose_inner_and_outer_dims2
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
+// CHECK-SAME:    into %[[ARG0_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
 
 // -----
 
@@ -253,27 +253,27 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x
     into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
   return %4 : tensor<100x200x4x16x16x32xi32>
 }
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)>
-// CHECK:     func.func @transpose_pack
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG2:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:       %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK:       %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:    inner_dims_pos = [3, 1] inner_tiles = [16, 32]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK:       %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK:       %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
-// CHECK-SAME:    inner_dims_pos = [0] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG2_EMPTY]]
-// CHECK:       %[[RES:.+]] = linalg.generic
-// CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
-// CHECK-SAME:    ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)>
+// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)>
+// CHECK-LABEL: func.func @transpose_pack
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [3, 1] inner_tiles = [16, 32]
+// CHECK-SAME:    into %[[ARG0_EMPTY]]
+// CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
+// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
+// CHECK-SAME:    into %[[ARG2_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
+// CHECK-SAME:      outs(%[[DEST]]
 
 // -----
 
@@ -299,27 +299,27 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1:
     into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
   return %4 : tensor<100x200x4x16x16x32xi32>
 }
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, 0, 0, 0)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (0, d1, 0, 0, d5)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)>
-// CHECK:     func.func @affine_constant_expr_pack
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG2:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:       %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK:       %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:    inner_dims_pos = [3, 1] inner_tiles = [16, 32]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK:       %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32>
-// CHECK:       %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
-// CHECK-SAME:    inner_dims_pos = [1] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG2_EMPTY]]
-// CHECK:       %[[RES:.+]] = linalg.generic
-// CHECK-SAME:    indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
-// CHECK-SAME:    ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:    outs(%[[DEST]]
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, 0, 0, 0)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (0, d1, 0, 0, d5)>
+// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d1, d3, d4, d5)>
+// CHECK-LABEL: func.func @affine_constant_expr_pack
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [3, 1] inner_tiles = [16, 32]
+// CHECK-SAME:    into %[[ARG0_EMPTY]]
+// CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32>
+// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [32]
+// CHECK-SAME:    into %[[ARG2_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
+// CHECK-SAME:      outs(%[[DEST]]
 
 // -----
 
@@ -347,26 +347,26 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
   return %4 : tensor<200x4x16x100x16x32xi32>
 }
 
-// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)>
-// CHECK:     func.func @transpose_pack_with_outer_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG2:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:  outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
-// CHECK-SAME:  inner_dims_pos = [0] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG2_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP]]]
-// CHECK-SAME:  ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:  outs(%[[DEST]]
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d5)>
+// CHECK-LABEL: func.func @transpose_pack_with_outer_dims
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
+// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG2_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]], #[[$MAP]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
+// CHECK-SAME:      outs(%[[DEST]]
 
 // -----
 
@@ -388,22 +388,22 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
   return %pack : tensor<16x4x32x16xi32>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-// CHECK: func.func @elem_pack_transpose_outer_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK: %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]]
-// CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
-// CHECK-SAME:  into %[[ARG1_EMPTY]]
-// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
-// CHECK-SAME:  ins(%[[PACKED_ARG0]]
-// CHECK-SAME:  outs(%[[PACKED_ARG1]]
+// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @elem_pack_transpose_outer_dims
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
+// CHECK:         %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
+// CHECK-SAME:      into %[[ARG1_EMPTY]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]]
+// CHECK-SAME:      outs(%[[PACKED_ARG1]]
 
 // -----
 
@@ -420,23 +420,23 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56
   return %2 : tensor<12x56x56x64xf32>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK: func.func @unpack_on_output
-// CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_EMPTY_UNPACK]]
-// CHECK: %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_EMPTY_PACK]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]]]
-// CHECK-SAME:  outs(%[[PACKED_ARG0]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_EMPTY_UNPACK]]
+// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @unpack_on_output
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
+// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_EMPTY_UNPACK]]
+// CHECK:         %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_EMPTY_PACK]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]]]
+// CHECK-SAME:      outs(%[[PACKED_ARG0]]
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_EMPTY_UNPACK]]
 
 // -----
 
@@ -453,29 +453,29 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
   return %2 : tensor<12x56x56x64xf32>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK: func.func @unpack_on_input
-// CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:  %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
-// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG1_PACK_EMPTY]]
-// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_PACK_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
-// CHECK-SAME:  ins(%[[ARG0_PACK]]
-// CHECK-SAME:  outs(%[[ARG1_PACK]]
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
+// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @unpack_on_input
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
+// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
+// CHECK:         %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG1_PACK_EMPTY]]
+// CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
+// CHECK-SAME:      ins(%[[ARG0_PACK]]
+// CHECK-SAME:      outs(%[[ARG1_PACK]]
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
 
 // -----
 
@@ -492,30 +492,30 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
   return %2 : tensor<12x56x56x64xf16>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK: func.func @unpack_element_type_change
-// CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:  %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
-// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
-// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG1_PACK_EMPTY]]
-// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_PACK_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
-// CHECK-SAME:  ins(%[[ARG0_PACK]]
-// CHECK-SAME:  outs(%[[ARG1_PACK]]
-// CHECK: %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG0_NEW_EMPTY_UNPACK]]
+// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @unpack_element_type_change
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
+// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
+// CHECK:         %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
+// CHECK:         %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG1_PACK_EMPTY]]
+// CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
+// CHECK-SAME:      ins(%[[ARG0_PACK]]
+// CHECK-SAME:      outs(%[[ARG1_PACK]]
+// CHECK:         %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG0_NEW_EMPTY_UNPACK]]
 
 // -----
 
@@ -533,29 +533,29 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5
   return %2 : tensor<12x56x56x64xf32>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK: func.func @forward_tensor_empty
-// CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
-// CHECK: %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_PACK_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP]]]
-// CHECK-SAME:  ins(%[[PACKED_ARG0]]
-// CHECK-SAME:  outs(%[[DEST]]
-// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[ARG0_UNPACK_EMPTY]]
+// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @forward_tensor_empty
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
+// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
+// CHECK:         %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]]
+// CHECK-SAME:      outs(%[[DEST]]
+// CHECK:         %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
 
 // -----
 
-func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> {
+func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<1x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
@@ -566,18 +566,18 @@ func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58
   return %padded : tensor<1x58x58x64xf32>
 }
 
-// CHECK: func.func @pad_valid_propagation(
-// CHECK-SAME:  %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
-// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
-// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32>
+// CHECK-LABEL: func.func @pad_valid_unpack_propagation(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32>
 
 // -----
 
-func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> {
+func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<1x56x56x64xf32>
   %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
@@ -588,14 +588,14 @@ func.func @pad_valid_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58
   return %padded : tensor<2x58x58x64xf32>
 }
 
-// CHECK: func.func @pad_valid_propagation(
-// CHECK-SAME:  %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
-// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
-// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
-// CHECK-SAME:  into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32>
+// CHECK-LABEL: func.func @pad_valid_unpack_propagation(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME:      into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32>
 
 // -----
 
@@ -610,14 +610,80 @@ func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x5
   return %padded : tensor<1x58x58x66xf32>
 }
 
-// CHECK: func.func @pad_along_unpacked_dim(
-// CHECK: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
-// CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32>
-// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
-// CHECK-SAME:  into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
-// CHECK: %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1]
+// CHECK-LABEL: func.func @pad_along_unpacked_dim(
+// CHECK:         %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] 
+// CHECK-SAME:      into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1]
+
+// -----
+
+func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1x2x58x58x32xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %padded = tensor.pad %arg0 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
+    tensor.yield %cst : f32
+  } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
+  %0 = tensor.empty() : tensor<1x2x58x58x32xf32>
+  %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+  return %1 : tensor<1x2x58x58x32xf32>
+}
+
+// CHECK-LABEL: func.func @pad_valid_pack_propagation(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32>
+// CHECK:         %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32>
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
+// CHECK:         return %[[PADDED]]
+
+// -----
+
+func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1x58x58x2x32xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %padded = tensor.pad %arg0 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
+    tensor.yield %cst : f32
+  } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
+  %0 = tensor.empty() : tensor<1x58x58x2x32xf32>
+  %1 = tensor.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32>
+  return %1 : tensor<1x58x58x2x32xf32>
+}
+
+// CHECK-LABEL: func.func @pad_valid_outer_dims_pack_propagation(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x2x32xf32>
+// CHECK:         %[[PACKED:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x56x56x2x32xf32>
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 1, 1, 0, 0] high[0, 1, 1, 0, 0]
+// CHECK:         return %[[PADDED]]
+
+// -----
+
+func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x58x32xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %padded = tensor.pad %arg0 low[0, 2, 1, 1] high[0, 2, 1, 1] {
+    ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
+    tensor.yield %cst : f32
+  } : tensor<1x60x56x56xf32> to tensor<1x64x58x58xf32>
+  %0 = tensor.empty() : tensor<1x2x58x58x32xf32>
+  %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+  return %1 : tensor<1x2x58x58x32xf32>
+}
+
+// CHECK-LABEL: func.func @pad_along_packed_dim(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<1x60x56x56xf32>)
+// CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
+// CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 2, 1, 1] high[0, 2, 1, 1]
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x58x58x32xf32>
+// CHECK:         tensor.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
 
 // -----
 
@@ -639,16 +705,16 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3
   return %pack : tensor<4x16x16x32xi32>
 }
 
-// CHECK: func.func @would_break_dominance(
-// CHECK-SAME: %[[ARG0:.+]]: tensor<128x256xi32>)
-// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32>
-// CHECK-NEXT: %[[GEN:.+]] = linalg.generic
-// CHECK-SAME:  ins(%[[ARG0]]
-// CHECK-SAME:  outs(%[[EMPTY]]
-// CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
-// CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]]
-// CHECK-SAME:  inner_dims_pos = [1, 0] inner_tiles = [16, 32] 
-// CHECK-SAME:  into %[[ALLOC]]
+// CHECK-LABEL: func.func @would_break_dominance(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<128x256xi32>)
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32>
+// CHECK-NEXT:    %[[GEN:.+]] = linalg.generic
+// CHECK-SAME:      ins(%[[ARG0]]
+// CHECK-SAME:      outs(%[[EMPTY]]
+// CHECK:         %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
+// CHECK-NEXT:    %{{.+}} = tensor.pack %[[GEN]]
+// CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32] 
+// CHECK-SAME:      into %[[ALLOC]]
 
 // -----
 
@@ -666,16 +732,16 @@ func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
   return %pack : tensor<1x32x7x7x32xf32>
 }
 
-// CHECK: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> ()>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK: func.func @scalar_tensor
-// CHECK-SAME: %[[ARG0:.+]]: tensor<f32>)
-// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x32x7x7x32xf32>
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
-// CHECK-SAME: ins(%[[ARG0]]
-// CHECK-SAME: outs(%[[EMPTY]]
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> ()>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-LABEL: func.func @scalar_tensor
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<f32>)
+// CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x32x7x7x32xf32>
+// CHECK:         linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP1]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[ARG0]]
+// CHECK-SAME:      outs(%[[EMPTY]]
 
 // -----
 
@@ -692,15 +758,15 @@ func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x
   return %2 : tensor<12x56x56x64xf32>
 }
 
-// CHECK: func.func @unpack_empty_inner_dims
-// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  ins(%[[PACKED_ARG0]]
-// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
-// CHECK-SAME:  outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
+// CHECK-LABEL: func.func @unpack_empty_inner_dims
+// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] 
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      ins(%[[PACKED_ARG0]]
+// CHECK:         %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] 
 
 // -----
 
@@ -722,25 +788,25 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>,
     into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
   return %pack : tensor<4x16x16x32xi32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK-DAG:  #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>
-// CHECK:      func.func @reduction_pack_transpose_inner_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK:        %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK:        %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
-// CHECK-SME:     inner_dims_pos = [1, 0] inner_tiles = [16, 32]
-// CHECK-SAME:    into %[[ARG1_EMPTY]]
-// CHECK:        %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:     inner_dims_pos = [1, 0] inner_tiles = [16, 32]
-// CHECK-SAME:     into %[[ARG0_EMPTY]]
-// CHECK:        %[[RED:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]]
-// CHECK-SAME:     outs(%[[PACK_ARG1]]
-// CHECK:        return %[[RED]] : tensor<4x16x16x32xi32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
+// CHECK-DAG:  #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>
+// CHECK-LABEL: func.func @reduction_pack_transpose_inner_dims
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
+// CHECK:         %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK-SME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
+// CHECK-SAME:     into %[[ARG1_EMPTY]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[RED:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]]
+// CHECK-SAME:      outs(%[[PACK_ARG1]]
+// CHECK:         return %[[RED]] : tensor<4x16x16x32xi32>
 
 // -----
 
@@ -770,31 +836,31 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
   return %4 : tensor<4x16x100x16x32xi32>
 }
 
-// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d5)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)>
-// CHECK:     func.func @reduction_pack_with_outer_dims
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG2:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG3:[a-zA-Z0-9]+]]
-// CHECK: %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32>
-// CHECK: %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]]
-// CHECK-SAME:  outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32]
-// CHECK-SAME:  into %[[ARG3_EMPTY]]
-// CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32>
-// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
-// CHECK-SAME:  outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
-// CHECK-SAME:  into %[[ARG0_EMPTY]]
-// CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
-// CHECK-SAME:  inner_dims_pos = [0] inner_tiles = [32]
-// CHECK-SAME:  into %[[ARG2_EMPTY]]
-// CHECK: %[[RES:.+]] = linalg.generic
-// CHECK-SAME:  indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
-// CHECK-SAME:  ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
-// CHECK-SAME:  outs(%[[PACKED_ARG3]]
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d5)>
+// CHECK-DAG: #[[$MAP3:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)>
+// CHECK-LABEL: func.func @reduction_pack_with_outer_dims
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]
+// CHECK:         %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32>
+// CHECK:         %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]]
+// CHECK-SAME:      outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG3_EMPTY]]
+// CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32>
+// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME:      outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
+// CHECK-SAME:      into %[[ARG0_EMPTY]]
+// CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
+// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
+// CHECK-SAME:      into %[[ARG2_EMPTY]]
+// CHECK:         %[[RES:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]], #[[$MAP3]]]
+// CHECK-SAME:      ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]]
+// CHECK-SAME:      outs(%[[PACKED_ARG3]]
 
 // -----
 
@@ -818,24 +884,24 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
   } -> tensor<16x540x960xi32>
   return %pool : tensor<16x540x960xi32>
 }
-// CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2 * 2 + d4, d3 * 2 + d5, d6)>
-// CHECK-DAG:  #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5)>
-// CHECK-DAG:  #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1, d2, d3, d6)>
-// CHECK:      func.func @unpack_different_destination_shape
-// CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]
-// CHECK:        %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32>
-// CHECK:        %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32>
-// CHECK:        %[[PACK_ARG0:.+]] = tensor.pack
-// CHECK-SAME:     inner_dims_pos = [1] inner_tiles = [16]
-// CHECK-SAME:     into %[[PACK_EMPTY]]
-// CHECK:        %[[POOL:.+]] = linalg.generic
-// CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
-// CHECK-SAME:     ins(%[[PACK_ARG0]], %[[ARG1]]
-// CHECK-SAME:     outs(%[[INIT]]
-// CHECK:        %[[UNPACK_NEW_DEST:.+]] = tensor.empty() : tensor<16x540x960xi32>
-// CHECK:        %[[UNPACK:.+]] = tensor.unpack %[[POOL]]
-// CHECK-SAME:     inner_dims_pos = [0] inner_tiles = [16]
-// CHECK-SAME:     into %[[UNPACK_NEW_DEST]]
-// CHECK:        return %[[UNPACK]] : tensor<16x540x960xi32>
+// CHECK-DAG:  #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2 * 2 + d4, d3 * 2 + d5, d6)>
+// CHECK-DAG:  #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5)>
+// CHECK-DAG:  #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1, d2, d3, d6)>
+// CHECK-LABEL: func.func @unpack_different_destination_shape
+// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
+// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
+// CHECK:         %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32>
+// CHECK:         %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32>
+// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack
+// CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [16]
+// CHECK-SAME:      into %[[PACK_EMPTY]]
+// CHECK:         %[[POOL:.+]] = linalg.generic
+// CHECK-SAME:      indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
+// CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
+// CHECK-SAME:      ins(%[[PACK_ARG0]], %[[ARG1]]
+// CHECK-SAME:      outs(%[[INIT]]
+// CHECK:         %[[UNPACK_NEW_DEST:.+]] = tensor.empty() : tensor<16x540x960xi32>
+// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[POOL]]
+// CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [16]
+// CHECK-SAME:      into %[[UNPACK_NEW_DEST]]
+// CHECK:         return %[[UNPACK]] : tensor<16x540x960xi32>