From b1bc1dbea6d0423813bb73d625c6eedc040007ed Mon Sep 17 00:00:00 2001 From: Edwin Vane Date: Tue, 7 May 2024 15:06:51 -0400 Subject: [PATCH 01/64] [clang-tidy] Refactor how NamedDecl are renamed (#88735) The handling of renaming failures and multiple usages related to those failures is currently spread over several functions. Identifying the failure NamedDecl for a given usage is also duplicated, once when creating failures and again when identify usages. There are currently two ways to a failed NamedDecl from a usage: use the canonical decl or use the overridden method. With new methods about to be added, a cleanup was in order. The data flow is simplified as follows: * The visitor always forwards NamedDecls to addUsage(NamedDecl). * addUsage(NamedDecl) determines the failed NamedDecl and determines potential new names based on that failure. Usages are registered using addUsage(NamingCheckId). * addUsage(NamingCheckId) is now protected and its single responsibility is maintaining the integrity of the failure/usage map. --- .../bugprone/ReservedIdentifierCheck.cpp | 5 +- .../readability/IdentifierNamingCheck.cpp | 4 + .../utils/RenamerClangTidyCheck.cpp | 196 ++++++++++-------- .../clang-tidy/utils/RenamerClangTidyCheck.h | 14 +- 4 files changed, 121 insertions(+), 98 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index f6714d056518da..53956661d57d13 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -178,8 +178,11 @@ std::optional ReservedIdentifierCheck::getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &) const { assert(Decl && Decl->getIdentifier() && !Decl->getName().empty() && - !Decl->isImplicit() && "Decl must be an explicit identifier with a name."); + // Implicit identifiers cannot fail. + if (Decl->isImplicit()) + return std::nullopt; + return getFailureInfoImpl( Decl->getName(), isa(Decl->getDeclContext()), /*IsMacro = */ false, getLangOpts(), Invert, AllowedIdentifiers); diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index dc30531ebda0e9..27a12bfc580682 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1374,6 +1374,10 @@ IdentifierNamingCheck::getFailureInfo( std::optional IdentifierNamingCheck::getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const { + // Implicit identifiers cannot be renamed. + if (Decl->isImplicit()) + return std::nullopt; + SourceLocation Loc = Decl->getLocation(); const FileStyle &FileStyle = getStyleForFile(SM.getFilename(Loc)); if (!FileStyle.isActive()) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 962a243ce94d48..f5ed617365403a 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -61,6 +61,7 @@ struct DenseMapInfo { namespace clang::tidy { namespace { + class NameLookup { llvm::PointerIntPair Data; @@ -78,6 +79,7 @@ class NameLookup { operator bool() const { return !hasMultipleResolutions(); } const NamedDecl *operator*() const { return getDecl(); } }; + } // namespace static const NamedDecl *findDecl(const RecordDecl &RecDecl, @@ -91,6 +93,44 @@ static const NamedDecl *findDecl(const RecordDecl &RecDecl, return nullptr; } +/// Returns the function that \p Method is overridding. If There are none or +/// multiple overrides it returns nullptr. If the overridden function itself is +/// overridding then it will recurse up to find the first decl of the function. +static const CXXMethodDecl *getOverrideMethod(const CXXMethodDecl *Method) { + if (Method->size_overridden_methods() != 1) + return nullptr; + + while (true) { + Method = *Method->begin_overridden_methods(); + assert(Method && "Overridden method shouldn't be null"); + unsigned NumOverrides = Method->size_overridden_methods(); + if (NumOverrides == 0) + return Method; + if (NumOverrides > 1) + return nullptr; + } +} + +static bool hasNoName(const NamedDecl *Decl) { + return !Decl->getIdentifier() || Decl->getName().empty(); +} + +static const NamedDecl *getFailureForNamedDecl(const NamedDecl *ND) { + const auto *Canonical = cast(ND->getCanonicalDecl()); + if (Canonical != ND) + return Canonical; + + if (const auto *Method = dyn_cast(ND)) { + if (const CXXMethodDecl *Overridden = getOverrideMethod(Method)) + Canonical = cast(Overridden->getCanonicalDecl()); + + if (Canonical != ND) + return Canonical; + } + + return ND; +} + /// Returns a decl matching the \p DeclName in \p Parent or one of its base /// classes. If \p AggressiveTemplateLookup is `true` then it will check /// template dependent base classes as well. @@ -132,24 +172,6 @@ static NameLookup findDeclInBases(const CXXRecordDecl &Parent, return NameLookup(Found); // If nullptr, decl wasn't found. } -/// Returns the function that \p Method is overridding. If There are none or -/// multiple overrides it returns nullptr. If the overridden function itself is -/// overridding then it will recurse up to find the first decl of the function. -static const CXXMethodDecl *getOverrideMethod(const CXXMethodDecl *Method) { - if (Method->size_overridden_methods() != 1) - return nullptr; - - while (true) { - Method = *Method->begin_overridden_methods(); - assert(Method && "Overridden method shouldn't be null"); - unsigned NumOverrides = Method->size_overridden_methods(); - if (NumOverrides == 0) - return Method; - if (NumOverrides > 1) - return nullptr; - } -} - namespace { /// Callback supplies macros to RenamerClangTidyCheck::checkMacro @@ -192,10 +214,6 @@ class RenamerClangTidyVisitor : Check(Check), SM(SM), AggressiveDependentMemberLookup(AggressiveDependentMemberLookup) {} - static bool hasNoName(const NamedDecl *Decl) { - return !Decl->getIdentifier() || Decl->getName().empty(); - } - bool shouldVisitTemplateInstantiations() const { return true; } bool shouldVisitImplicitCode() const { return false; } @@ -246,29 +264,10 @@ class RenamerClangTidyVisitor } bool VisitNamedDecl(NamedDecl *Decl) { - if (hasNoName(Decl)) - return true; - - const auto *Canonical = cast(Decl->getCanonicalDecl()); - if (Canonical != Decl) { - Check->addUsage(Canonical, Decl->getLocation(), SM); - return true; - } - - // Fix overridden methods - if (const auto *Method = dyn_cast(Decl)) { - if (const CXXMethodDecl *Overridden = getOverrideMethod(Method)) { - Check->addUsage(Overridden, Method->getLocation(), SM); - return true; // Don't try to add the actual decl as a Failure. - } - } - - // Ignore ClassTemplateSpecializationDecl which are creating duplicate - // replacements with CXXRecordDecl. - if (isa(Decl)) - return true; - - Check->checkNamedDecl(Decl, SM); + SourceRange UsageRange = + DeclarationNameInfo(Decl->getDeclName(), Decl->getLocation()) + .getSourceRange(); + Check->addUsage(Decl, UsageRange, SM); return true; } @@ -413,82 +412,97 @@ void RenamerClangTidyCheck::registerPPCallbacks( std::make_unique(SM, this)); } -void RenamerClangTidyCheck::addUsage( - const RenamerClangTidyCheck::NamingCheckId &Decl, SourceRange Range, - const SourceManager &SourceMgr) { +std::pair +RenamerClangTidyCheck::addUsage( + const RenamerClangTidyCheck::NamingCheckId &FailureId, + SourceRange UsageRange, const SourceManager &SourceMgr) { // Do nothing if the provided range is invalid. - if (Range.isInvalid()) - return; + if (UsageRange.isInvalid()) + return {NamingCheckFailures.end(), false}; - // If we have a source manager, use it to convert to the spelling location for - // performing the fix. This is necessary because macros can map the same - // spelling location to different source locations, and we only want to fix - // the token once, before it is expanded by the macro. - SourceLocation FixLocation = Range.getBegin(); + // Get the spelling location for performing the fix. This is necessary because + // macros can map the same spelling location to different source locations, + // and we only want to fix the token once, before it is expanded by the macro. + SourceLocation FixLocation = UsageRange.getBegin(); FixLocation = SourceMgr.getSpellingLoc(FixLocation); if (FixLocation.isInvalid()) - return; + return {NamingCheckFailures.end(), false}; + + auto EmplaceResult = NamingCheckFailures.try_emplace(FailureId); + NamingCheckFailure &Failure = EmplaceResult.first->second; // Try to insert the identifier location in the Usages map, and bail out if it // is already in there - RenamerClangTidyCheck::NamingCheckFailure &Failure = - NamingCheckFailures[Decl]; if (!Failure.RawUsageLocs.insert(FixLocation).second) - return; + return EmplaceResult; - if (!Failure.shouldFix()) - return; + if (Failure.FixStatus != RenamerClangTidyCheck::ShouldFixStatus::ShouldFix) + return EmplaceResult; if (SourceMgr.isWrittenInScratchSpace(FixLocation)) Failure.FixStatus = RenamerClangTidyCheck::ShouldFixStatus::InsideMacro; - if (!utils::rangeCanBeFixed(Range, &SourceMgr)) + if (!utils::rangeCanBeFixed(UsageRange, &SourceMgr)) Failure.FixStatus = RenamerClangTidyCheck::ShouldFixStatus::InsideMacro; + + return EmplaceResult; } -void RenamerClangTidyCheck::addUsage(const NamedDecl *Decl, SourceRange Range, +void RenamerClangTidyCheck::addUsage(const NamedDecl *Decl, + SourceRange UsageRange, const SourceManager &SourceMgr) { - // Don't keep track for non-identifier names. - auto *II = Decl->getIdentifier(); - if (!II) + if (hasNoName(Decl)) + return; + + // Ignore ClassTemplateSpecializationDecl which are creating duplicate + // replacements with CXXRecordDecl. + if (isa(Decl)) return; - if (const auto *Method = dyn_cast(Decl)) { - if (const CXXMethodDecl *Overridden = getOverrideMethod(Method)) - Decl = Overridden; - } - Decl = cast(Decl->getCanonicalDecl()); - return addUsage( - RenamerClangTidyCheck::NamingCheckId(Decl->getLocation(), II->getName()), - Range, SourceMgr); -} -void RenamerClangTidyCheck::checkNamedDecl(const NamedDecl *Decl, - const SourceManager &SourceMgr) { - std::optional MaybeFailure = getDeclFailureInfo(Decl, SourceMgr); + // We don't want to create a failure for every NamedDecl we find. Ideally + // there is just one NamedDecl in every group of "related" NamedDecls that + // becomes the failure. This NamedDecl and all of its related NamedDecls + // become usages. E.g. Since NamedDecls are Redeclarable, only the canonical + // NamedDecl becomes the failure and all redeclarations become usages. + const NamedDecl *FailureDecl = getFailureForNamedDecl(Decl); + + std::optional MaybeFailure = + getDeclFailureInfo(FailureDecl, SourceMgr); if (!MaybeFailure) return; - FailureInfo &Info = *MaybeFailure; - NamingCheckFailure &Failure = - NamingCheckFailures[NamingCheckId(Decl->getLocation(), Decl->getName())]; - SourceRange Range = - DeclarationNameInfo(Decl->getDeclName(), Decl->getLocation()) - .getSourceRange(); - - const IdentifierTable &Idents = Decl->getASTContext().Idents; - auto CheckNewIdentifier = Idents.find(Info.Fixup); + NamingCheckId FailureId(FailureDecl->getLocation(), FailureDecl->getName()); + + auto [FailureIter, NewFailure] = addUsage(FailureId, UsageRange, SourceMgr); + + if (FailureIter == NamingCheckFailures.end()) { + // Nothing to do if the usage wasn't accepted. + return; + } + if (!NewFailure) { + // FailureInfo has already been provided. + return; + } + + // Update the stored failure with info regarding the FailureDecl. + NamingCheckFailure &Failure = FailureIter->second; + Failure.Info = std::move(*MaybeFailure); + + // Don't overwritte the failure status if it was already set. + if (!Failure.shouldFix()) { + return; + } + const IdentifierTable &Idents = FailureDecl->getASTContext().Idents; + auto CheckNewIdentifier = Idents.find(Failure.Info.Fixup); if (CheckNewIdentifier != Idents.end()) { const IdentifierInfo *Ident = CheckNewIdentifier->second; if (Ident->isKeyword(getLangOpts())) Failure.FixStatus = ShouldFixStatus::ConflictsWithKeyword; else if (Ident->hasMacroDefinition()) Failure.FixStatus = ShouldFixStatus::ConflictsWithMacroDefinition; - } else if (!isValidAsciiIdentifier(Info.Fixup)) { + } else if (!isValidAsciiIdentifier(Failure.Info.Fixup)) { Failure.FixStatus = ShouldFixStatus::FixInvalidIdentifier; } - - Failure.Info = std::move(Info); - addUsage(Decl, Range, SourceMgr); } void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) { diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h index be5b6f0c7f7678..3d5721b789ac2e 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h @@ -115,15 +115,9 @@ class RenamerClangTidyCheck : public ClangTidyCheck { void expandMacro(const Token &MacroNameTok, const MacroInfo *MI, const SourceManager &SourceMgr); - void addUsage(const RenamerClangTidyCheck::NamingCheckId &Decl, - SourceRange Range, const SourceManager &SourceMgr); - - /// Convenience method when the usage to be added is a NamedDecl. void addUsage(const NamedDecl *Decl, SourceRange Range, const SourceManager &SourceMgr); - void checkNamedDecl(const NamedDecl *Decl, const SourceManager &SourceMgr); - protected: /// Overridden by derived classes, returns information about if and how a Decl /// failed the check. A 'std::nullopt' result means the Decl did not fail the @@ -158,6 +152,14 @@ class RenamerClangTidyCheck : public ClangTidyCheck { const NamingCheckFailure &Failure) const = 0; private: + // Manage additions to the Failure/usage map + // + // return the result of NamingCheckFailures::try_emplace() if the usage was + // accepted. + std::pair + addUsage(const RenamerClangTidyCheck::NamingCheckId &FailureId, + SourceRange UsageRange, const SourceManager &SourceMgr); + NamingCheckFailureMap NamingCheckFailures; const bool AggressiveDependentMemberLookup; }; From 62bed56efdde1bed5dcebec5ceb375ffce223691 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 7 May 2024 15:07:06 -0400 Subject: [PATCH 02/64] [mlir][tensor] Remove assertion in ExpandShapeOp::build (#91361) Unblocking downstream integrate where an expected-to-fail test was expecting this to be a runtime verifier error, not a compiler crash: https://github.com/llvm/torch-mlir/pull/3279. --- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 4c65045084dc5f..7a13f7a7d13551 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -1676,10 +1676,12 @@ void ExpandShapeOp::build(OpBuilder &builder, OperationState &result, auto tensorResultTy = cast(resultType); FailureOr> outputShape = inferOutputShape( builder, result.location, tensorResultTy, reassociation, inputShape); - // Failure of this assertion usually indicates presence of multiple - // dynamic dimensions in the same reassociation group. - assert(succeeded(outputShape) && "unable to infer output shape"); - build(builder, result, tensorResultTy, src, reassociation, *outputShape); + SmallVector outputShapeOrEmpty; + if (succeeded(outputShape)) { + outputShapeOrEmpty = *outputShape; + } + build(builder, result, tensorResultTy, src, reassociation, + outputShapeOrEmpty); } SmallVector CollapseShapeOp::getReassociationMaps() { From 6cba93f25dc2014b5d8c71c739f17be1d8c3763a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 7 May 2024 11:48:53 -0700 Subject: [PATCH 03/64] [RISCV] Add partial validation of S and X extension names to RISCVISAInfo::parseNormalizedArchString. Extensions starting with 's' or 'x' should always be followed by an alphabetical character. I don't know of any crashes from this currently, but it seemed better to be defensive. --- llvm/lib/TargetParser/RISCVISAInfo.cpp | 6 ++++-- .../ELF/RISCV/unknown-arch-attr.test | 6 +++--- llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 6ab5ee3508a672..9c2ac8c3893f16 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -486,9 +486,11 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) { return createStringError(errc::invalid_argument, "failed to parse major version number"); - if (ExtName[0] == 'z' && (ExtName.size() == 1 || isDigit(ExtName[1]))) + if ((ExtName[0] == 'z' || ExtName[0] == 's' || ExtName[0] == 'x') && + (ExtName.size() == 1 || isDigit(ExtName[1]))) return createStringError(errc::invalid_argument, - "'z' must be followed by a letter"); + "'" + Twine(ExtName[0]) + + "' must be followed by a letter"); ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion}); } diff --git a/llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test b/llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test index 35c8c6240d84bc..704c9d4add0d1d 100644 --- a/llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test +++ b/llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test @@ -3,7 +3,7 @@ ## The expected behavior is to ignore the unrecognized arch feature and ## continue to process the following arch features. ## -## The object file has the "rv32i2p0_m2p0_x1p0" arch feature. "x1p0" is an +## The object file has the "rv32i2p0_m2p0_y1p0" arch feature. "y1p0" is an ## unrecognized architecture extension. llvm-objdump will ignore it and decode ## "mul" instruction correctly according to "m2p0" in the arch feature. ## @@ -34,5 +34,5 @@ Sections: Content: 3385C502 - Name: .riscv.attributes Type: SHT_RISCV_ATTRIBUTES -## The content is the encoding of the arch feature "rv32i2p0_m2p0_x1p0" - Content: 412300000072697363760001190000000572763332693270305F6D3270305F7831703000 +## The content is the encoding of the arch feature "rv32i2p0_m2p0_y1p0" + Content: 412300000072697363760001190000000572763332693270305F6D3270305F7931703000 diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index d813a8d7185f61..a6c21c18c0ecc4 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -62,6 +62,22 @@ TEST(ParseNormalizedArchString, RejectsBadZ) { } } +TEST(ParseNormalizedArchString, RejectsBadS) { + for (StringRef Input : {"rv64i2p0_s1p0", "rv32i2p0_s2a1p0"}) { + EXPECT_EQ( + toString(RISCVISAInfo::parseNormalizedArchString(Input).takeError()), + "'s' must be followed by a letter"); + } +} + +TEST(ParseNormalizedArchString, RejectsBadX) { + for (StringRef Input : {"rv64i2p0_x1p0", "rv32i2p0_x2a1p0"}) { + EXPECT_EQ( + toString(RISCVISAInfo::parseNormalizedArchString(Input).takeError()), + "'x' must be followed by a letter"); + } +} + TEST(ParseNormalizedArchString, AcceptsValidBaseISAsAndSetsXLen) { auto MaybeRV32I = RISCVISAInfo::parseNormalizedArchString("rv32i2p0"); ASSERT_THAT_EXPECTED(MaybeRV32I, Succeeded()); From 1e36c96dc0998e886644d6fc76aa475d88d9645c Mon Sep 17 00:00:00 2001 From: AtariDreams Date: Tue, 7 May 2024 15:17:56 -0400 Subject: [PATCH 04/64] [InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) (#88193) Proofs: https://alive2.llvm.org/ce/z/N9dRzP https://alive2.llvm.org/ce/z/Xrpc-Y https://alive2.llvm.org/ce/z/BagBM6 --- .../InstCombine/InstCombineShifts.cpp | 50 +++- llvm/test/Transforms/InstCombine/lshr.ll | 240 ++++++++++++++++++ 2 files changed, 288 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 1cb21a1d81af4b..8847de36671301 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1259,6 +1259,54 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { match(Op1, m_SpecificIntAllowPoison(BitWidth - 1))) return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty); + // ((X << nuw Z) sub nuw Y) >>u exact Z --> X sub nuw (Y >>u exact Z), + Value *Y; + if (I.isExact() && + match(Op0, m_OneUse(m_NUWSub(m_NUWShl(m_Value(X), m_Specific(Op1)), + m_Value(Y))))) { + Value *NewLshr = Builder.CreateLShr(Y, Op1, "", /*isExact=*/true); + auto *NewSub = BinaryOperator::CreateNUWSub(X, NewLshr); + NewSub->setHasNoSignedWrap( + cast(Op0)->hasNoSignedWrap()); + return NewSub; + } + + auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) { + switch (BinOpcode) { + default: + return false; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // And does not work here, and sub is handled separately. + return true; + } + }; + + // If both the binop and the shift are nuw, then: + // ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) + if (match(Op0, m_OneUse(m_c_BinOp(m_NUWShl(m_Value(X), m_Specific(Op1)), + m_Value(Y))))) { + BinaryOperator *Op0OB = cast(Op0); + if (isSuitableBinOpcode(Op0OB->getOpcode())) { + if (auto *OBO = dyn_cast(Op0); + !OBO || OBO->hasNoUnsignedWrap()) { + Value *NewLshr = Builder.CreateLShr( + Y, Op1, "", I.isExact() && Op0OB->getOpcode() != Instruction::And); + auto *NewBinOp = BinaryOperator::Create(Op0OB->getOpcode(), NewLshr, X); + if (OBO) { + NewBinOp->setHasNoUnsignedWrap(true); + NewBinOp->setHasNoSignedWrap(OBO->hasNoSignedWrap()); + } else if (auto *Disjoint = dyn_cast(Op0)) { + cast(NewBinOp)->setIsDisjoint( + Disjoint->isDisjoint()); + } + return NewBinOp; + } + } + } + if (match(Op1, m_APInt(C))) { unsigned ShAmtC = C->getZExtValue(); auto *II = dyn_cast(Op0); @@ -1275,7 +1323,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return new ZExtInst(Cmp, Ty); } - Value *X; const APInt *C1; if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) { if (C1->ult(ShAmtC)) { @@ -1320,7 +1367,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C) // TODO: Consolidate with the more general transform that starts from shl // (the shifts are in the opposite order). - Value *Y; if (match(Op0, m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))), m_Value(Y))))) { diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 7d611ba188d6b4..563e669f903537 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -163,6 +163,17 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ret <2 x i8> %lshr } +define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) { +; CHECK-LABEL: @lshr_exact_splat_vec_nuw( +; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i8> [[LSHR]] +; + %shl = shl nuw <2 x i8> %x, + %add = add nuw <2 x i8> %shl, + %lshr = lshr <2 x i8> %add, + ret <2 x i8> %lshr +} + define i8 @shl_add(i8 %x, i8 %y) { ; CHECK-LABEL: @shl_add( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2 @@ -360,8 +371,222 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) { ret <3 x i14> %t } +define i32 @shl_add_lshr_flag_preservation(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_add_lshr_flag_preservation( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %add = add nuw nsw i32 %shl, %y + %lshr = lshr exact i32 %add, %c + ret i32 %lshr +} + +define i32 @shl_add_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_add_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %add = add nuw i32 %shl, %y + %lshr = lshr i32 %add, %c + ret i32 %lshr +} + +define i32 @shl_add_lshr_comm(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_add_lshr_comm( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %add = add nuw i32 %y, %shl + %lshr = lshr i32 %add, %c + ret i32 %lshr +} + ; Negative test +define i32 @shl_add_lshr_no_nuw(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_add_lshr_no_nuw( +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[Y:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ADD]], [[C]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %add = add i32 %shl, %y + %lshr = lshr i32 %add, %c + ret i32 %lshr +} + +; Negative test + +define i32 @shl_sub_lshr_not_exact(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_sub_lshr_not_exact( +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw i32 [[SHL]], [[Y:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[SUB]], [[C]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %sub = sub nuw i32 %shl, %y + %lshr = lshr i32 %sub, %c + ret i32 %lshr +} + +; Negative test + +define i32 @shl_sub_lshr_no_nuw(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_sub_lshr_no_nuw( +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[SHL]], [[Y:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[SUB]], [[C]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nsw i32 %x, %c + %sub = sub nsw i32 %shl, %y + %lshr = lshr exact i32 %sub, %c + ret i32 %lshr +} + +define i32 @shl_sub_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_sub_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = sub nuw nsw i32 [[X:%.*]], [[TMP1]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %sub = sub nuw nsw i32 %shl, %y + %lshr = lshr exact i32 %sub, %c + ret i32 %lshr +} + +define i32 @shl_or_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_or_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %or = or i32 %shl, %y + %lshr = lshr i32 %or, %c + ret i32 %lshr +} + +define i32 @shl_or_disjoint_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_or_disjoint_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %or = or disjoint i32 %shl, %y + %lshr = lshr i32 %or, %c + ret i32 %lshr +} + +define i32 @shl_or_lshr_comm(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_or_lshr_comm( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %or = or i32 %y, %shl + %lshr = lshr i32 %or, %c + ret i32 %lshr +} + +define i32 @shl_or_disjoint_lshr_comm(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_or_disjoint_lshr_comm( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %or = or disjoint i32 %y, %shl + %lshr = lshr i32 %or, %c + ret i32 %lshr +} + +define i32 @shl_xor_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_xor_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %xor = xor i32 %shl, %y + %lshr = lshr i32 %xor, %c + ret i32 %lshr +} + +define i32 @shl_xor_lshr_comm(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_xor_lshr_comm( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %xor = xor i32 %y, %shl + %lshr = lshr i32 %xor, %c + ret i32 %lshr +} + +define i32 @shl_and_lshr(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_and_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = and i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %and = and i32 %shl, %y + %lshr = lshr i32 %and, %c + ret i32 %lshr +} + +define i32 @shl_and_lshr_comm(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_and_lshr_comm( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = and i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[LSHR]] +; + %shl = shl nuw i32 %x, %c + %and = and i32 %y, %shl + %lshr = lshr i32 %and, %c + ret i32 %lshr +} + +define i32 @shl_lshr_and_exact(i32 %x, i32 %c, i32 %y) { +; CHECK-LABEL: @shl_lshr_and_exact( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = shl nuw i32 %x, %c + %3 = and i32 %2, %y + %4 = lshr exact i32 %3, %c + ret i32 %4 +} + +; Negative test + +define i32 @shl_add_lshr_neg(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @shl_add_lshr_neg( +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHL]], [[Z:%.*]] +; CHECK-NEXT: [[RES:%.*]] = lshr exact i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[RES]] +; + %shl = shl nuw i32 %x, %y + %add = add nuw nsw i32 %shl, %z + %res = lshr exact i32 %add, %z + ret i32 %res +} + define i32 @mul_splat_fold_wrong_mul_const(i32 %x) { ; CHECK-LABEL: @mul_splat_fold_wrong_mul_const( ; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538 @@ -375,6 +600,21 @@ define i32 @mul_splat_fold_wrong_mul_const(i32 %x) { ; Negative test +define i32 @shl_add_lshr_multiuse(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @shl_add_lshr_multiuse( +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHL]], [[Z:%.*]] +; CHECK-NEXT: call void @use(i32 [[ADD]]) +; CHECK-NEXT: [[RES:%.*]] = lshr exact i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[RES]] +; + %shl = shl nuw i32 %x, %y + %add = add nuw nsw i32 %shl, %z + call void @use (i32 %add) + %res = lshr exact i32 %add, %z + ret i32 %res +} + define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) { ; CHECK-LABEL: @mul_splat_fold_wrong_lshr_const( ; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537 From 2a4f1f4a8ff60d55da69b4654360cf947b5b20f7 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 7 May 2024 12:23:00 -0700 Subject: [PATCH 05/64] Document FP relative offsets (#91031) --- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index fa661b17c13a90..fca1824165e7e3 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1271,6 +1271,9 @@ Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) { // FP is 0xfffffffffffFFFF0 (4 lower bits are zero) // We only really need ~20 lower non-zero bits (FFFF), so we mix like this: // 0xFFFFPPPPPPPPPPPP + // + // FP works because in AArch64FrameLowering::getFrameIndexReference, we + // prefer FP-relative offsets for functions compiled with HWASan. FP = IRB.CreateShl(FP, 44); return IRB.CreateOr(PC, FP); } From c9ab1d890586bd8a6a194e6a37968538b80f81bd Mon Sep 17 00:00:00 2001 From: Sean Perry <39927768+perry-ca@users.noreply.github.com> Date: Tue, 7 May 2024 15:23:50 -0400 Subject: [PATCH 06/64] Mark test cases as unsupported on z/OS (#90990) These test cases are testing features not available when either targeting the s390x-ibm-zos target or use tools/features not available on the z/OS operating system. In a couple cases the lit test had a number of subtests with one or two that aren't supported on z/OS. Rather than mark the entire test as unsupported I split out the unsupported tests into a separate test case. --- clang/test/AST/Interp/cxx23.cpp | 1 + clang/test/CodeGen/ffp-contract-option.c | 1 + clang/test/CodeGen/ffp-model.c | 1 + clang/test/CodeGen/fp-matrix-pragma.c | 1 + .../Driver/clang-offload-bundler-asserts-on.c | 2 +- .../Driver/clang-offload-bundler-standardize.c | 2 +- clang/test/Driver/clang-offload-bundler-zlib.c | 2 +- clang/test/Driver/clang-offload-bundler-zstd.c | 2 +- clang/test/Driver/clang-offload-bundler.c | 2 +- clang/test/Driver/std-trigraph-override.c | 7 +++++++ clang/test/Driver/std.c | 4 ---- clang/test/FixIt/fixit-c++2a-tls.cpp | 16 ++++++++++++++++ clang/test/FixIt/fixit-c++2a.cpp | 4 ---- clang/test/Interpreter/const.cpp | 2 +- clang/test/Lexer/unicode.c | 1 + clang/test/Modules/cstd.m | 1 + .../Modules/merge-objc-protocol-visibility.m | 2 +- clang/test/PCH/chain-openmp-threadprivate.cpp | 1 + clang/test/Sema/thread_local.c | 1 + llvm/test/MC/AsmParser/layout-interdependency.s | 1 + llvm/test/Object/archive-big-extract.test | 1 + llvm/test/Object/archive-extract.test | 1 + 22 files changed, 41 insertions(+), 15 deletions(-) create mode 100644 clang/test/Driver/std-trigraph-override.c create mode 100644 clang/test/FixIt/fixit-c++2a-tls.cpp diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp index 55807f0e0f115a..c91d52c552b127 100644 --- a/clang/test/AST/Interp/cxx23.cpp +++ b/clang/test/AST/Interp/cxx23.cpp @@ -1,3 +1,4 @@ +// UNSUPPORTED: target={{.*}}-zos{{.*}} // RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all20 %s // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23,all %s // RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter diff --git a/clang/test/CodeGen/ffp-contract-option.c b/clang/test/CodeGen/ffp-contract-option.c index cd777ac9b43c68..2a6443032a4e64 100644 --- a/clang/test/CodeGen/ffp-contract-option.c +++ b/clang/test/CodeGen/ffp-contract-option.c @@ -1,4 +1,5 @@ // REQUIRES: x86-registered-target +// UNSUPPORTED: target={{.*}}-zos{{.*}} // RUN: %clang_cc1 -triple=x86_64 %s -emit-llvm -o - \ // RUN:| FileCheck --check-prefixes CHECK,CHECK-DEFAULT %s diff --git a/clang/test/CodeGen/ffp-model.c b/clang/test/CodeGen/ffp-model.c index 780603284a99f7..4ed9b9dc0a780c 100644 --- a/clang/test/CodeGen/ffp-model.c +++ b/clang/test/CodeGen/ffp-model.c @@ -1,4 +1,5 @@ // REQUIRES: x86-registered-target +// UNSUPPORTED: target={{.*}}-zos{{.*}} // RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \ // RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-FAST diff --git a/clang/test/CodeGen/fp-matrix-pragma.c b/clang/test/CodeGen/fp-matrix-pragma.c index 45ad6e657daf17..5c9909bf60e0ff 100644 --- a/clang/test/CodeGen/fp-matrix-pragma.c +++ b/clang/test/CodeGen/fp-matrix-pragma.c @@ -1,4 +1,5 @@ // RUN: %clang -emit-llvm -S -fenable-matrix -mllvm -disable-llvm-optzns %s -o - | FileCheck %s +// UNSUPPORTED: target={{.*}}-zos{{.*}} typedef float fx2x2_t __attribute__((matrix_type(2, 2))); typedef int ix2x2_t __attribute__((matrix_type(2, 2))); diff --git a/clang/test/Driver/clang-offload-bundler-asserts-on.c b/clang/test/Driver/clang-offload-bundler-asserts-on.c index eb11d5fbbee4a7..55060c2c42e734 100644 --- a/clang/test/Driver/clang-offload-bundler-asserts-on.c +++ b/clang/test/Driver/clang-offload-bundler-asserts-on.c @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target // REQUIRES: asserts -// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // Generate the file we can bundle. // RUN: %clang -O0 -target %itanium_abi_triple %s -c -o %t.o diff --git a/clang/test/Driver/clang-offload-bundler-standardize.c b/clang/test/Driver/clang-offload-bundler-standardize.c index 91dc8947aabb9a..52f5ea038e47b8 100644 --- a/clang/test/Driver/clang-offload-bundler-standardize.c +++ b/clang/test/Driver/clang-offload-bundler-standardize.c @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target // REQUIRES: asserts -// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // REQUIRES: asserts // Generate the file we can bundle. diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c index 15b60341a8dbde..fff7a0f5456860 100644 --- a/clang/test/Driver/clang-offload-bundler-zlib.c +++ b/clang/test/Driver/clang-offload-bundler-zlib.c @@ -1,6 +1,6 @@ // REQUIRES: zlib && !zstd // REQUIRES: x86-registered-target -// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // // Generate the host binary to be bundled. diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c index a424981c69716f..d01d9659a68dd3 100644 --- a/clang/test/Driver/clang-offload-bundler-zstd.c +++ b/clang/test/Driver/clang-offload-bundler-zstd.c @@ -1,6 +1,6 @@ // REQUIRES: zstd // REQUIRES: x86-registered-target -// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // // Generate the host binary to be bundled. diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index a56a5424abf88d..e492da31abb746 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -1,5 +1,5 @@ // REQUIRES: x86-registered-target -// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-macosx{{.*}}, target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // // Generate all the types of files we can bundle. diff --git a/clang/test/Driver/std-trigraph-override.c b/clang/test/Driver/std-trigraph-override.c new file mode 100644 index 00000000000000..e4b83ffcf823d9 --- /dev/null +++ b/clang/test/Driver/std-trigraph-override.c @@ -0,0 +1,7 @@ +// UNSUPPORTED: target={{.*-zos.*}} +// RUN: %clang -w -std=c99 -trigraphs -std=gnu99 %s -E -o - | FileCheck -check-prefix=OVERRIDE %s +// OVERRIDE: ??(??) +// RUN: %clang -w -std=c99 -ftrigraphs -std=gnu99 %s -E -o - | FileCheck -check-prefix=FOVERRIDE %s +// FOVERRIDE: ??(??) + +??(??) diff --git a/clang/test/Driver/std.c b/clang/test/Driver/std.c index 54f746cc63d09a..fe0c4671d9d6fd 100644 --- a/clang/test/Driver/std.c +++ b/clang/test/Driver/std.c @@ -1,7 +1,3 @@ -// RUN: %clang -w -std=c99 -trigraphs -std=gnu99 %s -E -o - | FileCheck -check-prefix=OVERRIDE %s -// OVERRIDE: ??(??) -// RUN: %clang -w -std=c99 -ftrigraphs -std=gnu99 %s -E -o - | FileCheck -check-prefix=FOVERRIDE %s -// FOVERRIDE: ??(??) // RUN: %clang -w -ansi %s -E -o - | FileCheck -check-prefix=ANSI %s // ANSI: [] // RUN: %clang -w -ansi %s -fno-trigraphs -E -o - | FileCheck -check-prefix=ANSI-OVERRIDE %s diff --git a/clang/test/FixIt/fixit-c++2a-tls.cpp b/clang/test/FixIt/fixit-c++2a-tls.cpp new file mode 100644 index 00000000000000..97f2899c908393 --- /dev/null +++ b/clang/test/FixIt/fixit-c++2a-tls.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -verify -std=c++2a -pedantic-errors %s +// RUN: cp %s %t +// RUN: %clang_cc1 -x c++ -std=c++2a -fixit %t +// RUN: %clang_cc1 -Wall -pedantic-errors -x c++ -std=c++2a %t +// RUN: cat %t | FileCheck %s +// UNSUPPORTED: target={{.*-zos.*}} + +/* This is a test of the various code modification hints that only + apply in C++2a. */ + +namespace constinit_mismatch { + extern thread_local constinit int a; // expected-note {{declared constinit here}} + thread_local int a = 123; // expected-error {{'constinit' specifier missing on initializing declaration of 'a'}} + // CHECK: {{^}} constinit thread_local int a = 123; +} + diff --git a/clang/test/FixIt/fixit-c++2a.cpp b/clang/test/FixIt/fixit-c++2a.cpp index 6fe05dabf07905..a21dd701ec7431 100644 --- a/clang/test/FixIt/fixit-c++2a.cpp +++ b/clang/test/FixIt/fixit-c++2a.cpp @@ -16,10 +16,6 @@ template void init_capture_pack(T ...a) { } namespace constinit_mismatch { - extern thread_local constinit int a; // expected-note {{declared constinit here}} - thread_local int a = 123; // expected-error {{'constinit' specifier missing on initializing declaration of 'a'}} - // CHECK: {{^}} constinit thread_local int a = 123; - int b = 123; // expected-note {{add the 'constinit' specifier}} extern constinit int b; // expected-error {{'constinit' specifier added after initialization of variable}} // CHECK: {{^}} extern int b; diff --git a/clang/test/Interpreter/const.cpp b/clang/test/Interpreter/const.cpp index 86358c1a54fbdd..57fd880400e6a1 100644 --- a/clang/test/Interpreter/const.cpp +++ b/clang/test/Interpreter/const.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: system-aix +// UNSUPPORTED: system-aix, system-zos // see https://github.com/llvm/llvm-project/issues/68092 // XFAIL: host={{.*}}-windows-msvc diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index 909b5b4244438b..e7c7d4b5dad549 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s // RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace // RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace +// UNSUPPORTED: system-zos // This file contains Unicode characters; please do not "fix" them! diff --git a/clang/test/Modules/cstd.m b/clang/test/Modules/cstd.m index 6b81b9013e9da5..2155037400bd9d 100644 --- a/clang/test/Modules/cstd.m +++ b/clang/test/Modules/cstd.m @@ -1,5 +1,6 @@ // RUN: rm -rf %t // RUN: %clang_cc1 -fsyntax-only -internal-isystem %S/Inputs/System/usr/include -fmodules -fimplicit-module-maps -fbuiltin-headers-in-system-modules -fmodules-cache-path=%t -D__need_wint_t -Werror=implicit-function-declaration %s +// UNSUPPORTED: target={{.*}}-zos{{.*}} @import uses_other_constants; const double other_value = DBL_MAX; diff --git a/clang/test/Modules/merge-objc-protocol-visibility.m b/clang/test/Modules/merge-objc-protocol-visibility.m index f5f048b369022f..074c3b1ca66814 100644 --- a/clang/test/Modules/merge-objc-protocol-visibility.m +++ b/clang/test/Modules/merge-objc-protocol-visibility.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%t/Frameworks %t/test.m -Werror=objc-method-access -DHIDDEN_FIRST=1 \ diff --git a/clang/test/PCH/chain-openmp-threadprivate.cpp b/clang/test/PCH/chain-openmp-threadprivate.cpp index 05cd65063789b6..21b9f6868cc3c7 100644 --- a/clang/test/PCH/chain-openmp-threadprivate.cpp +++ b/clang/test/PCH/chain-openmp-threadprivate.cpp @@ -8,6 +8,7 @@ // with PCH // RUN: %clang_cc1 -fopenmp -emit-llvm -chain-include %s -chain-include %s %s -o - | FileCheck %s -check-prefix=CHECK-TLS-1 // RUN: %clang_cc1 -fopenmp -emit-llvm -chain-include %s -chain-include %s %s -o - | FileCheck %s -check-prefix=CHECK-TLS-2 +// // UNSUPPORTED: target={{.*}}-zos{{.*}} #if !defined(PASS1) #define PASS1 diff --git a/clang/test/Sema/thread_local.c b/clang/test/Sema/thread_local.c index a0de0aa4e39a6e..b65f1119c73831 100644 --- a/clang/test/Sema/thread_local.c +++ b/clang/test/Sema/thread_local.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -std=c23 %s -verify +// UNSUPPORTED: target={{.*}}-zos{{.*}} // Ensure that thread_local and _Thread_local are synonyms in C23 and both // restrict local variables to be explicitly static or extern. diff --git a/llvm/test/MC/AsmParser/layout-interdependency.s b/llvm/test/MC/AsmParser/layout-interdependency.s index f26149ced766f2..d275614e87e74d 100644 --- a/llvm/test/MC/AsmParser/layout-interdependency.s +++ b/llvm/test/MC/AsmParser/layout-interdependency.s @@ -1,5 +1,6 @@ # RUN: not llvm-mc --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s # REQUIRES: object-emission +# UNSUPPORTED: target={{.*}}-zos{{.*}} fct_end: diff --git a/llvm/test/Object/archive-big-extract.test b/llvm/test/Object/archive-big-extract.test index a1d7f0c731c084..3de09d8fb10696 100644 --- a/llvm/test/Object/archive-big-extract.test +++ b/llvm/test/Object/archive-big-extract.test @@ -1,4 +1,5 @@ ## Test extract xcoff object file from AIX big archive. +# UNSUPPORTED: target={{.*}}-zos{{.*}} # RUN: rm -rf %t && mkdir -p %t/extracted/ && cd %t/extracted/ # RUN: llvm-ar x %p/Inputs/aix-big-archive.a # RUN: echo "content_of_evenlen" > evenlen_1 diff --git a/llvm/test/Object/archive-extract.test b/llvm/test/Object/archive-extract.test index 57b3c8f6795a36..d4edece8fc45ed 100644 --- a/llvm/test/Object/archive-extract.test +++ b/llvm/test/Object/archive-extract.test @@ -1,6 +1,7 @@ ; This test just makes sure that llvm-ar can extract bytecode members ; from various style archives. +; UNSUPPORTED: target={{.*}}-zos{{.*}} ; RUN: rm -rf %t && mkdir -p %t && cd %t ; RUN: rm -f very_long_bytecode_file_name.bc From 057de4d26425c8b9840912e40ce025626f45d8d6 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 7 May 2024 12:35:06 -0700 Subject: [PATCH 07/64] [lldb] Request crash report when prompting for a bug report on Darwin (#91371) On Darwin platforms, the system will generate a crash report in ~/Library/Logs/DiagnosticReports/ when a process crashes. These reports are much more useful than the "pretty backtraces" printed by LLVM and are preferred when filing bug reports on Darwin. --- lldb/tools/driver/Driver.cpp | 6 ++++++ lldb/tools/lldb-dap/lldb-dap.cpp | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index a821699c5e2ec2..14371da64f2f2f 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -733,8 +733,14 @@ int main(int argc, char const *argv[]) { // Setup LLVM signal handlers and make sure we call llvm_shutdown() on // destruction. llvm::InitLLVM IL(argc, argv, /*InstallPipeSignalExitHandler=*/false); +#if !defined(__APPLE__) llvm::setBugReportMsg("PLEASE submit a bug report to " LLDB_BUG_REPORT_URL " and include the crash backtrace.\n"); +#else + llvm::setBugReportMsg("PLEASE submit a bug report to " LLDB_BUG_REPORT_URL + " and include the crash report from " + "~/Library/Logs/DiagnosticReports/.\n"); +#endif // Parse arguments. LLDBOptTable T; diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index cf52a22b18cc14..f35abd665e8449 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4192,8 +4192,14 @@ int SetupStdoutStderrRedirection() { int main(int argc, char *argv[]) { llvm::InitLLVM IL(argc, argv, /*InstallPipeSignalExitHandler=*/false); +#if !defined(__APPLE__) llvm::setBugReportMsg("PLEASE submit a bug report to " LLDB_BUG_REPORT_URL " and include the crash backtrace.\n"); +#else + llvm::setBugReportMsg("PLEASE submit a bug report to " LLDB_BUG_REPORT_URL + " and include the crash report from " + "~/Library/Logs/DiagnosticReports/.\n"); +#endif llvm::SmallString<256> program_path(argv[0]); llvm::sys::fs::make_absolute(program_path); From 65e2fab401a2da55c51d3caceae8478c33f3c60f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 7 May 2024 12:40:19 -0700 Subject: [PATCH 08/64] [Sema] Fix warnings This patch fixes: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp:3937:12: error: unused variable 'CanonType' [-Werror,-Wunused-variable] clang/lib/Sema/SemaTemplate.cpp:9279:18: error: unused variable 'TemplateKWLoc' [-Werror,-Wunused-variable] --- clang/lib/Sema/SemaTemplate.cpp | 2 -- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 ------ 2 files changed, 8 deletions(-) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index b268d7c405dfdf..6231b65bd842a9 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -9276,8 +9276,6 @@ DeclResult Sema::ActOnClassTemplateSpecialization( // NOTE: KWLoc is the location of the tag keyword. This will instead // store the location of the outermost template keyword in the declaration. - SourceLocation TemplateKWLoc = TemplateParameterLists.size() > 0 - ? TemplateParameterLists[0]->getTemplateLoc() : KWLoc; SourceLocation TemplateNameLoc = TemplateId.TemplateNameLoc; SourceLocation LAngleLoc = TemplateId.LAngleLoc; SourceLocation RAngleLoc = TemplateId.RAngleLoc; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 5315b143215e19..884e98a300f5a6 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3932,12 +3932,6 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl( if (SubstQualifier(D, InstD)) return nullptr; - // Build the canonical type that describes the converted template - // arguments of the class template explicit specialization. - QualType CanonType = SemaRef.Context.getTemplateSpecializationType( - TemplateName(InstClassTemplate), CanonicalConverted, - SemaRef.Context.getRecordType(InstD)); - InstD->setAccess(D->getAccess()); InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); InstD->setSpecializationKind(D->getSpecializationKind()); From c76ccf0f1e05d649449c8ff6908b0b6329eb2612 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 7 May 2024 20:41:55 +0100 Subject: [PATCH 09/64] [LV] Add test case for #91369. Add tests for https://github.com/llvm/llvm-project/issues/91369. --- .../version-stride-with-integer-casts.ll | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index d09066fa2d7042..45745f85de95c7 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -412,6 +412,139 @@ loop: exit: ret void } + +; Test case to make sure that uses of versioned strides of type i1 are properly +; extended. From https://github.com/llvm/llvm-project/issues/91369. +; FIXME: Currently miscompiled. +define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { +; CHECK-LABEL: define void @zext_of_i1_stride( +; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_16:%.*]] = zext i1 [[G]] to i16 +; CHECK-NEXT: [[G_64:%.*]] = zext i1 [[G]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 15, [[G_64]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 0, [[G_64]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <4 x i16> , ptr [[TMP5]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i16 [[G_16]], ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], [[G_64]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 16 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %g.16 = zext i1 %g to i16 + %g.64 = zext i1 %g to i64 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i16, ptr %dst, i64 %iv + store i16 %g.16, ptr %gep, align 2 + %iv.next = add nuw nsw i64 %iv, %g.64 + %cmp = icmp ult i64 %iv.next, 16 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Test case to make sure that uses of versioned strides of type i1 are properly +; extended. +define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { +; CHECK-LABEL: define void @sext_of_i1_stride( +; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_16:%.*]] = sext i1 [[G]] to i16 +; CHECK-NEXT: [[G_64:%.*]] = sext i1 [[G]] to i64 +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[G_64]], i64 16) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], [[G_64]] +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 0, [[G_64]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 -3 +; CHECK-NEXT: store <4 x i16> , ptr [[TMP7]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i16 [[G_16]], ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], [[G_64]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 16 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %g.16 = sext i1 %g to i16 + %g.64 = sext i1 %g to i64 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i16, ptr %dst, i64 %iv + store i16 %g.16, ptr %gep, align 2 + %iv.next = add nuw nsw i64 %iv, %g.64 + %cmp = icmp ult i64 %iv.next, 16 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -425,4 +558,8 @@ exit: ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]} ;. From 82bb2534d4de16abb7a51fc646d5c31d6cec5eeb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 7 May 2024 21:43:22 +0200 Subject: [PATCH 10/64] AMDGPU: Don't bitcast float typed atomic store in IR (#90116) Implement the promotion in the DAG. Depends #90113 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 29 +++++++---- .../SelectionDAG/LegalizeFloatTypes.cpp | 34 +++++++++++++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 12 +++++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 ++ .../AMDGPU/no-expand-atomic-store.ll | 51 +++++++------------ 6 files changed, 91 insertions(+), 41 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bfc3e08c1632de..b3ae419b20fec2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5006,7 +5006,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } - if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + if (Node->getOpcode() == ISD::ATOMIC_STORE || + Node->getOpcode() == ISD::STRICT_UINT_TO_FP || Node->getOpcode() == ISD::STRICT_SINT_TO_FP || Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS || @@ -5622,7 +5623,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(CvtVec); break; } - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_STORE: { AtomicSDNode *AM = cast(Node); SDLoc SL(Node); SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal()); @@ -5631,13 +5633,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && "unexpected atomic_swap with illegal type"); - SDValue NewAtomic - = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT, - DAG.getVTList(NVT, MVT::Other), - { AM->getChain(), AM->getBasePtr(), CastVal }, - AM->getMemOperand()); - Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); - Results.push_back(NewAtomic.getValue(1)); + SDValue Op0 = AM->getBasePtr(); + SDValue Op1 = CastVal; + + // ATOMIC_STORE uses a swapped operand order from every other AtomicSDNode, + // but really it should merge with ISD::STORE. + if (AM->getOpcode() == ISD::ATOMIC_STORE) + std::swap(Op0, Op1); + + SDValue NewAtomic = DAG.getAtomic(AM->getOpcode(), SL, NVT, AM->getChain(), + Op0, Op1, AM->getMemOperand()); + + if (AM->getOpcode() != ISD::ATOMIC_STORE) { + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); + Results.push_back(NewAtomic.getValue(1)); + } else + Results.push_back(NewAtomic); break; } case ISD::ATOMIC_LOAD: { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index bf87437b8dfd57..fc96ecdc662808 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2287,6 +2287,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + case ISD::ATOMIC_STORE: R = PromoteFloatOp_ATOMIC_STORE(N, OpNo); break; } // clang-format on @@ -2409,6 +2410,23 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteFloatOp_ATOMIC_STORE(SDNode *N, + unsigned OpNo) { + AtomicSDNode *ST = cast(N); + SDValue Val = ST->getVal(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1).getValueType(); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), + DL, IVT, Promoted); + + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, IVT, ST->getChain(), NewVal, + ST->getBasePtr(), ST->getMemOperand()); +} + //===----------------------------------------------------------------------===// // Float Result Promotion //===----------------------------------------------------------------------===// @@ -3238,6 +3256,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + case ISD::ATOMIC_STORE: + Res = SoftPromoteHalfOp_ATOMIC_STORE(N, OpNo); + break; case ISD::STACKMAP: Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); break; @@ -3391,6 +3412,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Can only soften the stored value!"); + AtomicSDNode *ST = cast(N); + SDValue Val = ST->getVal(); + SDLoc dl(N); + + SDValue Promoted = GetSoftPromotedHalf(Val); + return DAG.getAtomic(ISD::ATOMIC_STORE, dl, Promoted.getValueType(), + ST->getChain(), Promoted, ST->getBasePtr(), + ST->getMemOperand()); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); // Because the first two arguments are guaranteed legal. SmallVector NewOps(N->ops().begin(), N->ops().end()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index f44916b741cc5c..d925089d5689f1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -712,6 +712,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); @@ -757,6 +758,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5ca7f8ef5345f5..1e9132bcfaf939 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -161,6 +161,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote); AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16); + setOperationAction(ISD::ATOMIC_STORE, MVT::f32, Promote); + AddPromotedToType(ISD::ATOMIC_STORE, MVT::f32, MVT::i32); + + setOperationAction(ISD::ATOMIC_STORE, MVT::f64, Promote); + AddPromotedToType(ISD::ATOMIC_STORE, MVT::f64, MVT::i64); + + setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Promote); + AddPromotedToType(ISD::ATOMIC_STORE, MVT::f16, MVT::i16); + + setOperationAction(ISD::ATOMIC_STORE, MVT::bf16, Promote); + AddPromotedToType(ISD::ATOMIC_STORE, MVT::bf16, MVT::i16); + // There are no 64-bit extloads. These should be done as a 32-bit extload and // an extension to 64-bit. for (MVT VT : MVT::integer_valuetypes()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 16c4f53d634447..3814b56a4d56ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -236,6 +236,10 @@ class AMDGPUTargetLowering : public TargetLowering { return AtomicExpansionKind::None; } + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { + return AtomicExpansionKind::None; + } + AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override { return AtomicExpansionKind::None; } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll index db0c3a20e62f48..9159393ab887e2 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll @@ -4,8 +4,7 @@ define void @store_atomic_f32_global_system(float %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f32_global_system( ; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 -; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic float [[VAL]], ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0:![0-9]+]] ; CHECK-NEXT: ret void ; store atomic float %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -15,8 +14,7 @@ define void @store_atomic_f32_global_system(float %val, ptr addrspace(1) %ptr) { define void @store_atomic_f32_global_agent(float %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f32_global_agent( ; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 -; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: store atomic float [[VAL]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic float %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 @@ -26,8 +24,7 @@ define void @store_atomic_f32_global_agent(float %val, ptr addrspace(1) %ptr) { define void @store_atomic_f32_local(float %val, ptr addrspace(3) %ptr) { ; CHECK-LABEL: define void @store_atomic_f32_local( ; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 -; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic float [[VAL]], ptr addrspace(3) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic float %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -37,8 +34,7 @@ define void @store_atomic_f32_local(float %val, ptr addrspace(3) %ptr) { define void @store_atomic_f32_flat(float %val, ptr %ptr) { ; CHECK-LABEL: define void @store_atomic_f32_flat( ; CHECK-SAME: float [[VAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 -; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic float [[VAL]], ptr [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic float %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 @@ -48,8 +44,7 @@ define void @store_atomic_f32_flat(float %val, ptr %ptr) { define void @store_atomic_f16_global_system(half %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f16_global_system( ; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic half [[VAL]], ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic half %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -59,8 +54,7 @@ define void @store_atomic_f16_global_system(half %val, ptr addrspace(1) %ptr) { define void @store_atomic_f16_global_agent(half %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f16_global_agent( ; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: store atomic half [[VAL]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic half %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 @@ -70,8 +64,7 @@ define void @store_atomic_f16_global_agent(half %val, ptr addrspace(1) %ptr) { define void @store_atomic_f16_local(half %val, ptr addrspace(3) %ptr) { ; CHECK-LABEL: define void @store_atomic_f16_local( ; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic half [[VAL]], ptr addrspace(3) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic half %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -81,8 +74,7 @@ define void @store_atomic_f16_local(half %val, ptr addrspace(3) %ptr) { define void @store_atomic_f16_flat(half %val, ptr %ptr) { ; CHECK-LABEL: define void @store_atomic_f16_flat( ; CHECK-SAME: half [[VAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic half [[VAL]], ptr [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic half %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 @@ -92,8 +84,7 @@ define void @store_atomic_f16_flat(half %val, ptr %ptr) { define void @store_atomic_bf16_global_system(bfloat %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_bf16_global_system( ; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic bfloat [[VAL]], ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic bfloat %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -103,8 +94,7 @@ define void @store_atomic_bf16_global_system(bfloat %val, ptr addrspace(1) %ptr) define void @store_atomic_bf16_global_agent(bfloat %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_bf16_global_agent( ; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: store atomic bfloat [[VAL]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic bfloat %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 @@ -114,8 +104,7 @@ define void @store_atomic_bf16_global_agent(bfloat %val, ptr addrspace(1) %ptr) define void @store_atomic_bf16_local(bfloat %val, ptr addrspace(3) %ptr) { ; CHECK-LABEL: define void @store_atomic_bf16_local( ; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic bfloat [[VAL]], ptr addrspace(3) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic bfloat %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 @@ -125,8 +114,7 @@ define void @store_atomic_bf16_local(bfloat %val, ptr addrspace(3) %ptr) { define void @store_atomic_bf16_flat(bfloat %val, ptr %ptr) { ; CHECK-LABEL: define void @store_atomic_bf16_flat( ; CHECK-SAME: bfloat [[VAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 -; CHECK-NEXT: store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: store atomic bfloat [[VAL]], ptr [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic bfloat %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 @@ -135,8 +123,7 @@ define void @store_atomic_bf16_flat(bfloat %val, ptr %ptr) { define void @store_atomic_f64_global_system(double %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f64_global_system( ; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 -; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: store atomic double [[VAL]], ptr addrspace(1) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic double %val, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0 @@ -146,8 +133,7 @@ define void @store_atomic_f64_global_system(double %val, ptr addrspace(1) %ptr) define void @store_atomic_f64_global_agent(double %val, ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @store_atomic_f64_global_agent( ; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 -; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8 +; CHECK-NEXT: store atomic double [[VAL]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic double %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0 @@ -157,8 +143,7 @@ define void @store_atomic_f64_global_agent(double %val, ptr addrspace(1) %ptr) { define void @store_atomic_f64_local(double %val, ptr addrspace(3) %ptr) { ; CHECK-LABEL: define void @store_atomic_f64_local( ; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 -; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: store atomic double [[VAL]], ptr addrspace(3) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic double %val, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0 @@ -168,8 +153,7 @@ define void @store_atomic_f64_local(double %val, ptr addrspace(3) %ptr) { define void @store_atomic_f64_flat(double %val, ptr %ptr) { ; CHECK-LABEL: define void @store_atomic_f64_flat( ; CHECK-SAME: double [[VAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 -; CHECK-NEXT: store atomic i64 [[TMP1]], ptr [[PTR]] seq_cst, align 8 +; CHECK-NEXT: store atomic double [[VAL]], ptr [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]] ; CHECK-NEXT: ret void ; store atomic double %val, ptr %ptr seq_cst, align 8, !some.unknown.md !0 @@ -177,3 +161,6 @@ define void @store_atomic_f64_flat(double %val, ptr %ptr) { } !0 = !{} +;. +; CHECK: [[META0]] = !{} +;. From 31dd0ef73c99b1bc9825ddfc58ddff0b134608fb Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 21 Mar 2024 01:28:09 -0500 Subject: [PATCH 11/64] [CVP] Add tests for adding `nneg` flag to `uitofp` and converting `sitofp` -> `uitofp nneg`; NFC --- .../CorrelatedValuePropagation/sitofp.ll | 99 +++++++++++++++++++ .../CorrelatedValuePropagation/uitofp.ll | 98 ++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll new file mode 100644 index 00000000000000..4bc649245d52f5 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +declare void @use.f32(float) + +define void @test1(i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = sitofp i32 [[N]] to float +; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -1 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = sitofp i32 %n to float + call void @use.f32(float %ext.wide) + br label %exit + +exit: + ret void +} + + +define void @test2_fail(i32 %n) { +; CHECK-LABEL: @test2_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = sitofp i32 [[N]] to float +; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -2 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = sitofp i32 %n to float + call void @use.f32(float %ext.wide) + br label %exit + +exit: + ret void +} + +define float @may_including_undef(i1 %c.1, i1 %c.2) { +; CHECK-LABEL: @may_including_undef( +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[TRUE_1:%.*]], label [[FALSE:%.*]] +; CHECK: true.1: +; CHECK-NEXT: br i1 [[C_2:%.*]], label [[TRUE_2:%.*]], label [[EXIT:%.*]] +; CHECK: true.2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: false: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ undef, [[FALSE]] ] +; CHECK-NEXT: [[EXT:%.*]] = sitofp i32 [[P]] to float +; CHECK-NEXT: ret float [[EXT]] +; + br i1 %c.1, label %true.1, label %false + +true.1: + br i1 %c.2, label %true.2, label %exit + +true.2: + br label %exit + +false: + br label %exit + +exit: + %p = phi i32 [ 0, %true.1 ], [ 1, %true.2], [ undef, %false ] + %ext = sitofp i32 %p to float + ret float %ext +} + +define double @test_infer_at_use(i32 noundef %n) { +; CHECK-LABEL: @test_infer_at_use( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: [[EXT:%.*]] = sitofp i32 [[N]] to double +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], double [[EXT]], double 0.000000e+00 +; CHECK-NEXT: ret double [[SELECT]] +; + %cmp = icmp sgt i32 %n, -1 + %ext = sitofp i32 %n to double + %select = select i1 %cmp, double %ext, double 0.0 + ret double %select +} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll new file mode 100644 index 00000000000000..0558ec61e636f1 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +declare void @use.f32(float) + +define void @test1(i32 %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = uitofp i32 [[N]] to float +; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -1 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = uitofp i32 %n to float + call void @use.f32(float %ext.wide) + br label %exit + +exit: + ret void +} + +define void @test2_fail(i32 %n) { +; CHECK-LABEL: @test2_fail( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[EXT_WIDE:%.*]] = uitofp i32 [[N]] to float +; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, -2 + br i1 %cmp, label %bb, label %exit + +bb: + %ext.wide = uitofp i32 %n to float + call void @use.f32(float %ext.wide) + br label %exit + +exit: + ret void +} + +define float @may_including_undef(i1 %c.1, i1 %c.2) { +; CHECK-LABEL: @may_including_undef( +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[TRUE_1:%.*]], label [[FALSE:%.*]] +; CHECK: true.1: +; CHECK-NEXT: br i1 [[C_2:%.*]], label [[TRUE_2:%.*]], label [[EXIT:%.*]] +; CHECK: true.2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: false: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ undef, [[FALSE]] ] +; CHECK-NEXT: [[EXT:%.*]] = uitofp i32 [[P]] to float +; CHECK-NEXT: ret float [[EXT]] +; + br i1 %c.1, label %true.1, label %false + +true.1: + br i1 %c.2, label %true.2, label %exit + +true.2: + br label %exit + +false: + br label %exit + +exit: + %p = phi i32 [ 0, %true.1 ], [ 1, %true.2], [ undef, %false ] + %ext = uitofp i32 %p to float + ret float %ext +} + +define double @test_infer_at_use(i32 noundef %n) { +; CHECK-LABEL: @test_infer_at_use( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 +; CHECK-NEXT: [[EXT:%.*]] = uitofp i32 [[N]] to double +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], double [[EXT]], double 0.000000e+00 +; CHECK-NEXT: ret double [[SELECT]] +; + %cmp = icmp sgt i32 %n, -1 + %ext = uitofp i32 %n to double + %select = select i1 %cmp, double %ext, double 0.0 + ret double %select +} From 925a11128c903c8554921c2b5700caf191ae61d6 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 20 Mar 2024 17:00:47 -0500 Subject: [PATCH 12/64] [CVP] Convert `sitofp` -> `uitofp nneg` and add `nneg` flag to `uitofp` Similiar to the `InstCombine` changes, just furthering the scope of the canonicalization/`uitofp nneg` support --- .../Scalar/CorrelatedValuePropagation.cpp | 50 ++++++++++++++++--- .../CorrelatedValuePropagation/sitofp.ll | 4 +- .../CorrelatedValuePropagation/uitofp.ll | 4 +- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 715cdaff972796..50b5fdb5672074 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -62,6 +62,7 @@ STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr"); STATISTIC(NumAShrsRemoved, "Number of ashr removed"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); +STATISTIC(NumSIToFP, "Number of sitofp converted to uitofp"); STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); STATISTIC(NumAnd, "Number of ands removed"); STATISTIC(NumNW, "Number of no-wrap deductions"); @@ -89,7 +90,7 @@ STATISTIC(NumSMinMax, "Number of llvm.s{min,max} intrinsics simplified to unsigned"); STATISTIC(NumUDivURemsNarrowedExpanded, "Number of bound udiv's/urem's expanded"); -STATISTIC(NumZExt, "Number of non-negative deductions"); +STATISTIC(NumNNeg, "Number of zext/uitofp non-negative deductions"); static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { if (Constant *C = LVI->getConstant(V, At)) @@ -1075,20 +1076,49 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { return true; } -static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) { - if (ZExt->getType()->isVectorTy()) +static bool processPossibleNonNeg(PossiblyNonNegInst *I, LazyValueInfo *LVI) { + if (I->getType()->isVectorTy()) return false; - if (ZExt->hasNonNeg()) + if (I->hasNonNeg()) return false; - const Use &Base = ZExt->getOperandUse(0); + const Use &Base = I->getOperandUse(0); if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) .isAllNonNegative()) return false; - ++NumZExt; - ZExt->setNonNeg(); + ++NumNNeg; + I->setNonNeg(); + + return true; +} + +static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) { + return processPossibleNonNeg(cast(ZExt), LVI); +} + +static bool processUIToFP(UIToFPInst *UIToFP, LazyValueInfo *LVI) { + return processPossibleNonNeg(cast(UIToFP), LVI); +} + +static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) { + if (SIToFP->getType()->isVectorTy()) + return false; + + const Use &Base = SIToFP->getOperandUse(0); + if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) + .isAllNonNegative()) + return false; + + ++NumSIToFP; + auto *UIToFP = CastInst::Create(Instruction::UIToFP, Base, SIToFP->getType(), + "", SIToFP->getIterator()); + UIToFP->takeName(SIToFP); + UIToFP->setDebugLoc(SIToFP->getDebugLoc()); + UIToFP->setNonNeg(); + SIToFP->replaceAllUsesWith(UIToFP); + SIToFP->eraseFromParent(); return true; } @@ -1197,6 +1227,12 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, case Instruction::ZExt: BBChanged |= processZExt(cast(&II), LVI); break; + case Instruction::UIToFP: + BBChanged |= processUIToFP(cast(&II), LVI); + break; + case Instruction::SIToFP: + BBChanged |= processSIToFP(cast(&II), LVI); + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll index 4bc649245d52f5..83533290e2f614 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sitofp.ll @@ -9,7 +9,7 @@ define void @test1(i32 %n) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[EXT_WIDE:%.*]] = sitofp i32 [[N]] to float +; CHECK-NEXT: [[EXT_WIDE:%.*]] = uitofp nneg i32 [[N]] to float ; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -88,7 +88,7 @@ exit: define double @test_infer_at_use(i32 noundef %n) { ; CHECK-LABEL: @test_infer_at_use( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 -; CHECK-NEXT: [[EXT:%.*]] = sitofp i32 [[N]] to double +; CHECK-NEXT: [[EXT:%.*]] = uitofp nneg i32 [[N]] to double ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], double [[EXT]], double 0.000000e+00 ; CHECK-NEXT: ret double [[SELECT]] ; diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll index 0558ec61e636f1..32d0f5b4d33829 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/uitofp.ll @@ -9,7 +9,7 @@ define void @test1(i32 %n) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[EXT_WIDE:%.*]] = uitofp i32 [[N]] to float +; CHECK-NEXT: [[EXT_WIDE:%.*]] = uitofp nneg i32 [[N]] to float ; CHECK-NEXT: call void @use.f32(float [[EXT_WIDE]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -87,7 +87,7 @@ exit: define double @test_infer_at_use(i32 noundef %n) { ; CHECK-LABEL: @test_infer_at_use( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1 -; CHECK-NEXT: [[EXT:%.*]] = uitofp i32 [[N]] to double +; CHECK-NEXT: [[EXT:%.*]] = uitofp nneg i32 [[N]] to double ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], double [[EXT]], double 0.000000e+00 ; CHECK-NEXT: ret double [[SELECT]] ; From 6243395d7f1da6a2ea813f5d86ba71f91e1070bf Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 20 Mar 2024 17:05:03 -0500 Subject: [PATCH 13/64] [SCCP] Add `nneg` flag to `uitofp` if its operand is non-negative Similiar to the `InstCombine` changes, just furthering the support of the `uitofp nneg` support. Closes #86154 --- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 12 +++++++----- llvm/test/Transforms/SCCP/ip-ranges-casts.ll | 6 +++--- llvm/test/Transforms/SCCP/sitofp.ll | 8 ++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index c6029b428ed398..ce40e8b31b767a 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -143,7 +143,7 @@ static bool refineInstruction(SCCPSolver &Solver, Changed = true; } } - } else if (isa(Inst) && !Inst.hasNonNeg()) { + } else if (isa(Inst) && !Inst.hasNonNeg()) { auto Range = GetRange(Inst.getOperand(0)); if (Range.isAllNonNegative()) { Inst.setNonNeg(); @@ -191,14 +191,16 @@ static bool replaceSignedInst(SCCPSolver &Solver, Instruction *NewInst = nullptr; switch (Inst.getOpcode()) { - // Note: We do not fold sitofp -> uitofp here because that could be more - // expensive in codegen and may not be reversible in the backend. + case Instruction::SIToFP: case Instruction::SExt: { - // If the source value is not negative, this is a zext. + // If the source value is not negative, this is a zext/uitofp. Value *Op0 = Inst.getOperand(0); if (InsertedValues.count(Op0) || !isNonNegative(Op0)) return false; - NewInst = new ZExtInst(Op0, Inst.getType(), "", Inst.getIterator()); + NewInst = CastInst::Create(Inst.getOpcode() == Instruction::SExt + ? Instruction::ZExt + : Instruction::UIToFP, + Op0, Inst.getType(), "", Inst.getIterator()); NewInst->setNonNeg(); break; } diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index 05fa04a9fbe06f..e8d417546def85 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -167,7 +167,7 @@ define i1 @caller.sext() { define internal i1 @f.fptosi(i32 %x) { ; CHECK-LABEL: define internal i1 @f.fptosi( ; CHECK-SAME: i32 [[X:%.*]]) { -; CHECK-NEXT: [[TO_DOUBLE:%.*]] = sitofp i32 [[X]] to double +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = uitofp nneg i32 [[X]] to double ; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[TO_DOUBLE]] ; CHECK-NEXT: [[TO_I32:%.*]] = fptosi double [[ADD]] to i32 ; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i32 [[TO_I32]], 300 @@ -209,7 +209,7 @@ define i1 @caller.fptosi() { define internal i1 @f.fpext(i16 %x) { ; CHECK-LABEL: define internal i1 @f.fpext( ; CHECK-SAME: i16 [[X:%.*]]) { -; CHECK-NEXT: [[TO_FLOAT:%.*]] = sitofp i16 [[X]] to float +; CHECK-NEXT: [[TO_FLOAT:%.*]] = uitofp nneg i16 [[X]] to float ; CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[TO_FLOAT]] to double ; CHECK-NEXT: [[TO_I64:%.*]] = fptoui float [[TO_FLOAT]] to i64 ; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 @@ -293,7 +293,7 @@ define i1 @int_range_to_double_cast(i32 %a) { ; CHECK-LABEL: define i1 @int_range_to_double_cast( ; CHECK-SAME: i32 [[A:%.*]]) { ; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], 255 -; CHECK-NEXT: [[T4:%.*]] = sitofp i32 [[R]] to double +; CHECK-NEXT: [[T4:%.*]] = uitofp nneg i32 [[R]] to double ; CHECK-NEXT: [[T10:%.*]] = fadd double 0.000000e+00, [[T4]] ; CHECK-NEXT: [[T11:%.*]] = fcmp olt double [[T4]], [[T10]] ; CHECK-NEXT: ret i1 [[T11]] diff --git a/llvm/test/Transforms/SCCP/sitofp.ll b/llvm/test/Transforms/SCCP/sitofp.ll index b635263a5726c2..24f04ae1fccb91 100644 --- a/llvm/test/Transforms/SCCP/sitofp.ll +++ b/llvm/test/Transforms/SCCP/sitofp.ll @@ -4,7 +4,7 @@ define float @sitofp_and(i8 %x) { ; CHECK-LABEL: @sitofp_and( ; CHECK-NEXT: [[PX:%.*]] = and i8 [[X:%.*]], 127 -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[PX]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[PX]] to float ; CHECK-NEXT: ret float [[R]] ; %px = and i8 %x, 127 @@ -23,7 +23,7 @@ define half @sitofp_const(i8 %x) { define double @sitofp_zext(i7 %x) { ; CHECK-LABEL: @sitofp_zext( ; CHECK-NEXT: [[PX:%.*]] = zext i7 [[X:%.*]] to i8 -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[PX]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[PX]] to double ; CHECK-NEXT: ret double [[R]] ; %px = zext i7 %x to i8 @@ -52,7 +52,7 @@ define float @dominating_condition(i32 %x) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: [[A:%.*]] = sitofp i32 [[X]] to float +; CHECK-NEXT: [[A:%.*]] = uitofp nneg i32 [[X]] to float ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: f: ; CHECK-NEXT: br label [[EXIT]] @@ -86,7 +86,7 @@ define float @dominating_condition_alt(i32 %x) { ; CHECK: t: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: f: -; CHECK-NEXT: [[A:%.*]] = sitofp i32 [[X]] to float +; CHECK-NEXT: [[A:%.*]] = uitofp nneg i32 [[X]] to float ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[COND:%.*]] = phi float [ -4.200000e+01, [[T]] ], [ [[A]], [[F]] ] From 117bda523ea15510d2289020decabef57d89acc0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 7 May 2024 12:57:01 -0700 Subject: [PATCH 14/64] [RISCV] Add unittests for MinVLen/MaxELen/MaxElenFp for ParseArchString. NFC We had tests for ParseNormalizedArchString, but not ParseArchString. The ParseNormalizedArchString test was not checking MaxElenFp. --- .../TargetParser/RISCVISAInfoTest.cpp | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index a6c21c18c0ecc4..83b52d0527c3ad 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -139,6 +139,7 @@ TEST(ParseNormalizedArchString, UpdatesFLenMinVLenMaxELen) { EXPECT_EQ(Info.getFLen(), 64U); EXPECT_EQ(Info.getMinVLen(), 64U); EXPECT_EQ(Info.getMaxELen(), 64U); + EXPECT_EQ(Info.getMaxELenFp(), 64U); } TEST(ParseArchString, RejectsInvalidChars) { @@ -181,6 +182,9 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_TRUE(ExtsRV32I.at("i") == (RISCVISAUtils::ExtensionVersion{2, 1})); EXPECT_EQ(InfoRV32I.getXLen(), 32U); EXPECT_EQ(InfoRV32I.getFLen(), 0U); + EXPECT_EQ(InfoRV32I.getMinVLen(), 0U); + EXPECT_EQ(InfoRV32I.getMaxELen(), 0U); + EXPECT_EQ(InfoRV32I.getMaxELenFp(), 0U); auto MaybeRV32E = RISCVISAInfo::parseArchString("rv32e", true); ASSERT_THAT_EXPECTED(MaybeRV32E, Succeeded()); @@ -190,6 +194,9 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_TRUE(ExtsRV32E.at("e") == (RISCVISAUtils::ExtensionVersion{2, 0})); EXPECT_EQ(InfoRV32E.getXLen(), 32U); EXPECT_EQ(InfoRV32E.getFLen(), 0U); + EXPECT_EQ(InfoRV32E.getMinVLen(), 0U); + EXPECT_EQ(InfoRV32E.getMaxELen(), 0U); + EXPECT_EQ(InfoRV32E.getMaxELenFp(), 0U); auto MaybeRV32G = RISCVISAInfo::parseArchString("rv32g", true); ASSERT_THAT_EXPECTED(MaybeRV32G, Succeeded()); @@ -206,6 +213,9 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { (RISCVISAUtils::ExtensionVersion{2, 0})); EXPECT_EQ(InfoRV32G.getXLen(), 32U); EXPECT_EQ(InfoRV32G.getFLen(), 64U); + EXPECT_EQ(InfoRV32G.getMinVLen(), 0U); + EXPECT_EQ(InfoRV32G.getMaxELen(), 0U); + EXPECT_EQ(InfoRV32G.getMaxELenFp(), 0U); auto MaybeRV64I = RISCVISAInfo::parseArchString("rv64i", true); ASSERT_THAT_EXPECTED(MaybeRV64I, Succeeded()); @@ -215,6 +225,9 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_TRUE(ExtsRV64I.at("i") == (RISCVISAUtils::ExtensionVersion{2, 1})); EXPECT_EQ(InfoRV64I.getXLen(), 64U); EXPECT_EQ(InfoRV64I.getFLen(), 0U); + EXPECT_EQ(InfoRV64I.getMinVLen(), 0U); + EXPECT_EQ(InfoRV64I.getMaxELen(), 0U); + EXPECT_EQ(InfoRV64I.getMaxELenFp(), 0U); auto MaybeRV64E = RISCVISAInfo::parseArchString("rv64e", true); ASSERT_THAT_EXPECTED(MaybeRV64E, Succeeded()); @@ -224,6 +237,9 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_TRUE(ExtsRV64E.at("e") == (RISCVISAUtils::ExtensionVersion{2, 0})); EXPECT_EQ(InfoRV64E.getXLen(), 64U); EXPECT_EQ(InfoRV64E.getFLen(), 0U); + EXPECT_EQ(InfoRV64E.getMinVLen(), 0U); + EXPECT_EQ(InfoRV64E.getMaxELen(), 0U); + EXPECT_EQ(InfoRV64E.getMaxELenFp(), 0U); auto MaybeRV64G = RISCVISAInfo::parseArchString("rv64g", true); ASSERT_THAT_EXPECTED(MaybeRV64G, Succeeded()); @@ -240,6 +256,38 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { (RISCVISAUtils::ExtensionVersion{2, 0})); EXPECT_EQ(InfoRV64G.getXLen(), 64U); EXPECT_EQ(InfoRV64G.getFLen(), 64U); + EXPECT_EQ(InfoRV64G.getMinVLen(), 0U); + EXPECT_EQ(InfoRV64G.getMaxELen(), 0U); + EXPECT_EQ(InfoRV64G.getMaxELenFp(), 0U); + + auto MaybeRV64GCV = RISCVISAInfo::parseArchString("rv64gcv", true); + ASSERT_THAT_EXPECTED(MaybeRV64GCV, Succeeded()); + RISCVISAInfo &InfoRV64GCV = **MaybeRV64GCV; + const auto &ExtsRV64GCV = InfoRV64GCV.getExtensions(); + EXPECT_EQ(ExtsRV64GCV.size(), 17UL); + EXPECT_TRUE(ExtsRV64GCV.at("i") == (RISCVISAUtils::ExtensionVersion{2, 1})); + EXPECT_TRUE(ExtsRV64GCV.at("m") == (RISCVISAUtils::ExtensionVersion{2, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("a") == (RISCVISAUtils::ExtensionVersion{2, 1})); + EXPECT_TRUE(ExtsRV64GCV.at("f") == (RISCVISAUtils::ExtensionVersion{2, 2})); + EXPECT_TRUE(ExtsRV64GCV.at("d") == (RISCVISAUtils::ExtensionVersion{2, 2})); + EXPECT_TRUE(ExtsRV64GCV.at("c") == (RISCVISAUtils::ExtensionVersion{2, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zicsr") == (RISCVISAUtils::ExtensionVersion{2, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zifencei") == + (RISCVISAUtils::ExtensionVersion{2, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("v") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zve32x") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zve32f") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zve64x") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zve64f") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zve64d") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zvl32b") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zvl64b") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_TRUE(ExtsRV64GCV.at("zvl128b") == (RISCVISAUtils::ExtensionVersion{1, 0})); + EXPECT_EQ(InfoRV64GCV.getXLen(), 64U); + EXPECT_EQ(InfoRV64GCV.getFLen(), 64U); + EXPECT_EQ(InfoRV64GCV.getMinVLen(), 128U); + EXPECT_EQ(InfoRV64GCV.getMaxELen(), 64U); + EXPECT_EQ(InfoRV64GCV.getMaxELenFp(), 64U); } TEST(ParseArchString, RejectsUnrecognizedExtensionNamesByDefault) { From dca3a6e562e012940c2b62a4d8dae3afec09caa4 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 7 May 2024 13:02:31 -0700 Subject: [PATCH 15/64] [WebAssembly] Make EH depend on multivalue and reference-types (#91299) This PR turns on multivalue and reference-types features when exception-handling feature is turned on, and errors out when disabling of those dependent features is explicitly requested. I think doing this would be safe anyway regardless of whether or when we end up turning on reference-types by default. We currently don't yet have a experimental flag for the Clang and LLVM for the new experimental EH yet. But I think it should be fine to turn those features on even if the LLVM does not yet generate the new EH instructions, for the same reason we tried to turn them on by default and the browsers that support EH also support multivalue and reference-types anyway. --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 34 ++++++++++++++++++ clang/test/Driver/wasm-toolchain.c | 39 +++++++++++++++++---- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index b7c6efab83e806..5b763df9b33293 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -347,6 +347,23 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, // Backend needs -wasm-enable-eh to enable Wasm EH CC1Args.push_back("-mllvm"); CC1Args.push_back("-wasm-enable-eh"); + + // New Wasm EH spec (adopted in Oct 2023) requires multivalue and + // reference-types. + if (DriverArgs.hasFlag(options::OPT_mno_multivalue, + options::OPT_mmultivalue, false)) { + getDriver().Diag(diag::err_drv_argument_not_allowed_with) + << "-fwasm-exceptions" << "-mno-multivalue"; + } + if (DriverArgs.hasFlag(options::OPT_mno_reference_types, + options::OPT_mreference_types, false)) { + getDriver().Diag(diag::err_drv_argument_not_allowed_with) + << "-fwasm-exceptions" << "-mno-reference-types"; + } + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+multivalue"); + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+reference-types"); } for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) { @@ -408,6 +425,23 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("+exception-handling"); // Backend needs '-exception-model=wasm' to use Wasm EH instructions CC1Args.push_back("-exception-model=wasm"); + + // New Wasm EH spec (adopted in Oct 2023) requires multivalue and + // reference-types. + if (DriverArgs.hasFlag(options::OPT_mno_multivalue, + options::OPT_mmultivalue, false)) { + getDriver().Diag(diag::err_drv_argument_not_allowed_with) + << "-mllvm -wasm-enable-sjlj" << "-mno-multivalue"; + } + if (DriverArgs.hasFlag(options::OPT_mno_reference_types, + options::OPT_mreference_types, false)) { + getDriver().Diag(diag::err_drv_argument_not_allowed_with) + << "-mllvm -wasm-enable-sjlj" << "-mno-reference-types"; + } + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+multivalue"); + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+reference-types"); } } } diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index dabf0ac2433bbb..7c26c2c13c0baf 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -120,11 +120,12 @@ // RUN: | FileCheck -check-prefix=EMSCRIPTEN_EH_ALLOWED_WO_ENABLE %s // EMSCRIPTEN_EH_ALLOWED_WO_ENABLE: invalid argument '-mllvm -emscripten-cxx-exceptions-allowed' only allowed with '-mllvm -enable-emscripten-cxx-exceptions' -// '-fwasm-exceptions' sets +exception-handling and '-mllvm -wasm-enable-eh' +// '-fwasm-exceptions' sets +exception-handling, -multivalue, -reference-types +// and '-mllvm -wasm-enable-eh' // RUN: %clang -### --target=wasm32-unknown-unknown \ // RUN: --sysroot=/foo %s -fwasm-exceptions 2>&1 \ // RUN: | FileCheck -check-prefix=WASM_EXCEPTIONS %s -// WASM_EXCEPTIONS: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-mllvm" "-wasm-enable-eh" +// WASM_EXCEPTIONS: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-mllvm" "-wasm-enable-eh" "-target-feature" "+multivalue" "-target-feature" "+reference-types" // '-fwasm-exceptions' not allowed with '-mno-exception-handling' // RUN: not %clang -### --target=wasm32-unknown-unknown \ @@ -132,19 +133,32 @@ // RUN: | FileCheck -check-prefix=WASM_EXCEPTIONS_NO_EH %s // WASM_EXCEPTIONS_NO_EH: invalid argument '-fwasm-exceptions' not allowed with '-mno-exception-handling' -// '-fwasm-exceptions' not allowed with '-mllvm -enable-emscripten-cxx-exceptions' +// '-fwasm-exceptions' not allowed with +// '-mllvm -enable-emscripten-cxx-exceptions' // RUN: not %clang -### --target=wasm32-unknown-unknown \ // RUN: --sysroot=/foo %s -fwasm-exceptions \ // RUN: -mllvm -enable-emscripten-cxx-exceptions 2>&1 \ // RUN: | FileCheck -check-prefix=WASM_EXCEPTIONS_EMSCRIPTEN_EH %s // WASM_EXCEPTIONS_EMSCRIPTEN_EH: invalid argument '-fwasm-exceptions' not allowed with '-mllvm -enable-emscripten-cxx-exceptions' -// '-mllvm -wasm-enable-sjlj' sets +exception-handling and -// '-exception-model=wasm' +// '-fwasm-exceptions' not allowed with '-mno-multivalue' +// RUN: not %clang -### --target=wasm32-unknown-unknown \ +// RUN: --sysroot=/foo %s -fwasm-exceptions -mno-multivalue 2>&1 \ +// RUN: | FileCheck -check-prefix=WASM_EXCEPTIONS_NO_MULTIVALUE %s +// WASM_EXCEPTIONS_NO_MULTIVALUE: invalid argument '-fwasm-exceptions' not allowed with '-mno-multivalue' + +// '-fwasm-exceptions' not allowed with '-mno-reference-types' +// RUN: not %clang -### --target=wasm32-unknown-unknown \ +// RUN: --sysroot=/foo %s -fwasm-exceptions -mno-reference-types 2>&1 \ +// RUN: | FileCheck -check-prefix=WASM_EXCEPTIONS_NO_REFERENCE_TYPES %s +// WASM_EXCEPTIONS_NO_REFERENCE_TYPES: invalid argument '-fwasm-exceptions' not allowed with '-mno-reference-types' + +// '-mllvm -wasm-enable-sjlj' sets +exception-handling, +multivalue, +// +reference-types and '-exception-model=wasm' // RUN: %clang -### --target=wasm32-unknown-unknown \ // RUN: --sysroot=/foo %s -mllvm -wasm-enable-sjlj 2>&1 \ // RUN: | FileCheck -check-prefix=WASM_SJLJ %s -// WASM_SJLJ: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-exception-model=wasm" +// WASM_SJLJ: "-cc1" {{.*}} "-target-feature" "+exception-handling" "-exception-model=wasm" "-target-feature" "+multivalue" "-target-feature" "+reference-types" // '-mllvm -wasm-enable-sjlj' not allowed with '-mno-exception-handling' // RUN: not %clang -### --target=wasm32-unknown-unknown \ @@ -168,6 +182,19 @@ // RUN: | FileCheck -check-prefix=WASM_SJLJ_EMSCRIPTEN_SJLJ %s // WASM_SJLJ_EMSCRIPTEN_SJLJ: invalid argument '-mllvm -wasm-enable-sjlj' not allowed with '-mllvm -enable-emscripten-sjlj' +// '-mllvm -wasm-enable-sjlj' not allowed with '-mno-multivalue' +// RUN: not %clang -### --target=wasm32-unknown-unknown \ +// RUN: --sysroot=/foo %s -mllvm -wasm-enable-sjlj -mno-multivalue 2>&1 \ +// RUN: | FileCheck -check-prefix=WASM_SJLJ_NO_MULTIVALUE %s +// WASM_SJLJ_NO_MULTIVALUE: invalid argument '-mllvm -wasm-enable-sjlj' not allowed with '-mno-multivalue' + +// '-mllvm -wasm-enable-sjlj' not allowed with '-mno-reference-types' +// RUN: not %clang -### --target=wasm32-unknown-unknown \ +// RUN: --sysroot=/foo %s -mllvm -wasm-enable-sjlj \ +// RUN: -mno-reference-types 2>&1 \ +// RUN: | FileCheck -check-prefix=WASM_SJLJ_NO_REFERENCE_TYPES %s +// WASM_SJLJ_NO_REFERENCE_TYPES: invalid argument '-mllvm -wasm-enable-sjlj' not allowed with '-mno-reference-types' + // RUN: %clang -### %s -fsanitize=address --target=wasm32-unknown-emscripten 2>&1 | FileCheck -check-prefix=CHECK-ASAN-EMSCRIPTEN %s // CHECK-ASAN-EMSCRIPTEN: "-fsanitize=address" // CHECK-ASAN-EMSCRIPTEN: "-fsanitize-address-globals-dead-stripping" From 2e4abfae57f81e2bb23fc654d6edbaeae51ae10a Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 7 May 2024 13:02:04 -0700 Subject: [PATCH 16/64] Revert "[Sema] Fix warnings" This reverts commit 65e2fab401a2da55c51d3caceae8478c33f3c60f because I'm also reverting 7115ed0fff027b65fa76fdfae215ed1382ed1473. --- clang/lib/Sema/SemaTemplate.cpp | 2 ++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 6231b65bd842a9..b268d7c405dfdf 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -9276,6 +9276,8 @@ DeclResult Sema::ActOnClassTemplateSpecialization( // NOTE: KWLoc is the location of the tag keyword. This will instead // store the location of the outermost template keyword in the declaration. + SourceLocation TemplateKWLoc = TemplateParameterLists.size() > 0 + ? TemplateParameterLists[0]->getTemplateLoc() : KWLoc; SourceLocation TemplateNameLoc = TemplateId.TemplateNameLoc; SourceLocation LAngleLoc = TemplateId.LAngleLoc; SourceLocation RAngleLoc = TemplateId.RAngleLoc; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 884e98a300f5a6..5315b143215e19 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3932,6 +3932,12 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl( if (SubstQualifier(D, InstD)) return nullptr; + // Build the canonical type that describes the converted template + // arguments of the class template explicit specialization. + QualType CanonType = SemaRef.Context.getTemplateSpecializationType( + TemplateName(InstClassTemplate), CanonicalConverted, + SemaRef.Context.getRecordType(InstD)); + InstD->setAccess(D->getAccess()); InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); InstD->setSpecializationKind(D->getSpecializationKind()); From c6855ab24e63feb432aac4f86eb70ac16d76c921 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 7 May 2024 13:02:17 -0700 Subject: [PATCH 17/64] Revert "[Clang] Unify interface for accessing template arguments as written for class/variable template specializations (#81642)" This reverts commit 7115ed0fff027b65fa76fdfae215ed1382ed1473. This commit broke several LLDB tests. https://green.lab.llvm.org/job/llvm.org/view/LLDB/job/as-lldb-cmake/3480/ --- clang-tools-extra/clangd/AST.cpp | 37 +- .../clangd/SemanticHighlighting.cpp | 13 +- .../include-cleaner/lib/WalkAST.cpp | 13 +- clang/docs/LibASTMatchersReference.html | 364 +++++------------- clang/docs/ReleaseNotes.rst | 3 - clang/include/clang/AST/DeclTemplate.h | 220 ++++++----- clang/include/clang/AST/RecursiveASTVisitor.h | 27 +- clang/include/clang/ASTMatchers/ASTMatchers.h | 74 ++-- .../clang/ASTMatchers/ASTMatchersInternal.h | 50 +-- clang/lib/AST/ASTImporter.cpp | 68 ++-- clang/lib/AST/DeclPrinter.cpp | 18 +- clang/lib/AST/DeclTemplate.cpp | 198 +++++----- clang/lib/AST/TypePrinter.cpp | 25 +- clang/lib/Index/IndexDecl.cpp | 9 +- clang/lib/Sema/Sema.cpp | 2 +- clang/lib/Sema/SemaTemplate.cpp | 54 +-- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 157 +++++--- clang/lib/Serialization/ASTReaderDecl.cpp | 28 +- clang/lib/Serialization/ASTWriterDecl.cpp | 36 +- clang/lib/Tooling/Syntax/BuildTree.cpp | 3 +- clang/test/AST/ast-dump-template-decls.cpp | 18 +- clang/test/Index/Core/index-source.cpp | 24 +- clang/test/Index/index-refs.cpp | 1 + clang/tools/libclang/CIndex.cpp | 29 +- .../ASTMatchers/ASTMatchersNodeTest.cpp | 12 + .../ASTMatchers/ASTMatchersTraversalTest.cpp | 92 +++-- 26 files changed, 729 insertions(+), 846 deletions(-) diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp index fda1e5fdf8d82c..1b86ea19cf28da 100644 --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -50,12 +50,17 @@ getTemplateSpecializationArgLocs(const NamedDecl &ND) { if (const ASTTemplateArgumentListInfo *Args = Func->getTemplateSpecializationArgsAsWritten()) return Args->arguments(); - } else if (auto *Cls = llvm::dyn_cast(&ND)) { + } else if (auto *Cls = + llvm::dyn_cast(&ND)) { if (auto *Args = Cls->getTemplateArgsAsWritten()) return Args->arguments(); - } else if (auto *Var = llvm::dyn_cast(&ND)) { + } else if (auto *Var = + llvm::dyn_cast(&ND)) { if (auto *Args = Var->getTemplateArgsAsWritten()) return Args->arguments(); + } else if (auto *Var = llvm::dyn_cast(&ND)) { + if (auto *Args = Var->getTemplateArgsInfo()) + return Args->arguments(); } // We return std::nullopt for ClassTemplateSpecializationDecls because it does // not contain TemplateArgumentLoc information. @@ -265,10 +270,22 @@ std::string printTemplateSpecializationArgs(const NamedDecl &ND) { getTemplateSpecializationArgLocs(ND)) { printTemplateArgumentList(OS, *Args, Policy); } else if (auto *Cls = llvm::dyn_cast(&ND)) { - // FIXME: Fix cases when getTypeAsWritten returns null inside clang AST, - // e.g. friend decls. Currently we fallback to Template Arguments without - // location information. - printTemplateArgumentList(OS, Cls->getTemplateArgs().asArray(), Policy); + if (const TypeSourceInfo *TSI = Cls->getTypeAsWritten()) { + // ClassTemplateSpecializationDecls do not contain + // TemplateArgumentTypeLocs, they only have TemplateArgumentTypes. So we + // create a new argument location list from TypeSourceInfo. + auto STL = TSI->getTypeLoc().getAs(); + llvm::SmallVector ArgLocs; + ArgLocs.reserve(STL.getNumArgs()); + for (unsigned I = 0; I < STL.getNumArgs(); ++I) + ArgLocs.push_back(STL.getArgLoc(I)); + printTemplateArgumentList(OS, ArgLocs, Policy); + } else { + // FIXME: Fix cases when getTypeAsWritten returns null inside clang AST, + // e.g. friend decls. Currently we fallback to Template Arguments without + // location information. + printTemplateArgumentList(OS, Cls->getTemplateArgs().asArray(), Policy); + } } OS.flush(); return TemplateArgs; @@ -436,12 +453,10 @@ bool hasReservedScope(const DeclContext &DC) { } QualType declaredType(const TypeDecl *D) { - ASTContext &Context = D->getASTContext(); if (const auto *CTSD = llvm::dyn_cast(D)) - if (const auto *Args = CTSD->getTemplateArgsAsWritten()) - return Context.getTemplateSpecializationType( - TemplateName(CTSD->getSpecializedTemplate()), Args->arguments()); - return Context.getTypeDeclType(D); + if (const auto *TSI = CTSD->getTypeAsWritten()) + return TSI->getType(); + return D->getASTContext().getTypeDeclType(D); } namespace { diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index eb025f21f36161..08f99e11ac9be1 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -693,22 +693,17 @@ class CollectExtraHighlightings return true; } - bool - VisitClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *D) { - if (auto *Args = D->getTemplateArgsAsWritten()) - H.addAngleBracketTokens(Args->getLAngleLoc(), Args->getRAngleLoc()); - return true; - } - bool VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { if (auto *TPL = D->getTemplateParameters()) H.addAngleBracketTokens(TPL->getLAngleLoc(), TPL->getRAngleLoc()); + if (auto *Args = D->getTemplateArgsAsWritten()) + H.addAngleBracketTokens(Args->getLAngleLoc(), Args->getRAngleLoc()); return true; } bool VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) { - if (auto *Args = D->getTemplateArgsAsWritten()) + if (auto *Args = D->getTemplateArgsInfo()) H.addAngleBracketTokens(Args->getLAngleLoc(), Args->getRAngleLoc()); return true; } @@ -717,6 +712,8 @@ class CollectExtraHighlightings VarTemplatePartialSpecializationDecl *D) { if (auto *TPL = D->getTemplateParameters()) H.addAngleBracketTokens(TPL->getLAngleLoc(), TPL->getRAngleLoc()); + if (auto *Args = D->getTemplateArgsAsWritten()) + H.addAngleBracketTokens(Args->getLAngleLoc(), Args->getRAngleLoc()); return true; } diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index f7cc9d19123635..878067aca0173f 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -267,21 +267,18 @@ class ASTWalker : public RecursiveASTVisitor { return true; } - // Report a reference from explicit specializations/instantiations to the - // specialized template. Implicit ones are filtered out by RAV. + // Report a reference from explicit specializations to the specialized + // template. Implicit ones are filtered out by RAV and explicit instantiations + // are already traversed through typelocs. bool VisitClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *CTSD) { - // if (CTSD->isExplicitSpecialization()) - if (clang::isTemplateExplicitInstantiationOrSpecialization( - CTSD->getTemplateSpecializationKind())) + if (CTSD->isExplicitSpecialization()) report(CTSD->getLocation(), CTSD->getSpecializedTemplate()->getTemplatedDecl()); return true; } bool VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *VTSD) { - // if (VTSD->isExplicitSpecialization()) - if (clang::isTemplateExplicitInstantiationOrSpecialization( - VTSD->getTemplateSpecializationKind())) + if (VTSD->isExplicitSpecialization()) report(VTSD->getLocation(), VTSD->getSpecializedTemplate()->getTemplatedDecl()); return true; diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index a16b9c44ef0eab..bb1b68f6671b1a 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -3546,35 +3546,33 @@

Narrowing Matchers

-Matcher<CXXMethodDecl>isCopyAssignmentOperator -
Matches if the given method declaration declares a copy assignment
-operator.
+Matcher<CXXMethodDecl>isExplicitObjectMemberFunction
+
Matches if the given method declaration declares a member function with an explicit object parameter.
 
 Given
 struct A {
-  A &operator=(const A &);
-  A &operator=(A &&);
+  int operator-(this A, int);
+  void fun(this A &&self);
+  static int operator()(int);
+  int operator+(int);
 };
 
-cxxMethodDecl(isCopyAssignmentOperator()) matches the first method but not
-the second one.
+cxxMethodDecl(isExplicitObjectMemberFunction()) matches the first two methods but not the last two.
 
-Matcher<CXXMethodDecl>isExplicitObjectMemberFunction -
Matches if the given method declaration declares a member function with an
-explicit object parameter.
+Matcher<CXXMethodDecl>isCopyAssignmentOperator
+
Matches if the given method declaration declares a copy assignment
+operator.
 
 Given
 struct A {
- int operator-(this A, int);
- void fun(this A &&self);
- static int operator()(int);
- int operator+(int);
+  A &operator=(const A &);
+  A &operator=(A &&);
 };
 
-cxxMethodDecl(isExplicitObjectMemberFunction()) matches the first two
-methods but not the last two.
+cxxMethodDecl(isCopyAssignmentOperator()) matches the first method but not
+the second one.
 
@@ -6715,7 +6713,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -6759,7 +6757,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -6987,7 +6985,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -7221,7 +7219,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -7418,7 +7416,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -7622,7 +7620,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -7679,7 +7677,7 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -7877,10 +7875,9 @@

AST Traversal Matchers

Matcher<ClassTemplateSpecializationDecl>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specialization,
-variable template specialization, and function template specialization
-nodes where the template argument matches the inner matcher. This matcher
-may produce multiple matches.
+
Matches classTemplateSpecialization, templateSpecializationType and
+functionDecl nodes where the template argument matches the inner matcher.
+This matcher may produce multiple matches.
 
 Given
   template <typename T, unsigned N, unsigned M>
@@ -7902,25 +7899,10 @@ 

AST Traversal Matchers

-Matcher<ClassTemplateSpecializationDecl>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-that have at least one `TemplateArgumentLoc` matching the given
-`InnerMatcher`.
-
-Given
-  template<typename T> class A {};
-  A<int> a;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
-  hasTypeLoc(loc(asString("int")))))))
-  matches `A<int> a`.
-
- - Matcher<ClassTemplateSpecializationDecl>hasAnyTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationTypes, class template specializations,
-variable template specializations, and function template specializations
-that have at least one TemplateArgument matching the given InnerMatcher.
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl that have at least one TemplateArgument matching the given
+InnerMatcher.
 
 Given
   template<typename T> class A {};
@@ -7951,25 +7933,9 @@ 

AST Traversal Matchers

-Matcher<ClassTemplateSpecializationDecl>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
-
-Given
-  template<typename T, typename U> class A {};
-  A<double, int> b;
-  A<int, double> c;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0,
-  hasTypeLoc(loc(asString("double")))))))
-  matches `A<double, int> b`, but not `A<int, double> c`.
-
- - Matcher<ClassTemplateSpecializationDecl>hasTemplateArgumentunsigned N, Matcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specializations,
-variable template specializations, and function template specializations
-where the n'th TemplateArgument matches the given InnerMatcher.
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl where the n'th TemplateArgument matches the given InnerMatcher.
 
 Given
   template<typename T, typename U> class A {};
@@ -7987,6 +7953,34 @@ 

AST Traversal Matchers

+Matcher<ClassTemplateSpecializationDecl>hasTypeLocMatcher<TypeLoc> Inner +
Matches if the type location of a node matches the inner matcher.
+
+Examples:
+  int x;
+declaratorDecl(hasTypeLoc(loc(asString("int"))))
+  matches int x
+
+auto x = int(3);
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
+  matches int(3)
+
+struct Foo { Foo(int, int); };
+auto x = Foo(1, 2);
+cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
+  matches Foo(1, 2)
+
+Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
+  Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
+  Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>,
+  Matcher<CXXUnresolvedConstructExpr>,
+  Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>,
+  Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>,
+  Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>,
+  Matcher<TypedefNameDecl>
+
+ + Matcher<ComplexType>hasElementTypeMatcher<Type>
Matches arrays and C99 complex types that have a specific element
 type.
@@ -8002,8 +7996,8 @@ 

AST Traversal Matchers

-Matcher<CompoundLiteralExpr>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<CompoundLiteralExpr>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -8023,7 +8017,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -8072,21 +8066,6 @@

AST Traversal Matchers

-Matcher<DeclRefExpr>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-that have at least one `TemplateArgumentLoc` matching the given
-`InnerMatcher`.
-
-Given
-  template<typename T> class A {};
-  A<int> a;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
-  hasTypeLoc(loc(asString("int")))))))
-  matches `A<int> a`.
-
- - Matcher<DeclRefExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
 matches the given matcher.
@@ -8121,10 +8100,9 @@ 

AST Traversal Matchers

-Matcher<DeclRefExpr>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
+Matcher<DeclRefExpr>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher
+
Matches template specialization `TypeLoc`s where the n'th
+`TemplateArgumentLoc` matches the given `InnerMatcher`.
 
 Given
   template<typename T, typename U> class A {};
@@ -8198,8 +8176,8 @@ 

AST Traversal Matchers

-Matcher<DeclaratorDecl>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<DeclaratorDecl>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -8219,7 +8197,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -8415,8 +8393,8 @@

AST Traversal Matchers

-Matcher<ExplicitCastExpr>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<ExplicitCastExpr>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -8436,7 +8414,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -8729,10 +8707,9 @@

AST Traversal Matchers

Matcher<FunctionDecl>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specialization,
-variable template specialization, and function template specialization
-nodes where the template argument matches the inner matcher. This matcher
-may produce multiple matches.
+
Matches classTemplateSpecialization, templateSpecializationType and
+functionDecl nodes where the template argument matches the inner matcher.
+This matcher may produce multiple matches.
 
 Given
   template <typename T, unsigned N, unsigned M>
@@ -8801,25 +8778,10 @@ 

AST Traversal Matchers

-Matcher<FunctionDecl>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-that have at least one `TemplateArgumentLoc` matching the given
-`InnerMatcher`.
-
-Given
-  template<typename T> class A {};
-  A<int> a;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
-  hasTypeLoc(loc(asString("int")))))))
-  matches `A<int> a`.
-
- - Matcher<FunctionDecl>hasAnyTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationTypes, class template specializations,
-variable template specializations, and function template specializations
-that have at least one TemplateArgument matching the given InnerMatcher.
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl that have at least one TemplateArgument matching the given
+InnerMatcher.
 
 Given
   template<typename T> class A {};
@@ -8916,25 +8878,9 @@ 

AST Traversal Matchers

-Matcher<FunctionDecl>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
-
-Given
-  template<typename T, typename U> class A {};
-  A<double, int> b;
-  A<int, double> c;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0,
-  hasTypeLoc(loc(asString("double")))))))
-  matches `A<double, int> b`, but not `A<int, double> c`.
-
- - Matcher<FunctionDecl>hasTemplateArgumentunsigned N, Matcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specializations,
-variable template specializations, and function template specializations
-where the n'th TemplateArgument matches the given InnerMatcher.
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl where the n'th TemplateArgument matches the given InnerMatcher.
 
 Given
   template<typename T, typename U> class A {};
@@ -9527,8 +9473,8 @@ 

AST Traversal Matchers

-Matcher<ObjCPropertyDecl>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<ObjCPropertyDecl>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -9548,7 +9494,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -9973,8 +9919,8 @@

AST Traversal Matchers

-Matcher<TemplateArgumentLoc>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<TemplateArgumentLoc>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -9994,7 +9940,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -10068,11 +10014,9 @@

AST Traversal Matchers

-Matcher<TemplateSpecializationTypeLoc>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-that have at least one `TemplateArgumentLoc` matching the given
-`InnerMatcher`.
+Matcher<TemplateSpecializationTypeLoc>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher
+
Matches template specialization `TypeLoc`s that have at least one
+`TemplateArgumentLoc` matching the given `InnerMatcher`.
 
 Given
   template<typename T> class A {};
@@ -10083,10 +10027,9 @@ 

AST Traversal Matchers

-Matcher<TemplateSpecializationTypeLoc>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
+Matcher<TemplateSpecializationTypeLoc>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher
+
Matches template specialization `TypeLoc`s where the n'th
+`TemplateArgumentLoc` matches the given `InnerMatcher`.
 
 Given
   template<typename T, typename U> class A {};
@@ -10098,11 +10041,10 @@ 

AST Traversal Matchers

-Matcher<TemplateSpecializationType>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specialization,
-variable template specialization, and function template specialization
-nodes where the template argument matches the inner matcher. This matcher
-may produce multiple matches.
+Matcher<TemplateSpecializationType>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher
+
Matches classTemplateSpecialization, templateSpecializationType and
+functionDecl nodes where the template argument matches the inner matcher.
+This matcher may produce multiple matches.
 
 Given
   template <typename T, unsigned N, unsigned M>
@@ -10124,10 +10066,10 @@ 

AST Traversal Matchers

-Matcher<TemplateSpecializationType>hasAnyTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationTypes, class template specializations,
-variable template specializations, and function template specializations
-that have at least one TemplateArgument matching the given InnerMatcher.
+Matcher<TemplateSpecializationType>hasAnyTemplateArgumentMatcher<TemplateArgument> InnerMatcher
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl that have at least one TemplateArgument matching the given
+InnerMatcher.
 
 Given
   template<typename T> class A {};
@@ -10180,10 +10122,9 @@ 

AST Traversal Matchers

-Matcher<TemplateSpecializationType>hasTemplateArgumentunsigned N, Matcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specializations,
-variable template specializations, and function template specializations
-where the n'th TemplateArgument matches the given InnerMatcher.
+Matcher<TemplateSpecializationType>hasTemplateArgumentunsigned N, Matcher<TemplateArgument> InnerMatcher
+
Matches classTemplateSpecializations, templateSpecializationType and
+functionDecl where the n'th TemplateArgument matches the given InnerMatcher.
 
 Given
   template<typename T, typename U> class A {};
@@ -10241,8 +10182,8 @@ 

AST Traversal Matchers

-Matcher<TypedefNameDecl>hasTypeLocMatcher<TypeLoc> Inner -
Matches if the type location of a node matches the inner matcher.
+Matcher<TypedefNameDecl>hasTypeLocMatcher<TypeLoc> Inner
+
Matches if the type location of a node matches the inner matcher.
 
 Examples:
   int x;
@@ -10262,7 +10203,7 @@ 

AST Traversal Matchers

Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>, Matcher<CXXNewExpr>, Matcher<CXXTemporaryObjectExpr>, Matcher<CXXUnresolvedConstructExpr>, - Matcher<CompoundLiteralExpr>, + Matcher<ClassTemplateSpecializationDecl>, Matcher<CompoundLiteralExpr>, Matcher<DeclaratorDecl>, Matcher<ExplicitCastExpr>, Matcher<ObjCPropertyDecl>, Matcher<TemplateArgumentLoc>, Matcher<TypedefNameDecl> @@ -10508,105 +10449,6 @@

AST Traversal Matchers

-Matcher<VarTemplateSpecializationDecl>forEachTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specialization,
-variable template specialization, and function template specialization
-nodes where the template argument matches the inner matcher. This matcher
-may produce multiple matches.
-
-Given
-  template <typename T, unsigned N, unsigned M>
-  struct Matrix {};
-
-  constexpr unsigned R = 2;
-  Matrix<int, R * 2, R * 4> M;
-
-  template <typename T, typename U>
-  void f(T&& t, U&& u) {}
-
-  bool B = false;
-  f(R, B);
-templateSpecializationType(forEachTemplateArgument(isExpr(expr())))
-  matches twice, with expr() matching 'R * 2' and 'R * 4'
-functionDecl(forEachTemplateArgument(refersToType(builtinType())))
-  matches the specialization f<unsigned, bool> twice, for 'unsigned'
-  and 'bool'
-
- - -Matcher<VarTemplateSpecializationDecl>hasAnyTemplateArgumentLocMatcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-that have at least one `TemplateArgumentLoc` matching the given
-`InnerMatcher`.
-
-Given
-  template<typename T> class A {};
-  A<int> a;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
-  hasTypeLoc(loc(asString("int")))))))
-  matches `A<int> a`.
-
- - -Matcher<VarTemplateSpecializationDecl>hasAnyTemplateArgumentMatcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationTypes, class template specializations,
-variable template specializations, and function template specializations
-that have at least one TemplateArgument matching the given InnerMatcher.
-
-Given
-  template<typename T> class A {};
-  template<> class A<double> {};
-  A<int> a;
-
-  template<typename T> f() {};
-  void func() { f<int>(); };
-
-classTemplateSpecializationDecl(hasAnyTemplateArgument(
-    refersToType(asString("int"))))
-  matches the specialization A<int>
-
-functionDecl(hasAnyTemplateArgument(refersToType(asString("int"))))
-  matches the specialization f<int>
-
- - -Matcher<VarTemplateSpecializationDecl>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher -
Matches template specialization `TypeLoc`s, class template specializations,
-variable template specializations, and function template specializations
-where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
-
-Given
-  template<typename T, typename U> class A {};
-  A<double, int> b;
-  A<int, double> c;
-varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0,
-  hasTypeLoc(loc(asString("double")))))))
-  matches `A<double, int> b`, but not `A<int, double> c`.
-
- - -Matcher<VarTemplateSpecializationDecl>hasTemplateArgumentunsigned N, Matcher<TemplateArgument> InnerMatcher -
Matches templateSpecializationType, class template specializations,
-variable template specializations, and function template specializations
-where the n'th TemplateArgument matches the given InnerMatcher.
-
-Given
-  template<typename T, typename U> class A {};
-  A<bool, int> b;
-  A<int, bool> c;
-
-  template<typename T> void f() {}
-  void func() { f<int>(); };
-classTemplateSpecializationDecl(hasTemplateArgument(
-    1, refersToType(asString("int"))))
-  matches the specialization A<bool, int>
-
-functionDecl(hasTemplateArgument(0, refersToType(asString("int"))))
-  matches the specialization f<int>
-
- - Matcher<VariableArrayType>hasSizeExprMatcher<Expr> InnerMatcher
Matches VariableArrayType nodes that have a specific size
 expression.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2fae5731566dde..cc3108bf41d680 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -111,9 +111,6 @@ Clang Frontend Potentially Breaking Changes
     $ clang --target= -print-target-triple
     
 
-- The ``hasTypeLoc`` AST matcher will no longer match a ``classTemplateSpecializationDecl``;
-  existing uses should switch to ``templateArgumentLoc`` or ``hasAnyTemplateArgumentLoc`` instead.
-
 What's New in Clang |release|?
 ==============================
 Some of the major new features and improvements to Clang are listed
diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h
index 36fb7ec80c17d7..3ee03eebdb8ca4 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1776,25 +1776,6 @@ class BuiltinTemplateDecl : public TemplateDecl {
   BuiltinTemplateKind getBuiltinTemplateKind() const { return BTK; }
 };
 
-/// Provides information about an explicit instantiation of a variable or class
-/// template.
-struct ExplicitInstantiationInfo {
-  /// The template arguments as written..
-  const ASTTemplateArgumentListInfo *TemplateArgsAsWritten = nullptr;
-
-  /// The location of the extern keyword.
-  SourceLocation ExternKeywordLoc;
-
-  /// The location of the template keyword.
-  SourceLocation TemplateKeywordLoc;
-
-  ExplicitInstantiationInfo() = default;
-};
-
-using SpecializationOrInstantiationInfo =
-    llvm::PointerUnion;
-
 /// Represents a class template specialization, which refers to
 /// a class template with a given set of template arguments.
 ///
@@ -1808,8 +1789,8 @@ using SpecializationOrInstantiationInfo =
 /// template<>
 /// class array { }; // class template specialization array
 /// \endcode
-class ClassTemplateSpecializationDecl : public CXXRecordDecl,
-                                        public llvm::FoldingSetNode {
+class ClassTemplateSpecializationDecl
+  : public CXXRecordDecl, public llvm::FoldingSetNode {
   /// Structure that stores information about a class template
   /// specialization that was instantiated from a class template partial
   /// specialization.
@@ -1827,9 +1808,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl,
   llvm::PointerUnion
     SpecializedTemplate;
 
+  /// Further info for explicit template specialization/instantiation.
+  struct ExplicitSpecializationInfo {
+    /// The type-as-written.
+    TypeSourceInfo *TypeAsWritten = nullptr;
+
+    /// The location of the extern keyword.
+    SourceLocation ExternLoc;
+
+    /// The location of the template keyword.
+    SourceLocation TemplateKeywordLoc;
+
+    ExplicitSpecializationInfo() = default;
+  };
+
   /// Further info for explicit template specialization/instantiation.
   /// Does not apply to implicit specializations.
-  SpecializationOrInstantiationInfo ExplicitInfo = nullptr;
+  ExplicitSpecializationInfo *ExplicitInfo = nullptr;
 
   /// The template arguments used to describe this specialization.
   const TemplateArgumentList *TemplateArgs;
@@ -2006,49 +2001,44 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl,
     SpecializedTemplate = TemplDecl;
   }
 
-  /// Retrieve the template argument list as written in the sources,
-  /// if any.
-  const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->TemplateArgsAsWritten;
-    return ExplicitInfo.get();
+  /// Sets the type of this specialization as it was written by
+  /// the user. This will be a class template specialization type.
+  void setTypeAsWritten(TypeSourceInfo *T) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->TypeAsWritten = T;
   }
 
-  /// Set the template argument list as written in the sources.
-  void
-  setTemplateArgsAsWritten(const ASTTemplateArgumentListInfo *ArgsWritten) {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      Info->TemplateArgsAsWritten = ArgsWritten;
-    else
-      ExplicitInfo = ArgsWritten;
-  }
-
-  /// Set the template argument list as written in the sources.
-  void setTemplateArgsAsWritten(const TemplateArgumentListInfo &ArgsInfo) {
-    setTemplateArgsAsWritten(
-        ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo));
+  /// Gets the type of this specialization as it was written by
+  /// the user, if it was so written.
+  TypeSourceInfo *getTypeAsWritten() const {
+    return ExplicitInfo ? ExplicitInfo->TypeAsWritten : nullptr;
   }
 
   /// Gets the location of the extern keyword, if present.
-  SourceLocation getExternKeywordLoc() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->ExternKeywordLoc;
-    return SourceLocation();
+  SourceLocation getExternLoc() const {
+    return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
   }
 
   /// Sets the location of the extern keyword.
-  void setExternKeywordLoc(SourceLocation Loc);
+  void setExternLoc(SourceLocation Loc) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->ExternLoc = Loc;
+  }
+
+  /// Sets the location of the template keyword.
+  void setTemplateKeywordLoc(SourceLocation Loc) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->TemplateKeywordLoc = Loc;
+  }
 
   /// Gets the location of the template keyword, if present.
   SourceLocation getTemplateKeywordLoc() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->TemplateKeywordLoc;
-    return SourceLocation();
+    return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
   }
 
-  /// Sets the location of the template keyword.
-  void setTemplateKeywordLoc(SourceLocation Loc);
-
   SourceRange getSourceRange() const override LLVM_READONLY;
 
   void Profile(llvm::FoldingSetNodeID &ID) const {
@@ -2076,6 +2066,10 @@ class ClassTemplatePartialSpecializationDecl
   /// The list of template parameters
   TemplateParameterList* TemplateParams = nullptr;
 
+  /// The source info for the template arguments as written.
+  /// FIXME: redundant with TypeAsWritten?
+  const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
+
   /// The class template partial specialization from which this
   /// class template partial specialization was instantiated.
   ///
@@ -2084,11 +2078,15 @@ class ClassTemplatePartialSpecializationDecl
   llvm::PointerIntPair
       InstantiatedFromMember;
 
-  ClassTemplatePartialSpecializationDecl(
-      ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc,
-      SourceLocation IdLoc, TemplateParameterList *Params,
-      ClassTemplateDecl *SpecializedTemplate, ArrayRef Args,
-      ClassTemplatePartialSpecializationDecl *PrevDecl);
+  ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
+                                         DeclContext *DC,
+                                         SourceLocation StartLoc,
+                                         SourceLocation IdLoc,
+                                         TemplateParameterList *Params,
+                                         ClassTemplateDecl *SpecializedTemplate,
+                                         ArrayRef Args,
+                               const ASTTemplateArgumentListInfo *ArgsAsWritten,
+                               ClassTemplatePartialSpecializationDecl *PrevDecl);
 
   ClassTemplatePartialSpecializationDecl(ASTContext &C)
     : ClassTemplateSpecializationDecl(C, ClassTemplatePartialSpecialization),
@@ -2103,8 +2101,11 @@ class ClassTemplatePartialSpecializationDecl
   static ClassTemplatePartialSpecializationDecl *
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
          SourceLocation StartLoc, SourceLocation IdLoc,
-         TemplateParameterList *Params, ClassTemplateDecl *SpecializedTemplate,
-         ArrayRef Args, QualType CanonInjectedType,
+         TemplateParameterList *Params,
+         ClassTemplateDecl *SpecializedTemplate,
+         ArrayRef Args,
+         const TemplateArgumentListInfo &ArgInfos,
+         QualType CanonInjectedType,
          ClassTemplatePartialSpecializationDecl *PrevDecl);
 
   static ClassTemplatePartialSpecializationDecl *
@@ -2135,6 +2136,11 @@ class ClassTemplatePartialSpecializationDecl
     return TemplateParams->hasAssociatedConstraints();
   }
 
+  /// Get the template arguments as written.
+  const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
+    return ArgsAsWritten;
+  }
+
   /// Retrieve the member class template partial specialization from
   /// which this particular class template partial specialization was
   /// instantiated.
@@ -2607,12 +2613,27 @@ class VarTemplateSpecializationDecl : public VarDecl,
   llvm::PointerUnion
   SpecializedTemplate;
 
+  /// Further info for explicit template specialization/instantiation.
+  struct ExplicitSpecializationInfo {
+    /// The type-as-written.
+    TypeSourceInfo *TypeAsWritten = nullptr;
+
+    /// The location of the extern keyword.
+    SourceLocation ExternLoc;
+
+    /// The location of the template keyword.
+    SourceLocation TemplateKeywordLoc;
+
+    ExplicitSpecializationInfo() = default;
+  };
+
   /// Further info for explicit template specialization/instantiation.
   /// Does not apply to implicit specializations.
-  SpecializationOrInstantiationInfo ExplicitInfo = nullptr;
+  ExplicitSpecializationInfo *ExplicitInfo = nullptr;
 
   /// The template arguments used to describe this specialization.
   const TemplateArgumentList *TemplateArgs;
+  const ASTTemplateArgumentListInfo *TemplateArgsInfo = nullptr;
 
   /// The point where this template was instantiated (if any).
   SourceLocation PointOfInstantiation;
@@ -2666,6 +2687,14 @@ class VarTemplateSpecializationDecl : public VarDecl,
   /// specialization.
   const TemplateArgumentList &getTemplateArgs() const { return *TemplateArgs; }
 
+  // TODO: Always set this when creating the new specialization?
+  void setTemplateArgsInfo(const TemplateArgumentListInfo &ArgsInfo);
+  void setTemplateArgsInfo(const ASTTemplateArgumentListInfo *ArgsInfo);
+
+  const ASTTemplateArgumentListInfo *getTemplateArgsInfo() const {
+    return TemplateArgsInfo;
+  }
+
   /// Determine the kind of specialization that this
   /// declaration represents.
   TemplateSpecializationKind getSpecializationKind() const {
@@ -2769,49 +2798,44 @@ class VarTemplateSpecializationDecl : public VarDecl,
     SpecializedTemplate = TemplDecl;
   }
 
-  /// Retrieve the template argument list as written in the sources,
-  /// if any.
-  const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->TemplateArgsAsWritten;
-    return ExplicitInfo.get();
-  }
-
-  /// Set the template argument list as written in the sources.
-  void
-  setTemplateArgsAsWritten(const ASTTemplateArgumentListInfo *ArgsWritten) {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      Info->TemplateArgsAsWritten = ArgsWritten;
-    else
-      ExplicitInfo = ArgsWritten;
+  /// Sets the type of this specialization as it was written by
+  /// the user.
+  void setTypeAsWritten(TypeSourceInfo *T) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->TypeAsWritten = T;
   }
 
-  /// Set the template argument list as written in the sources.
-  void setTemplateArgsAsWritten(const TemplateArgumentListInfo &ArgsInfo) {
-    setTemplateArgsAsWritten(
-        ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo));
+  /// Gets the type of this specialization as it was written by
+  /// the user, if it was so written.
+  TypeSourceInfo *getTypeAsWritten() const {
+    return ExplicitInfo ? ExplicitInfo->TypeAsWritten : nullptr;
   }
 
   /// Gets the location of the extern keyword, if present.
-  SourceLocation getExternKeywordLoc() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->ExternKeywordLoc;
-    return SourceLocation();
+  SourceLocation getExternLoc() const {
+    return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
   }
 
   /// Sets the location of the extern keyword.
-  void setExternKeywordLoc(SourceLocation Loc);
+  void setExternLoc(SourceLocation Loc) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->ExternLoc = Loc;
+  }
+
+  /// Sets the location of the template keyword.
+  void setTemplateKeywordLoc(SourceLocation Loc) {
+    if (!ExplicitInfo)
+      ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
+    ExplicitInfo->TemplateKeywordLoc = Loc;
+  }
 
   /// Gets the location of the template keyword, if present.
   SourceLocation getTemplateKeywordLoc() const {
-    if (auto *Info = ExplicitInfo.dyn_cast())
-      return Info->TemplateKeywordLoc;
-    return SourceLocation();
+    return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
   }
 
-  /// Sets the location of the template keyword.
-  void setTemplateKeywordLoc(SourceLocation Loc);
-
   SourceRange getSourceRange() const override LLVM_READONLY;
 
   void Profile(llvm::FoldingSetNodeID &ID) const {
@@ -2839,6 +2863,10 @@ class VarTemplatePartialSpecializationDecl
   /// The list of template parameters
   TemplateParameterList *TemplateParams = nullptr;
 
+  /// The source info for the template arguments as written.
+  /// FIXME: redundant with TypeAsWritten?
+  const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
+
   /// The variable template partial specialization from which this
   /// variable template partial specialization was instantiated.
   ///
@@ -2851,7 +2879,8 @@ class VarTemplatePartialSpecializationDecl
       ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
       SourceLocation IdLoc, TemplateParameterList *Params,
       VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
-      StorageClass S, ArrayRef Args);
+      StorageClass S, ArrayRef Args,
+      const ASTTemplateArgumentListInfo *ArgInfos);
 
   VarTemplatePartialSpecializationDecl(ASTContext &Context)
       : VarTemplateSpecializationDecl(VarTemplatePartialSpecialization,
@@ -2868,8 +2897,8 @@ class VarTemplatePartialSpecializationDecl
   Create(ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
          SourceLocation IdLoc, TemplateParameterList *Params,
          VarTemplateDecl *SpecializedTemplate, QualType T,
-         TypeSourceInfo *TInfo, StorageClass S,
-         ArrayRef Args);
+         TypeSourceInfo *TInfo, StorageClass S, ArrayRef Args,
+         const TemplateArgumentListInfo &ArgInfos);
 
   static VarTemplatePartialSpecializationDecl *
   CreateDeserialized(ASTContext &C, GlobalDeclID ID);
@@ -2885,6 +2914,11 @@ class VarTemplatePartialSpecializationDecl
     return TemplateParams;
   }
 
+  /// Get the template arguments as written.
+  const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
+    return ArgsAsWritten;
+  }
+
   /// \brief All associated constraints of this partial specialization,
   /// including the requires clause and any constraints derived from
   /// constrained-parameters.
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 782f60844506f4..f9b145b4e86a55 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2030,15 +2030,6 @@ DEF_TRAVERSE_DECL(RecordDecl, { TRY_TO(TraverseRecordHelper(D)); })
 
 DEF_TRAVERSE_DECL(CXXRecordDecl, { TRY_TO(TraverseCXXRecordHelper(D)); })
 
-template 
-bool RecursiveASTVisitor::TraverseTemplateArgumentLocsHelper(
-    const TemplateArgumentLoc *TAL, unsigned Count) {
-  for (unsigned I = 0; I < Count; ++I) {
-    TRY_TO(TraverseTemplateArgumentLoc(TAL[I]));
-  }
-  return true;
-}
-
 #define DEF_TRAVERSE_TMPL_SPEC_DECL(TMPLDECLKIND, DECLKIND)                    \
   DEF_TRAVERSE_DECL(TMPLDECLKIND##TemplateSpecializationDecl, {                \
     /* For implicit instantiations ("set x;"), we don't want to           \
@@ -2048,12 +2039,9 @@ bool RecursiveASTVisitor::TraverseTemplateArgumentLocsHelper(
        TemplateSpecializationType).  For explicit instantiations               \
        ("template set;"), we do need a callback, since this               \
        is the only callback that's made for this instantiation.                \
-       We use getTemplateArgsAsWritten() to distinguish. */                    \
-    if (const auto *ArgsWritten = D->getTemplateArgsAsWritten()) {             \
-      /* The args that remains unspecialized. */                               \
-      TRY_TO(TraverseTemplateArgumentLocsHelper(                               \
-          ArgsWritten->getTemplateArgs(), ArgsWritten->NumTemplateArgs));      \
-    }                                                                          \
+       We use getTypeAsWritten() to distinguish. */                            \
+    if (TypeSourceInfo *TSI = D->getTypeAsWritten())                           \
+      TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));                              \
                                                                                \
     if (getDerived().shouldVisitTemplateInstantiations() ||                    \
         D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) {    \
@@ -2073,6 +2061,15 @@ bool RecursiveASTVisitor::TraverseTemplateArgumentLocsHelper(
 DEF_TRAVERSE_TMPL_SPEC_DECL(Class, CXXRecord)
 DEF_TRAVERSE_TMPL_SPEC_DECL(Var, Var)
 
+template 
+bool RecursiveASTVisitor::TraverseTemplateArgumentLocsHelper(
+    const TemplateArgumentLoc *TAL, unsigned Count) {
+  for (unsigned I = 0; I < Count; ++I) {
+    TRY_TO(TraverseTemplateArgumentLoc(TAL[I]));
+  }
+  return true;
+}
+
 #define DEF_TRAVERSE_TMPL_PART_SPEC_DECL(TMPLDECLKIND, DECLKIND)               \
   DEF_TRAVERSE_DECL(TMPLDECLKIND##TemplatePartialSpecializationDecl, {         \
     /* The partial specialization. */                                          \
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 0f3257db6f415f..8a2bbfff9e9e6b 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -764,9 +764,9 @@ AST_POLYMORPHIC_MATCHER(isImplicit,
   return Node.isImplicit();
 }
 
-/// Matches templateSpecializationTypes, class template specializations,
-/// variable template specializations, and function template specializations
-/// that have at least one TemplateArgument matching the given InnerMatcher.
+/// Matches classTemplateSpecializations, templateSpecializationType and
+/// functionDecl that have at least one TemplateArgument matching the given
+/// InnerMatcher.
 ///
 /// Given
 /// \code
@@ -788,8 +788,8 @@ AST_POLYMORPHIC_MATCHER(isImplicit,
 AST_POLYMORPHIC_MATCHER_P(
     hasAnyTemplateArgument,
     AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
-                                    VarTemplateSpecializationDecl, FunctionDecl,
-                                    TemplateSpecializationType),
+                                    TemplateSpecializationType,
+                                    FunctionDecl),
     internal::Matcher, InnerMatcher) {
   ArrayRef List =
       internal::getTemplateSpecializationArgs(Node);
@@ -1047,9 +1047,8 @@ AST_MATCHER(Expr, isTypeDependent) { return Node.isTypeDependent(); }
 /// expr(isValueDependent()) matches return Size
 AST_MATCHER(Expr, isValueDependent) { return Node.isValueDependent(); }
 
-/// Matches templateSpecializationType, class template specializations,
-/// variable template specializations, and function template specializations
-/// where the n'th TemplateArgument matches the given InnerMatcher.
+/// Matches classTemplateSpecializations, templateSpecializationType and
+/// functionDecl where the n'th TemplateArgument matches the given InnerMatcher.
 ///
 /// Given
 /// \code
@@ -1069,8 +1068,8 @@ AST_MATCHER(Expr, isValueDependent) { return Node.isValueDependent(); }
 AST_POLYMORPHIC_MATCHER_P2(
     hasTemplateArgument,
     AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
-                                    VarTemplateSpecializationDecl, FunctionDecl,
-                                    TemplateSpecializationType),
+                                    TemplateSpecializationType,
+                                    FunctionDecl),
     unsigned, N, internal::Matcher, InnerMatcher) {
   ArrayRef List =
       internal::getTemplateSpecializationArgs(Node);
@@ -4067,7 +4066,7 @@ AST_POLYMORPHIC_MATCHER_P_OVERLOAD(
 ///   Matcher, Matcher,
 ///   Matcher, Matcher,
 ///   Matcher,
-///   Matcher,
+///   Matcher, Matcher,
 ///   Matcher, Matcher,
 ///   Matcher, Matcher,
 ///   Matcher
@@ -4076,8 +4075,9 @@ AST_POLYMORPHIC_MATCHER_P(
     AST_POLYMORPHIC_SUPPORTED_TYPES(
         BlockDecl, CXXBaseSpecifier, CXXCtorInitializer, CXXFunctionalCastExpr,
         CXXNewExpr, CXXTemporaryObjectExpr, CXXUnresolvedConstructExpr,
-        CompoundLiteralExpr, DeclaratorDecl, ExplicitCastExpr, ObjCPropertyDecl,
-        TemplateArgumentLoc, TypedefNameDecl),
+        ClassTemplateSpecializationDecl, CompoundLiteralExpr, DeclaratorDecl,
+        ExplicitCastExpr, ObjCPropertyDecl, TemplateArgumentLoc,
+        TypedefNameDecl),
     internal::Matcher, Inner) {
   TypeSourceInfo *source = internal::GetTypeSourceInfo(Node);
   if (source == nullptr) {
@@ -5304,10 +5304,9 @@ AST_POLYMORPHIC_MATCHER_P(parameterCountIs,
   return Node.getNumParams() == N;
 }
 
-/// Matches templateSpecializationType, class template specialization,
-/// variable template specialization, and function template specialization
-/// nodes where the template argument matches the inner matcher. This matcher
-/// may produce multiple matches.
+/// Matches classTemplateSpecialization, templateSpecializationType and
+/// functionDecl nodes where the template argument matches the inner matcher.
+/// This matcher may produce multiple matches.
 ///
 /// Given
 /// \code
@@ -5331,8 +5330,7 @@ AST_POLYMORPHIC_MATCHER_P(parameterCountIs,
 AST_POLYMORPHIC_MATCHER_P(
     forEachTemplateArgument,
     AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
-                                    VarTemplateSpecializationDecl, FunctionDecl,
-                                    TemplateSpecializationType),
+                                    TemplateSpecializationType, FunctionDecl),
     internal::Matcher, InnerMatcher) {
   ArrayRef TemplateArgs =
       clang::ast_matchers::internal::getTemplateSpecializationArgs(Node);
@@ -6907,10 +6905,8 @@ extern const internal::VariadicDynCastAllOfMatcher<
     TypeLoc, TemplateSpecializationTypeLoc>
     templateSpecializationTypeLoc;
 
-/// Matches template specialization `TypeLoc`s, class template specializations,
-/// variable template specializations, and function template specializations
-/// that have at least one `TemplateArgumentLoc` matching the given
-/// `InnerMatcher`.
+/// Matches template specialization `TypeLoc`s that have at least one
+/// `TemplateArgumentLoc` matching the given `InnerMatcher`.
 ///
 /// Given
 /// \code
@@ -6920,21 +6916,20 @@ extern const internal::VariadicDynCastAllOfMatcher<
 /// varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
 ///   hasTypeLoc(loc(asString("int")))))))
 ///   matches `A a`.
-AST_POLYMORPHIC_MATCHER_P(
-    hasAnyTemplateArgumentLoc,
-    AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
-                                    VarTemplateSpecializationDecl, FunctionDecl,
-                                    DeclRefExpr, TemplateSpecializationTypeLoc),
-    internal::Matcher, InnerMatcher) {
-  auto Args = internal::getTemplateArgsWritten(Node);
-  return matchesFirstInRange(InnerMatcher, Args.begin(), Args.end(), Finder,
-                             Builder) != Args.end();
+AST_MATCHER_P(TemplateSpecializationTypeLoc, hasAnyTemplateArgumentLoc,
+              internal::Matcher, InnerMatcher) {
+  for (unsigned Index = 0, N = Node.getNumArgs(); Index < N; ++Index) {
+    clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder);
+    if (InnerMatcher.matches(Node.getArgLoc(Index), Finder, &Result)) {
+      *Builder = std::move(Result);
+      return true;
+    }
+  }
   return false;
 }
 
-/// Matches template specialization `TypeLoc`s, class template specializations,
-/// variable template specializations, and function template specializations
-/// where the n'th `TemplateArgumentLoc` matches the given `InnerMatcher`.
+/// Matches template specialization `TypeLoc`s where the n'th
+/// `TemplateArgumentLoc` matches the given `InnerMatcher`.
 ///
 /// Given
 /// \code
@@ -6947,13 +6942,10 @@ AST_POLYMORPHIC_MATCHER_P(
 ///   matches `A b`, but not `A c`.
 AST_POLYMORPHIC_MATCHER_P2(
     hasTemplateArgumentLoc,
-    AST_POLYMORPHIC_SUPPORTED_TYPES(ClassTemplateSpecializationDecl,
-                                    VarTemplateSpecializationDecl, FunctionDecl,
-                                    DeclRefExpr, TemplateSpecializationTypeLoc),
+    AST_POLYMORPHIC_SUPPORTED_TYPES(DeclRefExpr, TemplateSpecializationTypeLoc),
     unsigned, Index, internal::Matcher, InnerMatcher) {
-  auto Args = internal::getTemplateArgsWritten(Node);
-  return Index < Args.size() &&
-         InnerMatcher.matches(Args[Index], Finder, Builder);
+  return internal::MatchTemplateArgLocAt(Node, Index, InnerMatcher, Finder,
+                                         Builder);
 }
 
 /// Matches C or C++ elaborated `TypeLoc`s.
diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
index c1cc63fdb7433f..47d912c73dd7eb 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -186,6 +186,10 @@ inline TypeSourceInfo *GetTypeSourceInfo(const BlockDecl &Node) {
 inline TypeSourceInfo *GetTypeSourceInfo(const CXXNewExpr &Node) {
   return Node.getAllocatedTypeSourceInfo();
 }
+inline TypeSourceInfo *
+GetTypeSourceInfo(const ClassTemplateSpecializationDecl &Node) {
+  return Node.getTypeAsWritten();
+}
 
 /// Unifies obtaining the FunctionProtoType pointer from both
 /// FunctionProtoType and FunctionDecl nodes..
@@ -1935,11 +1939,6 @@ getTemplateSpecializationArgs(const ClassTemplateSpecializationDecl &D) {
   return D.getTemplateArgs().asArray();
 }
 
-inline ArrayRef
-getTemplateSpecializationArgs(const VarTemplateSpecializationDecl &D) {
-  return D.getTemplateArgs().asArray();
-}
-
 inline ArrayRef
 getTemplateSpecializationArgs(const TemplateSpecializationType &T) {
   return T.template_arguments();
@@ -1949,46 +1948,7 @@ inline ArrayRef
 getTemplateSpecializationArgs(const FunctionDecl &FD) {
   if (const auto* TemplateArgs = FD.getTemplateSpecializationArgs())
     return TemplateArgs->asArray();
-  return std::nullopt;
-}
-
-inline ArrayRef
-getTemplateArgsWritten(const ClassTemplateSpecializationDecl &D) {
-  if (const ASTTemplateArgumentListInfo *Args = D.getTemplateArgsAsWritten())
-    return Args->arguments();
-  return std::nullopt;
-}
-
-inline ArrayRef
-getTemplateArgsWritten(const VarTemplateSpecializationDecl &D) {
-  if (const ASTTemplateArgumentListInfo *Args = D.getTemplateArgsAsWritten())
-    return Args->arguments();
-  return std::nullopt;
-}
-
-inline ArrayRef
-getTemplateArgsWritten(const FunctionDecl &FD) {
-  if (const auto *Args = FD.getTemplateSpecializationArgsAsWritten())
-    return Args->arguments();
-  return std::nullopt;
-}
-
-inline ArrayRef
-getTemplateArgsWritten(const DeclRefExpr &DRE) {
-  if (const auto *Args = DRE.getTemplateArgs())
-    return {Args, DRE.getNumTemplateArgs()};
-  return std::nullopt;
-}
-
-inline SmallVector
-getTemplateArgsWritten(const TemplateSpecializationTypeLoc &T) {
-  SmallVector Args;
-  if (!T.isNull()) {
-    Args.reserve(T.getNumArgs());
-    for (unsigned I = 0; I < T.getNumArgs(); ++I)
-      Args.emplace_back(T.getArgLoc(I));
-  }
-  return Args;
+  return ArrayRef();
 }
 
 struct NotEqualsBoundNodePredicate {
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 9ff8e1ea78d852..60f213322b346b 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -443,9 +443,8 @@ namespace clang {
     Expected
     ImportFunctionTemplateWithTemplateArgsFromSpecialization(
         FunctionDecl *FromFD);
-
-    template 
-    Error ImportTemplateParameterLists(const DeclTy *FromD, DeclTy *ToD);
+    Error ImportTemplateParameterLists(const DeclaratorDecl *FromD,
+                                       DeclaratorDecl *ToD);
 
     Error ImportTemplateInformation(FunctionDecl *FromFD, FunctionDecl *ToFD);
 
@@ -3323,9 +3322,8 @@ ExpectedDecl ASTNodeImporter::VisitEnumConstantDecl(EnumConstantDecl *D) {
   return ToEnumerator;
 }
 
-template 
-Error ASTNodeImporter::ImportTemplateParameterLists(const DeclTy *FromD,
-                                                    DeclTy *ToD) {
+Error ASTNodeImporter::ImportTemplateParameterLists(const DeclaratorDecl *FromD,
+                                                    DeclaratorDecl *ToD) {
   unsigned int Num = FromD->getNumTemplateParameterLists();
   if (Num == 0)
     return Error::success();
@@ -6212,16 +6210,15 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl(
   if (!IdLocOrErr)
     return IdLocOrErr.takeError();
 
-  // Import TemplateArgumentListInfo.
-  TemplateArgumentListInfo ToTAInfo;
-  if (const auto *ASTTemplateArgs = D->getTemplateArgsAsWritten()) {
-    if (Error Err = ImportTemplateArgumentListInfo(*ASTTemplateArgs, ToTAInfo))
-      return std::move(Err);
-  }
-
   // Create the specialization.
   ClassTemplateSpecializationDecl *D2 = nullptr;
   if (PartialSpec) {
+    // Import TemplateArgumentListInfo.
+    TemplateArgumentListInfo ToTAInfo;
+    const auto &ASTTemplateArgs = *PartialSpec->getTemplateArgsAsWritten();
+    if (Error Err = ImportTemplateArgumentListInfo(ASTTemplateArgs, ToTAInfo))
+      return std::move(Err);
+
     QualType CanonInjType;
     if (Error Err = importInto(
         CanonInjType, PartialSpec->getInjectedSpecializationType()))
@@ -6231,7 +6228,7 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl(
     if (GetImportedOrCreateDecl(
             D2, D, Importer.getToContext(), D->getTagKind(), DC, *BeginLocOrErr,
             *IdLocOrErr, ToTPList, ClassTemplate,
-            llvm::ArrayRef(TemplateArgs.data(), TemplateArgs.size()),
+            llvm::ArrayRef(TemplateArgs.data(), TemplateArgs.size()), ToTAInfo,
             CanonInjType,
             cast_or_null(PrevDecl)))
       return D2;
@@ -6279,27 +6276,28 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl(
   else
     return BraceRangeOrErr.takeError();
 
-  if (Error Err = ImportTemplateParameterLists(D, D2))
-    return std::move(Err);
-
   // Import the qualifier, if any.
   if (auto LocOrErr = import(D->getQualifierLoc()))
     D2->setQualifierInfo(*LocOrErr);
   else
     return LocOrErr.takeError();
 
-  if (D->getTemplateArgsAsWritten())
-    D2->setTemplateArgsAsWritten(ToTAInfo);
+  if (auto *TSI = D->getTypeAsWritten()) {
+    if (auto TInfoOrErr = import(TSI))
+      D2->setTypeAsWritten(*TInfoOrErr);
+    else
+      return TInfoOrErr.takeError();
 
-  if (auto LocOrErr = import(D->getTemplateKeywordLoc()))
-    D2->setTemplateKeywordLoc(*LocOrErr);
-  else
-    return LocOrErr.takeError();
+    if (auto LocOrErr = import(D->getTemplateKeywordLoc()))
+      D2->setTemplateKeywordLoc(*LocOrErr);
+    else
+      return LocOrErr.takeError();
 
-  if (auto LocOrErr = import(D->getExternKeywordLoc()))
-    D2->setExternKeywordLoc(*LocOrErr);
-  else
-    return LocOrErr.takeError();
+    if (auto LocOrErr = import(D->getExternLoc()))
+      D2->setExternLoc(*LocOrErr);
+    else
+      return LocOrErr.takeError();
+  }
 
   if (D->getPointOfInstantiation().isValid()) {
     if (auto POIOrErr = import(D->getPointOfInstantiation()))
@@ -6519,7 +6517,7 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateSpecializationDecl(
   VarTemplateSpecializationDecl *D2 = nullptr;
 
   TemplateArgumentListInfo ToTAInfo;
-  if (const auto *Args = D->getTemplateArgsAsWritten()) {
+  if (const ASTTemplateArgumentListInfo *Args = D->getTemplateArgsInfo()) {
     if (Error Err = ImportTemplateArgumentListInfo(*Args, ToTAInfo))
       return std::move(Err);
   }
@@ -6527,6 +6525,14 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateSpecializationDecl(
   using PartVarSpecDecl = VarTemplatePartialSpecializationDecl;
   // Create a new specialization.
   if (auto *FromPartial = dyn_cast(D)) {
+    // Import TemplateArgumentListInfo
+    TemplateArgumentListInfo ArgInfos;
+    const auto *FromTAArgsAsWritten = FromPartial->getTemplateArgsAsWritten();
+    // NOTE: FromTAArgsAsWritten and template parameter list are non-null.
+    if (Error Err =
+            ImportTemplateArgumentListInfo(*FromTAArgsAsWritten, ArgInfos))
+      return std::move(Err);
+
     auto ToTPListOrErr = import(FromPartial->getTemplateParameters());
     if (!ToTPListOrErr)
       return ToTPListOrErr.takeError();
@@ -6535,7 +6541,7 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateSpecializationDecl(
     if (GetImportedOrCreateDecl(ToPartial, D, Importer.getToContext(), DC,
                                 *BeginLocOrErr, *IdLocOrErr, *ToTPListOrErr,
                                 VarTemplate, QualType(), nullptr,
-                                D->getStorageClass(), TemplateArgs))
+                                D->getStorageClass(), TemplateArgs, ArgInfos))
       return ToPartial;
 
     if (Expected ToInstOrErr =
@@ -6578,9 +6584,7 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateSpecializationDecl(
   }
 
   D2->setSpecializationKind(D->getSpecializationKind());
-
-  if (D->getTemplateArgsAsWritten())
-    D2->setTemplateArgsAsWritten(ToTAInfo);
+  D2->setTemplateArgsInfo(ToTAInfo);
 
   if (auto LocOrErr = import(D->getQualifierLoc()))
     D2->setQualifierInfo(*LocOrErr);
diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index c5868256b440d9..599d379340abad 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -1083,15 +1083,15 @@ void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) {
       NNS->print(Out, Policy);
     Out << *D;
 
-    if (auto *S = dyn_cast(D)) {
-      const TemplateParameterList *TParams =
-          S->getSpecializedTemplate()->getTemplateParameters();
-      const ASTTemplateArgumentListInfo *TArgAsWritten =
-          S->getTemplateArgsAsWritten();
-      if (TArgAsWritten && !Policy.PrintCanonicalTypes)
-        printTemplateArguments(TArgAsWritten->arguments(), TParams);
-      else
-        printTemplateArguments(S->getTemplateArgs().asArray(), TParams);
+    if (auto S = dyn_cast(D)) {
+      ArrayRef Args = S->getTemplateArgs().asArray();
+      if (!Policy.PrintCanonicalTypes)
+        if (const auto* TSI = S->getTypeAsWritten())
+          if (const auto *TST =
+                  dyn_cast(TSI->getType()))
+            Args = TST->template_arguments();
+      printTemplateArguments(
+          Args, S->getSpecializedTemplate()->getTemplateParameters());
     }
   }
 
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index af2d8d728e3efc..d27a30e0c5fce1 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -985,63 +985,41 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const {
 
 SourceRange
 ClassTemplateSpecializationDecl::getSourceRange() const {
-  if (getSpecializationKind() == TSK_ExplicitInstantiationDeclaration) {
-    return SourceRange(getExternKeywordLoc(),
-                       getTemplateArgsAsWritten()->getRAngleLoc());
-  } else if (getSpecializationKind() == TSK_ExplicitInstantiationDefinition) {
-    return SourceRange(getTemplateKeywordLoc(),
-                       getTemplateArgsAsWritten()->getRAngleLoc());
-  } else if (!isExplicitSpecialization()) {
+  if (ExplicitInfo) {
+    SourceLocation Begin = getTemplateKeywordLoc();
+    if (Begin.isValid()) {
+      // Here we have an explicit (partial) specialization or instantiation.
+      assert(getSpecializationKind() == TSK_ExplicitSpecialization ||
+             getSpecializationKind() == TSK_ExplicitInstantiationDeclaration ||
+             getSpecializationKind() == TSK_ExplicitInstantiationDefinition);
+      if (getExternLoc().isValid())
+        Begin = getExternLoc();
+      SourceLocation End = getBraceRange().getEnd();
+      if (End.isInvalid())
+        End = getTypeAsWritten()->getTypeLoc().getEndLoc();
+      return SourceRange(Begin, End);
+    }
+    // An implicit instantiation of a class template partial specialization
+    // uses ExplicitInfo to record the TypeAsWritten, but the source
+    // locations should be retrieved from the instantiation pattern.
+    using CTPSDecl = ClassTemplatePartialSpecializationDecl;
+    auto *ctpsd = const_cast(cast(this));
+    CTPSDecl *inst_from = ctpsd->getInstantiatedFromMember();
+    assert(inst_from != nullptr);
+    return inst_from->getSourceRange();
+  }
+  else {
     // No explicit info available.
     llvm::PointerUnion
-        InstFrom = getInstantiatedFrom();
-    if (InstFrom.isNull())
+      inst_from = getInstantiatedFrom();
+    if (inst_from.isNull())
       return getSpecializedTemplate()->getSourceRange();
-    if (const auto *CTD = InstFrom.dyn_cast())
-      return CTD->getSourceRange();
-    return InstFrom.get()
-        ->getSourceRange();
-  }
-  SourceLocation Begin = TagDecl::getOuterLocStart();
-  if (const auto *CTPSD =
-          dyn_cast(this)) {
-    if (const auto *InstFrom = CTPSD->getInstantiatedFromMember())
-      return InstFrom->getSourceRange();
-    else if (!getNumTemplateParameterLists())
-      Begin = CTPSD->getTemplateParameters()->getTemplateLoc();
-  }
-  SourceLocation End = getBraceRange().getEnd();
-  if (End.isInvalid())
-    End = getTemplateArgsAsWritten()->getRAngleLoc();
-  return SourceRange(Begin, End);
-}
-
-void ClassTemplateSpecializationDecl::setExternKeywordLoc(SourceLocation Loc) {
-  auto *Info = ExplicitInfo.dyn_cast();
-  if (!Info) {
-    // Don't allocate if the location is invalid.
-    if (Loc.isInvalid())
-      return;
-    Info = new (getASTContext()) ExplicitInstantiationInfo;
-    Info->TemplateArgsAsWritten = getTemplateArgsAsWritten();
-    ExplicitInfo = Info;
+    if (const auto *ctd = inst_from.dyn_cast())
+      return ctd->getSourceRange();
+    return inst_from.get()
+      ->getSourceRange();
   }
-  Info->ExternKeywordLoc = Loc;
-}
-
-void ClassTemplateSpecializationDecl::setTemplateKeywordLoc(
-    SourceLocation Loc) {
-  auto *Info = ExplicitInfo.dyn_cast();
-  if (!Info) {
-    // Don't allocate if the location is invalid.
-    if (Loc.isInvalid())
-      return;
-    Info = new (getASTContext()) ExplicitInstantiationInfo;
-    Info->TemplateArgsAsWritten = getTemplateArgsAsWritten();
-    ExplicitInfo = Info;
-  }
-  Info->TemplateKeywordLoc = Loc;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1109,29 +1087,43 @@ void ImplicitConceptSpecializationDecl::setTemplateArguments(
 //===----------------------------------------------------------------------===//
 void ClassTemplatePartialSpecializationDecl::anchor() {}
 
-ClassTemplatePartialSpecializationDecl::ClassTemplatePartialSpecializationDecl(
-    ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc,
-    SourceLocation IdLoc, TemplateParameterList *Params,
-    ClassTemplateDecl *SpecializedTemplate, ArrayRef Args,
-    ClassTemplatePartialSpecializationDecl *PrevDecl)
-    : ClassTemplateSpecializationDecl(
-          Context, ClassTemplatePartialSpecialization, TK, DC, StartLoc, IdLoc,
-          SpecializedTemplate, Args, PrevDecl),
-      TemplateParams(Params), InstantiatedFromMember(nullptr, false) {
+ClassTemplatePartialSpecializationDecl::
+ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
+                                       DeclContext *DC,
+                                       SourceLocation StartLoc,
+                                       SourceLocation IdLoc,
+                                       TemplateParameterList *Params,
+                                       ClassTemplateDecl *SpecializedTemplate,
+                                       ArrayRef Args,
+                               const ASTTemplateArgumentListInfo *ArgInfos,
+                               ClassTemplatePartialSpecializationDecl *PrevDecl)
+    : ClassTemplateSpecializationDecl(Context,
+                                      ClassTemplatePartialSpecialization,
+                                      TK, DC, StartLoc, IdLoc,
+                                      SpecializedTemplate, Args, PrevDecl),
+      TemplateParams(Params), ArgsAsWritten(ArgInfos),
+      InstantiatedFromMember(nullptr, false) {
   if (AdoptTemplateParameterList(Params, this))
     setInvalidDecl();
 }
 
 ClassTemplatePartialSpecializationDecl *
-ClassTemplatePartialSpecializationDecl::Create(
-    ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc,
-    SourceLocation IdLoc, TemplateParameterList *Params,
-    ClassTemplateDecl *SpecializedTemplate, ArrayRef Args,
-    QualType CanonInjectedType,
-    ClassTemplatePartialSpecializationDecl *PrevDecl) {
-  auto *Result = new (Context, DC) ClassTemplatePartialSpecializationDecl(
-      Context, TK, DC, StartLoc, IdLoc, Params, SpecializedTemplate, Args,
-      PrevDecl);
+ClassTemplatePartialSpecializationDecl::
+Create(ASTContext &Context, TagKind TK,DeclContext *DC,
+       SourceLocation StartLoc, SourceLocation IdLoc,
+       TemplateParameterList *Params,
+       ClassTemplateDecl *SpecializedTemplate,
+       ArrayRef Args,
+       const TemplateArgumentListInfo &ArgInfos,
+       QualType CanonInjectedType,
+       ClassTemplatePartialSpecializationDecl *PrevDecl) {
+  const ASTTemplateArgumentListInfo *ASTArgInfos =
+    ASTTemplateArgumentListInfo::Create(Context, ArgInfos);
+
+  auto *Result = new (Context, DC)
+      ClassTemplatePartialSpecializationDecl(Context, TK, DC, StartLoc, IdLoc,
+                                             Params, SpecializedTemplate, Args,
+                                             ASTArgInfos, PrevDecl);
   Result->setSpecializationKind(TSK_ExplicitSpecialization);
   Result->setMayHaveOutOfDateDef(false);
 
@@ -1379,47 +1371,26 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const {
   return SpecializedTemplate.get();
 }
 
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+    const TemplateArgumentListInfo &ArgsInfo) {
+  TemplateArgsInfo =
+      ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+    const ASTTemplateArgumentListInfo *ArgsInfo) {
+  TemplateArgsInfo =
+      ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
 SourceRange VarTemplateSpecializationDecl::getSourceRange() const {
   if (isExplicitSpecialization() && !hasInit()) {
-    if (const ASTTemplateArgumentListInfo *Info = getTemplateArgsAsWritten())
+    if (const ASTTemplateArgumentListInfo *Info = getTemplateArgsInfo())
       return SourceRange(getOuterLocStart(), Info->getRAngleLoc());
-  } else if (getTemplateSpecializationKind() ==
-             TSK_ExplicitInstantiationDeclaration) {
-    if (const ASTTemplateArgumentListInfo *Info = getTemplateArgsAsWritten())
-      return SourceRange(getExternKeywordLoc(), Info->getRAngleLoc());
-  } else if (getTemplateSpecializationKind() ==
-             TSK_ExplicitInstantiationDefinition) {
-    if (const ASTTemplateArgumentListInfo *Info = getTemplateArgsAsWritten())
-      return SourceRange(getTemplateKeywordLoc(), Info->getRAngleLoc());
   }
   return VarDecl::getSourceRange();
 }
 
-void VarTemplateSpecializationDecl::setExternKeywordLoc(SourceLocation Loc) {
-  auto *Info = ExplicitInfo.dyn_cast();
-  if (!Info) {
-    // Don't allocate if the location is invalid.
-    if (Loc.isInvalid())
-      return;
-    Info = new (getASTContext()) ExplicitInstantiationInfo;
-    Info->TemplateArgsAsWritten = getTemplateArgsAsWritten();
-    ExplicitInfo = Info;
-  }
-  Info->ExternKeywordLoc = Loc;
-}
-
-void VarTemplateSpecializationDecl::setTemplateKeywordLoc(SourceLocation Loc) {
-  auto *Info = ExplicitInfo.dyn_cast();
-  if (!Info) {
-    // Don't allocate if the location is invalid.
-    if (Loc.isInvalid())
-      return;
-    Info = new (getASTContext()) ExplicitInstantiationInfo;
-    Info->TemplateArgsAsWritten = getTemplateArgsAsWritten();
-    ExplicitInfo = Info;
-  }
-  Info->TemplateKeywordLoc = Loc;
-}
 
 //===----------------------------------------------------------------------===//
 // VarTemplatePartialSpecializationDecl Implementation
@@ -1431,11 +1402,13 @@ VarTemplatePartialSpecializationDecl::VarTemplatePartialSpecializationDecl(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, TemplateParameterList *Params,
     VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
-    StorageClass S, ArrayRef Args)
+    StorageClass S, ArrayRef Args,
+    const ASTTemplateArgumentListInfo *ArgInfos)
     : VarTemplateSpecializationDecl(VarTemplatePartialSpecialization, Context,
                                     DC, StartLoc, IdLoc, SpecializedTemplate, T,
                                     TInfo, S, Args),
-      TemplateParams(Params), InstantiatedFromMember(nullptr, false) {
+      TemplateParams(Params), ArgsAsWritten(ArgInfos),
+      InstantiatedFromMember(nullptr, false) {
   if (AdoptTemplateParameterList(Params, DC))
     setInvalidDecl();
 }
@@ -1445,10 +1418,15 @@ VarTemplatePartialSpecializationDecl::Create(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, TemplateParameterList *Params,
     VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
-    StorageClass S, ArrayRef Args) {
-  auto *Result = new (Context, DC) VarTemplatePartialSpecializationDecl(
-      Context, DC, StartLoc, IdLoc, Params, SpecializedTemplate, T, TInfo, S,
-      Args);
+    StorageClass S, ArrayRef Args,
+    const TemplateArgumentListInfo &ArgInfos) {
+  const ASTTemplateArgumentListInfo *ASTArgInfos
+    = ASTTemplateArgumentListInfo::Create(Context, ArgInfos);
+
+  auto *Result =
+      new (Context, DC) VarTemplatePartialSpecializationDecl(
+          Context, DC, StartLoc, IdLoc, Params, SpecializedTemplate, T, TInfo,
+          S, Args, ASTArgInfos);
   Result->setSpecializationKind(TSK_ExplicitSpecialization);
   return Result;
 }
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 87f0a8728d8506..9602f448e94279 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1472,18 +1472,21 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
 
   // If this is a class template specialization, print the template
   // arguments.
-  if (auto *S = dyn_cast(D)) {
-    const TemplateParameterList *TParams =
-        S->getSpecializedTemplate()->getTemplateParameters();
-    const ASTTemplateArgumentListInfo *TArgAsWritten =
-        S->getTemplateArgsAsWritten();
+  if (const auto *Spec = dyn_cast(D)) {
+    ArrayRef Args;
+    TypeSourceInfo *TAW = Spec->getTypeAsWritten();
+    if (!Policy.PrintCanonicalTypes && TAW) {
+      const TemplateSpecializationType *TST =
+        cast(TAW->getType());
+      Args = TST->template_arguments();
+    } else {
+      const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
+      Args = TemplateArgs.asArray();
+    }
     IncludeStrongLifetimeRAII Strong(Policy);
-    if (TArgAsWritten && !Policy.PrintCanonicalTypes)
-      printTemplateArgumentList(OS, TArgAsWritten->arguments(), Policy,
-                                TParams);
-    else
-      printTemplateArgumentList(OS, S->getTemplateArgs().asArray(), Policy,
-                                TParams);
+    printTemplateArgumentList(
+        OS, Args, Policy,
+        Spec->getSpecializedTemplate()->getTemplateParameters());
   }
 
   spaceBeforePlaceHolder(OS);
diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp
index 8eb88f5a1e94ee..1c04aa17d53fb2 100644
--- a/clang/lib/Index/IndexDecl.cpp
+++ b/clang/lib/Index/IndexDecl.cpp
@@ -673,12 +673,9 @@ class IndexingDeclVisitor : public ConstDeclVisitor {
     IndexCtx.indexTagDecl(
         D, SymbolRelation(SymbolRoleSet(SymbolRole::RelationSpecializationOf),
                           SpecializationOf));
-    // Template specialization arguments.
-    if (const ASTTemplateArgumentListInfo *TemplateArgInfo =
-            D->getTemplateArgsAsWritten()) {
-      for (const auto &Arg : TemplateArgInfo->arguments())
-        handleTemplateArgumentLoc(Arg, D, D->getLexicalDeclContext());
-    }
+    if (TypeSourceInfo *TSI = D->getTypeAsWritten())
+      IndexCtx.indexTypeSourceInfo(TSI, /*Parent=*/nullptr,
+                                   D->getLexicalDeclContext());
     return true;
   }
 
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 0febf4e1d4546c..a1e32d391ed0cc 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1408,7 +1408,7 @@ void Sema::ActOnEndOfTranslationUnit() {
         SourceRange DiagRange = DiagD->getLocation();
         if (const auto *VTSD = dyn_cast(DiagD)) {
           if (const ASTTemplateArgumentListInfo *ASTTAL =
-                  VTSD->getTemplateArgsAsWritten())
+                  VTSD->getTemplateArgsInfo())
             DiagRange.setEnd(ASTTAL->RAngleLoc);
         }
         if (DiagD->isReferenced()) {
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index b268d7c405dfdf..5c72270ff15047 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5166,8 +5166,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
         VarTemplatePartialSpecializationDecl::Create(
             Context, VarTemplate->getDeclContext(), TemplateKWLoc,
             TemplateNameLoc, TemplateParams, VarTemplate, DI->getType(), DI, SC,
-            CanonicalConverted);
-    Partial->setTemplateArgsAsWritten(TemplateArgs);
+            CanonicalConverted, TemplateArgs);
 
     if (!PrevPartial)
       VarTemplate->AddPartialSpecialization(Partial, InsertPos);
@@ -5185,7 +5184,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     Specialization = VarTemplateSpecializationDecl::Create(
         Context, VarTemplate->getDeclContext(), TemplateKWLoc, TemplateNameLoc,
         VarTemplate, DI->getType(), DI, SC, CanonicalConverted);
-    Specialization->setTemplateArgsAsWritten(TemplateArgs);
+    Specialization->setTemplateArgsInfo(TemplateArgs);
 
     if (!PrevDecl)
       VarTemplate->AddSpecialization(Specialization, InsertPos);
@@ -5220,6 +5219,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     }
   }
 
+  Specialization->setTemplateKeywordLoc(TemplateKWLoc);
   Specialization->setLexicalDeclContext(CurContext);
 
   // Add the specialization into its lexical context, so that it can
@@ -9489,8 +9489,7 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
         ClassTemplatePartialSpecializationDecl::Create(
             Context, Kind, ClassTemplate->getDeclContext(), KWLoc,
             TemplateNameLoc, TemplateParams, ClassTemplate, CanonicalConverted,
-            CanonType, PrevPartial);
-    Partial->setTemplateArgsAsWritten(TemplateArgs);
+            TemplateArgs, CanonType, PrevPartial);
     SetNestedNameSpecifier(*this, Partial, SS);
     if (TemplateParameterLists.size() > 1 && SS.isSet()) {
       Partial->setTemplateParameterListsInfo(
@@ -9513,7 +9512,6 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
     Specialization = ClassTemplateSpecializationDecl::Create(
         Context, Kind, ClassTemplate->getDeclContext(), KWLoc, TemplateNameLoc,
         ClassTemplate, CanonicalConverted, PrevDecl);
-    Specialization->setTemplateArgsAsWritten(TemplateArgs);
     SetNestedNameSpecifier(*this, Specialization, SS);
     if (TemplateParameterLists.size() > 0) {
       Specialization->setTemplateParameterListsInfo(Context,
@@ -9597,6 +9595,21 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
       << (isPartialSpecialization? 1 : 0)
       << FixItHint::CreateRemoval(ModulePrivateLoc);
 
+  // Build the fully-sugared type for this class template
+  // specialization as the user wrote in the specialization
+  // itself. This means that we'll pretty-print the type retrieved
+  // from the specialization's declaration the way that the user
+  // actually wrote the specialization, rather than formatting the
+  // name based on the "canonical" representation used to store the
+  // template arguments in the specialization.
+  TypeSourceInfo *WrittenTy
+    = Context.getTemplateSpecializationTypeInfo(Name, TemplateNameLoc,
+                                                TemplateArgs, CanonType);
+  if (TUK != TUK_Friend) {
+    Specialization->setTypeAsWritten(WrittenTy);
+    Specialization->setTemplateKeywordLoc(TemplateKWLoc);
+  }
+
   // C++ [temp.expl.spec]p9:
   //   A template explicit specialization is in the scope of the
   //   namespace in which the template was defined.
@@ -9612,15 +9625,6 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
     Specialization->startDefinition();
 
   if (TUK == TUK_Friend) {
-    // Build the fully-sugared type for this class template
-    // specialization as the user wrote in the specialization
-    // itself. This means that we'll pretty-print the type retrieved
-    // from the specialization's declaration the way that the user
-    // actually wrote the specialization, rather than formatting the
-    // name based on the "canonical" representation used to store the
-    // template arguments in the specialization.
-    TypeSourceInfo *WrittenTy = Context.getTemplateSpecializationTypeInfo(
-        Name, TemplateNameLoc, TemplateArgs, CanonType);
     FriendDecl *Friend = FriendDecl::Create(Context, CurContext,
                                             TemplateNameLoc,
                                             WrittenTy,
@@ -10826,10 +10830,21 @@ DeclResult Sema::ActOnExplicitInstantiation(
     }
   }
 
-  Specialization->setTemplateArgsAsWritten(TemplateArgs);
+  // Build the fully-sugared type for this explicit instantiation as
+  // the user wrote in the explicit instantiation itself. This means
+  // that we'll pretty-print the type retrieved from the
+  // specialization's declaration the way that the user actually wrote
+  // the explicit instantiation, rather than formatting the name based
+  // on the "canonical" representation used to store the template
+  // arguments in the specialization.
+  TypeSourceInfo *WrittenTy
+    = Context.getTemplateSpecializationTypeInfo(Name, TemplateNameLoc,
+                                                TemplateArgs,
+                                  Context.getTypeDeclType(Specialization));
+  Specialization->setTypeAsWritten(WrittenTy);
 
   // Set source locations for keywords.
-  Specialization->setExternKeywordLoc(ExternLoc);
+  Specialization->setExternLoc(ExternLoc);
   Specialization->setTemplateKeywordLoc(TemplateLoc);
   Specialization->setBraceRange(SourceRange());
 
@@ -11242,11 +11257,6 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
     if (!HasNoEffect) {
       // Instantiate static data member or variable template.
       Prev->setTemplateSpecializationKind(TSK, D.getIdentifierLoc());
-      if (auto *VTSD = dyn_cast(Prev)) {
-        VTSD->setExternKeywordLoc(ExternLoc);
-        VTSD->setTemplateKeywordLoc(TemplateLoc);
-      }
-
       // Merge attributes.
       ProcessDeclAttributeList(S, Prev, D.getDeclSpec().getAttributes());
       if (PrevTemplate)
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 5315b143215e19..d544cfac55ba36 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3858,16 +3858,15 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
 
   // Substitute into the template arguments of the class template explicit
   // specialization.
-  TemplateArgumentListInfo InstTemplateArgs;
-  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
-          D->getTemplateArgsAsWritten()) {
-    InstTemplateArgs.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
-    InstTemplateArgs.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
-
-    if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
-                                       TemplateArgs, InstTemplateArgs))
-      return nullptr;
-  }
+  TemplateSpecializationTypeLoc Loc = D->getTypeAsWritten()->getTypeLoc().
+                                        castAs();
+  TemplateArgumentListInfo InstTemplateArgs(Loc.getLAngleLoc(),
+                                            Loc.getRAngleLoc());
+  SmallVector ArgLocs;
+  for (unsigned I = 0; I != Loc.getNumArgs(); ++I)
+    ArgLocs.push_back(Loc.getArgLoc(I));
+  if (SemaRef.SubstTemplateArguments(ArgLocs, TemplateArgs, InstTemplateArgs))
+    return nullptr;
 
   // Check that the template argument list is well-formed for this
   // class template.
@@ -3921,7 +3920,6 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
       ClassTemplateSpecializationDecl::Create(
           SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(),
           D->getLocation(), InstClassTemplate, CanonicalConverted, PrevDecl);
-  InstD->setTemplateArgsAsWritten(InstTemplateArgs);
 
   // Add this partial specialization to the set of class template partial
   // specializations.
@@ -3938,10 +3936,22 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
       TemplateName(InstClassTemplate), CanonicalConverted,
       SemaRef.Context.getRecordType(InstD));
 
+  // Build the fully-sugared type for this class template
+  // specialization as the user wrote in the specialization
+  // itself. This means that we'll pretty-print the type retrieved
+  // from the specialization's declaration the way that the user
+  // actually wrote the specialization, rather than formatting the
+  // name based on the "canonical" representation used to store the
+  // template arguments in the specialization.
+  TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
+      TemplateName(InstClassTemplate), D->getLocation(), InstTemplateArgs,
+      CanonType);
+
   InstD->setAccess(D->getAccess());
   InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);
   InstD->setSpecializationKind(D->getSpecializationKind());
-  InstD->setExternKeywordLoc(D->getExternKeywordLoc());
+  InstD->setTypeAsWritten(WrittenTy);
+  InstD->setExternLoc(D->getExternLoc());
   InstD->setTemplateKeywordLoc(D->getTemplateKeywordLoc());
 
   Owner->addDecl(InstD);
@@ -3975,7 +3985,7 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
 
   // Substitute the current template arguments.
   if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
-          D->getTemplateArgsAsWritten()) {
+          D->getTemplateArgsInfo()) {
     VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
     VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
@@ -4033,7 +4043,7 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
   VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create(
       SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
       VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
-  Var->setTemplateArgsAsWritten(TemplateArgsInfo);
+  Var->setTemplateArgsInfo(TemplateArgsInfo);
   if (!PrevDecl) {
     void *InsertPos = nullptr;
     VarTemplate->findSpecialization(Converted, InsertPos);
@@ -4275,21 +4285,19 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
   QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
       TemplateName(ClassTemplate), CanonicalConverted);
 
-  // Create the class template partial specialization declaration.
-  ClassTemplatePartialSpecializationDecl *InstPartialSpec =
-      ClassTemplatePartialSpecializationDecl::Create(
-          SemaRef.Context, PartialSpec->getTagKind(), Owner,
-          PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams,
-          ClassTemplate, CanonicalConverted, CanonType,
-          /*PrevDecl=*/nullptr);
-
-  InstPartialSpec->setTemplateArgsAsWritten(InstTemplateArgs);
-
-  // Substitute the nested name specifier, if any.
-  if (SubstQualifier(PartialSpec, InstPartialSpec))
-    return nullptr;
-
-  InstPartialSpec->setInstantiatedFromMember(PartialSpec);
+  // Build the fully-sugared type for this class template
+  // specialization as the user wrote in the specialization
+  // itself. This means that we'll pretty-print the type retrieved
+  // from the specialization's declaration the way that the user
+  // actually wrote the specialization, rather than formatting the
+  // name based on the "canonical" representation used to store the
+  // template arguments in the specialization.
+  TypeSourceInfo *WrittenTy
+    = SemaRef.Context.getTemplateSpecializationTypeInfo(
+                                                    TemplateName(ClassTemplate),
+                                                    PartialSpec->getLocation(),
+                                                    InstTemplateArgs,
+                                                    CanonType);
 
   if (PrevDecl) {
     // We've already seen a partial specialization with the same template
@@ -4307,14 +4315,28 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
     //
     //   Outer outer; // error: the partial specializations of Inner
     //                          // have the same signature.
-    SemaRef.Diag(InstPartialSpec->getLocation(),
-                 diag::err_partial_spec_redeclared)
-        << InstPartialSpec;
+    SemaRef.Diag(PartialSpec->getLocation(), diag::err_partial_spec_redeclared)
+      << WrittenTy->getType();
     SemaRef.Diag(PrevDecl->getLocation(), diag::note_prev_partial_spec_here)
       << SemaRef.Context.getTypeDeclType(PrevDecl);
     return nullptr;
   }
 
+
+  // Create the class template partial specialization declaration.
+  ClassTemplatePartialSpecializationDecl *InstPartialSpec =
+      ClassTemplatePartialSpecializationDecl::Create(
+          SemaRef.Context, PartialSpec->getTagKind(), Owner,
+          PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams,
+          ClassTemplate, CanonicalConverted, InstTemplateArgs, CanonType,
+          nullptr);
+  // Substitute the nested name specifier, if any.
+  if (SubstQualifier(PartialSpec, InstPartialSpec))
+    return nullptr;
+
+  InstPartialSpec->setInstantiatedFromMember(PartialSpec);
+  InstPartialSpec->setTypeAsWritten(WrittenTy);
+
   // Check the completed partial specialization.
   SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);
 
@@ -4383,6 +4405,46 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
       VarTemplate->findPartialSpecialization(CanonicalConverted, InstParams,
                                              InsertPos);
 
+  // Build the canonical type that describes the converted template
+  // arguments of the variable template partial specialization.
+  QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
+      TemplateName(VarTemplate), CanonicalConverted);
+
+  // Build the fully-sugared type for this variable template
+  // specialization as the user wrote in the specialization
+  // itself. This means that we'll pretty-print the type retrieved
+  // from the specialization's declaration the way that the user
+  // actually wrote the specialization, rather than formatting the
+  // name based on the "canonical" representation used to store the
+  // template arguments in the specialization.
+  TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
+      TemplateName(VarTemplate), PartialSpec->getLocation(), InstTemplateArgs,
+      CanonType);
+
+  if (PrevDecl) {
+    // We've already seen a partial specialization with the same template
+    // parameters and template arguments. This can happen, for example, when
+    // substituting the outer template arguments ends up causing two
+    // variable template partial specializations of a member variable template
+    // to have identical forms, e.g.,
+    //
+    //   template
+    //   struct Outer {
+    //     template pair p;
+    //     template pair p;
+    //     template pair p;
+    //   };
+    //
+    //   Outer outer; // error: the partial specializations of Inner
+    //                          // have the same signature.
+    SemaRef.Diag(PartialSpec->getLocation(),
+                 diag::err_var_partial_spec_redeclared)
+        << WrittenTy->getType();
+    SemaRef.Diag(PrevDecl->getLocation(),
+                 diag::note_var_prev_partial_spec_here);
+    return nullptr;
+  }
+
   // Do substitution on the type of the declaration
   TypeSourceInfo *DI = SemaRef.SubstType(
       PartialSpec->getTypeSourceInfo(), TemplateArgs,
@@ -4402,39 +4464,16 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
       VarTemplatePartialSpecializationDecl::Create(
           SemaRef.Context, Owner, PartialSpec->getInnerLocStart(),
           PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(),
-          DI, PartialSpec->getStorageClass(), CanonicalConverted);
-
-  InstPartialSpec->setTemplateArgsAsWritten(InstTemplateArgs);
+          DI, PartialSpec->getStorageClass(), CanonicalConverted,
+          InstTemplateArgs);
 
   // Substitute the nested name specifier, if any.
   if (SubstQualifier(PartialSpec, InstPartialSpec))
     return nullptr;
 
   InstPartialSpec->setInstantiatedFromMember(PartialSpec);
+  InstPartialSpec->setTypeAsWritten(WrittenTy);
 
-  if (PrevDecl) {
-    // We've already seen a partial specialization with the same template
-    // parameters and template arguments. This can happen, for example, when
-    // substituting the outer template arguments ends up causing two
-    // variable template partial specializations of a member variable template
-    // to have identical forms, e.g.,
-    //
-    //   template
-    //   struct Outer {
-    //     template pair p;
-    //     template pair p;
-    //     template pair p;
-    //   };
-    //
-    //   Outer outer; // error: the partial specializations of Inner
-    //                          // have the same signature.
-    SemaRef.Diag(PartialSpec->getLocation(),
-                 diag::err_var_partial_spec_redeclared)
-        << InstPartialSpec;
-    SemaRef.Diag(PrevDecl->getLocation(),
-                 diag::note_var_prev_partial_spec_here);
-    return nullptr;
-  }
   // Check the completed partial specialization.
   SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);
 
@@ -5696,7 +5735,7 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
 
     TemplateArgumentListInfo TemplateArgInfo;
     if (const ASTTemplateArgumentListInfo *ArgInfo =
-            VarSpec->getTemplateArgsAsWritten()) {
+            VarSpec->getTemplateArgsInfo()) {
       TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
       TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
       for (const TemplateArgumentLoc &Arg : ArgInfo->arguments())
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 0c647086e304a3..089ede4f49260b 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2548,17 +2548,16 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
     }
   }
 
-  // extern/template keyword locations for explicit instantiations
-  if (Record.readBool()) {
-    auto *ExplicitInfo = new (C) ExplicitInstantiationInfo;
-    ExplicitInfo->ExternKeywordLoc = readSourceLocation();
+  // Explicit info.
+  if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) {
+    auto *ExplicitInfo =
+        new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo;
+    ExplicitInfo->TypeAsWritten = TyInfo;
+    ExplicitInfo->ExternLoc = readSourceLocation();
     ExplicitInfo->TemplateKeywordLoc = readSourceLocation();
     D->ExplicitInfo = ExplicitInfo;
   }
 
-  if (Record.readBool())
-    D->setTemplateArgsAsWritten(Record.readASTTemplateArgumentListInfo());
-
   return Redecl;
 }
 
@@ -2568,6 +2567,7 @@ void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl(
   // need them for profiling
   TemplateParameterList *Params = Record.readTemplateParameterList();
   D->TemplateParams = Params;
+  D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
 
   RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D);
 
@@ -2617,17 +2617,16 @@ ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
     }
   }
 
-  // extern/template keyword locations for explicit instantiations
-  if (Record.readBool()) {
-    auto *ExplicitInfo = new (C) ExplicitInstantiationInfo;
-    ExplicitInfo->ExternKeywordLoc = readSourceLocation();
+  // Explicit info.
+  if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) {
+    auto *ExplicitInfo =
+        new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo;
+    ExplicitInfo->TypeAsWritten = TyInfo;
+    ExplicitInfo->ExternLoc = readSourceLocation();
     ExplicitInfo->TemplateKeywordLoc = readSourceLocation();
     D->ExplicitInfo = ExplicitInfo;
   }
 
-  if (Record.readBool())
-    D->setTemplateArgsAsWritten(Record.readASTTemplateArgumentListInfo());
-
   SmallVector TemplArgs;
   Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
   D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
@@ -2667,6 +2666,7 @@ void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl(
     VarTemplatePartialSpecializationDecl *D) {
   TemplateParameterList *Params = Record.readTemplateParameterList();
   D->TemplateParams = Params;
+  D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
 
   RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D);
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index c2f1d1b44241cc..6201d284f0e025 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1765,28 +1765,20 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl(
     Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl());
   }
 
-  bool ExplicitInstantiation =
-      D->getTemplateSpecializationKind() ==
-          TSK_ExplicitInstantiationDeclaration ||
-      D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition;
-  Record.push_back(ExplicitInstantiation);
-  if (ExplicitInstantiation) {
-    Record.AddSourceLocation(D->getExternKeywordLoc());
+  // Explicit info.
+  Record.AddTypeSourceInfo(D->getTypeAsWritten());
+  if (D->getTypeAsWritten()) {
+    Record.AddSourceLocation(D->getExternLoc());
     Record.AddSourceLocation(D->getTemplateKeywordLoc());
   }
 
-  const ASTTemplateArgumentListInfo *ArgsWritten =
-      D->getTemplateArgsAsWritten();
-  Record.push_back(!!ArgsWritten);
-  if (ArgsWritten)
-    Record.AddASTTemplateArgumentListInfo(ArgsWritten);
-
   Code = serialization::DECL_CLASS_TEMPLATE_SPECIALIZATION;
 }
 
 void ASTDeclWriter::VisitClassTemplatePartialSpecializationDecl(
                                     ClassTemplatePartialSpecializationDecl *D) {
   Record.AddTemplateParameterList(D->getTemplateParameters());
+  Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten());
 
   VisitClassTemplateSpecializationDecl(D);
 
@@ -1820,22 +1812,13 @@ void ASTDeclWriter::VisitVarTemplateSpecializationDecl(
     Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs());
   }
 
-  bool ExplicitInstantiation =
-      D->getTemplateSpecializationKind() ==
-          TSK_ExplicitInstantiationDeclaration ||
-      D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition;
-  Record.push_back(ExplicitInstantiation);
-  if (ExplicitInstantiation) {
-    Record.AddSourceLocation(D->getExternKeywordLoc());
+  // Explicit info.
+  Record.AddTypeSourceInfo(D->getTypeAsWritten());
+  if (D->getTypeAsWritten()) {
+    Record.AddSourceLocation(D->getExternLoc());
     Record.AddSourceLocation(D->getTemplateKeywordLoc());
   }
 
-  const ASTTemplateArgumentListInfo *ArgsWritten =
-      D->getTemplateArgsAsWritten();
-  Record.push_back(!!ArgsWritten);
-  if (ArgsWritten)
-    Record.AddASTTemplateArgumentListInfo(ArgsWritten);
-
   Record.AddTemplateArgumentList(&D->getTemplateArgs());
   Record.AddSourceLocation(D->getPointOfInstantiation());
   Record.push_back(D->getSpecializationKind());
@@ -1856,6 +1839,7 @@ void ASTDeclWriter::VisitVarTemplateSpecializationDecl(
 void ASTDeclWriter::VisitVarTemplatePartialSpecializationDecl(
     VarTemplatePartialSpecializationDecl *D) {
   Record.AddTemplateParameterList(D->getTemplateParameters());
+  Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten());
 
   VisitVarTemplateSpecializationDecl(D);
 
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 3e50d67f4d6ef0..cd026198949500 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -735,8 +735,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor {
     auto *Declaration =
         cast(handleFreeStandingTagDecl(C));
     foldExplicitTemplateInstantiation(
-        Builder.getTemplateRange(C),
-        Builder.findToken(C->getExternKeywordLoc()),
+        Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()),
         Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
     return true;
   }
diff --git a/clang/test/AST/ast-dump-template-decls.cpp b/clang/test/AST/ast-dump-template-decls.cpp
index 37f6d8a0472d30..142bc9e6ad9a0e 100644
--- a/clang/test/AST/ast-dump-template-decls.cpp
+++ b/clang/test/AST/ast-dump-template-decls.cpp
@@ -1,12 +1,12 @@
 // Test without serialization:
 // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -ast-dump %s \
-// RUN: | FileCheck -strict-whitespace %s
+// RUN: | FileCheck -strict-whitespace %s --check-prefix=DIRECT
 //
 // Test with serialization:
 // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -x c++ -std=c++17 -triple x86_64-unknown-unknown -include-pch %t -ast-dump-all /dev/null \
 // RUN: | sed -e "s/ //" -e "s/ imported//" \
-// RUN: | FileCheck --strict-whitespace %s
+// RUN: | FileCheck --strict-whitespace %s --check-prefix=SERIALIZED
 
 template 
 // CHECK: FunctionTemplateDecl 0x{{[^ ]*}} <{{.*}}:1, line:[[@LINE+2]]:10> col:6 a
@@ -189,13 +189,15 @@ T unTempl = 1;
 
 template<>
 int unTempl;
-// CHECK:      VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 unTempl 'int'
+// FIXME (#61680) - serializing and loading AST should not affect reported source range
+// DIRECT:     VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 unTempl 'int'
+// SERIALIZED: VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 unTempl 'int'
 // CHECK-NEXT: `-TemplateArgument type 'int'
 // CHECK-NEXT: `-BuiltinType 0x{{[^ ]*}} 'int'
 
 template<>
 float unTempl = 1;
-// CHECK:      VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:7 unTempl 'float'
+// CHECK:      VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:7 unTempl 'float' cinit
 // CHECK-NEXT: |-TemplateArgument type 'float'
 // CHECK-NEXT: | `-BuiltinType 0x{{[^ ]*}} 'float'
 // CHECK-NEXT: `-ImplicitCastExpr 0x{{[^ ]*}}  'float' 
@@ -220,7 +222,7 @@ int binTempl;
 
 template
 float binTempl = 1;
-// CHECK:      VarTemplatePartialSpecializationDecl 0x{{[^ ]*}}  col:7 binTempl 'float'
+// CHECK:      VarTemplatePartialSpecializationDecl 0x{{[^ ]*}}  col:7 binTempl 'float' cinit
 // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{[^ ]*}}  col:16 referenced class depth 0 index 0 U
 // CHECK-NEXT: |-TemplateArgument type 'float'
 // CHECK-NEXT: | `-BuiltinType 0x{{[^ ]*}} 'float'
@@ -231,7 +233,9 @@ float binTempl = 1;
 
 template<>
 int binTempl;
-// CHECK:      VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 binTempl 'int'
+// FIXME (#61680) - serializing and loading AST should not affect reported source range
+// DIRECT:     VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 binTempl 'int'
+// SERIALIZED: VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:5 binTempl 'int'
 // CHECK-NEXT: |-TemplateArgument type 'int'
 // CHECK-NEXT: | `-BuiltinType 0x{{[^ ]*}} 'int'
 // CHECK-NEXT: `-TemplateArgument type 'int'
@@ -239,7 +243,7 @@ int binTempl;
 
 template<>
 float binTempl = 1;
-// CHECK:      VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:7 binTempl 'float'
+// CHECK:     VarTemplateSpecializationDecl 0x{{[^ ]*}}  col:7 binTempl 'float' cinit
 // CHECK-NEXT: |-TemplateArgument type 'float'
 // CHECK-NEXT: | `-BuiltinType 0x{{[^ ]*}} 'float'
 // CHECK-NEXT: |-TemplateArgument type 'float'
diff --git a/clang/test/Index/Core/index-source.cpp b/clang/test/Index/Core/index-source.cpp
index 043e616a1d3664..8f9fbc4c8d29c4 100644
--- a/clang/test/Index/Core/index-source.cpp
+++ b/clang/test/Index/Core/index-source.cpp
@@ -285,17 +285,20 @@ template<>
 class SpecializationDecl;
 // CHECK: [[@LINE-1]]:7 | class(Gen,TS)/C++ | SpecializationDecl | c:@S@SpecializationDecl>#I |  | Decl,RelSpecialization | rel: 1
 // CHECK-NEXT: RelSpecialization | SpecializationDecl | c:@ST>1#T@SpecializationDecl
+// CHECK: [[@LINE-3]]:7 | class(Gen,TS)/C++ | SpecializationDecl | c:@S@SpecializationDecl>#I |  | Ref | rel: 0
 
 template<>
 class SpecializationDecl { };
 // CHECK: [[@LINE-1]]:7 | class(Gen,TS)/C++ | SpecializationDecl | c:@S@SpecializationDecl>#I |  | Def,RelSpecialization | rel: 1
 // CHECK-NEXT: RelSpecialization | SpecializationDecl | c:@ST>1#T@SpecializationDecl
+// CHECK-NEXT: [[@LINE-3]]:7 | class(Gen,TS)/C++ | SpecializationDecl | c:@S@SpecializationDecl>#I |  | Ref | rel: 0
 
 template
 class PartialSpecilizationClass;
 // CHECK: [[@LINE-1]]:7 | class(Gen,TPS)/C++ | PartialSpecilizationClass | c:@SP>1#T@PartialSpecilizationClass>#$@S@Cls#t0.0 |  | Decl,RelSpecialization | rel: 1
 // CHECK-NEXT: RelSpecialization | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass
-// CHECK-NEXT: [[@LINE-3]]:33 | class/C++ | Cls | c:@S@Cls |  | Ref,RelCont | rel: 1
+// CHECK: [[@LINE-3]]:7 | class(Gen)/C++ | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass |  | Ref | rel: 0
+// CHECK-NEXT: [[@LINE-4]]:33 | class/C++ | Cls | c:@S@Cls |  | Ref | rel: 0
 
 template<>
 class PartialSpecilizationClass : Cls { };
@@ -303,10 +306,9 @@ class PartialSpecilizationClass : Cls { };
 // CHECK-NEXT: RelSpecialization | PartialSpecilizationClass | c:@ST>2#T#T@PartialSpecilizationClass
 // CHECK-NEXT: [[@LINE-3]]:45 | class/C++ | Cls | c:@S@Cls |  | Ref,RelBase,RelCont | rel: 1
 // CHECK-NEXT: RelBase,RelCont | PartialSpecilizationClass | c:@S@PartialSpecilizationClass>#$@S@Cls#S0_
-// CHECK-NEXT: [[@LINE-5]]:33 | class/C++ | Cls | c:@S@Cls |  | Ref,RelCont | rel: 1
-// CHECK-NEXT: RelCont | PartialSpecilizationClass | c:@S@PartialSpecilizationClass>#$@S@Cls#S0_
-// CHECK-NEXT: [[@LINE-7]]:38 | class/C++ | Cls | c:@S@Cls |  | Ref,RelCont | rel: 1
-// CHECK-NEXT: RelCont | PartialSpecilizationClass | c:@S@PartialSpecilizationClass>#$@S@Cls#S0_
+// CHECK-NEXT: [[@LINE-5]]:7 | class(Gen,TS)/C++ | PartialSpecilizationClass | c:@S@PartialSpecilizationClass>#$@S@Cls#S0_ |  | Ref | rel: 0
+// CHECK-NEXT: [[@LINE-6]]:33 | class/C++ | Cls | c:@S@Cls |  | Ref | rel: 0
+// CHECK-NEXT: [[@LINE-7]]:38 | class/C++ | Cls | c:@S@Cls |  | Ref | rel: 0
 
 template
 void functionSp() { }
@@ -330,14 +332,10 @@ class ClassWithCorrectSpecialization { };
 
 template<>
 class ClassWithCorrectSpecialization, Record::C> { };
-// CHECK: [[@LINE-1]]:38 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl |  | Ref,RelCont | rel: 1
-// CHECK-NEXT: RelCont | ClassWithCorrectSpecialization | c:@S@ClassWithCorrectSpecialization>#$@S@SpecializationDecl>#$@S@Cls#VI2
-// CHECK-NEXT: [[@LINE-3]]:57 | class/C++ | Cls | c:@S@Cls |  | Ref,RelCont | rel: 1
-// CHECK-NEXT: RelCont | ClassWithCorrectSpecialization | c:@S@ClassWithCorrectSpecialization>#$@S@SpecializationDecl>#$@S@Cls#VI2
-// CHECK-NEXT: [[@LINE-5]]:71 | static-property/C++ | C | c:@S@Record@C | __ZN6Record1CE | Ref,Read,RelCont | rel: 1
-// CHECK-NEXT: RelCont | ClassWithCorrectSpecialization | c:@S@ClassWithCorrectSpecialization>#$@S@SpecializationDecl>#$@S@Cls#VI2
-// CHECK-NEXT: [[@LINE-7]]:63 | struct/C++ | Record | c:@S@Record |  | Ref,RelCont | rel: 1
-// CHECK-NEXT: RelCont | ClassWithCorrectSpecialization | c:@S@ClassWithCorrectSpecialization>#$@S@SpecializationDecl>#$@S@Cls#VI2
+// CHECK: [[@LINE-1]]:38 | class(Gen)/C++ | SpecializationDecl | c:@ST>1#T@SpecializationDecl |  | Ref | rel: 0
+// CHECK: [[@LINE-2]]:57 | class/C++ | Cls | c:@S@Cls |  | Ref | rel: 0
+// CHECK: [[@LINE-3]]:71 | static-property/C++ | C | c:@S@Record@C | __ZN6Record1CE | Ref,Read | rel: 0
+// CHECK: [[@LINE-4]]:63 | struct/C++ | Record | c:@S@Record |  | Ref | rel: 0
 
 namespace ns {
 // CHECK: [[@LINE-1]]:11 | namespace/C++ | ns | c:@N@ns |  | Decl | rel: 0
diff --git a/clang/test/Index/index-refs.cpp b/clang/test/Index/index-refs.cpp
index 14946849777d44..0e613e48522b12 100644
--- a/clang/test/Index/index-refs.cpp
+++ b/clang/test/Index/index-refs.cpp
@@ -108,6 +108,7 @@ int ginitlist[] = {EnumVal};
 // CHECK:      [indexDeclaration]: kind: c++-class-template | name: TS | {{.*}} | loc: 47:8
 // CHECK-NEXT: [indexDeclaration]: kind: struct-template-partial-spec | name: TS | USR: c:@SP>1#T@TS>#t0.0#I | {{.*}} | loc: 50:8
 // CHECK-NEXT: [indexDeclaration]: kind: typedef | name: MyInt | USR: c:index-refs.cpp@SP>1#T@TS>#t0.0#I@T@MyInt | {{.*}} | loc: 51:15 | semantic-container: [TS:50:8] | lexical-container: [TS:50:8]
+// CHECK-NEXT: [indexEntityReference]: kind: c++-class-template | name: TS | USR: c:@ST>2#T#T@TS | lang: C++ | cursor: TemplateRef=TS:47:8 | loc: 50:8 | :: <> | container: [TU] | refkind: direct | role: ref
 /* when indexing implicit instantiations
   [indexDeclaration]: kind: struct-template-spec | name: TS | USR: c:@S@TS>#I | {{.*}} | loc: 50:8
   [indexDeclaration]: kind: typedef | name: MyInt | USR: c:index-refs.cpp@593@S@TS>#I@T@MyInt | {{.*}} | loc: 51:15 | semantic-container: [TS:50:8] | lexical-container: [TS:50:8]
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 60241afd8776dc..b845a381d63bed 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -743,10 +743,14 @@ bool CursorVisitor::VisitClassTemplateSpecializationDecl(
   }
 
   // Visit the template arguments used in the specialization.
-  if (const auto *ArgsWritten = D->getTemplateArgsAsWritten()) {
-    for (const TemplateArgumentLoc &Arg : ArgsWritten->arguments())
-      if (VisitTemplateArgumentLoc(Arg))
-        return true;
+  if (TypeSourceInfo *SpecType = D->getTypeAsWritten()) {
+    TypeLoc TL = SpecType->getTypeLoc();
+    if (TemplateSpecializationTypeLoc TSTLoc =
+            TL.getAs()) {
+      for (unsigned I = 0, N = TSTLoc.getNumArgs(); I != N; ++I)
+        if (VisitTemplateArgumentLoc(TSTLoc.getArgLoc(I)))
+          return true;
+    }
   }
 
   return ShouldVisitBody && VisitCXXRecordDecl(D);
@@ -5655,19 +5659,16 @@ CXString clang_getCursorDisplayName(CXCursor C) {
 
   if (const ClassTemplateSpecializationDecl *ClassSpec =
           dyn_cast(D)) {
+    // If the type was explicitly written, use that.
+    if (TypeSourceInfo *TSInfo = ClassSpec->getTypeAsWritten())
+      return cxstring::createDup(TSInfo->getType().getAsString(Policy));
+
     SmallString<128> Str;
     llvm::raw_svector_ostream OS(Str);
     OS << *ClassSpec;
-    // If the template arguments were written explicitly, use them..
-    if (const auto *ArgsWritten = ClassSpec->getTemplateArgsAsWritten()) {
-      printTemplateArgumentList(
-          OS, ArgsWritten->arguments(), Policy,
-          ClassSpec->getSpecializedTemplate()->getTemplateParameters());
-    } else {
-      printTemplateArgumentList(
-          OS, ClassSpec->getTemplateArgs().asArray(), Policy,
-          ClassSpec->getSpecializedTemplate()->getTemplateParameters());
-    }
+    printTemplateArgumentList(
+        OS, ClassSpec->getTemplateArgs().asArray(), Policy,
+        ClassSpec->getSpecializedTemplate()->getTemplateParameters());
     return cxstring::createDup(OS.str());
   }
 
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index 65df513d271372..b76627cb9be637 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -2213,6 +2213,18 @@ TEST_P(ASTMatchersTest, ReferenceTypeLocTest_BindsToAnyRvalueReferenceTypeLoc) {
   EXPECT_TRUE(matches("float&& r = 3.0;", matcher));
 }
 
+TEST_P(
+    ASTMatchersTest,
+    TemplateSpecializationTypeLocTest_BindsToTemplateSpecializationExplicitInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+  EXPECT_TRUE(
+      matches("template  class C {}; template class C;",
+              classTemplateSpecializationDecl(
+                  hasName("C"), hasTypeLoc(templateSpecializationTypeLoc()))));
+}
+
 TEST_P(ASTMatchersTest,
        TemplateSpecializationTypeLocTest_BindsToVarDeclTemplateSpecialization) {
   if (!GetParam().isCXX()) {
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index af99c73f1945f6..f198dc71eb8337 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -430,6 +430,12 @@ TEST(HasTypeLoc, MatchesCXXUnresolvedConstructExpr) {
               cxxUnresolvedConstructExpr(hasTypeLoc(loc(asString("T"))))));
 }
 
+TEST(HasTypeLoc, MatchesClassTemplateSpecializationDecl) {
+  EXPECT_TRUE(matches(
+      "template  class Foo; template <> class Foo {};",
+      classTemplateSpecializationDecl(hasTypeLoc(loc(asString("Foo"))))));
+}
+
 TEST(HasTypeLoc, MatchesCompoundLiteralExpr) {
   EXPECT_TRUE(
       matches("int* x = (int[2]) { 0, 1 };",
@@ -6378,7 +6384,8 @@ TEST(HasAnyTemplateArgumentLoc, BindsToExplicitSpecializationWithIntArgument) {
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("int")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(
+              hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("int")))))))));
 }
 
 TEST(HasAnyTemplateArgumentLoc,
@@ -6387,7 +6394,8 @@ TEST(HasAnyTemplateArgumentLoc,
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("double")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
+              hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasAnyTemplateArgumentLoc, BindsToSpecializationWithMultipleArguments) {
@@ -6397,20 +6405,24 @@ TEST(HasAnyTemplateArgumentLoc, BindsToSpecializationWithMultipleArguments) {
   )";
   EXPECT_TRUE(
       matches(code, classTemplateSpecializationDecl(
-                        hasName("A"), hasAnyTemplateArgumentLoc(hasTypeLoc(
-                                          loc(asString("double")))))));
-
+                        hasName("A"), hasTypeLoc(templateSpecializationTypeLoc(
+                                          hasAnyTemplateArgumentLoc(hasTypeLoc(
+                                              loc(asString("double")))))))));
   EXPECT_TRUE(matches(
-      code, classTemplateSpecializationDecl(
-                hasName("A"),
-                hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("int")))))));
+      code,
+      classTemplateSpecializationDecl(
+          hasName("A"),
+          hasTypeLoc(templateSpecializationTypeLoc(
+              hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("int")))))))));
 }
 
 TEST(HasAnyTemplateArgumentLoc, DoesNotBindToSpecializationWithIntArgument) {
-  EXPECT_TRUE(notMatches("template class A {}; A a;",
-                         classTemplateSpecializationDecl(
-                             hasName("A"), hasAnyTemplateArgumentLoc(hasTypeLoc(
-                                               loc(asString("double")))))));
+  EXPECT_TRUE(notMatches(
+      "template class A {}; A a;",
+      classTemplateSpecializationDecl(
+          hasName("A"),
+          hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
+              hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasAnyTemplateArgumentLoc,
@@ -6419,7 +6431,8 @@ TEST(HasAnyTemplateArgumentLoc,
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasAnyTemplateArgumentLoc(hasTypeLoc(loc(asString("double")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
+              hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasTemplateArgumentLoc, BindsToSpecializationWithIntArgument) {
@@ -6440,21 +6453,13 @@ TEST(HasTemplateArgumentLoc, BindsToSpecializationWithDoubleArgument) {
                               0, hasTypeLoc(loc(asString("double")))))))))));
 }
 
-TEST(HasTemplateArgumentLoc, DoesNotBindToSpecializationWithIntArgument) {
-  EXPECT_TRUE(notMatches(
-      "template class A {}; A a;",
-      varDecl(hasName("a"),
-              hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc(
-                  templateSpecializationTypeLoc(hasTemplateArgumentLoc(
-                      0, hasTypeLoc(loc(asString("double")))))))))));
-}
-
 TEST(HasTemplateArgumentLoc, BindsToExplicitSpecializationWithIntArgument) {
   EXPECT_TRUE(matches(
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasTemplateArgumentLoc(0, hasTypeLoc(loc(asString("int")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(
+              hasTemplateArgumentLoc(0, hasTypeLoc(loc(asString("int")))))))));
 }
 
 TEST(HasTemplateArgumentLoc, BindsToExplicitSpecializationWithDoubleArgument) {
@@ -6462,7 +6467,8 @@ TEST(HasTemplateArgumentLoc, BindsToExplicitSpecializationWithDoubleArgument) {
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasTemplateArgumentLoc(0, hasTypeLoc(loc(asString("double")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+              0, hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasTemplateArgumentLoc, BindsToSpecializationWithMultipleArguments) {
@@ -6472,12 +6478,23 @@ TEST(HasTemplateArgumentLoc, BindsToSpecializationWithMultipleArguments) {
   )";
   EXPECT_TRUE(matches(
       code, classTemplateSpecializationDecl(
-                hasName("A"), hasTemplateArgumentLoc(
-                                  0, hasTypeLoc(loc(asString("double")))))));
+                hasName("A"),
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    0, hasTypeLoc(loc(asString("double")))))))));
   EXPECT_TRUE(matches(
       code, classTemplateSpecializationDecl(
                 hasName("A"),
-                hasTemplateArgumentLoc(1, hasTypeLoc(loc(asString("int")))))));
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    1, hasTypeLoc(loc(asString("int")))))))));
+}
+
+TEST(HasTemplateArgumentLoc, DoesNotBindToSpecializationWithIntArgument) {
+  EXPECT_TRUE(notMatches(
+      "template class A {}; A a;",
+      classTemplateSpecializationDecl(
+          hasName("A"),
+          hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+              0, hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasTemplateArgumentLoc,
@@ -6486,7 +6503,8 @@ TEST(HasTemplateArgumentLoc,
       "template class A {}; template<> class A {};",
       classTemplateSpecializationDecl(
           hasName("A"),
-          hasTemplateArgumentLoc(0, hasTypeLoc(loc(asString("double")))))));
+          hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+              0, hasTypeLoc(loc(asString("double")))))))));
 }
 
 TEST(HasTemplateArgumentLoc,
@@ -6497,12 +6515,14 @@ TEST(HasTemplateArgumentLoc,
   )";
   EXPECT_TRUE(notMatches(
       code, classTemplateSpecializationDecl(
-                hasName("A"), hasTemplateArgumentLoc(
-                                  1, hasTypeLoc(loc(asString("double")))))));
+                hasName("A"),
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    1, hasTypeLoc(loc(asString("double")))))))));
   EXPECT_TRUE(notMatches(
       code, classTemplateSpecializationDecl(
                 hasName("A"),
-                hasTemplateArgumentLoc(0, hasTypeLoc(loc(asString("int")))))));
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    0, hasTypeLoc(loc(asString("int")))))))));
 }
 
 TEST(HasTemplateArgumentLoc, DoesNotBindWithBadIndex) {
@@ -6512,12 +6532,14 @@ TEST(HasTemplateArgumentLoc, DoesNotBindWithBadIndex) {
   )";
   EXPECT_TRUE(notMatches(
       code, classTemplateSpecializationDecl(
-                hasName("A"), hasTemplateArgumentLoc(
-                                  -1, hasTypeLoc(loc(asString("double")))))));
+                hasName("A"),
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    -1, hasTypeLoc(loc(asString("double")))))))));
   EXPECT_TRUE(notMatches(
       code, classTemplateSpecializationDecl(
-                hasName("A"), hasTemplateArgumentLoc(
-                                  100, hasTypeLoc(loc(asString("int")))))));
+                hasName("A"),
+                hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(
+                    100, hasTypeLoc(loc(asString("int")))))))));
 }
 
 TEST(HasTemplateArgumentLoc, BindsToDeclRefExprWithIntArgument) {

From dad11097096c05564758e539f9f03ef883365fdd Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Tue, 7 May 2024 13:12:22 -0700
Subject: [PATCH 18/64] [lldb] Reinstate lldb-sbapi-dwarf-enums target (NFC)
 (#91390)

Alex pointed out in #91254 that we only need the custom target if we had
more than one target depending on it. This isn't the case upstream, but
on our downstream fork, we have a second dependency. Reintroduce the
target so that everything can depend on that, without the
single-dependency foot-gun.
---
 lldb/source/API/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt
index 798a92874f13d1..aa31caddfde3a4 100644
--- a/lldb/source/API/CMakeLists.txt
+++ b/lldb/source/API/CMakeLists.txt
@@ -20,7 +20,7 @@ if(LLDB_ENABLE_LUA)
   set(lldb_lua_wrapper ${lua_bindings_dir}/LLDBWrapLua.cpp)
 endif()
 
-# Target to generate SBLanguages.h from Dwarf.def.
+# Generate SBLanguages.h from Dwarf.def.
 set(sb_languages_file
   ${CMAKE_CURRENT_BINARY_DIR}/../../include/lldb/API/SBLanguages.h)
 add_custom_command(
@@ -33,6 +33,8 @@ add_custom_command(
   DEPENDS ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
   WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
 )
+add_custom_target(lldb-sbapi-dwarf-enums
+  DEPENDS ${sb_languages_file})
 
 add_lldb_library(liblldb SHARED ${option_framework}
   SBAddress.cpp
@@ -113,7 +115,9 @@ add_lldb_library(liblldb SHARED ${option_framework}
   SystemInitializerFull.cpp
   ${lldb_python_wrapper}
   ${lldb_lua_wrapper}
-  ${sb_languages_file}
+
+  DEPENDS
+    lldb-sbapi-dwarf-enums
 
   LINK_LIBS
     lldbBreakpoint

From 5e9dd8827b3ccd03f8499b610deb6accd2d71d21 Mon Sep 17 00:00:00 2001
From: Xiang Li 
Date: Tue, 7 May 2024 13:19:52 -0700
Subject: [PATCH 19/64] [DirectX] remove string function attribute DXIL not
 allowed (#90778)

Remove string function attribute other than
"waveops-include-helper-lanes" and "fp32-denorm-mode".

Move DXILPrepareModulePass after DXILTranslateMetadataPass since
DXILTranslateMetadataPass needs to use attribute like hlsl.numthreads.

Fixes #90773
---
 llvm/lib/Target/DirectX/DXILMetadata.cpp      |  9 ++++
 llvm/lib/Target/DirectX/DXILMetadata.h        |  1 +
 llvm/lib/Target/DirectX/DXILPrepare.cpp       | 50 ++++++++++++++++++-
 .../Target/DirectX/DirectXTargetMachine.cpp   |  2 +-
 .../Metadata/shaderModel-cs-val-ver-0.0.ll    | 16 ++++++
 .../DirectX/Metadata/shaderModel-cs.ll        |  7 ++-
 llvm/test/tools/dxil-dis/attribute-filter.ll  |  8 +--
 7 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs-val-ver-0.0.ll

diff --git a/llvm/lib/Target/DirectX/DXILMetadata.cpp b/llvm/lib/Target/DirectX/DXILMetadata.cpp
index 2d94490a7f24c3..03758dc76e7eb0 100644
--- a/llvm/lib/Target/DirectX/DXILMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILMetadata.cpp
@@ -40,6 +40,15 @@ void ValidatorVersionMD::update(VersionTuple ValidatorVer) {
 
 bool ValidatorVersionMD::isEmpty() { return Entry->getNumOperands() == 0; }
 
+VersionTuple ValidatorVersionMD::getAsVersionTuple() {
+  if (isEmpty())
+    return VersionTuple(1, 0);
+  auto *ValVerMD = cast(Entry->getOperand(0));
+  auto *MajorMD = mdconst::extract(ValVerMD->getOperand(0));
+  auto *MinorMD = mdconst::extract(ValVerMD->getOperand(1));
+  return VersionTuple(MajorMD->getZExtValue(), MinorMD->getZExtValue());
+}
+
 static StringRef getShortShaderStage(Triple::EnvironmentType Env) {
   switch (Env) {
   case Triple::Pixel:
diff --git a/llvm/lib/Target/DirectX/DXILMetadata.h b/llvm/lib/Target/DirectX/DXILMetadata.h
index 2f5d7d9fe7683d..cd9f4c83fbd0f8 100644
--- a/llvm/lib/Target/DirectX/DXILMetadata.h
+++ b/llvm/lib/Target/DirectX/DXILMetadata.h
@@ -30,6 +30,7 @@ class ValidatorVersionMD {
   void update(VersionTuple ValidatorVer);
 
   bool isEmpty();
+  VersionTuple getAsVersionTuple();
 };
 
 void createShaderModelMD(Module &M);
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 026911946b47f0..24be644d9fc0e4 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -11,10 +11,14 @@
 /// Language (DXIL).
 //===----------------------------------------------------------------------===//
 
+#include "DXILMetadata.h"
+#include "DXILResourceAnalysis.h"
+#include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "DirectXIRPasses/PointerTypeAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/IRBuilder.h"
@@ -23,6 +27,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/VersionTuple.h"
 
 #define DEBUG_TYPE "dxil-prepare"
 
@@ -80,6 +85,37 @@ constexpr bool isValidForDXIL(Attribute::AttrKind Attr) {
                       Attr);
 }
 
+static void collectDeadStringAttrs(AttributeMask &DeadAttrs, AttributeSet &&AS,
+                                   const StringSet<> &LiveKeys,
+                                   bool AllowExperimental) {
+  for (auto &Attr : AS) {
+    if (!Attr.isStringAttribute())
+      continue;
+    StringRef Key = Attr.getKindAsString();
+    if (LiveKeys.contains(Key))
+      continue;
+    if (AllowExperimental && Key.starts_with("exp-"))
+      continue;
+    DeadAttrs.addAttribute(Key);
+  }
+}
+
+static void removeStringFunctionAttributes(Function &F,
+                                           bool AllowExperimental) {
+  AttributeList Attrs = F.getAttributes();
+  const StringSet<> LiveKeys = {"waveops-include-helper-lanes",
+                                "fp32-denorm-mode"};
+  // Collect DeadKeys in FnAttrs.
+  AttributeMask DeadAttrs;
+  collectDeadStringAttrs(DeadAttrs, Attrs.getFnAttrs(), LiveKeys,
+                         AllowExperimental);
+  collectDeadStringAttrs(DeadAttrs, Attrs.getRetAttrs(), LiveKeys,
+                         AllowExperimental);
+
+  F.removeFnAttrs(DeadAttrs);
+  F.removeRetAttrs(DeadAttrs);
+}
+
 class DXILPrepareModule : public ModulePass {
 
   static Value *maybeGenerateBitcast(IRBuilder<> &Builder,
@@ -110,9 +146,18 @@ class DXILPrepareModule : public ModulePass {
       if (!isValidForDXIL(I))
         AttrMask.addAttribute(I);
     }
+
+    dxil::ValidatorVersionMD ValVerMD(M);
+    VersionTuple ValVer = ValVerMD.getAsVersionTuple();
+    bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
+
     for (auto &F : M.functions()) {
       F.removeFnAttrs(AttrMask);
       F.removeRetAttrs(AttrMask);
+      // Only remove string attributes if we are not skipping validation.
+      // This will reserve the experimental attributes when validation version
+      // is 0.0 for experiment mode.
+      removeStringFunctionAttributes(F, SkipValidation);
       for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx)
         F.removeParamAttrs(Idx, AttrMask);
 
@@ -172,7 +217,10 @@ class DXILPrepareModule : public ModulePass {
   }
 
   DXILPrepareModule() : ModulePass(ID) {}
-
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved();
+    AU.addPreserved();
+  }
   static char ID; // Pass identification.
 };
 char DXILPrepareModule::ID = 0;
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index bebca0675522f3..c853393e4282a4 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -79,8 +79,8 @@ class DirectXPassConfig : public TargetPassConfig {
   void addCodeGenPrepare() override {
     addPass(createDXILIntrinsicExpansionLegacyPass());
     addPass(createDXILOpLoweringLegacyPass());
-    addPass(createDXILPrepareModulePass());
     addPass(createDXILTranslateMetadataPass());
+    addPass(createDXILPrepareModulePass());
   }
 };
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs-val-ver-0.0.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs-val-ver-0.0.ll
new file mode 100644
index 00000000000000..a85dc43ac2f6c2
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs-val-ver-0.0.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -dxil-prepare  %s | FileCheck %s 
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @entry() #0 {
+entry:
+  ret void
+}
+
+; Make sure experimental attribute is left when validation version is 0.0.
+; CHECK:attributes #0 = { noinline nounwind "exp-shader"="cs" } 
+attributes #0 = { noinline nounwind "exp-shader"="cs" "hlsl.numthreads"="1,2,1" "hlsl.shader"="compute" }
+
+!dx.valver = !{!0}
+
+!0 = !{i32 0, i32 0}
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
index be4b46f22ef25f..343f190d994f0d 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
@@ -1,4 +1,6 @@
 ; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-prepare  %s | FileCheck %s  --check-prefix=REMOVE_EXTRA_ATTRIBUTE
+
 target triple = "dxil-pc-shadermodel6.6-compute"
 
 ; CHECK: !dx.shaderModel = !{![[SM:[0-9]+]]}
@@ -9,4 +11,7 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline nounwind "hlsl.numthreads"="1,2,1" "hlsl.shader"="compute" }
+; Make sure extra attribute like hlsl.numthreads are removed.
+; And experimental attribute is removed when validator version is not 0.0.
+; REMOVE_EXTRA_ATTRIBUTE:attributes #0 = { noinline nounwind } 
+attributes #0 = { noinline nounwind "exp-shader"="cs" "hlsl.numthreads"="1,2,1" "hlsl.shader"="compute" }
diff --git a/llvm/test/tools/dxil-dis/attribute-filter.ll b/llvm/test/tools/dxil-dis/attribute-filter.ll
index 432a5a1b71018c..27590e10d79b54 100644
--- a/llvm/test/tools/dxil-dis/attribute-filter.ll
+++ b/llvm/test/tools/dxil-dis/attribute-filter.ll
@@ -19,8 +19,8 @@ define float @fma2(float %0, float %1, float %2) #1 {
   ret float %5
 }
 
-; CHECK: attributes #0 = { nounwind readnone "disable-tail-calls"="false" }
-attributes #0 = { norecurse nounwind readnone willreturn "disable-tail-calls"="false" }
+; CHECK: attributes #0 = { nounwind readnone "fp32-denorm-mode"="any" "waveops-include-helper-lanes" }
+attributes #0 = { norecurse nounwind readnone willreturn "disable-tail-calls"="false" "waveops-include-helper-lanes" "fp32-denorm-mode"="any" }
 
-; CHECK: attributes #1 = { readnone "disable-tail-calls"="false" }
-attributes #1 = { norecurse memory(none) willreturn "disable-tail-calls"="false" }
+; CHECK: attributes #1 = { readnone "fp32-denorm-mode"="ftz" "waveops-include-helper-lanes" }
+attributes #1 = { norecurse memory(none) willreturn "disable-tail-calls"="false" "waveops-include-helper-lanes" "fp32-denorm-mode"="ftz" }

From 9a28814f59e8f52cc63ae3d17023cee8348d9b53 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas 
Date: Tue, 7 May 2024 16:23:37 -0400
Subject: [PATCH 20/64] [PowerPC] Spill non-volatile registers required for
 traceback table (#71115)

On AIX we need to spill all [rfv]N-[rfv]31 when a function clobbers
[rfv]N so that the traceback table contains accurate information.
---
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  |   59 +
 llvm/lib/Target/PowerPC/PPCFrameLowering.h    |    1 +
 .../CodeGen/PowerPC/aix-csr-vector-extabi.ll  | 1199 +++++++++++++----
 llvm/test/CodeGen/PowerPC/aix-csr-vector.ll   |  198 ++-
 llvm/test/CodeGen/PowerPC/aix-csr.ll          |  809 +++++++++--
 .../test/CodeGen/PowerPC/aix-spills-for-eh.ll |  301 +++++
 llvm/test/CodeGen/PowerPC/aix32-crsave.mir    |   34 +-
 .../CodeGen/PowerPC/ppc-shrink-wrapping.ll    |   24 +-
 llvm/test/CodeGen/PowerPC/ppc64-crsave.mir    |  105 +-
 9 files changed, 2237 insertions(+), 493 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-spills-for-eh.ll

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 04e9f9e2366edd..8444266459c433 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1966,6 +1966,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                             BitVector &SavedRegs,
                                             RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+  if (Subtarget.isAIXABI())
+    updateCalleeSaves(MF, SavedRegs);
 
   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
 
@@ -2725,6 +2727,63 @@ bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
   return !MF.getSubtarget().is32BitELFABI();
 }
 
+void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF,
+                                         BitVector &SavedRegs) const {
+  // The AIX ABI uses traceback tables for EH which require that if callee-saved
+  // register N is used, all registers N-31 must be saved/restored.
+  // NOTE: The check for AIX is not actually what is relevant. Traceback tables
+  // on Linux have the same requirements. It is just that AIX is the only ABI
+  // for which we actually use traceback tables. If another ABI needs to be
+  // supported that also uses them, we can add a check such as
+  // Subtarget.usesTraceBackTables().
+  assert(Subtarget.isAIXABI() &&
+         "Function updateCalleeSaves should only be called for AIX.");
+
+  // If there are no callee saves then there is nothing to do.
+  if (SavedRegs.none())
+    return;
+
+  const MCPhysReg *CSRegs =
+      Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF);
+  MCPhysReg LowestGPR = PPC::R31;
+  MCPhysReg LowestG8R = PPC::X31;
+  MCPhysReg LowestFPR = PPC::F31;
+  MCPhysReg LowestVR = PPC::V31;
+
+  // Traverse the CSRs twice so as not to rely on ascending ordering of
+  // registers in the array. The first pass finds the lowest numbered
+  // register and the second pass marks all higher numbered registers
+  // for spilling.
+  for (int i = 0; CSRegs[i]; i++) {
+    // Get the lowest numbered register for each class that actually needs
+    // to be saved.
+    MCPhysReg Cand = CSRegs[i];
+    if (!SavedRegs.test(Cand))
+      continue;
+    if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR)
+      LowestGPR = Cand;
+    else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R)
+      LowestG8R = Cand;
+    else if ((PPC::F4RCRegClass.contains(Cand) ||
+              PPC::F8RCRegClass.contains(Cand)) &&
+             Cand < LowestFPR)
+      LowestFPR = Cand;
+    else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR)
+      LowestVR = Cand;
+  }
+
+  for (int i = 0; CSRegs[i]; i++) {
+    MCPhysReg Cand = CSRegs[i];
+    if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) ||
+        (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) ||
+        ((PPC::F4RCRegClass.contains(Cand) ||
+          PPC::F8RCRegClass.contains(Cand)) &&
+         Cand > LowestFPR) ||
+        (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR))
+      SavedRegs.set(Cand);
+  }
+}
+
 uint64_t PPCFrameLowering::getStackThreshold() const {
   // On PPC64, we use `stux r1, r1, ` to extend the stack;
   // use `add r1, r1, ` to release the stack frame.
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index e19087ce0e1869..d74c87428326ca 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -173,6 +173,7 @@ class PPCFrameLowering: public TargetFrameLowering {
   /// function prologue/epilogue.
   bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
   bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+  void updateCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const;
 
   uint64_t getStackThreshold() const override;
 };
diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll b/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll
index 67397e4adf4e79..b99ef4904d5496 100644
--- a/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll
@@ -23,92 +23,259 @@ entry:
 
 ; MIR32:         name:            vec_regs
 
-; MIR32-LABEL:   fixedStack:
-; MIR32-NEXT:    - { id: 0, type: spill-slot, offset: -16, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 1, type: spill-slot, offset: -96, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 2, type: spill-slot, offset: -192, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v20', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR32-LABEL:  fixedStack:
+; MIR32-NEXT:     - { id: 0, type: spill-slot, offset: -16, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 1, type: spill-slot, offset: -32, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 2, type: spill-slot, offset: -48, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 3, type: spill-slot, offset: -64, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 4, type: spill-slot, offset: -80, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 5, type: spill-slot, offset: -96, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 6, type: spill-slot, offset: -112, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 7, type: spill-slot, offset: -128, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 8, type: spill-slot, offset: -144, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 9, type: spill-slot, offset: -160, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:         callee-saved-register: '$v22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:         debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 10, type: spill-slot, offset: -176, size: 16, alignment: 16,
+; MIR32-NEXT:         stack-id: default, callee-saved-register: '$v21', callee-saved-restored: true,
+; MIR32-NEXT:         debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:     - { id: 11, type: spill-slot, offset: -192, size: 16, alignment: 16,
+; MIR32-NEXT:         stack-id: default, callee-saved-register: '$v20', callee-saved-restored: true,
+; MIR32-NEXT:         debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
 ; MIR32-NEXT:    stack:
 
-; MIR32:         liveins: $v20, $v26, $v31
+; MIR32: liveins: $v20, $v21, $v22, $v23, $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31
 
-; MIR32-DAG:     STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
-; MIR32-DAG:     STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
+; MIR32-DAG:     STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.11)
+; MIR32-DAG:     STXVD2X killed $v21, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.10)
+; MIR32-DAG:     STXVD2X killed $v22, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.9)
+; MIR32-DAG:     STXVD2X killed $v23, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.8)
+; MIR32-DAG:     STXVD2X killed $v24, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.7)
+; MIR32-DAG:     STXVD2X killed $v25, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.6)
+; MIR32-DAG:     STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.5)
+; MIR32-DAG:     STXVD2X killed $v27, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.4)
+; MIR32-DAG:     STXVD2X killed $v28, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.3)
+; MIR32-DAG:     STXVD2X killed $v29, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
+; MIR32-DAG:     STXVD2X killed $v30, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
 ; MIR32-DAG:     STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
 
 ; MIR32:         INLINEASM
 
-; MIR32-DAG:     $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
-; MIR32-DAG:     $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
 ; MIR32-DAG:     $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.0)
+; MIR32-DAG:     $v30 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
+; MIR32-DAG:     $v29 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
+; MIR32-DAG:     $v28 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.3)
+; MIR32-DAG:     $v27 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.4)
+; MIR32-DAG:     $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.5)
+; MIR32-DAG:     $v25 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.6)
+; MIR32-DAG:     $v24 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.7)
+; MIR32-DAG:     $v23 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.8)
+; MIR32-DAG:     $v22 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.9)
+; MIR32-DAG:     $v21 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.10)
+; MIR32-DAG:     $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.11)
 ; MIR32:         BLR implicit $lr, implicit $rm
 
 ; MIR64:         name:            vec_regs
 
 ; MIR64-LABEL:   fixedStack:
-; MIR64-NEXT:    - { id: 0, type: spill-slot, offset: -16, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 1, type: spill-slot, offset: -96, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 2, type: spill-slot, offset: -192, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v20', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 0, type: spill-slot, offset: -16, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 1, type: spill-slot, offset: -32, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 2, type: spill-slot, offset: -48, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 3, type: spill-slot, offset: -64, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 4, type: spill-slot, offset: -80, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 5, type: spill-slot, offset: -96, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 6, type: spill-slot, offset: -112, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 7, type: spill-slot, offset: -128, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 8, type: spill-slot, offset: -144, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 9, type: spill-slot, offset: -160, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 10, type: spill-slot, offset: -176, size: 16, alignment: 16,
+; MIR64-DAG:           stack-id: default, callee-saved-register: '$v21', callee-saved-restored: true,
+; MIR64-DAG:           debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 11, type: spill-slot, offset: -192, size: 16, alignment: 16,
+; MIR64-DAG:           stack-id: default, callee-saved-register: '$v20', callee-saved-restored: true,
+; MIR64-DAG:           debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
 ; MIR64-NEXT:    stack:
 
-; MIR64:         liveins: $v20, $v26, $v31
+; MIR64: liveins: $v20, $v21, $v22, $v23, $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31
 
-; MIR64-DAG:     STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
-; MIR64-DAG:     STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
-; MIR64-DAG:     STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
+; MIR64-DAG:   STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.11)
+; MIR64-DAG:   STXVD2X killed $v21, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.10)
+; MIR64-DAG:   STXVD2X killed $v22, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.9)
+; MIR64-DAG:   STXVD2X killed $v23, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.8)
+; MIR64-DAG:   STXVD2X killed $v24, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.7)
+; MIR64-DAG:   STXVD2X killed $v25, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.6)
+; MIR64-DAG:   STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.5)
+; MIR64-DAG:   STXVD2X killed $v27, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.4)
+; MIR64-DAG:   STXVD2X killed $v28, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.3)
+; MIR64-DAG:   STXVD2X killed $v29, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
+; MIR64-DAG:   STXVD2X killed $v30, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
+; MIR64-DAG:   STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
 
-; MIR64:         INLINEASM
+; MIR64:       INLINEASM
 
-; MIR64-DAG:     $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
-; MIR64-DAG:     $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
-; MIR64-DAG:     $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.0)
-; MIR64:         BLR8 implicit $lr8, implicit $rm
+; MIR64-DAG:   $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.0)
+; MIR64-DAG:   $v30 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
+; MIR64-DAG:   $v29 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
+; MIR64-DAG:   $v28 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.3)
+; MIR64-DAG:   $v27 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.4)
+; MIR64-DAG:   $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.5)
+; MIR64-DAG:   $v25 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.6)
+; MIR64-DAG:   $v24 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.7)
+; MIR64-DAG:   $v23 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.8)
+; MIR64-DAG:   $v22 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.9)
+; MIR64-DAG:   $v21 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.10)
+; MIR64-DAG:   $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.11)
+; MIR64:       BLR8 implicit $lr8, implicit $rm
 
 
 ; ASM32-LABEL:   .vec_regs:
 
-; ASM32:         li {{[0-9]+}}, -192
-; ASM32-DAG:     stxvd2x 52, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32-DAG:     li {{[0-9]+}}, -96
-; ASM32-DAG:     stxvd2x 58, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32-DAG:     li {{[0-9]+}}, -16
-; ASM32-DAG:     stxvd2x 63, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32:         #APP
-; ASM32-DAG:     #NO_APP
-; ASM32-DAG:     lxvd2x 63, 1, {{[0-9]+}}       # 16-byte Folded Reload
-; ASM32-DAG:     li {{[0-9]+}}, -96
-; ASM32-DAG:     lxvd2x 58, 1, {{[0-9]+}}       # 16-byte Folded Reload
-; ASM32-DAG:     li {{[0-9]+}}, -192
-; ASM32-DAG:     lxvd2x 52, 1, {{[0-9]+}}       # 16-byte Folded Reload
-; ASM32:         blr
+; ASM32-DAG:       li [[FIXEDSTACK11:[0-9]+]], -192
+; ASM32-DAG:       stxvd2x 52, 1, [[FIXEDSTACK11]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK10:[0-9]+]], -176
+; ASM32-DAG:       stxvd2x 53, 1, [[FIXEDSTACK10]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK9:[0-9]+]], -160
+; ASM32-DAG:       stxvd2x 54, 1, [[FIXEDSTACK9]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK8:[0-9]+]], -144
+; ASM32-DAG:       stxvd2x 55, 1, [[FIXEDSTACK8]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK7:[0-9]+]], -128
+; ASM32-DAG:       stxvd2x 56, 1, [[FIXEDSTACK7]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK6:[0-9]+]], -112
+; ASM32-DAG:       stxvd2x 57, 1, [[FIXEDSTACK6]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK5:[0-9]+]], -96
+; ASM32-DAG:       stxvd2x 58, 1, [[FIXEDSTACK5]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK4:[0-9]+]], -80
+; ASM32-DAG:       stxvd2x 59, 1, [[FIXEDSTACK4]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK3:[0-9]+]], -64
+; ASM32-DAG:       stxvd2x 60, 1, [[FIXEDSTACK3]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK2:[0-9]+]], -48
+; ASM32-DAG:       stxvd2x 61, 1, [[FIXEDSTACK2]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK1:[0-9]+]], -32
+; ASM32-DAG:       stxvd2x 62, 1, [[FIXEDSTACK1]]                       # 16-byte Folded Spill
+; ASM32-DAG:       li [[FIXEDSTACK0:[0-9]+]], -16
+; ASM32-DAG:       stxvd2x 63, 1, [[FIXEDSTACK0]]                       # 16-byte Folded Spill
+
+; ASM32:           #APP
+; ASM32-NEXT:      #NO_APP
+
+; ASM32-DAG:       lxvd2x 63, 1, [[FIXEDSTACK0]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK1:[0-9]+]], -32
+; ASM32-DAG:       lxvd2x 62, 1, [[FIXEDSTACK1]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK2:[0-9]+]], -48
+; ASM32-DAG:       lxvd2x 61, 1, [[FIXEDSTACK2]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK3:[0-9]+]], -64
+; ASM32-DAG:       lxvd2x 60, 1, [[FIXEDSTACK3]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK4:[0-9]+]], -80
+; ASM32-DAG:       lxvd2x 59, 1, [[FIXEDSTACK4]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK5:[0-9]+]], -96
+; ASM32-DAG:       lxvd2x 58, 1, [[FIXEDSTACK5]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK6:[0-9]+]], -112
+; ASM32-DAG:       lxvd2x 57, 1, [[FIXEDSTACK6]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK7:[0-9]+]], -128
+; ASM32-DAG:       lxvd2x 56, 1, [[FIXEDSTACK7]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK8:[0-9]+]], -144
+; ASM32-DAG:       lxvd2x 55, 1, [[FIXEDSTACK8]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK9:[0-9]+]], -160
+; ASM32-DAG:       lxvd2x 54, 1, [[FIXEDSTACK9]]                        # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK10:[0-9]+]], -176
+; ASM32-DAG:       lxvd2x 53, 1, [[FIXEDSTACK10]]                       # 16-byte Folded Reload
+; ASM32-DAG:       li [[FIXEDSTACK11:[0-9]+]], -192
+; ASM32-DAG:       lxvd2x 52, 1, [[FIXEDSTACK11]]                       # 16-byte Folded Reload
+; ASM32:           blr
 
 ; ASM64-LABEL:   .vec_regs:
 
-; ASM64-DAG:     li {{[0-9]+}}, -192
-; ASM64-DAG:     stxvd2x 52, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM64-DAG:     li {{[0-9]+}}, -96
-; ASM64-DAG:     stxvd2x 58, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM64-DAG:     li {{[0-9]+}}, -16
-; ASM64-DAG:     stxvd2x {{[0-9]+}}, 1, {{[0-9]+}}      # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK11:[0-9]+]], -192
+; ASM64-DAG:       stxvd2x 52, 1, [[FIXEDSTACK11]]                   # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK10:[0-9]+]], -176
+; ASM64-DAG:       stxvd2x 53, 1, [[FIXEDSTACK10]]                   # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK9:[0-9]+]], -160
+; ASM64-DAG:       stxvd2x 54, 1, [[FIXEDSTACK9]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK8:[0-9]+]], -144
+; ASM64-DAG:       stxvd2x 55, 1, [[FIXEDSTACK8]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK7:[0-9]+]], -128
+; ASM64-DAG:       stxvd2x 56, 1, [[FIXEDSTACK7]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK6:[0-9]+]], -112
+; ASM64-DAG:       stxvd2x 57, 1, [[FIXEDSTACK6]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK5:[0-9]+]], -96
+; ASM64-DAG:       stxvd2x 58, 1, [[FIXEDSTACK5]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK4:[0-9]+]], -80
+; ASM64-DAG:       stxvd2x 59, 1, [[FIXEDSTACK4]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK3:[0-9]+]], -64
+; ASM64-DAG:       stxvd2x 60, 1, [[FIXEDSTACK3]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK2:[0-9]+]], -48
+; ASM64-DAG:       stxvd2x 61, 1, [[FIXEDSTACK2]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK1:[0-9]+]], -32
+; ASM64-DAG:       stxvd2x 62, 1, [[FIXEDSTACK1]]                    # 16-byte Folded Spill
+; ASM64-DAG:       li [[FIXEDSTACK0:[0-9]+]], -16
+; ASM64-DAG:       stxvd2x 63, 1, [[FIXEDSTACK0]]                    # 16-byte Folded Spill
+
 ; ASM64-DAG:     #APP
 ; ASM64-DAG:     #NO_APP
-; ASM64-DAG:     lxvd2x {{[0-9]+}}, 1, {{[0-9]+}}       # 16-byte Folded Reload
-; ASM64-DAG:     li {{[0-9]+}}, -96
-; ASM64-DAG:     lxvd2x 58, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM64-DAG:     li {{[0-9]+}}, -192
-; ASM64-DAG:     lxvd2x 52, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM64-DAG:     blr
+
+; ASM64-DAG:     lxvd2x 63, 1, [[FIXEDSTACK0]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK1:[0-9]+]], -32
+; ASM64-DAG:     lxvd2x 62, 1, [[FIXEDSTACK1]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK2:[0-9]+]], -48
+; ASM64-DAG:     lxvd2x 61, 1, [[FIXEDSTACK2]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK3:[0-9]+]], -64
+; ASM64-DAG:     lxvd2x 60, 1, [[FIXEDSTACK3]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK4:[0-9]+]], -80
+; ASM64-DAG:     lxvd2x 59, 1, [[FIXEDSTACK4]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK5:[0-9]+]], -96
+; ASM64-DAG:     lxvd2x 58, 1, [[FIXEDSTACK5]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK6:[0-9]+]], -112
+; ASM64-DAG:     lxvd2x 57, 1, [[FIXEDSTACK6]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK7:[0-9]+]], -128
+; ASM64-DAG:     lxvd2x 56, 1, [[FIXEDSTACK7]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK8:[0-9]+]], -144
+; ASM64-DAG:     lxvd2x 55, 1, [[FIXEDSTACK8]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK9:[0-9]+]], -160
+; ASM64-DAG:     lxvd2x 54, 1, [[FIXEDSTACK9]]                         # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK10:[0-9]+]], -176
+; ASM64-DAG:     lxvd2x 53, 1, [[FIXEDSTACK10]]                        # 16-byte Folded Reload
+; ASM64-DAG:     li [[FIXEDSTACK11:[0-9]+]], -192
+; ASM64-DAG:     lxvd2x 52, 1, [[FIXEDSTACK11]]                        # 16-byte Folded Reload
+
+; ASM64:         blr
 
 define dso_local void @fprs_gprs_vecregs() {
   call void asm sideeffect "", "~{r14},~{r25},~{r31},~{f14},~{f21},~{f31},~{v20},~{v26},~{v31}"()
@@ -118,191 +285,767 @@ define dso_local void @fprs_gprs_vecregs() {
 ; MIR32:         name:            fprs_gprs_vecregs
 
 ; MIR32-LABEL:   fixedStack:
-; MIR32-NEXT:    - { id: 0, type: spill-slot, offset: -240, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 1, type: spill-slot, offset: -320, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 2, type: spill-slot, offset: -416, size: 16, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$v20', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 3, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 4, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 5, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 6, type: spill-slot, offset: -148, size: 4, alignment: 4, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 7, type: spill-slot, offset: -172, size: 4, alignment: 4, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 8, type: spill-slot, offset: -216, size: 4, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 0, type: spill-slot, offset: -240, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 1, type: spill-slot, offset: -256, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 2, type: spill-slot, offset: -272, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 3, type: spill-slot, offset: -288, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 4, type: spill-slot, offset: -304, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 5, type: spill-slot, offset: -320, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 6, type: spill-slot, offset: -336, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 7, type: spill-slot, offset: -352, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 8, type: spill-slot, offset: -368, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 9, type: spill-slot, offset: -384, size: 16, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$v22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 10, type: spill-slot, offset: -400, size: 16, alignment: 16,
+; MIR32-NEXT:          stack-id: default, callee-saved-register: '$v21', callee-saved-restored: true,
+; MIR32-NEXT:          debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 11, type: spill-slot, offset: -416, size: 16, alignment: 16,
+; MIR32-NEXT:          stack-id: default, callee-saved-register: '$v20', callee-saved-restored: true,
+; MIR32-NEXT:          debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 12, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 13, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 14, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 15, type: spill-slot, offset: -32, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 16, type: spill-slot, offset: -40, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 17, type: spill-slot, offset: -48, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 18, type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 19, type: spill-slot, offset: -64, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 20, type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 21, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 22, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 23, type: spill-slot, offset: -96, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f20', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 24, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 25, type: spill-slot, offset: -112, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f18', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 26, type: spill-slot, offset: -120, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f17', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 27, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f16', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 28, type: spill-slot, offset: -136, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f15', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 29, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 30, type: spill-slot, offset: -148, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 31, type: spill-slot, offset: -152, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 32, type: spill-slot, offset: -156, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 33, type: spill-slot, offset: -160, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 34, type: spill-slot, offset: -164, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 35, type: spill-slot, offset: -168, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 36, type: spill-slot, offset: -172, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 37, type: spill-slot, offset: -176, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 38, type: spill-slot, offset: -180, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 39, type: spill-slot, offset: -184, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 40, type: spill-slot, offset: -188, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r21', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 41, type: spill-slot, offset: -192, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r20', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 42, type: spill-slot, offset: -196, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r19', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 43, type: spill-slot, offset: -200, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r18', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 44, type: spill-slot, offset: -204, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r17', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 45, type: spill-slot, offset: -208, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r16', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 46, type: spill-slot, offset: -212, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r15', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:      - { id: 47, type: spill-slot, offset: -216, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:          callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:          debug-info-expression: '', debug-info-location: '' }
 ; MIR32-NEXT:    stack:
 
-; MIR32:         liveins: $r14, $r25, $r31, $f14, $f21, $f31, $v20, $v26, $v31
-
-; MIR32-DAG:     STW killed $r14, 232, $r1 :: (store (s32) into %fixed-stack.8, align 8)
-; MIR32-DAG:     STW killed $r25, 276, $r1 :: (store (s32) into %fixed-stack.7)
-; MIR32-DAG:     STW killed $r31, 300, $r1 :: (store (s32) into %fixed-stack.6)
-; MIR32-DAG:     STFD killed $f14, 304, $r1 :: (store (s64) into %fixed-stack.5, align 16)
-; MIR32-DAG:     STFD killed $f21, 360, $r1 :: (store (s64) into %fixed-stack.4)
-; MIR32-DAG:     STFD killed $f31, 440, $r1 :: (store (s64) into %fixed-stack.3)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 32
-; MIR32-DAG:     STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 128
-; MIR32-DAG:     STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 208
+; MIR32: liveins: $r14, $r15, $r16, $r17, $r18, $r19, $r20, $r21, $r22, $r23, $r24, $r25, $r26, $r27, $r28, $r29, $r30, $r31, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31, $v20, $v21, $v22, $v23, $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31
+
+; MIR32-DAG:     STW killed $r14, 232, $r1 :: (store (s32) into %fixed-stack.47, align 8)
+; MIR32-DAG:     STW killed $r15, 236, $r1 :: (store (s32) into %fixed-stack.46)
+; MIR32-DAG:     STW killed $r16, 240, $r1 :: (store (s32) into %fixed-stack.45, align 16)
+; MIR32-DAG:     STW killed $r17, 244, $r1 :: (store (s32) into %fixed-stack.44)
+; MIR32-DAG:     STW killed $r18, 248, $r1 :: (store (s32) into %fixed-stack.43, align 8)
+; MIR32-DAG:     STW killed $r19, 252, $r1 :: (store (s32) into %fixed-stack.42)
+; MIR32-DAG:     STW killed $r20, 256, $r1 :: (store (s32) into %fixed-stack.41, align 16)
+; MIR32-DAG:     STW killed $r21, 260, $r1 :: (store (s32) into %fixed-stack.40)
+; MIR32-DAG:     STW killed $r22, 264, $r1 :: (store (s32) into %fixed-stack.39, align 8)
+; MIR32-DAG:     STW killed $r23, 268, $r1 :: (store (s32) into %fixed-stack.38)
+; MIR32-DAG:     STW killed $r24, 272, $r1 :: (store (s32) into %fixed-stack.37, align 16)
+; MIR32-DAG:     STW killed $r25, 276, $r1 :: (store (s32) into %fixed-stack.36)
+; MIR32-DAG:     STW killed $r26, 280, $r1 :: (store (s32) into %fixed-stack.35, align 8)
+; MIR32-DAG:     STW killed $r27, 284, $r1 :: (store (s32) into %fixed-stack.34)
+; MIR32-DAG:     STW killed $r28, 288, $r1 :: (store (s32) into %fixed-stack.33, align 16)
+; MIR32-DAG:     STW killed $r29, 292, $r1 :: (store (s32) into %fixed-stack.32)
+; MIR32-DAG:     STW killed $r30, 296, $r1 :: (store (s32) into %fixed-stack.31, align 8)
+; MIR32-DAG:     STW killed $r31, 300, $r1 :: (store (s32) into %fixed-stack.30)
+; MIR32-DAG:     STFD killed $f14, 304, $r1 :: (store (s64) into %fixed-stack.29, align 16)
+; MIR32-DAG:     STFD killed $f15, 312, $r1 :: (store (s64) into %fixed-stack.28)
+; MIR32-DAG:     STFD killed $f16, 320, $r1 :: (store (s64) into %fixed-stack.27, align 16)
+; MIR32-DAG:     STFD killed $f17, 328, $r1 :: (store (s64) into %fixed-stack.26)
+; MIR32-DAG:     STFD killed $f18, 336, $r1 :: (store (s64) into %fixed-stack.25, align 16)
+; MIR32-DAG:     STFD killed $f19, 344, $r1 :: (store (s64) into %fixed-stack.24)
+; MIR32-DAG:     STFD killed $f20, 352, $r1 :: (store (s64) into %fixed-stack.23, align 16)
+; MIR32-DAG:     STFD killed $f21, 360, $r1 :: (store (s64) into %fixed-stack.22)
+; MIR32-DAG:     STFD killed $f22, 368, $r1 :: (store (s64) into %fixed-stack.21, align 16)
+; MIR32-DAG:     STFD killed $f23, 376, $r1 :: (store (s64) into %fixed-stack.20)
+; MIR32-DAG:     STFD killed $f24, 384, $r1 :: (store (s64) into %fixed-stack.19, align 16)
+; MIR32-DAG:     STFD killed $f25, 392, $r1 :: (store (s64) into %fixed-stack.18)
+; MIR32-DAG:     STFD killed $f26, 400, $r1 :: (store (s64) into %fixed-stack.17, align 16)
+; MIR32-DAG:     STFD killed $f27, 408, $r1 :: (store (s64) into %fixed-stack.16)
+; MIR32-DAG:     STFD killed $f28, 416, $r1 :: (store (s64) into %fixed-stack.15, align 16)
+; MIR32-DAG:     STFD killed $f29, 424, $r1 :: (store (s64) into %fixed-stack.14)
+; MIR32-DAG:     STFD killed $f30, 432, $r1 :: (store (s64) into %fixed-stack.13, align 16)
+; MIR32-DAG:     STFD killed $f31, 440, $r1 :: (store (s64) into %fixed-stack.12)
+; MIR32-DAG:     STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.11)
+; MIR32-DAG:     STXVD2X killed $v21, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.10)
+; MIR32-DAG:     STXVD2X killed $v22, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.9)
+; MIR32-DAG:     STXVD2X killed $v23, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.8)
+; MIR32-DAG:     STXVD2X killed $v24, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.7)
+; MIR32-DAG:     STXVD2X killed $v25, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.6)
+; MIR32-DAG:     STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.5)
+; MIR32-DAG:     STXVD2X killed $v27, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.4)
+; MIR32-DAG:     STXVD2X killed $v28, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.3)
+; MIR32-DAG:     STXVD2X killed $v29, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
+; MIR32-DAG:     STXVD2X killed $v30, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
 ; MIR32-DAG:     STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
-; MIR32-DAG:     $r1 = STWU $r1, -448, $r1
 
 ; MIR32:         INLINEASM
 
-; MIR32-DAG:     $r14 = LWZ 232, $r1 :: (load (s32) from %fixed-stack.8, align 8)
-; MIR32-DAG:     $r25 = LWZ 276, $r1 :: (load (s32) from %fixed-stack.7)
-; MIR32-DAG:     $r31 = LWZ 300, $r1 :: (load (s32) from %fixed-stack.6)
-; MIR32-DAG:     $f14 = LFD 304, $r1 :: (load (s64) from %fixed-stack.5, align 16)
-; MIR32-DAG:     $f21 = LFD 360, $r1 :: (load (s64) from %fixed-stack.4)
-; MIR32-DAG:     $f31 = LFD 440, $r1 :: (load (s64) from %fixed-stack.3)
-; MIR32-DAG:     $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 32
-; MIR32-DAG:     $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 128
-; MIR32-DAG:     $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.0)
-; MIR32-DAG:     $r{{[0-9]+}} = LI 208
-; MIR32-DAG:     $r1 = ADDI $r1, 448
-; MIR32-DAG:     BLR implicit $lr, implicit $rm
+; MIR32-DAG:     $v31 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.0)
+; MIR32-DAG:     $v30 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.1)
+; MIR32-DAG:     $v29 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.2)
+; MIR32-DAG:     $v28 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.3)
+; MIR32-DAG:     $v27 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.4)
+; MIR32-DAG:     $v26 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.5)
+; MIR32-DAG:     $v25 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.6)
+; MIR32-DAG:     $v24 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.7)
+; MIR32-DAG:     $v23 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.8)
+; MIR32-DAG:     $v22 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.9)
+; MIR32-DAG:     $v21 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.10)
+; MIR32-DAG:     $v20 = LXVD2X $r1, killed $r3 :: (load (s128) from %fixed-stack.11)
+; MIR32-DAG:     $f31 = LFD 440, $r1 :: (load (s64) from %fixed-stack.12)
+; MIR32-DAG:     $f30 = LFD 432, $r1 :: (load (s64) from %fixed-stack.13, align 16)
+; MIR32-DAG:     $f29 = LFD 424, $r1 :: (load (s64) from %fixed-stack.14)
+; MIR32-DAG:     $f28 = LFD 416, $r1 :: (load (s64) from %fixed-stack.15, align 16)
+; MIR32-DAG:     $f27 = LFD 408, $r1 :: (load (s64) from %fixed-stack.16)
+; MIR32-DAG:     $f26 = LFD 400, $r1 :: (load (s64) from %fixed-stack.17, align 16)
+; MIR32-DAG:     $f25 = LFD 392, $r1 :: (load (s64) from %fixed-stack.18)
+; MIR32-DAG:     $f24 = LFD 384, $r1 :: (load (s64) from %fixed-stack.19, align 16)
+; MIR32-DAG:     $f23 = LFD 376, $r1 :: (load (s64) from %fixed-stack.20)
+; MIR32-DAG:     $f22 = LFD 368, $r1 :: (load (s64) from %fixed-stack.21, align 16)
+; MIR32-DAG:     $f21 = LFD 360, $r1 :: (load (s64) from %fixed-stack.22)
+; MIR32-DAG:     $f20 = LFD 352, $r1 :: (load (s64) from %fixed-stack.23, align 16)
+; MIR32-DAG:     $f19 = LFD 344, $r1 :: (load (s64) from %fixed-stack.24)
+; MIR32-DAG:     $f18 = LFD 336, $r1 :: (load (s64) from %fixed-stack.25, align 16)
+; MIR32-DAG:     $f17 = LFD 328, $r1 :: (load (s64) from %fixed-stack.26)
+; MIR32-DAG:     $f16 = LFD 320, $r1 :: (load (s64) from %fixed-stack.27, align 16)
+; MIR32-DAG:     $f15 = LFD 312, $r1 :: (load (s64) from %fixed-stack.28)
+; MIR32-DAG:     $f14 = LFD 304, $r1 :: (load (s64) from %fixed-stack.29, align 16)
+; MIR32-DAG:     $r31 = LWZ 300, $r1 :: (load (s32) from %fixed-stack.30)
+; MIR32-DAG:     $r30 = LWZ 296, $r1 :: (load (s32) from %fixed-stack.31, align 8)
+; MIR32-DAG:     $r29 = LWZ 292, $r1 :: (load (s32) from %fixed-stack.32)
+; MIR32-DAG:     $r28 = LWZ 288, $r1 :: (load (s32) from %fixed-stack.33, align 16)
+; MIR32-DAG:     $r27 = LWZ 284, $r1 :: (load (s32) from %fixed-stack.34)
+; MIR32-DAG:     $r26 = LWZ 280, $r1 :: (load (s32) from %fixed-stack.35, align 8)
+; MIR32-DAG:     $r25 = LWZ 276, $r1 :: (load (s32) from %fixed-stack.36)
+; MIR32-DAG:     $r24 = LWZ 272, $r1 :: (load (s32) from %fixed-stack.37, align 16)
+; MIR32-DAG:     $r23 = LWZ 268, $r1 :: (load (s32) from %fixed-stack.38)
+; MIR32-DAG:     $r22 = LWZ 264, $r1 :: (load (s32) from %fixed-stack.39, align 8)
+; MIR32-DAG:     $r21 = LWZ 260, $r1 :: (load (s32) from %fixed-stack.40)
+; MIR32-DAG:     $r20 = LWZ 256, $r1 :: (load (s32) from %fixed-stack.41, align 16)
+; MIR32-DAG:     $r19 = LWZ 252, $r1 :: (load (s32) from %fixed-stack.42)
+; MIR32-DAG:     $r18 = LWZ 248, $r1 :: (load (s32) from %fixed-stack.43, align 8)
+; MIR32-DAG:     $r17 = LWZ 244, $r1 :: (load (s32) from %fixed-stack.44)
+; MIR32-DAG:     $r16 = LWZ 240, $r1 :: (load (s32) from %fixed-stack.45, align 16)
+; MIR32-DAG:     $r15 = LWZ 236, $r1 :: (load (s32) from %fixed-stack.46)
+; MIR32-DAG:     $r14 = LWZ 232, $r1 :: (load (s32) from %fixed-stack.47, align 8)
+; MIR32:         $r1 = ADDI $r1, 448
+; MIR32-NEXT:    BLR implicit $lr, implicit $rm
+
 
 ; MIR64:         name:            fprs_gprs_vecregs
 
 ; MIR64-LABEL:   fixedStack:
-; MIR64-NEXT:    - { id: 0, type: spill-slot, offset: -304, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 1, type: spill-slot, offset: -384, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 2, type: spill-slot, offset: -480, size: 16, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$v20', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 3, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 4, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 5, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 6, type: spill-slot, offset: -152, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 7, type: spill-slot, offset: -200, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 8, type: spill-slot, offset: -288, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 0, type: spill-slot, offset: -304, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 1, type: spill-slot, offset: -320, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 2, type: spill-slot, offset: -336, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 3, type: spill-slot, offset: -352, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 4, type: spill-slot, offset: -368, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 5, type: spill-slot, offset: -384, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 6, type: spill-slot, offset: -400, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 7, type: spill-slot, offset: -416, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 8, type: spill-slot, offset: -432, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 9, type: spill-slot, offset: -448, size: 16, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$v22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 10, type: spill-slot, offset: -464, size: 16, alignment: 16,
+; MIR64-DAG:           stack-id: default, callee-saved-register: '$v21', callee-saved-restored: true,
+; MIR64-DAG:           debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 11, type: spill-slot, offset: -480, size: 16, alignment: 16,
+; MIR64-DAG:           stack-id: default, callee-saved-register: '$v20', callee-saved-restored: true,
+; MIR64-DAG:           debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 12, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 13, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 14, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 15, type: spill-slot, offset: -32, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 16, type: spill-slot, offset: -40, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 17, type: spill-slot, offset: -48, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 18, type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 19, type: spill-slot, offset: -64, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 20, type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 21, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 22, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 23, type: spill-slot, offset: -96, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f20', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 24, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 25, type: spill-slot, offset: -112, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f18', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 26, type: spill-slot, offset: -120, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f17', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 27, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f16', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 28, type: spill-slot, offset: -136, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f15', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 29, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 30, type: spill-slot, offset: -152, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 31, type: spill-slot, offset: -160, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 32, type: spill-slot, offset: -168, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 33, type: spill-slot, offset: -176, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 34, type: spill-slot, offset: -184, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 35, type: spill-slot, offset: -192, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 36, type: spill-slot, offset: -200, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 37, type: spill-slot, offset: -208, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 38, type: spill-slot, offset: -216, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 39, type: spill-slot, offset: -224, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 40, type: spill-slot, offset: -232, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x21', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 41, type: spill-slot, offset: -240, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x20', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 42, type: spill-slot, offset: -248, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x19', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 43, type: spill-slot, offset: -256, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x18', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 44, type: spill-slot, offset: -264, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x17', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 45, type: spill-slot, offset: -272, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x16', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 46, type: spill-slot, offset: -280, size: 8, alignment: 8, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x15', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
+; MIR64-DAG:       - { id: 47, type: spill-slot, offset: -288, size: 8, alignment: 16, stack-id: default,
+; MIR64-DAG:           callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-DAG:           debug-info-expression: '', debug-info-location: '' }
 ; MIR64-NEXT:    stack:
 
-; MIR64:         liveins: $x14, $x25, $x31, $f14, $f21, $f31, $v20, $v26, $v31
-
-; MIR64-DAG:     $x1 = STDU $x1, -544, $x1
-; MIR64-DAG:     STD killed $x14, 256, $x1 :: (store (s64) into %fixed-stack.8, align 16)
-; MIR64-DAG:     STD killed $x25, 344, $x1 :: (store (s64) into %fixed-stack.7)
-; MIR64-DAG:     STD killed $x31, 392, $x1 :: (store (s64) into %fixed-stack.6)
-; MIR64-DAG:     STFD killed $f14, 400, $x1 :: (store (s64) into %fixed-stack.5, align 16)
-; MIR64-DAG:     STFD killed $f21, 456, $x1 :: (store (s64) into %fixed-stack.4)
-; MIR64-DAG:     STFD killed $f31, 536, $x1 :: (store (s64) into %fixed-stack.3)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 64
-; MIR64-DAG:     STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 160
-; MIR64-DAG:     STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 240
-; MIR64-DAG:     STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
+; MIR64: liveins: $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31, $v20, $v21, $v22, $v23, $v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31
+
+; MIR64:         $x1 = STDU $x1, -544, $x1
+;MIR64-DAG:      STD killed $x14, 256, $x1 :: (store (s64) into %fixed-stack.47, align 16)
+;MIR64-DAG:      STD killed $x15, 264, $x1 :: (store (s64) into %fixed-stack.46)
+;MIR64-DAG:      STD killed $x16, 272, $x1 :: (store (s64) into %fixed-stack.45, align 16)
+;MIR64-DAG:      STD killed $x17, 280, $x1 :: (store (s64) into %fixed-stack.44)
+;MIR64-DAG:      STD killed $x18, 288, $x1 :: (store (s64) into %fixed-stack.43, align 16)
+;MIR64-DAG:      STD killed $x19, 296, $x1 :: (store (s64) into %fixed-stack.42)
+;MIR64-DAG:      STD killed $x20, 304, $x1 :: (store (s64) into %fixed-stack.41, align 16)
+;MIR64-DAG:      STD killed $x21, 312, $x1 :: (store (s64) into %fixed-stack.40)
+;MIR64-DAG:      STD killed $x22, 320, $x1 :: (store (s64) into %fixed-stack.39, align 16)
+;MIR64-DAG:      STD killed $x23, 328, $x1 :: (store (s64) into %fixed-stack.38)
+;MIR64-DAG:      STD killed $x24, 336, $x1 :: (store (s64) into %fixed-stack.37, align 16)
+;MIR64-DAG:      STD killed $x25, 344, $x1 :: (store (s64) into %fixed-stack.36)
+;MIR64-DAG:      STD killed $x26, 352, $x1 :: (store (s64) into %fixed-stack.35, align 16)
+;MIR64-DAG:      STD killed $x27, 360, $x1 :: (store (s64) into %fixed-stack.34)
+;MIR64-DAG:      STD killed $x28, 368, $x1 :: (store (s64) into %fixed-stack.33, align 16)
+;MIR64-DAG:      STD killed $x29, 376, $x1 :: (store (s64) into %fixed-stack.32)
+;MIR64-DAG:      STD killed $x30, 384, $x1 :: (store (s64) into %fixed-stack.31, align 16)
+;MIR64-DAG:      STD killed $x31, 392, $x1 :: (store (s64) into %fixed-stack.30)
+;MIR64-DAG:      STFD killed $f14, 400, $x1 :: (store (s64) into %fixed-stack.29, align 16)
+;MIR64-DAG:      STFD killed $f15, 408, $x1 :: (store (s64) into %fixed-stack.28)
+;MIR64-DAG:      STFD killed $f16, 416, $x1 :: (store (s64) into %fixed-stack.27, align 16)
+;MIR64-DAG:      STFD killed $f17, 424, $x1 :: (store (s64) into %fixed-stack.26)
+;MIR64-DAG:      STFD killed $f18, 432, $x1 :: (store (s64) into %fixed-stack.25, align 16)
+;MIR64-DAG:      STFD killed $f19, 440, $x1 :: (store (s64) into %fixed-stack.24)
+;MIR64-DAG:      STFD killed $f20, 448, $x1 :: (store (s64) into %fixed-stack.23, align 16)
+;MIR64-DAG:      STFD killed $f21, 456, $x1 :: (store (s64) into %fixed-stack.22)
+;MIR64-DAG:      STFD killed $f22, 464, $x1 :: (store (s64) into %fixed-stack.21, align 16)
+;MIR64-DAG:      STFD killed $f23, 472, $x1 :: (store (s64) into %fixed-stack.20)
+;MIR64-DAG:      STFD killed $f24, 480, $x1 :: (store (s64) into %fixed-stack.19, align 16)
+;MIR64-DAG:      STFD killed $f25, 488, $x1 :: (store (s64) into %fixed-stack.18)
+;MIR64-DAG:      STFD killed $f26, 496, $x1 :: (store (s64) into %fixed-stack.17, align 16)
+;MIR64-DAG:      STFD killed $f27, 504, $x1 :: (store (s64) into %fixed-stack.16)
+;MIR64-DAG:      STFD killed $f28, 512, $x1 :: (store (s64) into %fixed-stack.15, align 16)
+;MIR64-DAG:      STFD killed $f29, 520, $x1 :: (store (s64) into %fixed-stack.14)
+;MIR64-DAG:      STFD killed $f30, 528, $x1 :: (store (s64) into %fixed-stack.13, align 16)
+;MIR64-DAG:      STFD killed $f31, 536, $x1 :: (store (s64) into %fixed-stack.12)
+;MIR64-DAG:      STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.11)
+;MIR64-DAG:      STXVD2X killed $v21, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.10)
+;MIR64-DAG:      STXVD2X killed $v22, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.9)
+;MIR64-DAG:      STXVD2X killed $v23, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.8)
+;MIR64-DAG:      STXVD2X killed $v24, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.7)
+;MIR64-DAG:      STXVD2X killed $v25, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.6)
+;MIR64-DAG:      STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.5)
+;MIR64-DAG:      STXVD2X killed $v27, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.4)
+;MIR64-DAG:      STXVD2X killed $v28, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.3)
+;MIR64-DAG:      STXVD2X killed $v29, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2)
+;MIR64-DAG:      STXVD2X killed $v30, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1)
+;MIR64-DAG:      STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0)
 
 ; MIR64:         INLINEASM
 
-; MIR64-DAG:     $x14 = LD 256, $x1 :: (load (s64) from %fixed-stack.8, align 16)
-; MIR64-DAG:     $x25 = LD 344, $x1 :: (load (s64) from %fixed-stack.7)
-; MIR64-DAG:     $x31 = LD 392, $x1 :: (load (s64) from %fixed-stack.6)
-; MIR64-DAG:     $f14 = LFD 400, $x1 :: (load (s64) from %fixed-stack.5, align 16)
-; MIR64-DAG:     $f21 = LFD 456, $x1 :: (load (s64) from %fixed-stack.4)
-; MIR64-DAG:     $f31 = LFD 536, $x1 :: (load (s64) from %fixed-stack.3)
-; MIR64-DAG:     $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 64
-; MIR64-DAG:     $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 160
 ; MIR64-DAG:     $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.0)
-; MIR64-DAG:     $x{{[0-9]+}} = LI8 240
-; MIR64-DAG:     $x1 = ADDI8 $x1, 544
-; MIR64-DAG:     BLR8 implicit $lr8, implicit $rm
-
-; ASM32-LABEL:   .fprs_gprs_vecregs:
-
-; ASM32:         stwu 1, -448(1)
-; ASM32-DAG:     li {{[0-9]+}}, 32
-; ASM32-DAG:     stw 14, 232(1)                          # 4-byte Folded Spill
-; ASM32-DAG:     stfd 14, 304(1)                         # 8-byte Folded Spill
-; ASM32-DAG:     stxvd2x 52, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32-DAG:     li {{[0-9]+}}, 128
-; ASM32-DAG:     stw 25, 276(1)                          # 4-byte Folded Spill
-; ASM32-DAG:     stxvd2x 58, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32-DAG:     li {{[0-9]+}}, 208
-; ASM32-DAG:     stw 31, 300(1)                          # 4-byte Folded Spill
-; ASM32-DAG:     stfd 21, 360(1)                         # 8-byte Folded Spill
-; ASM32-DAG:     stfd 31, 440(1)                         # 8-byte Folded Spill
-; ASM32-DAG:     stxvd2x 63, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM32-DAG:     #APP
-; ASM32-DAG:     #NO_APP
-; ASM32-DAG:     lxvd2x 63, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM32-DAG:     li {{[0-9]+}}, 128
-; ASM32-DAG:     lfd 31, 440(1)                          # 8-byte Folded Reload
-; ASM32-DAG:     lxvd2x 58, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM32-DAG:     li {{[0-9]+}}, 32
-; ASM32-DAG:     lfd 21, 360(1)                          # 8-byte Folded Reload
-; ASM32-DAG:     lxvd2x 52, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM32-DAG:     lfd 14, 304(1)                          # 8-byte Folded Reload
-; ASM32-DAG:     lwz 31, 300(1)                          # 4-byte Folded Reload
-; ASM32-DAG:     lwz 25, 276(1)                          # 4-byte Folded Reload
-; ASM32-DAG:     lwz 14, 232(1)                          # 4-byte Folded Reload
-; ASM32-DAG:     addi 1, 1, 448
-; ASM32:         blr
+; MIR64-DAG:     $v30 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1)
+; MIR64-DAG:     $v29 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2)
+; MIR64-DAG:     $v28 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.3)
+; MIR64-DAG:     $v27 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.4)
+; MIR64-DAG:     $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.5)
+; MIR64-DAG:     $v25 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.6)
+; MIR64-DAG:     $v24 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.7)
+; MIR64-DAG:     $v23 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.8)
+; MIR64-DAG:     $v22 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.9)
+; MIR64-DAG:     $v21 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.10)
+; MIR64-DAG:     $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.11)
+; MIR64-DAG:     $f31 = LFD 536, $x1 :: (load (s64) from %fixed-stack.12)
+; MIR64-DAG:     $f30 = LFD 528, $x1 :: (load (s64) from %fixed-stack.13, align 16)
+; MIR64-DAG:     $f29 = LFD 520, $x1 :: (load (s64) from %fixed-stack.14)
+; MIR64-DAG:     $f28 = LFD 512, $x1 :: (load (s64) from %fixed-stack.15, align 16)
+; MIR64-DAG:     $f27 = LFD 504, $x1 :: (load (s64) from %fixed-stack.16)
+; MIR64-DAG:     $f26 = LFD 496, $x1 :: (load (s64) from %fixed-stack.17, align 16)
+; MIR64-DAG:     $f25 = LFD 488, $x1 :: (load (s64) from %fixed-stack.18)
+; MIR64-DAG:     $f24 = LFD 480, $x1 :: (load (s64) from %fixed-stack.19, align 16)
+; MIR64-DAG:     $f23 = LFD 472, $x1 :: (load (s64) from %fixed-stack.20)
+; MIR64-DAG:     $f22 = LFD 464, $x1 :: (load (s64) from %fixed-stack.21, align 16)
+; MIR64-DAG:     $f21 = LFD 456, $x1 :: (load (s64) from %fixed-stack.22)
+; MIR64-DAG:     $f20 = LFD 448, $x1 :: (load (s64) from %fixed-stack.23, align 16)
+; MIR64-DAG:     $f19 = LFD 440, $x1 :: (load (s64) from %fixed-stack.24)
+; MIR64-DAG:     $f18 = LFD 432, $x1 :: (load (s64) from %fixed-stack.25, align 16)
+; MIR64-DAG:     $f17 = LFD 424, $x1 :: (load (s64) from %fixed-stack.26)
+; MIR64-DAG:     $f16 = LFD 416, $x1 :: (load (s64) from %fixed-stack.27, align 16)
+; MIR64-DAG:     $f15 = LFD 408, $x1 :: (load (s64) from %fixed-stack.28)
+; MIR64-DAG:     $f14 = LFD 400, $x1 :: (load (s64) from %fixed-stack.29, align 16)
+; MIR64-DAG:     $x31 = LD 392, $x1 :: (load (s64) from %fixed-stack.30)
+; MIR64-DAG:     $x30 = LD 384, $x1 :: (load (s64) from %fixed-stack.31, align 16)
+; MIR64-DAG:     $x29 = LD 376, $x1 :: (load (s64) from %fixed-stack.32)
+; MIR64-DAG:     $x28 = LD 368, $x1 :: (load (s64) from %fixed-stack.33, align 16)
+; MIR64-DAG:     $x27 = LD 360, $x1 :: (load (s64) from %fixed-stack.34)
+; MIR64-DAG:     $x26 = LD 352, $x1 :: (load (s64) from %fixed-stack.35, align 16)
+; MIR64-DAG:     $x25 = LD 344, $x1 :: (load (s64) from %fixed-stack.36)
+; MIR64-DAG:     $x24 = LD 336, $x1 :: (load (s64) from %fixed-stack.37, align 16)
+; MIR64-DAG:     $x23 = LD 328, $x1 :: (load (s64) from %fixed-stack.38)
+; MIR64-DAG:     $x22 = LD 320, $x1 :: (load (s64) from %fixed-stack.39, align 16)
+; MIR64-DAG:     $x21 = LD 312, $x1 :: (load (s64) from %fixed-stack.40)
+; MIR64-DAG:     $x20 = LD 304, $x1 :: (load (s64) from %fixed-stack.41, align 16)
+; MIR64-DAG:     $x19 = LD 296, $x1 :: (load (s64) from %fixed-stack.42)
+; MIR64-DAG:     $x18 = LD 288, $x1 :: (load (s64) from %fixed-stack.43, align 16)
+; MIR64-DAG:     $x17 = LD 280, $x1 :: (load (s64) from %fixed-stack.44)
+; MIR64-DAG:     $x16 = LD 272, $x1 :: (load (s64) from %fixed-stack.45, align 16)
+; MIR64-DAG:     $x15 = LD 264, $x1 :: (load (s64) from %fixed-stack.46)
+; MIR64-DAG:     $x14 = LD 256, $x1 :: (load (s64) from %fixed-stack.47, align 16)
+; MIR64:         $x1 = ADDI8 $x1, 544
+; MIR64-NEXT:    BLR8 implicit $lr8, implicit $rm
+
+; ASM32-LABEL:  .fprs_gprs_vecregs:
+
+; ASM32:          stwu 1, -448(1)
+; ASM32-DAG:      li [[FIXEDSTACK11:[0-9]+]], 32
+; ASM32-DAG:      stxvd2x 52, 1, [[FIXEDSTACK11]]                      # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK10:[0-9]+]], 48
+; ASM32-DAG:      stxvd2x 53, 1, [[FIXEDSTACK10]]                      # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK9:[0-9]+]], 64
+; ASM32-DAG:      stxvd2x 54, 1, [[FIXEDSTACK9]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK8:[0-9]+]], 80
+; ASM32-DAG:      stxvd2x 55, 1, [[FIXEDSTACK8]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK7:[0-9]+]], 96
+; ASM32-DAG:      stxvd2x 56, 1, [[FIXEDSTACK7]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK6:[0-9]+]], 112
+; ASM32-DAG:      stxvd2x 57, 1, [[FIXEDSTACK6]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK5:[0-9]+]], 128
+; ASM32-DAG:      stxvd2x 58, 1, [[FIXEDSTACK5]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK4:[0-9]+]], 144
+; ASM32-DAG:      stxvd2x 59, 1, [[FIXEDSTACK4]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK3:[0-9]+]], 160
+; ASM32-DAG:      stxvd2x 60, 1, [[FIXEDSTACK3]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK2:[0-9]+]], 176
+; ASM32-DAG:      stxvd2x 61, 1, [[FIXEDSTACK2]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK1:[0-9]+]], 192
+; ASM32-DAG:      stxvd2x 62, 1, [[FIXEDSTACK1]]                       # 16-byte Folded Spill
+; ASM32-DAG:      li [[FIXEDSTACK0:[0-9]+]], 208
+; ASM32-DAG:      stxvd2x 63, 1, [[FIXEDSTACK0]]                       # 16-byte Folded Spill
+; ASM32-DAG:      stw 14, 232(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 15, 236(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 16, 240(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 17, 244(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 18, 248(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 19, 252(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 20, 256(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 21, 260(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 22, 264(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 23, 268(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 24, 272(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 25, 276(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 26, 280(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 27, 284(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 28, 288(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 29, 292(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 30, 296(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stw 31, 300(1)                          # 4-byte Folded Spill
+; ASM32-DAG:      stfd 14, 304(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 15, 312(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 16, 320(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 17, 328(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 18, 336(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 19, 344(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 20, 352(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 21, 360(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 22, 368(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 23, 376(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 24, 384(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 25, 392(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 26, 400(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 27, 408(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 28, 416(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 29, 424(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 30, 432(1)                         # 8-byte Folded Spill
+; ASM32-DAG:      stfd 31, 440(1)                         # 8-byte Folded Spill
+
+; ASM32:          #APP
+; ASM32-NEXT:     #NO_APP
+
+; ASM32-DAG:      lxvd2x 63, 1, [[FIXEDSTACK0]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK1:[0-9]+]], 192
+; ASM32-DAG:      lxvd2x 62, 1, [[FIXEDSTACK1]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK2:[0-9]+]], 176
+; ASM32-DAG:      lxvd2x 61, 1, [[FIXEDSTACK2]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK3:[0-9]+]], 160
+; ASM32-DAG:      lxvd2x 60, 1, [[FIXEDSTACK3]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK4:[0-9]+]], 144
+; ASM32-DAG:      lxvd2x 59, 1, [[FIXEDSTACK4]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK5:[0-9]+]], 128
+; ASM32-DAG:      lxvd2x 58, 1, [[FIXEDSTACK5]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK6:[0-9]+]], 112
+; ASM32-DAG:      lxvd2x 57, 1, [[FIXEDSTACK6]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK7:[0-9]+]], 96
+; ASM32-DAG:      lxvd2x 56, 1, [[FIXEDSTACK7]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK8:[0-9]+]], 80
+; ASM32-DAG:      lxvd2x 55, 1, [[FIXEDSTACK8]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK9:[0-9]+]], 64
+; ASM32-DAG:      lxvd2x 54, 1, [[FIXEDSTACK9]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK10:[0-9]+]], 48
+; ASM32-DAG:      lxvd2x 53, 1, [[FIXEDSTACK10]]                        # 16-byte Folded Reload
+; ASM32-DAG:      li [[FIXEDSTACK11:[0-9]+]], 32
+; ASM32-DAG:      lxvd2x 52, 1, [[FIXEDSTACK11]]                        # 16-byte Folded Reload
+; ASM32-DAG:      lfd 31, 440(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 30, 432(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 29, 424(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 28, 416(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 27, 408(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 26, 400(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 25, 392(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 24, 384(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 23, 376(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 22, 368(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 21, 360(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 20, 352(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 19, 344(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 18, 336(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 17, 328(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 16, 320(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 15, 312(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lfd 14, 304(1)                          # 8-byte Folded Reload
+; ASM32-DAG:      lwz 31, 300(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 30, 296(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 29, 292(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 28, 288(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 27, 284(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 26, 280(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 25, 276(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 24, 272(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 23, 268(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 22, 264(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 21, 260(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 20, 256(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 19, 252(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 18, 248(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 17, 244(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 16, 240(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 15, 236(1)                          # 4-byte Folded Reload
+; ASM32-DAG:      lwz 14, 232(1)                          # 4-byte Folded Reload
+
+; ASM32:          addi 1, 1, 448
+; ASM32-NEXT:     blr
 
 ; ASM64-LABEL:    .fprs_gprs_vecregs:
 
-; ASM64:         stdu 1, -544(1)
-; ASM64-DAG:     li {{[0-9]+}}, 64
-; ASM64-DAG:     std 14, 256(1)                          # 8-byte Folded Spill
-; ASM64-DAG:     stfd 14, 400(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     stxvd2x 52, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM64-DAG:     li {{[0-9]+}}, 160
-; ASM64-DAG:     std 25, 344(1)                          # 8-byte Folded Spill
-; ASM64-DAG:     stxvd2x 58, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM64-DAG:     li {{[0-9]+}}, 240
-; ASM64-DAG:     std 31, 392(1)                          # 8-byte Folded Spill
-; ASM64-DAG:     stfd 21, 456(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     stfd 31, 536(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     stxvd2x 63, 1, {{[0-9]+}}               # 16-byte Folded Spill
-; ASM64-DAG:     #APP
-; ASM64-DAG:     #NO_APP
-; ASM64-DAG:     lxvd2x 63, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM64-DAG:     li {{[0-9]+}}, 160
-; ASM64-DAG:     lfd 31, 536(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     lxvd2x 58, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM64-DAG:     li {{[0-9]+}}, 64
-; ASM64-DAG:     lfd 21, 456(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     lxvd2x 52, 1, {{[0-9]+}}                # 16-byte Folded Reload
-; ASM64-DAG:     lfd 14, 400(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     ld 31, 392(1)                           # 8-byte Folded Reload
-; ASM64-DAG:     ld 25, 344(1)                           # 8-byte Folded Reload
-; ASM64-DAG:     ld 14, 256(1)                           # 8-byte Folded Reload
-; ASM64-DAG:     addi 1, 1, 544
-; ASM64:         blr
+; ASM64:            stdu 1, -544(1)
+; ASM64-DAG:        li [[FIXEDSTACK11:[0-9]+]], 64
+; ASM64-DAG:        stxvd2x 52, 1, [[FIXEDSTACK11]]                       # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK10:[0-9]+]], 80
+; ASM64-DAG:        stxvd2x 53, 1, [[FIXEDSTACK10]]                       # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK9:[0-9]+]], 96
+; ASM64-DAG:        stxvd2x 54, 1, [[FIXEDSTACK9]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK8:[0-9]+]], 112
+; ASM64-DAG:        stxvd2x 55, 1, [[FIXEDSTACK8]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK7:[0-9]+]], 128
+; ASM64-DAG:        stxvd2x 56, 1, [[FIXEDSTACK7]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK6:[0-9]+]], 144
+; ASM64-DAG:        stxvd2x 57, 1, [[FIXEDSTACK6]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK5:[0-9]+]], 160
+; ASM64-DAG:        stxvd2x 58, 1, [[FIXEDSTACK5]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK4:[0-9]+]], 176
+; ASM64-DAG:        stxvd2x 59, 1, [[FIXEDSTACK4]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK3:[0-9]+]], 192
+; ASM64-DAG:        stxvd2x 60, 1, [[FIXEDSTACK3]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK2:[0-9]+]], 208
+; ASM64-DAG:        stxvd2x 61, 1, [[FIXEDSTACK2]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK1:[0-9]+]], 224
+; ASM64-DAG:        stxvd2x 62, 1, [[FIXEDSTACK1]]                        # 16-byte Folded Spill
+; ASM64-DAG:        li [[FIXEDSTACK0:[0-9]+]], 240
+; ASM64-DAG:        stxvd2x 63, 1, [[FIXEDSTACK0]]                        # 16-byte Folded Spill
+; ASM64-DAG:        std 14, 256(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 15, 264(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 16, 272(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 17, 280(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 18, 288(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 19, 296(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 20, 304(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 21, 312(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 22, 320(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 23, 328(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 24, 336(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 25, 344(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 26, 352(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 27, 360(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 28, 368(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 29, 376(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 30, 384(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        std 31, 392(1)                          # 8-byte Folded Spill
+; ASM64-DAG:        stfd 14, 400(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 15, 408(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 16, 416(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 17, 424(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 18, 432(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 19, 440(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 20, 448(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 21, 456(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 22, 464(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 23, 472(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 24, 480(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 25, 488(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 26, 496(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 27, 504(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 28, 512(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 29, 520(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 30, 528(1)                         # 8-byte Folded Spill
+; ASM64-DAG:        stfd 31, 536(1)                         # 8-byte Folded Spill
+
+; ASM64:            #APP
+; ASM64-NEXT:       #NO_APP
+
+; ASM64-DAG:        lxvd2x 63, 1, [[FIXEDSTACK0]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK1:[0-9]+]], 224
+; ASM64-DAG:        lxvd2x 62, 1, [[FIXEDSTACK1]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK2:[0-9]+]], 208
+; ASM64-DAG:        lxvd2x 61, 1, [[FIXEDSTACK2]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK3:[0-9]+]], 192
+; ASM64-DAG:        lxvd2x 60, 1, [[FIXEDSTACK3]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK4:[0-9]+]], 176
+; ASM64-DAG:        lxvd2x 59, 1, [[FIXEDSTACK4]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK5:[0-9]+]], 160
+; ASM64-DAG:        lxvd2x 58, 1, [[FIXEDSTACK5]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK6:[0-9]+]], 144
+; ASM64-DAG:        lxvd2x 57, 1, [[FIXEDSTACK6]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK7:[0-9]+]], 128
+; ASM64-DAG:        lxvd2x 56, 1, [[FIXEDSTACK7]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK8:[0-9]+]], 112
+; ASM64-DAG:        lxvd2x 55, 1, [[FIXEDSTACK8]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK9:[0-9]+]], 96
+; ASM64-DAG:        lxvd2x 54, 1, [[FIXEDSTACK9]]                         # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK10:[0-9]+]], 80
+; ASM64-DAG:        lxvd2x 53, 1, [[FIXEDSTACK10]]                        # 16-byte Folded Reload
+; ASM64-DAG:        li [[FIXEDSTACK11:[0-9]+]], 64
+; ASM64-DAG:        lxvd2x 52, 1, [[FIXEDSTACK11]]                        # 16-byte Folded Reload
+; ASM64-DAG:        lfd 31, 536(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 30, 528(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 29, 520(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 28, 512(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 27, 504(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 26, 496(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 25, 488(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 24, 480(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 23, 472(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 22, 464(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 21, 456(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 20, 448(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 19, 440(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 18, 432(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 17, 424(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 16, 416(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 15, 408(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        lfd 14, 400(1)                          # 8-byte Folded Reload
+; ASM64-DAG:        ld 31, 392(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 30, 384(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 29, 376(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 28, 368(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 27, 360(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 26, 352(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 25, 344(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 24, 336(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 23, 328(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 22, 320(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 21, 312(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 20, 304(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 19, 296(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 18, 288(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 17, 280(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 16, 272(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 15, 264(1)                           # 8-byte Folded Reload
+; ASM64-DAG:        ld 14, 256(1)                           # 8-byte Folded Reload
+
+; ASM64:            addi 1, 1, 544
+; ASM64-NEXT:       blr
diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll b/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll
index 45ec7357656bcb..9dc06dca3d3b45 100644
--- a/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll
@@ -63,24 +63,34 @@ define dso_local void @vec_regs() {
 ; ASM64:         blr
 
 define dso_local void @fprs_gprs_vecregs() {
-    call void asm sideeffect "", "~{r14},~{r25},~{r31},~{f14},~{f21},~{f31},~{v20},~{v26},~{v31}"()
+    call void asm sideeffect "", "~{r25},~{r28},~{r31},~{f21},~{f25},~{f31},~{v20},~{v26},~{v31}"()
       ret void
 }
 
 ; MIR32-LABEL:   name:            fprs_gprs_vecregs
 
-; MIR32:         fixedStack:
-
-; MIR32:         liveins: $r14, $r25, $r31, $f14, $f21, $f31
+; MIR32: liveins: $r25, $r26, $r27, $r28, $r29, $r30, $r31, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31
 
 ; MIR32-NOT:     STXVD2X killed $v20
 ; MIR32-NOT:     STXVD2X killed $v26
 ; MIR32-NOT:     STXVD2X killed $v31
-; MIR32-DAG:     STW killed $r14, -216, $r1 :: (store (s32) into %fixed-stack.5, align 8)
-; MIR32-DAG:     STW killed $r25, -172, $r1 :: (store (s32) into %fixed-stack.4)
-; MIR32-DAG:     STW killed $r31, -148, $r1 :: (store (s32) into %fixed-stack.3)
-; MIR32-DAG:     STFD killed $f14, -144, $r1 :: (store (s64) into %fixed-stack.2, align 16)
-; MIR32-DAG:     STFD killed $f21, -88, $r1 :: (store (s64) into %fixed-stack.1)
+; MIR32-DAG:     STW killed $r25, -116, $r1 :: (store (s32) into %fixed-stack.17)
+; MIR32-DAG:     STW killed $r26, -112, $r1 :: (store (s32) into %fixed-stack.16, align 8)
+; MIR32-DAG:     STW killed $r27, -108, $r1 :: (store (s32) into %fixed-stack.15)
+; MIR32-DAG:     STW killed $r28, -104, $r1 :: (store (s32) into %fixed-stack.14, align 16)
+; MIR32-DAG:     STW killed $r29, -100, $r1 :: (store (s32) into %fixed-stack.13)
+; MIR32-DAG:     STW killed $r30, -96, $r1 :: (store (s32) into %fixed-stack.12, align 8)
+; MIR32-DAG:     STW killed $r31, -92, $r1 :: (store (s32) into %fixed-stack.11)
+; MIR32-DAG:     STFD killed $f21, -88, $r1 :: (store (s64) into %fixed-stack.10)
+; MIR32-DAG:     STFD killed $f22, -80, $r1 :: (store (s64) into %fixed-stack.9, align 16)
+; MIR32-DAG:     STFD killed $f23, -72, $r1 :: (store (s64) into %fixed-stack.8)
+; MIR32-DAG:     STFD killed $f24, -64, $r1 :: (store (s64) into %fixed-stack.7, align 16)
+; MIR32-DAG:     STFD killed $f25, -56, $r1 :: (store (s64) into %fixed-stack.6)
+; MIR32-DAG:     STFD killed $f26, -48, $r1 :: (store (s64) into %fixed-stack.5, align 16)
+; MIR32-DAG:     STFD killed $f27, -40, $r1 :: (store (s64) into %fixed-stack.4)
+; MIR32-DAG:     STFD killed $f28, -32, $r1 :: (store (s64) into %fixed-stack.3, align 16)
+; MIR32-DAG:     STFD killed $f29, -24, $r1 :: (store (s64) into %fixed-stack.2)
+; MIR32-DAG:     STFD killed $f30, -16, $r1 :: (store (s64) into %fixed-stack.1, align 16)
 ; MIR32-DAG:     STFD killed $f31, -8, $r1 :: (store (s64) into %fixed-stack.0)
 
 ; MIR32-LABEL:   INLINEASM
@@ -88,28 +98,50 @@ define dso_local void @fprs_gprs_vecregs() {
 ; MIR32-NOT:     $v20 = LXVD2X
 ; MIR32-NOT:     $v26 = LXVD2X
 ; MIR32-NOT:     $v31 = LXVD2X
-; MIR32-DAG:     $r14 = LWZ -216, $r1 :: (load (s32) from %fixed-stack.5, align 8)
-; MIR32-DAG:     $r25 = LWZ -172, $r1 :: (load (s32) from %fixed-stack.4)
-; MIR32-DAG:     $r31 = LWZ -148, $r1 :: (load (s32) from %fixed-stack.3)
-; MIR32-DAG:     $f14 = LFD -144, $r1 :: (load (s64) from %fixed-stack.2, align 16)
-; MIR32-DAG:     $f21 = LFD -88, $r1 :: (load (s64) from %fixed-stack.1)
 ; MIR32-DAG:     $f31 = LFD -8, $r1 :: (load (s64) from %fixed-stack.0)
-; MIR32-DAG:     BLR implicit $lr, implicit $rm
+; MIR32-DAG:     $f30 = LFD -16, $r1 :: (load (s64) from %fixed-stack.1, align 16)
+; MIR32-DAG:     $f29 = LFD -24, $r1 :: (load (s64) from %fixed-stack.2)
+; MIR32-DAG:     $f28 = LFD -32, $r1 :: (load (s64) from %fixed-stack.3, align 16)
+; MIR32-DAG:     $f27 = LFD -40, $r1 :: (load (s64) from %fixed-stack.4)
+; MIR32-DAG:     $f26 = LFD -48, $r1 :: (load (s64) from %fixed-stack.5, align 16)
+; MIR32-DAG:     $f25 = LFD -56, $r1 :: (load (s64) from %fixed-stack.6)
+; MIR32-DAG:     $f24 = LFD -64, $r1 :: (load (s64) from %fixed-stack.7, align 16)
+; MIR32-DAG:     $f23 = LFD -72, $r1 :: (load (s64) from %fixed-stack.8)
+; MIR32-DAG:     $f22 = LFD -80, $r1 :: (load (s64) from %fixed-stack.9, align 16)
+; MIR32-DAG:     $f21 = LFD -88, $r1 :: (load (s64) from %fixed-stack.10)
+; MIR32-DAG:     $r31 = LWZ -92, $r1 :: (load (s32) from %fixed-stack.11)
+; MIR32-DAG:     $r30 = LWZ -96, $r1 :: (load (s32) from %fixed-stack.12, align 8)
+; MIR32-DAG:     $r29 = LWZ -100, $r1 :: (load (s32) from %fixed-stack.13)
+; MIR32-DAG:     $r28 = LWZ -104, $r1 :: (load (s32) from %fixed-stack.14, align 16)
+; MIR32-DAG:     $r27 = LWZ -108, $r1 :: (load (s32) from %fixed-stack.15)
+; MIR32-DAG:     $r26 = LWZ -112, $r1 :: (load (s32) from %fixed-stack.16, align 8)
+; MIR32-DAG:     $r25 = LWZ -116, $r1 :: (load (s32) from %fixed-stack.17)
+; MIR32:         BLR implicit $lr, implicit $rm
 
 ; MIR64-LABEL:   name:            fprs_gprs_vecregs
 
-; MIR64:         fixedStack:
-
-; MIR64:         liveins: $x14, $x25, $x31, $f14, $f21, $f31
+; MIR64: liveins: $x25, $x26, $x27, $x28, $x29, $x30, $x31, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31
 
 ; MIR64-NOT:     STXVD2X killed $v20
 ; MIR64-NOT:     STXVD2X killed $v26
 ; MIR64-NOT:     STXVD2X killed $v31
-; MIR64-DAG:     STD killed $x14, -288, $x1 :: (store (s64) into %fixed-stack.5, align 16)
-; MIR64-DAG:     STD killed $x25, -200, $x1 :: (store (s64) into %fixed-stack.4)
-; MIR64-DAG:     STD killed $x31, -152, $x1 :: (store (s64) into %fixed-stack.3)
-; MIR64-DAG:     STFD killed $f14, -144, $x1 :: (store (s64) into %fixed-stack.2, align 16)
-; MIR64-DAG:     STFD killed $f21, -88, $x1 :: (store (s64) into %fixed-stack.1)
+; MIR64-DAG:     STD killed $x25, -144, $x1 :: (store (s64) into %fixed-stack.17)
+; MIR64-DAG:     STD killed $x26, -136, $x1 :: (store (s64) into %fixed-stack.16, align 16)
+; MIR64-DAG:     STD killed $x27, -128, $x1 :: (store (s64) into %fixed-stack.15)
+; MIR64-DAG:     STD killed $x28, -120, $x1 :: (store (s64) into %fixed-stack.14, align 16)
+; MIR64-DAG:     STD killed $x29, -112, $x1 :: (store (s64) into %fixed-stack.13)
+; MIR64-DAG:     STD killed $x30, -104, $x1 :: (store (s64) into %fixed-stack.12, align 16)
+; MIR64-DAG:     STD killed $x31, -96, $x1 :: (store (s64) into %fixed-stack.11)
+; MIR64-DAG:     STFD killed $f21, -88, $x1 :: (store (s64) into %fixed-stack.10)
+; MIR64-DAG:     STFD killed $f22, -80, $x1 :: (store (s64) into %fixed-stack.9, align 16)
+; MIR64-DAG:     STFD killed $f23, -72, $x1 :: (store (s64) into %fixed-stack.8)
+; MIR64-DAG:     STFD killed $f24, -64, $x1 :: (store (s64) into %fixed-stack.7, align 16)
+; MIR64-DAG:     STFD killed $f25, -56, $x1 :: (store (s64) into %fixed-stack.6)
+; MIR64-DAG:     STFD killed $f26, -48, $x1 :: (store (s64) into %fixed-stack.5, align 16)
+; MIR64-DAG:     STFD killed $f27, -40, $x1 :: (store (s64) into %fixed-stack.4)
+; MIR64-DAG:     STFD killed $f28, -32, $x1 :: (store (s64) into %fixed-stack.3, align 16)
+; MIR64-DAG:     STFD killed $f29, -24, $x1 :: (store (s64) into %fixed-stack.2)
+; MIR64-DAG:     STFD killed $f30, -16, $x1 :: (store (s64) into %fixed-stack.1, align 16)
 ; MIR64-DAG:     STFD killed $f31, -8, $x1 :: (store (s64) into %fixed-stack.0)
 
 ; MIR64-LABEL:   INLINEASM
@@ -117,12 +149,25 @@ define dso_local void @fprs_gprs_vecregs() {
 ; MIR64-NOT:     $v20 = LXVD2X
 ; MIR64-NOT:     $v26 = LXVD2X
 ; MIR64-NOT:     $v31 = LXVD2X
-; MIR64-DAG:     $x14 = LD -288, $x1 :: (load (s64) from %fixed-stack.5, align 16)
-; MIR64-DAG:     $x25 = LD -200, $x1 :: (load (s64) from %fixed-stack.4)
-; MIR64-DAG:     $x31 = LD -152, $x1 :: (load (s64) from %fixed-stack.3)
-; MIR64-DAG:     $f14 = LFD -144, $x1 :: (load (s64) from %fixed-stack.2, align 16)
-; MIR64-DAG:     $f21 = LFD -88, $x1 :: (load (s64) from %fixed-stack.1)
 ; MIR64-DAG:     $f31 = LFD -8, $x1 :: (load (s64) from %fixed-stack.0)
+; MIR64-DAG:     $f30 = LFD -16, $x1 :: (load (s64) from %fixed-stack.1, align 16)
+; MIR64-DAG:     $f29 = LFD -24, $x1 :: (load (s64) from %fixed-stack.2)
+; MIR64-DAG:     $f28 = LFD -32, $x1 :: (load (s64) from %fixed-stack.3, align 16)
+; MIR64-DAG:     $f27 = LFD -40, $x1 :: (load (s64) from %fixed-stack.4)
+; MIR64-DAG:     $f26 = LFD -48, $x1 :: (load (s64) from %fixed-stack.5, align 16)
+; MIR64-DAG:     $f25 = LFD -56, $x1 :: (load (s64) from %fixed-stack.6)
+; MIR64-DAG:     $f24 = LFD -64, $x1 :: (load (s64) from %fixed-stack.7, align 16)
+; MIR64-DAG:     $f23 = LFD -72, $x1 :: (load (s64) from %fixed-stack.8)
+; MIR64-DAG:     $f22 = LFD -80, $x1 :: (load (s64) from %fixed-stack.9, align 16)
+; MIR64-DAG:     $f21 = LFD -88, $x1 :: (load (s64) from %fixed-stack.10)
+; MIR64-DAG:     $x31 = LD -96, $x1 :: (load (s64) from %fixed-stack.11)
+; MIR64-DAG:     $x30 = LD -104, $x1 :: (load (s64) from %fixed-stack.12, align 16)
+; MIR64-DAG:     $x29 = LD -112, $x1 :: (load (s64) from %fixed-stack.13)
+; MIR64-DAG:     $x28 = LD -120, $x1 :: (load (s64) from %fixed-stack.14, align 16)
+; MIR64-DAG:     $x27 = LD -128, $x1 :: (load (s64) from %fixed-stack.15)
+; MIR64-DAG:     $x26 = LD -136, $x1 :: (load (s64) from %fixed-stack.16, align 16)
+; MIR64-DAG:     $x25 = LD -144, $x1 :: (load (s64) from %fixed-stack.17)
+
 ; MIR64:         BLR8 implicit $lr8, implicit $rm
 
 ;; We don't have -ppc-full-reg-names on AIX so can't reliably check-not for
@@ -130,38 +175,87 @@ define dso_local void @fprs_gprs_vecregs() {
 
 ; ASM32-LABEL:   .fprs_gprs_vecregs:
 
-; ASM32-DAG:     stw 14, -216(1)                         # 4-byte Folded Spill
-; ASM32-DAG:     stw 25, -172(1)                         # 4-byte Folded Spill
-; ASM32-DAG:     stw 31, -148(1)                         # 4-byte Folded Spill
-; ASM32-DAG:     stfd 14, -144(1)                        # 8-byte Folded Spill
-; ASM32-DAG:     stfd 21, -88(1)                         # 8-byte Folded Spill
-; ASM32-DAG:     stfd 31, -8(1)                          # 8-byte Folded Spill
-; ASM32-DAG:     #APP
-; ASM32-DAG:     #NO_APP
-; ASM32-DAG:     lfd 31, -8(1)                           # 8-byte Folded Reload
-; ASM32-DAG:     lfd 21, -88(1)                          # 8-byte Folded Reload
-; ASM32-DAG:     lfd 14, -144(1)                         # 8-byte Folded Reload
-; ASM32-DAG:     lwz 31, -148(1)                         # 4-byte Folded Reload
-; ASM32-DAG:     lwz 25, -172(1)                         # 4-byte Folded Reload
-; ASM32-DAG:     lwz 14, -216(1)                         # 4-byte Folded Reload
+; ASM32-DAG:   stw 25, -116(1)                         # 4-byte Folded Spill
+; ASM32-DAG:   stw 26, -112(1)                         # 4-byte Folded Spill
+; ASM32-DAG:   stw 27, -108(1)                         # 4-byte Folded Spill
+; ASM32-DAG:   stw 28, -104(1)                         # 4-byte Folded Spill
+; ASM32-DAG:   stw 29, -100(1)                         # 4-byte Folded Spill
+; ASM32-DAG:   stw 30, -96(1)                          # 4-byte Folded Spill
+; ASM32-DAG:   stw 31, -92(1)                          # 4-byte Folded Spill
+; ASM32-DAG:   stfd 21, -88(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 22, -80(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 23, -72(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 24, -64(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 25, -56(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 26, -48(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 27, -40(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 28, -32(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 29, -24(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 30, -16(1)                         # 8-byte Folded Spill
+; ASM32-DAG:   stfd 31, -8(1)                          # 8-byte Folded Spill
+; ASM32:       #APP
+; ASM32-NEXT:  #NO_APP
+; ASM32-DAG:   lfd 31, -8(1)                           # 8-byte Folded Reload
+; ASM32-DAG:   lfd 30, -16(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 29, -24(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 28, -32(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 27, -40(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 26, -48(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 25, -56(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 24, -64(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 23, -72(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 22, -80(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lfd 21, -88(1)                          # 8-byte Folded Reload
+; ASM32-DAG:   lwz 31, -92(1)                          # 4-byte Folded Reload
+; ASM32-DAG:   lwz 30, -96(1)                          # 4-byte Folded Reload
+; ASM32-DAG:   lwz 29, -100(1)                         # 4-byte Folded Reload
+; ASM32-DAG:   lwz 28, -104(1)                         # 4-byte Folded Reload
+; ASM32-DAG:   lwz 27, -108(1)                         # 4-byte Folded Reload
+; ASM32-DAG:   lwz 26, -112(1)                         # 4-byte Folded Reload
+; ASM32-DAG:   lwz 25, -116(1)                         # 4-byte Folded Reload
 ; ASM32:         blr
 
 ; ASM64-LABEL:    .fprs_gprs_vecregs:
 
-; ASM64-DAG:     std 14, -288(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     std 25, -200(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     std 31, -152(1)                         # 8-byte Folded Spill
-; ASM64-DAG:     stfd 14, -144(1)                        # 8-byte Folded Spill
+; ASM64-DAG:     std 25, -144(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 26, -136(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 27, -128(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 28, -120(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 29, -112(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 30, -104(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 31, -96(1)                          # 8-byte Folded Spill
 ; ASM64-DAG:     stfd 21, -88(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 22, -80(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 23, -72(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 24, -64(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 25, -56(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 26, -48(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 27, -40(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 28, -32(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 29, -24(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 30, -16(1)                         # 8-byte Folded Spill
 ; ASM64-DAG:     stfd 31, -8(1)                          # 8-byte Folded Spill
-; ASM64-DAG:     #APP
-; ASM64-DAG:     #NO_APP
+; ASM64:         #APP
+; ASM64-NEXT:    #NO_APP
 ; ASM64-DAG:     lfd 31, -8(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     lfd 30, -16(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 29, -24(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 28, -32(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 27, -40(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 26, -48(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 25, -56(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 24, -64(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 23, -72(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 22, -80(1)                          # 8-byte Folded Reload
 ; ASM64-DAG:     lfd 21, -88(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     lfd 14, -144(1)                         # 8-byte Folded Reload
-; ASM64-DAG:     ld 31, -152(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     ld 25, -200(1)                          # 8-byte Folded Reload
-; ASM64-DAG:     ld 14, -288(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 31, -96(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 30, -104(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 29, -112(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 28, -120(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 27, -128(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 26, -136(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 25, -144(1)                          # 8-byte Folded Reload
+
 ; ASM64:         blr
 
 define dso_local void @all_fprs_and_vecregs() {
diff --git a/llvm/test/CodeGen/PowerPC/aix-csr.ll b/llvm/test/CodeGen/PowerPC/aix-csr.ll
index a9a85c8be5a105..1dadacf1faab78 100644
--- a/llvm/test/CodeGen/PowerPC/aix-csr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-csr.ll
@@ -20,77 +20,260 @@ entry:
 
 ; MIR64:       name:            gprs_only
 ; MIR64-LABEL: fixedStack:
-; MIR64-NEXT:   - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:       callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:       debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:   - { id: 1, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:       callee-saved-register: '$x22', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:       debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:   - { id: 2, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:       callee-saved-register: '$x16', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:       debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 2, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 3, type: spill-slot, offset: -32, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 4, type: spill-slot, offset: -40, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 5, type: spill-slot, offset: -48, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 6, type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 7, type: spill-slot, offset: -64, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 8, type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 9, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 10, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x21', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 11, type: spill-slot, offset: -96, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x20', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 12, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x19', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 13, type: spill-slot, offset: -112, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x18', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 14, type: spill-slot, offset: -120, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x17', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT:  - { id: 15, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:      callee-saved-register: '$x16', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:      debug-info-expression: '', debug-info-location: '' }
 ; MIR64-NEXT:  stack:           []
 
 ; MIR32:       name:            gprs_only
 ; MIR32-LABEL: fixedStack:
-; MIR32:        - { id: 0, type: spill-slot, offset: -8, size: 4, alignment: 8, stack-id: default,
-; MIR32-NEXT:       callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:       debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:   - { id: 1, type: spill-slot, offset: -40, size: 4, alignment: 8, stack-id: default,
-; MIR32-NEXT:       callee-saved-register: '$r22', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:       debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:   - { id: 2, type: spill-slot, offset: -64, size: 4, alignment: 16, stack-id: default,
-; MIR32-NEXT:       callee-saved-register: '$r16', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:       debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 3, type: spill-slot, offset: -16, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 4, type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 5, type: spill-slot, offset: -24, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 6, type: spill-slot, offset: -28, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 7, type: spill-slot, offset: -32, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 8, type: spill-slot, offset: -36, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 9, type: spill-slot, offset: -40, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 10, type: spill-slot, offset: -44, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r21', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 11, type: spill-slot, offset: -48, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r20', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 12, type: spill-slot, offset: -52, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r19', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 13, type: spill-slot, offset: -56, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r18', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 14, type: spill-slot, offset: -60, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r17', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 15, type: spill-slot, offset: -64, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r16', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
 ; MIR32-NEXT:  stack:           []
 
 
-; MIR64: liveins: $x3, $x16, $x22, $x30
-
-; MIR64-DAG: STD killed $x16, -128, $x1 :: (store (s64) into %fixed-stack.2, align 16)
-; MIR64-DAG: STD killed $x22, -80, $x1 :: (store (s64) into %fixed-stack.1, align 16)
-; MIR64-DAG: STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.0, align 16)
+; MIR64: liveins: $x3, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
+
+; MIR64-DAG:       STD killed $x16, -128, $x1 :: (store (s64) into %fixed-stack.15, align 16)
+; MIR64-DAG:  STD killed $x17, -120, $x1 :: (store (s64) into %fixed-stack.14)
+; MIR64-DAG:  STD killed $x18, -112, $x1 :: (store (s64) into %fixed-stack.13, align 16)
+; MIR64-DAG:  STD killed $x19, -104, $x1 :: (store (s64) into %fixed-stack.12)
+; MIR64-DAG:  STD killed $x20, -96, $x1 :: (store (s64) into %fixed-stack.11, align 16)
+; MIR64-DAG:  STD killed $x21, -88, $x1 :: (store (s64) into %fixed-stack.10)
+; MIR64-DAG:  STD killed $x22, -80, $x1 :: (store (s64) into %fixed-stack.9, align 16)
+; MIR64-DAG:  STD killed $x23, -72, $x1 :: (store (s64) into %fixed-stack.8)
+; MIR64-DAG:  STD killed $x24, -64, $x1 :: (store (s64) into %fixed-stack.7, align 16)
+; MIR64-DAG:  STD killed $x25, -56, $x1 :: (store (s64) into %fixed-stack.6)
+; MIR64-DAG:  STD killed $x26, -48, $x1 :: (store (s64) into %fixed-stack.5, align 16)
+; MIR64-DAG:  STD killed $x27, -40, $x1 :: (store (s64) into %fixed-stack.4)
+; MIR64-DAG:  STD killed $x28, -32, $x1 :: (store (s64) into %fixed-stack.3, align 16)
+; MIR64-DAG:  STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.2)
+; MIR64-DAG:  STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.1, align 16)
+; MIR64-DAG:  STD killed $x31, -8, $x1 :: (store (s64) into %fixed-stack.0)
 
 ; MIR64:     INLINEASM
 
-; MIR64-DAG: $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.0, align 16)
-; MIR64-DAG: $x22 = LD -80, $x1 :: (load (s64) from %fixed-stack.1, align 16)
-; MIR64-DAG: $x16 = LD -128, $x1 :: (load (s64) from %fixed-stack.2, align 16)
-; MIR64:     BLR8 implicit $lr8, implicit $rm, implicit $x3
-
-
-; MIR32: liveins: $r3, $r16, $r22, $r30
 
-; MIR32-DAG: STW killed $r16, -64, $r1 :: (store (s32) into %fixed-stack.2, align 16)
-; MIR32-DAG: STW killed $r22, -40, $r1 :: (store (s32) into %fixed-stack.1, align 8)
-; MIR32-DAG: STW killed $r30, -8, $r1 :: (store (s32) into %fixed-stack.0, align 8)
+; MIR64-DAG:    $x31 = LD -8, $x1 :: (load (s64) from %fixed-stack.0)
+; MIR64-DAG:    $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.1, align 16)
+; MIR64-DAG:    $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.2)
+; MIR64-DAG:    $x28 = LD -32, $x1 :: (load (s64) from %fixed-stack.3, align 16)
+; MIR64-DAG:    $x27 = LD -40, $x1 :: (load (s64) from %fixed-stack.4)
+; MIR64-DAG:    $x26 = LD -48, $x1 :: (load (s64) from %fixed-stack.5, align 16)
+; MIR64-DAG:    $x25 = LD -56, $x1 :: (load (s64) from %fixed-stack.6)
+; MIR64-DAG:    $x24 = LD -64, $x1 :: (load (s64) from %fixed-stack.7, align 16)
+; MIR64-DAG:    $x23 = LD -72, $x1 :: (load (s64) from %fixed-stack.8)
+; MIR64-DAG:    $x22 = LD -80, $x1 :: (load (s64) from %fixed-stack.9, align 16)
+; MIR64-DAG:    $x21 = LD -88, $x1 :: (load (s64) from %fixed-stack.10)
+; MIR64-DAG:    $x20 = LD -96, $x1 :: (load (s64) from %fixed-stack.11, align 16)
+; MIR64-DAG:    $x19 = LD -104, $x1 :: (load (s64) from %fixed-stack.12)
+; MIR64-DAG:    $x18 = LD -112, $x1 :: (load (s64) from %fixed-stack.13, align 16)
+; MIR64-DAG:    $x17 = LD -120, $x1 :: (load (s64) from %fixed-stack.14)
+; MIR64-DAG:    $x16 = LD -128, $x1 :: (load (s64) from %fixed-stack.15, align 16)
+; MIR64:        BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+
+; MIR32:  liveins: $r3, $r16, $r17, $r18, $r19, $r20, $r21, $r22, $r23, $r24, $r25, $r26, $r27, $r28, $r29, $r30, $r31
+
+; MIR32-DAG:  STW killed $r16, -64, $r1 :: (store (s32) into %fixed-stack.15, align 16)
+; MIR32-DAG:  STW killed $r17, -60, $r1 :: (store (s32) into %fixed-stack.14)
+; MIR32-DAG:  STW killed $r18, -56, $r1 :: (store (s32) into %fixed-stack.13, align 8)
+; MIR32-DAG:  STW killed $r19, -52, $r1 :: (store (s32) into %fixed-stack.12)
+; MIR32-DAG:  STW killed $r20, -48, $r1 :: (store (s32) into %fixed-stack.11, align 16)
+; MIR32-DAG:  STW killed $r21, -44, $r1 :: (store (s32) into %fixed-stack.10)
+; MIR32-DAG:  STW killed $r22, -40, $r1 :: (store (s32) into %fixed-stack.9, align 8)
+; MIR32-DAG:  STW killed $r23, -36, $r1 :: (store (s32) into %fixed-stack.8)
+; MIR32-DAG:  STW killed $r24, -32, $r1 :: (store (s32) into %fixed-stack.7, align 16)
+; MIR32-DAG:  STW killed $r25, -28, $r1 :: (store (s32) into %fixed-stack.6)
+; MIR32-DAG:  STW killed $r26, -24, $r1 :: (store (s32) into %fixed-stack.5, align 8)
+; MIR32-DAG:  STW killed $r27, -20, $r1 :: (store (s32) into %fixed-stack.4)
+; MIR32-DAG:  STW killed $r28, -16, $r1 :: (store (s32) into %fixed-stack.3, align 16)
+; MIR32-DAG:  STW killed $r29, -12, $r1 :: (store (s32) into %fixed-stack.2)
+; MIR32-DAG:  STW killed $r30, -8, $r1 :: (store (s32) into %fixed-stack.1, align 8)
+; MIR32-DAG:  STW killed $r31, -4, $r1 :: (store (s32) into %fixed-stack.0)
 
-; MIR32:     INLINEASM
+; MIR32:      INLINEASM
 
-; MIR32-DAG: $r30 = LWZ -8, $r1 :: (load (s32) from %fixed-stack.0, align 8)
-; MIR32-DAG: $r22 = LWZ -40, $r1 :: (load (s32) from %fixed-stack.1, align 8)
-; MIR32-DAG: $r16 = LWZ -64, $r1 :: (load (s32) from %fixed-stack.2, align 16)
-; MIR32:     BLR implicit $lr, implicit $rm, implicit $r3
+; MIR32-DAG:  $r31 = LWZ -4, $r1 :: (load (s32) from %fixed-stack.0)
+; MIR32-DAG:  $r30 = LWZ -8, $r1 :: (load (s32) from %fixed-stack.1, align 8)
+; MIR32-DAG:  $r29 = LWZ -12, $r1 :: (load (s32) from %fixed-stack.2)
+; MIR32-DAG:  $r28 = LWZ -16, $r1 :: (load (s32) from %fixed-stack.3, align 16)
+; MIR32-DAG:  $r27 = LWZ -20, $r1 :: (load (s32) from %fixed-stack.4)
+; MIR32-DAG:  $r26 = LWZ -24, $r1 :: (load (s32) from %fixed-stack.5, align 8)
+; MIR32-DAG:  $r25 = LWZ -28, $r1 :: (load (s32) from %fixed-stack.6)
+; MIR32-DAG:  $r24 = LWZ -32, $r1 :: (load (s32) from %fixed-stack.7, align 16)
+; MIR32-DAG:  $r23 = LWZ -36, $r1 :: (load (s32) from %fixed-stack.8)
+; MIR32-DAG:  $r22 = LWZ -40, $r1 :: (load (s32) from %fixed-stack.9, align 8)
+; MIR32-DAG:  $r21 = LWZ -44, $r1 :: (load (s32) from %fixed-stack.10)
+; MIR32-DAG:  $r20 = LWZ -48, $r1 :: (load (s32) from %fixed-stack.11, align 16)
+; MIR32-DAG:  $r19 = LWZ -52, $r1 :: (load (s32) from %fixed-stack.12)
+; MIR32-DAG:  $r18 = LWZ -56, $r1 :: (load (s32) from %fixed-stack.13, align 8)
+; MIR32-DAG:  $r17 = LWZ -60, $r1 :: (load (s32) from %fixed-stack.14)
+; MIR32-DAG:  $r16 = LWZ -64, $r1 :: (load (s32) from %fixed-stack.15, align 16)
+; MIR32:      BLR implicit $lr, implicit $rm, implicit $r3
 
 
 ; ASM64-LABEL: .gprs_only:
-; ASM64-DAG:      std 16, -128(1)                 # 8-byte Folded Spill
-; ASM64-DAG:      std 22, -80(1)                  # 8-byte Folded Spill
-; ASM64-DAG:      std 30, -16(1)                  # 8-byte Folded Spill
-; ASM64:          #APP
-; ASM64-DAG:      ld 30, -16(1)                   # 8-byte Folded Reload
-; ASM64-DAG:      ld 22, -80(1)                   # 8-byte Folded Reload
-; ASM64-DAG:      ld 16, -128(1)                  # 8-byte Folded Reload
+; ASM64-DAG:     std 16, -128(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 17, -120(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 18, -112(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 19, -104(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     std 20, -96(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 21, -88(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 22, -80(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 23, -72(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 24, -64(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 25, -56(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 26, -48(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 27, -40(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 28, -32(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 29, -24(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 30, -16(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 31, -8(1)                           # 8-byte Folded Spill
+; ASM64:         #APP
+; AMS64-DAG:     ld 31, -8(1)                            # 8-byte Folded Reload
+; ASM64-DAG:     ld 30, -16(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 29, -24(1)                           # 8-byte Folded Reload
+; ASM64-DAG:      ld 28, -32(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 27, -40(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 26, -48(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 25, -56(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 24, -64(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 23, -72(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 22, -80(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 21, -88(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 20, -96(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 19, -104(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 18, -112(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 17, -120(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 16, -128(1)                          # 8-byte Folded Reload
 ; ASM64:          blr
 
 ; ASM32-LABEL: .gprs_only:
-; ASM32-DAG:     stw 16, -64(1)                  # 4-byte Folded Spill
-; ASM32-DAG:     stw 22, -40(1)                  # 4-byte Folded Spill
-; ASM32-DAG:     stw 30, -8(1)                   # 4-byte Folded Spill
+; ASM32-DAG:     stw 16, -64(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 17, -60(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 18, -56(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 19, -52(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 20, -48(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 21, -44(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 22, -40(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 23, -36(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 24, -32(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 25, -28(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 26, -24(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 27, -20(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 28, -16(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 29, -12(1)                          # 4-byte Folded Spill
+; ASM32-DAG:     stw 30, -8(1)                           # 4-byte Folded Spill
+; ASM32-DAG:     stw 31, -4(1)                           # 4-byte Folded Spill
 ; ASM32:         #APP
-; ASM32-DAG:     lwz 30, -8(1)                   # 4-byte Folded Reload
-; ASM32-DAG:     lwz 22, -40(1)                  # 4-byte Folded Reload
-; ASM32-DAG:     lwz 16, -64(1)                  # 4-byte Folded Reload
+; ASM32-DAG:     lwz 31, -4(1)                           # 4-byte Folded Reload
+; ASM32-DAG:     lwz 30, -8(1)                           # 4-byte Folded Reload
+; ASM32-DAG:     lwz 29, -12(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 28, -16(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 27, -20(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 26, -24(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 25, -28(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 24, -32(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 23, -36(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 22, -40(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 21, -44(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 20, -48(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 19, -52(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 18, -56(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 17, -60(1)                          # 4-byte Folded Reload
+; ASM32-DAG:     lwz 16, -64(1)                          # 4-byte Folded Reload
 ; ASM32-DAG:     blr
 
 
@@ -104,112 +287,402 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 
 ; MIR64:       name:            fprs_and_gprs
 ; MIR64-LABEL: fixedStack:
-; MIR64-NEXT:    - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 2, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 3, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 4, type: spill-slot, offset: -152, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 5, type: spill-slot, offset: -200, size: 8, alignment: 8, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR64-NEXT:    - { id: 6, type: spill-slot, offset: -288, size: 8, alignment: 16, stack-id: default,
-; MIR64-NEXT:        callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '',
-; MIR64-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 2, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 3, type: spill-slot, offset: -32, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 4, type: spill-slot, offset: -40, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 5, type: spill-slot, offset: -48, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 6, type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 7, type: spill-slot, offset: -64, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 8, type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 9, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 10, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 11, type: spill-slot, offset: -96, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f20', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 12, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 13, type: spill-slot, offset: -112, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f18', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 14, type: spill-slot, offset: -120, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f17', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 15, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f16', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 16, type: spill-slot, offset: -136, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f15', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 17, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 18, type: spill-slot, offset: -152, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x31', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 19, type: spill-slot, offset: -160, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 20, type: spill-slot, offset: -168, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 21, type: spill-slot, offset: -176, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x28', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 22, type: spill-slot, offset: -184, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x27', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 23, type: spill-slot, offset: -192, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x26', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 24, type: spill-slot, offset: -200, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x25', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 25, type: spill-slot, offset: -208, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x24', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 26, type: spill-slot, offset: -216, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x23', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 27, type: spill-slot, offset: -224, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x22', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 28, type: spill-slot, offset: -232, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x21', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 29, type: spill-slot, offset: -240, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x20', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 30, type: spill-slot, offset: -248, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x19', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 31, type: spill-slot, offset: -256, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x18', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 32, type: spill-slot, offset: -264, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x17', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 33, type: spill-slot, offset: -272, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x16', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 34, type: spill-slot, offset: -280, size: 8, alignment: 8, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x15', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
+; MIR64-NEXT: - { id: 35, type: spill-slot, offset: -288, size: 8, alignment: 16, stack-id: default,
+; MIR64-NEXT:     callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '',
+; MIR64-NEXT:     debug-info-expression: '', debug-info-location: '' }
 ; MIR64-NEXT:  stack:           []
 
 ; MIR32:       name:            fprs_and_gprs
 ; MIR32-LABEL: fixedStack:
-; MIR32-NEXT:    - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 2, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 3, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 4, type: spill-slot, offset: -148, size: 4, alignment: 4, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 5, type: spill-slot, offset: -172, size: 4, alignment: 4, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 6, type: spill-slot, offset: -216, size: 4, alignment: 8, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
-; MIR32-NEXT:    - { id: 7, type: spill-slot, offset: -220, size: 4, alignment: 4, stack-id: default,
-; MIR32-NEXT:        callee-saved-register: '$r13', callee-saved-restored: true, debug-info-variable: '',
-; MIR32-NEXT:        debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 0, type: spill-slot, offset: -8, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 2, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 3, type: spill-slot, offset: -32, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 4, type: spill-slot, offset: -40, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 5, type: spill-slot, offset: -48, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 6, type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 7, type: spill-slot, offset: -64, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 8, type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 9, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 10, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f21', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 11, type: spill-slot, offset: -96, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f20', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 12, type: spill-slot, offset: -104, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f19', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 13, type: spill-slot, offset: -112, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f18', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 14, type: spill-slot, offset: -120, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f17', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 15, type: spill-slot, offset: -128, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f16', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 16, type: spill-slot, offset: -136, size: 8, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f15', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 17, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$f14', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 18, type: spill-slot, offset: -148, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 19, type: spill-slot, offset: -152, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 20, type: spill-slot, offset: -156, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r29', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 21, type: spill-slot, offset: -160, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r28', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 22, type: spill-slot, offset: -164, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r27', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 23, type: spill-slot, offset: -168, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r26', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 24, type: spill-slot, offset: -172, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r25', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 25, type: spill-slot, offset: -176, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r24', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 26, type: spill-slot, offset: -180, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r23', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 27, type: spill-slot, offset: -184, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r22', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 28, type: spill-slot, offset: -188, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r21', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 29, type: spill-slot, offset: -192, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r20', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 30, type: spill-slot, offset: -196, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r19', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 31, type: spill-slot, offset: -200, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r18', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 32, type: spill-slot, offset: -204, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r17', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 33, type: spill-slot, offset: -208, size: 4, alignment: 16, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r16', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 34, type: spill-slot, offset: -212, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r15', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 35, type: spill-slot, offset: -216, size: 4, alignment: 8, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
+; MIR32-NEXT:  - { id: 36, type: spill-slot, offset: -220, size: 4, alignment: 4, stack-id: default,
+; MIR32-NEXT:      callee-saved-register: '$r13', callee-saved-restored: true, debug-info-variable: '',
+; MIR32-NEXT:      debug-info-expression: '', debug-info-location: '' }
 ; MIR32-NEXT:  stack:           []
 
 
-; MIR64: liveins: $x3, $x14, $x25, $x31, $f14, $f19, $f21, $f31
+; MIR64: liveins: $x3, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31
 
 ; MIR64:       $x0 = MFLR8 implicit $lr8
 ; MIR64-NEXT:  $x1 = STDU $x1, -400, $x1
 ; MIR64-NEXT:  STD killed $x0, 416, $x1
-; MIR64-DAG:   STD killed $x14, 112, $x1 :: (store (s64) into %fixed-stack.6, align 16)
-; MIR64-DAG:   STD killed $x25, 200, $x1 :: (store (s64) into %fixed-stack.5)
-; MIR64-DAG:   STD killed $x31, 248, $x1 :: (store (s64) into %fixed-stack.4)
-; MIR64-DAG:   STFD killed $f14, 256, $x1 :: (store (s64) into %fixed-stack.3, align 16)
-; MIR64-DAG:   STFD killed $f19, 296, $x1 :: (store (s64) into %fixed-stack.2)
-; MIR64-DAG:   STFD killed $f21, 312, $x1 :: (store (s64) into %fixed-stack.1)
+; MIR64-DAG:   STD killed $x14, 112, $x1 :: (store (s64) into %fixed-stack.35, align 16)
+; MIR64-DAG:   STD killed $x15, 120, $x1 :: (store (s64) into %fixed-stack.34)
+; MIR64-DAG:   STD killed $x16, 128, $x1 :: (store (s64) into %fixed-stack.33, align 16)
+; MIR64-DAG:   STD killed $x17, 136, $x1 :: (store (s64) into %fixed-stack.32)
+; MIR64-DAG:   STD killed $x18, 144, $x1 :: (store (s64) into %fixed-stack.31, align 16)
+; MIR64-DAG:   STD killed $x19, 152, $x1 :: (store (s64) into %fixed-stack.30)
+; MIR64-DAG:   STD killed $x20, 160, $x1 :: (store (s64) into %fixed-stack.29, align 16)
+; MIR64-DAG:   STD killed $x21, 168, $x1 :: (store (s64) into %fixed-stack.28)
+; MIR64-DAG:   STD killed $x22, 176, $x1 :: (store (s64) into %fixed-stack.27, align 16)
+; MIR64-DAG:   STD killed $x23, 184, $x1 :: (store (s64) into %fixed-stack.26)
+; MIR64-DAG:   STD killed $x24, 192, $x1 :: (store (s64) into %fixed-stack.25, align 16)
+; MIR64-DAG:   STD killed $x25, 200, $x1 :: (store (s64) into %fixed-stack.24)
+; MIR64-DAG:   STD killed $x26, 208, $x1 :: (store (s64) into %fixed-stack.23, align 16)
+; MIR64-DAG:   STD killed $x27, 216, $x1 :: (store (s64) into %fixed-stack.22)
+; MIR64-DAG:   STD killed $x28, 224, $x1 :: (store (s64) into %fixed-stack.21, align 16)
+; MIR64-DAG:   STD killed $x29, 232, $x1 :: (store (s64) into %fixed-stack.20)
+; MIR64-DAG:   STD killed $x30, 240, $x1 :: (store (s64) into %fixed-stack.19, align 16)
+; MIR64-DAG:   STD killed $x31, 248, $x1 :: (store (s64) into %fixed-stack.18)
+; MIR64-DAG:   STFD killed $f14, 256, $x1 :: (store (s64) into %fixed-stack.17, align 16)
+; MIR64-DAG:   STFD killed $f15, 264, $x1 :: (store (s64) into %fixed-stack.16)
+; MIR64-DAG:   STFD killed $f16, 272, $x1 :: (store (s64) into %fixed-stack.15, align 16)
+; MIR64-DAG:   STFD killed $f17, 280, $x1 :: (store (s64) into %fixed-stack.14)
+; MIR64-DAG:   STFD killed $f18, 288, $x1 :: (store (s64) into %fixed-stack.13, align 16)
+; MIR64-DAG:   STFD killed $f19, 296, $x1 :: (store (s64) into %fixed-stack.12)
+; MIR64-DAG:   STFD killed $f20, 304, $x1 :: (store (s64) into %fixed-stack.11, align 16)
+; MIR64-DAG:   STFD killed $f21, 312, $x1 :: (store (s64) into %fixed-stack.10)
+; MIR64-DAG:   STFD killed $f22, 320, $x1 :: (store (s64) into %fixed-stack.9, align 16)
+; MIR64-DAG:   STFD killed $f23, 328, $x1 :: (store (s64) into %fixed-stack.8)
+; MIR64-DAG:   STFD killed $f24, 336, $x1 :: (store (s64) into %fixed-stack.7, align 16)
+; MIR64-DAG:   STFD killed $f25, 344, $x1 :: (store (s64) into %fixed-stack.6)
+; MIR64-DAG:   STFD killed $f26, 352, $x1 :: (store (s64) into %fixed-stack.5, align 16)
+; MIR64-DAG:   STFD killed $f27, 360, $x1 :: (store (s64) into %fixed-stack.4)
+; MIR64-DAG:   STFD killed $f28, 368, $x1 :: (store (s64) into %fixed-stack.3, align 16)
+; MIR64-DAG:   STFD killed $f29, 376, $x1 :: (store (s64) into %fixed-stack.2)
+; MIR64-DAG:   STFD killed $f30, 384, $x1 :: (store (s64) into %fixed-stack.1, align 16)
 ; MIR64-DAG:   STFD killed $f31, 392, $x1 :: (store (s64) into %fixed-stack.0)
 
 ; MIR64:       INLINEASM
 ; MIR64-NEXT:  BL8_NOP
 
 ; MIR64-DAG:   $f31 = LFD 392, $x1 :: (load (s64) from %fixed-stack.0)
-; MIR64-DAG:   $f21 = LFD 312, $x1 :: (load (s64) from %fixed-stack.1)
-; MIR64-DAG:   $f19 = LFD 296, $x1 :: (load (s64) from %fixed-stack.2)
-; MIR64-DAG:   $f14 = LFD 256, $x1 :: (load (s64) from %fixed-stack.3, align 16)
-; MIR64-DAG:   $x31 = LD 248, $x1 :: (load (s64) from %fixed-stack.4)
-; MIR64-DAG:   $x25 = LD 200, $x1 :: (load (s64) from %fixed-stack.5)
-; MIR64-DAG:   $x14 = LD 112, $x1 :: (load (s64) from %fixed-stack.6, align 16)
+; MIR64-DAG:   $f30 = LFD 384, $x1 :: (load (s64) from %fixed-stack.1, align 16)
+; MIR64-DAG:   $f29 = LFD 376, $x1 :: (load (s64) from %fixed-stack.2)
+; MIR64-DAG:   $f28 = LFD 368, $x1 :: (load (s64) from %fixed-stack.3, align 16)
+; MIR64-DAG:   $f27 = LFD 360, $x1 :: (load (s64) from %fixed-stack.4)
+; MIR64-DAG:   $f26 = LFD 352, $x1 :: (load (s64) from %fixed-stack.5, align 16)
+; MIR64-DAG:   $f25 = LFD 344, $x1 :: (load (s64) from %fixed-stack.6)
+; MIR64-DAG:   $f24 = LFD 336, $x1 :: (load (s64) from %fixed-stack.7, align 16)
+; MIR64-DAG:   $f23 = LFD 328, $x1 :: (load (s64) from %fixed-stack.8)
+; MIR64-DAG:   $f22 = LFD 320, $x1 :: (load (s64) from %fixed-stack.9, align 16)
+; MIR64-DAG:   $f21 = LFD 312, $x1 :: (load (s64) from %fixed-stack.10)
+; MIR64-DAG:   $f20 = LFD 304, $x1 :: (load (s64) from %fixed-stack.11, align 16)
+; MIR64-DAG:   $f19 = LFD 296, $x1 :: (load (s64) from %fixed-stack.12)
+; MIR64-DAG:   $f18 = LFD 288, $x1 :: (load (s64) from %fixed-stack.13, align 16)
+; MIR64-DAG:   $f17 = LFD 280, $x1 :: (load (s64) from %fixed-stack.14)
+; MIR64-DAG:   $f16 = LFD 272, $x1 :: (load (s64) from %fixed-stack.15, align 16)
+; MIR64-DAG:   $f15 = LFD 264, $x1 :: (load (s64) from %fixed-stack.16)
+; MIR64-DAG:   $f14 = LFD 256, $x1 :: (load (s64) from %fixed-stack.17, align 16)
+; MIR64-DAG:   $x31 = LD 248, $x1 :: (load (s64) from %fixed-stack.18)
+; MIR64-DAG:   $x30 = LD 240, $x1 :: (load (s64) from %fixed-stack.19, align 16)
+; MIR64-DAG:   $x29 = LD 232, $x1 :: (load (s64) from %fixed-stack.20)
+; MIR64-DAG:   $x28 = LD 224, $x1 :: (load (s64) from %fixed-stack.21, align 16)
+; MIR64-DAG:   $x27 = LD 216, $x1 :: (load (s64) from %fixed-stack.22)
+; MIR64-DAG:   $x26 = LD 208, $x1 :: (load (s64) from %fixed-stack.23, align 16)
+; MIR64-DAG:   $x25 = LD 200, $x1 :: (load (s64) from %fixed-stack.24)
+; MIR64-DAG:   $x24 = LD 192, $x1 :: (load (s64) from %fixed-stack.25, align 16)
+; MIR64-DAG:   $x23 = LD 184, $x1 :: (load (s64) from %fixed-stack.26)
+; MIR64-DAG:   $x22 = LD 176, $x1 :: (load (s64) from %fixed-stack.27, align 16)
+; MIR64-DAG:   $x21 = LD 168, $x1 :: (load (s64) from %fixed-stack.28)
+; MIR64-DAG:   $x20 = LD 160, $x1 :: (load (s64) from %fixed-stack.29, align 16)
+; MIR64-DAG:   $x19 = LD 152, $x1 :: (load (s64) from %fixed-stack.30)
+; MIR64-DAG:   $x18 = LD 144, $x1 :: (load (s64) from %fixed-stack.31, align 16)
+; MIR64-DAG:   $x17 = LD 136, $x1 :: (load (s64) from %fixed-stack.32)
+; MIR64-DAG:   $x16 = LD 128, $x1 :: (load (s64) from %fixed-stack.33, align 16)
+; MIR64-DAG:   $x15 = LD 120, $x1 :: (load (s64) from %fixed-stack.34)
+; MIR64-DAG:   $x14 = LD 112, $x1 :: (load (s64) from %fixed-stack.35, align 16)
+
 ; MIR64:       $x1 = ADDI8 $x1, 400
 ; MIR64-NEXT:  $x0 = LD 16, $x1
 ; MIR64-NEXT:  MTLR8 $x0, implicit-def $lr8
 ; MIR64-NEXT:  BLR8 implicit $lr8, implicit $rm, implicit $f1
 
-
-; MIR32: liveins: $r3, $r13, $r14, $r25, $r31, $f14, $f19, $f21, $f31
+; MIR32: liveins: $r3, $r13, $r14, $r15, $r16, $r17, $r18, $r19, $r20, $r21, $r22, $r23, $r24, $r25, $r26, $r27, $r28, $r29, $r30, $r31, $f14, $f15, $f16, $f17, $f18, $f19, $f20, $f21, $f22, $f23, $f24, $f25, $f26, $f27, $f28, $f29, $f30, $f31
 
 ; MIR32:      $r0 = MFLR implicit $lr
 ; MIR32-NEXT: $r1 = STWU $r1, -288, $r1
 ; MIR32-NEXT: STW killed $r0, 296, $r1
-; MIR32-DAG:  STW killed $r13, 68, $r1 :: (store (s32) into %fixed-stack.7)
-; MIR32-DAG:  STW killed $r14, 72, $r1 :: (store (s32) into %fixed-stack.6, align 8)
-; MIR32-DAG:  STW killed $r25, 116, $r1 :: (store (s32) into %fixed-stack.5)
-; MIR32-DAG:  STW killed $r31, 140, $r1 :: (store (s32) into %fixed-stack.4)
-; MIR32-DAG:  STFD killed $f14, 144, $r1 :: (store (s64) into %fixed-stack.3, align 16)
-; MIR32-DAG:  STFD killed $f19, 184, $r1 :: (store (s64) into %fixed-stack.2)
-; MIR32-DAG:  STFD killed $f21, 200, $r1 :: (store (s64) into %fixed-stack.1)
+; MIR32-DAG:  STW killed $r13, 68, $r1 :: (store (s32) into %fixed-stack.36)
+; MIR32-DAG:  STW killed $r14, 72, $r1 :: (store (s32) into %fixed-stack.35, align 8)
+; MIR32-DAG:  STW killed $r15, 76, $r1 :: (store (s32) into %fixed-stack.34)
+; MIR32-DAG:  STW killed $r16, 80, $r1 :: (store (s32) into %fixed-stack.33, align 16)
+; MIR32-DAG:  STW killed $r17, 84, $r1 :: (store (s32) into %fixed-stack.32)
+; MIR32-DAG:  STW killed $r18, 88, $r1 :: (store (s32) into %fixed-stack.31, align 8)
+; MIR32-DAG:  STW killed $r19, 92, $r1 :: (store (s32) into %fixed-stack.30)
+; MIR32-DAG:  STW killed $r20, 96, $r1 :: (store (s32) into %fixed-stack.29, align 16)
+; MIR32-DAG:  STW killed $r21, 100, $r1 :: (store (s32) into %fixed-stack.28)
+; MIR32-DAG:  STW killed $r22, 104, $r1 :: (store (s32) into %fixed-stack.27, align 8)
+; MIR32-DAG:  STW killed $r23, 108, $r1 :: (store (s32) into %fixed-stack.26)
+; MIR32-DAG:  STW killed $r24, 112, $r1 :: (store (s32) into %fixed-stack.25, align 16)
+; MIR32-DAG:  STW killed $r25, 116, $r1 :: (store (s32) into %fixed-stack.24)
+; MIR32-DAG:  STW killed $r26, 120, $r1 :: (store (s32) into %fixed-stack.23, align 8)
+; MIR32-DAG:  STW killed $r27, 124, $r1 :: (store (s32) into %fixed-stack.22)
+; MIR32-DAG:  STW killed $r28, 128, $r1 :: (store (s32) into %fixed-stack.21, align 16)
+; MIR32-DAG:  STW killed $r29, 132, $r1 :: (store (s32) into %fixed-stack.20)
+; MIR32-DAG:  STW killed $r30, 136, $r1 :: (store (s32) into %fixed-stack.19, align 8)
+; MIR32-DAG:  STW killed $r31, 140, $r1 :: (store (s32) into %fixed-stack.18)
+; MIR32-DAG:  STFD killed $f14, 144, $r1 :: (store (s64) into %fixed-stack.17, align 16)
+; MIR32-DAG:  STFD killed $f15, 152, $r1 :: (store (s64) into %fixed-stack.16)
+; MIR32-DAG:  STFD killed $f16, 160, $r1 :: (store (s64) into %fixed-stack.15, align 16)
+; MIR32-DAG:  STFD killed $f17, 168, $r1 :: (store (s64) into %fixed-stack.14)
+; MIR32-DAG:  STFD killed $f18, 176, $r1 :: (store (s64) into %fixed-stack.13, align 16)
+; MIR32-DAG:  STFD killed $f19, 184, $r1 :: (store (s64) into %fixed-stack.12)
+; MIR32-DAG:  STFD killed $f20, 192, $r1 :: (store (s64) into %fixed-stack.11, align 16)
+; MIR32-DAG:  STFD killed $f21, 200, $r1 :: (store (s64) into %fixed-stack.10)
+; MIR32-DAG:  STFD killed $f22, 208, $r1 :: (store (s64) into %fixed-stack.9, align 16)
+; MIR32-DAG:  STFD killed $f23, 216, $r1 :: (store (s64) into %fixed-stack.8)
+; MIR32-DAG:  STFD killed $f24, 224, $r1 :: (store (s64) into %fixed-stack.7, align 16)
+; MIR32-DAG:  STFD killed $f25, 232, $r1 :: (store (s64) into %fixed-stack.6)
+; MIR32-DAG:  STFD killed $f26, 240, $r1 :: (store (s64) into %fixed-stack.5, align 16)
+; MIR32-DAG:  STFD killed $f27, 248, $r1 :: (store (s64) into %fixed-stack.4)
+; MIR32-DAG:  STFD killed $f28, 256, $r1 :: (store (s64) into %fixed-stack.3, align 16)
+; MIR32-DAG:  STFD killed $f29, 264, $r1 :: (store (s64) into %fixed-stack.2)
+; MIR32-DAG:  STFD killed $f30, 272, $r1 :: (store (s64) into %fixed-stack.1, align 16)
 ; MIR32-DAG:  STFD killed $f31, 280, $r1 :: (store (s64) into %fixed-stack.0)
 
 ; MIR32:      INLINEASM
 ; MIR32:      BL_NOP
 
 ; MIR32-DAG:  $f31 = LFD 280, $r1 :: (load (s64) from %fixed-stack.0)
-; MIR32-DAG:  $f21 = LFD 200, $r1 :: (load (s64) from %fixed-stack.1)
-; MIR32-DAG:  $f19 = LFD 184, $r1 :: (load (s64) from %fixed-stack.2)
-; MIR32-DAG:  $f14 = LFD 144, $r1 :: (load (s64) from %fixed-stack.3, align 16)
-; MIR32-DAG:  $r31 = LWZ 140, $r1 :: (load (s32) from %fixed-stack.4)
-; MIR32-DAG:  $r25 = LWZ 116, $r1 :: (load (s32) from %fixed-stack.5)
-; MIR32-DAG:  $r14 = LWZ 72, $r1 :: (load (s32) from %fixed-stack.6, align 8)
-; MIR32-DAG:  $r13 = LWZ 68, $r1 :: (load (s32) from %fixed-stack.7)
+; MIR32-DAG:  $f30 = LFD 272, $r1 :: (load (s64) from %fixed-stack.1, align 16)
+; MIR32-DAG:  $f29 = LFD 264, $r1 :: (load (s64) from %fixed-stack.2)
+; MIR32-DAG:  $f28 = LFD 256, $r1 :: (load (s64) from %fixed-stack.3, align 16)
+; MIR32-DAG:  $f27 = LFD 248, $r1 :: (load (s64) from %fixed-stack.4)
+; MIR32-DAG:  $f26 = LFD 240, $r1 :: (load (s64) from %fixed-stack.5, align 16)
+; MIR32-DAG:  $f25 = LFD 232, $r1 :: (load (s64) from %fixed-stack.6)
+; MIR32-DAG:  $f24 = LFD 224, $r1 :: (load (s64) from %fixed-stack.7, align 16)
+; MIR32-DAG:  $f23 = LFD 216, $r1 :: (load (s64) from %fixed-stack.8)
+; MIR32-DAG:  $f22 = LFD 208, $r1 :: (load (s64) from %fixed-stack.9, align 16)
+; MIR32-DAG:  $f21 = LFD 200, $r1 :: (load (s64) from %fixed-stack.10)
+; MIR32-DAG:  $f20 = LFD 192, $r1 :: (load (s64) from %fixed-stack.11, align 16)
+; MIR32-DAG:  $f19 = LFD 184, $r1 :: (load (s64) from %fixed-stack.12)
+; MIR32-DAG:  $f18 = LFD 176, $r1 :: (load (s64) from %fixed-stack.13, align 16)
+; MIR32-DAG:  $f17 = LFD 168, $r1 :: (load (s64) from %fixed-stack.14)
+; MIR32-DAG:  $f16 = LFD 160, $r1 :: (load (s64) from %fixed-stack.15, align 16)
+; MIR32-DAG:  $f15 = LFD 152, $r1 :: (load (s64) from %fixed-stack.16)
+; MIR32-DAG:  $f14 = LFD 144, $r1 :: (load (s64) from %fixed-stack.17, align 16)
+; MIR32-DAG:  $r31 = LWZ 140, $r1 :: (load (s32) from %fixed-stack.18)
+; MIR32-DAG:  $r30 = LWZ 136, $r1 :: (load (s32) from %fixed-stack.19, align 8)
+; MIR32-DAG:  $r29 = LWZ 132, $r1 :: (load (s32) from %fixed-stack.20)
+; MIR32-DAG:  $r28 = LWZ 128, $r1 :: (load (s32) from %fixed-stack.21, align 16)
+; MIR32-DAG:  $r27 = LWZ 124, $r1 :: (load (s32) from %fixed-stack.22)
+; MIR32-DAG:  $r26 = LWZ 120, $r1 :: (load (s32) from %fixed-stack.23, align 8)
+; MIR32-DAG:  $r25 = LWZ 116, $r1 :: (load (s32) from %fixed-stack.24)
+; MIR32-DAG:  $r24 = LWZ 112, $r1 :: (load (s32) from %fixed-stack.25, align 16)
+; MIR32-DAG:  $r23 = LWZ 108, $r1 :: (load (s32) from %fixed-stack.26)
+; MIR32-DAG:  $r22 = LWZ 104, $r1 :: (load (s32) from %fixed-stack.27, align 8)
+; MIR32-DAG:  $r21 = LWZ 100, $r1 :: (load (s32) from %fixed-stack.28)
+; MIR32-DAG:  $r20 = LWZ 96, $r1 :: (load (s32) from %fixed-stack.29, align 16)
+; MIR32-DAG:  $r19 = LWZ 92, $r1 :: (load (s32) from %fixed-stack.30)
+; MIR32-DAG:  $r18 = LWZ 88, $r1 :: (load (s32) from %fixed-stack.31, align 8)
+; MIR32-DAG:  $r17 = LWZ 84, $r1 :: (load (s32) from %fixed-stack.32)
+; MIR32-DAG:  $r16 = LWZ 80, $r1 :: (load (s32) from %fixed-stack.33, align 16)
+; MIR32-DAG:  $r15 = LWZ 76, $r1 :: (load (s32) from %fixed-stack.34)
+; MIR32-DAG:  $r14 = LWZ 72, $r1 :: (load (s32) from %fixed-stack.35, align 8)
+; MIR32-DAG:  $r13 = LWZ 68, $r1 :: (load (s32) from %fixed-stack.36)
 ; MIR32:      $r1 = ADDI $r1, 288
 ; MIR32-NEXT: $r0 = LWZ 8, $r1
 ; MIR32-NEXT: MTLR $r0, implicit-def $lr
@@ -219,23 +692,81 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 ; ASM64:         mflr 0
 ; ASM64-NEXT:    stdu 1, -400(1)
 ; ASM64-NEXT:    std 0, 416(1)
-; ASM64-DAG:     std 14, 112(1)                  # 8-byte Folded Spill
-; ASM64-DAG:     std 25, 200(1)                  # 8-byte Folded Spill
-; ASM64-DAG:     std 31, 248(1)                  # 8-byte Folded Spill
-; ASM64-DAG:     stfd 14, 256(1)                 # 8-byte Folded Spill
-; ASM64-DAG:     stfd 19, 296(1)                 # 8-byte Folded Spill
-; ASM64-DAG:     stfd 21, 312(1)                 # 8-byte Folded Spill
-; ASM64-DAG:     stfd 31, 392(1)                 # 8-byte Folded Spill
+; ASM64-DAG:     std 14, 112(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 15, 120(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 16, 128(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 17, 136(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 18, 144(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 19, 152(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 20, 160(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 21, 168(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 22, 176(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 23, 184(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 24, 192(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 25, 200(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 26, 208(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 27, 216(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 28, 224(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 29, 232(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 30, 240(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     std 31, 248(1)                          # 8-byte Folded Spill
+; ASM64-DAG:     stfd 14, 256(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 15, 264(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 16, 272(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 17, 280(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 18, 288(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 19, 296(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 20, 304(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 21, 312(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 22, 320(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 23, 328(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 24, 336(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 25, 344(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 26, 352(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 27, 360(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 28, 368(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 29, 376(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 30, 384(1)                         # 8-byte Folded Spill
+; ASM64-DAG:     stfd 31, 392(1)                         # 8-byte Folded Spill
 
 ; ASM64:         bl .dummy
+; ASM64-DAG:     lfd 31, 392(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 30, 384(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 29, 376(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 28, 368(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 27, 360(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 26, 352(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 25, 344(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 24, 336(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 23, 328(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 22, 320(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 21, 312(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 20, 304(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 19, 296(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 18, 288(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 17, 280(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 16, 272(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 15, 264(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     lfd 14, 256(1)                          # 8-byte Folded Reload
+; ASM64-DAG:     ld 31, 248(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 30, 240(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 29, 232(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 28, 224(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 27, 216(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 26, 208(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 25, 200(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 24, 192(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 23, 184(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 22, 176(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 21, 168(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 20, 160(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 19, 152(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 18, 144(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 17, 136(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 16, 128(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 15, 120(1)                           # 8-byte Folded Reload
+; ASM64-DAG:     ld 14, 112(1)                           # 8-byte Folded Reload
 
-; ASM64-DAG:     lfd 31, 392(1)                  # 8-byte Folded Reload
-; ASM64-DAG:     lfd 21, 312(1)                  # 8-byte Folded Reload
-; ASM64-DAG:     lfd 19, 296(1)                  # 8-byte Folded Reload
-; ASM64-DAG:     lfd 14, 256(1)                  # 8-byte Folded Reload
-; ASM64-DAG:     ld 31, 248(1)                   # 8-byte Folded Reload
-; ASM64-DAG:     ld 25, 200(1)                   # 8-byte Folded Reload
-; ASM64-DAG:     ld 14, 112(1)                   # 8-byte Folded Reload
 ; ASM64:         addi 1, 1, 400
 ; ASM64-NEXT:    ld 0, 16(1)
 ; ASM64-NEXT:    mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/aix-spills-for-eh.ll b/llvm/test/CodeGen/PowerPC/aix-spills-for-eh.ll
new file mode 100644
index 00000000000000..73004e87587311
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-spills-for-eh.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -mattr=+altivec -verify-machineinstrs --vec-extabi \
+; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc-unknown-aix < %s  | FileCheck %s --check-prefix 32BIT
+
+; RUN: llc -mcpu=pwr9 -mattr=+altivec -verify-machineinstrs --vec-extabi \
+; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64-unknown-aix < %s | FileCheck %s --check-prefix 64BIT
+
+@_ZTIi = external constant ptr
+
+; Function Attrs: uwtable mustprogress
+define dso_local signext i32 @_Z5test2iPPKc(i32 signext %argc, ptr nocapture readnone %argv) local_unnamed_addr #0 personality ptr @__gxx_personality_v0{
+; 32BIT-LABEL: _Z5test2iPPKc:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr r0
+; 32BIT-NEXT:    stwu r1, -464(r1)
+; 32BIT-NEXT:    stw r0, 472(r1)
+; 32BIT-NEXT:    stw r30, 320(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    li r30, 0
+; 32BIT-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r31, 324(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    mr r31, r3
+; 32BIT-NEXT:    stw r14, 256(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r15, 260(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r16, 264(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r17, 268(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stw r18, 272(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r19, 276(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r20, 280(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r21, 284(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stw r22, 288(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r23, 292(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r24, 296(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r25, 300(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stw r26, 304(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
+; 32BIT-NEXT:    stw r27, 308(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stw r28, 312(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stw r29, 316(r1) # 4-byte Folded Spill
+; 32BIT-NEXT:    stfd f15, 328(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f16, 336(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f17, 344(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f18, 352(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f19, 360(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f20, 368(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f21, 376(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f22, 384(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f23, 392(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f24, 400(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f25, 408(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f26, 416(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f27, 424(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f28, 432(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f29, 440(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f30, 448(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    stfd f31, 456(r1) # 8-byte Folded Spill
+; 32BIT-NEXT:    #APP
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    #NO_APP
+; 32BIT-NEXT:  L..tmp0:
+; 32BIT-NEXT:    bl ._Z4testi[PR]
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:  L..tmp1:
+; 32BIT-NEXT:  L..BB0_1: # %return
+; 32BIT-NEXT:    lxv v31, 240(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v30, 224(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v29, 208(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v28, 192(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    mr r3, r30
+; 32BIT-NEXT:    lxv v27, 176(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v26, 160(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v25, 144(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v24, 128(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v23, 112(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v22, 96(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v21, 80(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lxv v20, 64(r1) # 16-byte Folded Reload
+; 32BIT-NEXT:    lfd f31, 456(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f30, 448(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f29, 440(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f28, 432(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r31, 324(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r30, 320(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r29, 316(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f27, 424(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r28, 312(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r27, 308(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r26, 304(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f26, 416(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r25, 300(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r24, 296(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r23, 292(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f25, 408(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r22, 288(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r21, 284(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r20, 280(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f24, 400(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r19, 276(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r18, 272(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r17, 268(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f23, 392(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lwz r16, 264(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r15, 260(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lwz r14, 256(r1) # 4-byte Folded Reload
+; 32BIT-NEXT:    lfd f22, 384(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f21, 376(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f20, 368(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f19, 360(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f18, 352(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f17, 344(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f16, 336(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    lfd f15, 328(r1) # 8-byte Folded Reload
+; 32BIT-NEXT:    addi r1, r1, 464
+; 32BIT-NEXT:    lwz r0, 8(r1)
+; 32BIT-NEXT:    mtlr r0
+; 32BIT-NEXT:    blr
+; 32BIT-NEXT:  L..BB0_2: # %lpad
+; 32BIT-NEXT:  L..tmp2:
+; 32BIT-NEXT:    bl .__cxa_begin_catch[PR]
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    lwz r3, 0(r3)
+; 32BIT-NEXT:    add r30, r3, r31
+; 32BIT-NEXT:    bl .__cxa_end_catch[PR]
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    b L..BB0_1
+;
+; 64BIT-LABEL: _Z5test2iPPKc:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr r0
+; 64BIT-NEXT:    stdu r1, -592(r1)
+; 64BIT-NEXT:    std r0, 608(r1)
+; 64BIT-NEXT:    std r30, 440(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    li r30, 0
+; 64BIT-NEXT:    stxv v20, 112(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    stxv v21, 128(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r31, 448(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    mr r31, r3
+; 64BIT-NEXT:    std r14, 312(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v22, 144(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r15, 320(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v23, 160(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r16, 328(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v24, 176(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r17, 336(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    std r18, 344(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v25, 192(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r19, 352(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v26, 208(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r20, 360(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v27, 224(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r21, 368(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    std r22, 376(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v28, 240(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r23, 384(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v29, 256(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r24, 392(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v30, 272(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r25, 400(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    std r26, 408(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stxv v31, 288(r1) # 16-byte Folded Spill
+; 64BIT-NEXT:    std r27, 416(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    std r28, 424(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    std r29, 432(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f15, 456(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f16, 464(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f17, 472(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f18, 480(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f19, 488(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f20, 496(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f21, 504(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f22, 512(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f23, 520(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f24, 528(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f25, 536(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f26, 544(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f27, 552(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f28, 560(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f29, 568(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f30, 576(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    stfd f31, 584(r1) # 8-byte Folded Spill
+; 64BIT-NEXT:    #APP
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    #NO_APP
+; 64BIT-NEXT:  L..tmp0:
+; 64BIT-NEXT:    bl ._Z4testi[PR]
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:  L..tmp1:
+; 64BIT-NEXT:  L..BB0_1: # %return
+; 64BIT-NEXT:    lxv v31, 288(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v30, 272(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v29, 256(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v28, 240(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    extsw r3, r30
+; 64BIT-NEXT:    lxv v27, 224(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v26, 208(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v25, 192(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v24, 176(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v23, 160(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v22, 144(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v21, 128(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lxv v20, 112(r1) # 16-byte Folded Reload
+; 64BIT-NEXT:    lfd f31, 584(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f30, 576(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f29, 568(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f28, 560(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r31, 448(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r30, 440(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r29, 432(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f27, 552(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r28, 424(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r27, 416(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r26, 408(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f26, 544(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r25, 400(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r24, 392(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r23, 384(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f25, 536(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r22, 376(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r21, 368(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r20, 360(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f24, 528(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r19, 352(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r18, 344(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r17, 336(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f23, 520(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r16, 328(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r15, 320(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    ld r14, 312(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f22, 512(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f21, 504(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f20, 496(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f19, 488(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f18, 480(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f17, 472(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f16, 464(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    lfd f15, 456(r1) # 8-byte Folded Reload
+; 64BIT-NEXT:    addi r1, r1, 592
+; 64BIT-NEXT:    ld r0, 16(r1)
+; 64BIT-NEXT:    mtlr r0
+; 64BIT-NEXT:    blr
+; 64BIT-NEXT:  L..BB0_2: # %lpad
+; 64BIT-NEXT:  L..tmp2:
+; 64BIT-NEXT:    bl .__cxa_begin_catch[PR]
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    lwz r3, 0(r3)
+; 64BIT-NEXT:    add r30, r3, r31
+; 64BIT-NEXT:    bl .__cxa_end_catch[PR]
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    b L..BB0_1
+entry:
+  tail call void asm sideeffect "nop", "~{r14},~{f15},~{v20}"()
+  %call = invoke signext i32 @_Z4testi(i32 signext %argc)
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { ptr, i32 }
+          catch ptr @_ZTIi
+  %1 = extractvalue { ptr, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #3
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { ptr, i32 } %0, 0
+  %4 = tail call ptr @__cxa_begin_catch(ptr %3) #3
+  %5 = load i32, ptr %4, align 4
+  %add = add nsw i32 %5, %argc
+  tail call void @__cxa_end_catch()
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %add, %catch ], [ 0, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { ptr, i32 } %0
+}
+
+declare signext i32 @_Z4testi(i32 signext) local_unnamed_addr
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(ptr)
+
+declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr
+
+declare void @__cxa_end_catch() local_unnamed_addr
+
+attributes #0 = { uwtable }
diff --git a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir
index cf51f79c7e9893..73736d6d5353fb 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir
+++ b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir
@@ -18,23 +18,33 @@ body:             |
     BLR implicit $lr, implicit $rm, implicit $r3
 
     ; CHECK-LABEL:  fixedStack:
-    ; CHECK-NEXT:   - { id: 0, type: spill-slot, offset: -12, size: 4, alignment: 4, stack-id: default,
+    ; CHECK-NEXT:   - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default, 
+    ; CHECK-NEXT:       callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '', 
+    ; CHECK-NEXT:       debug-info-expression: '', debug-info-location: '' }
+    ; CHECK-NEXT:   - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 8, stack-id: default, 
+    ; CHECK-NEXT:       callee-saved-register: '$r30', callee-saved-restored: true, debug-info-variable: '', 
+    ; CHECK-NEXT:       debug-info-expression: '', debug-info-location: '' }
+    ; CHECK-NEXT:   - { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4, stack-id: default,
     ; CHECK-NEXT:       callee-saved-register: '$r29', callee-saved-restored: true, debug-info-variable: '',
     ; CHECK-NEXT:       debug-info-expression: '', debug-info-location: '' }
-    ; CHECK-NEXT:   - { id: 1, type: default, offset: 4, size: 4, alignment: 4, stack-id: default,
+    ; CHECK-NEXT:   - { id: 3, type: default, offset: 4, size: 4, alignment: 4, stack-id: default,
     ; CHECK-NEXT:       isImmutable: true, isAliased: false, callee-saved-register: '$cr4',
     ; CHECK-NEXT:       callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
     ; CHECK-NEXT:       debug-info-location: '' }
     ; CHECK-LABEL:  stack:
 
     ; CHECK:      bb.0.entry:
-    ; CHECK-NEXT:  liveins: $r3, $r29, $cr2, $cr4
+    ; CHECK-NEXT:  liveins: $r3, $r29, $r30, $r31, $cr2, $cr4
 
     ; CHECK:      $r12 = MFCR implicit killed $cr2, implicit killed $cr4
     ; CHECK-NEXT: STW killed $r12, 4, $r1
-    ; CHECK-NEXT: STW killed $r29, -12, $r1 :: (store (s32) into %fixed-stack.0)
+    ; CHECK-NEXT: STW killed $r29, -12, $r1 :: (store (s32) into %fixed-stack.2)
+    ; CHECK-NEXT: STW killed $r30, -8, $r1 :: (store (s32) into %fixed-stack.1, align 8)
+    ; CHECK-NEXT: STW killed $r31, -4, $r1 :: (store (s32) into %fixed-stack.0)
 
-    ; CHECK:      $r29 = LWZ -12, $r1 :: (load (s32) from %fixed-stack.0)
+    ; CHECK:      $r31 = LWZ -4, $r1 :: (load (s32) from %fixed-stack.0)
+    ; CHECK-NEXT: $r30 = LWZ -8, $r1 :: (load (s32) from %fixed-stack.1, align 8)
+    ; CHECK-NEXT: $r29 = LWZ -12, $r1 :: (load (s32) from %fixed-stack.2)
     ; CHECK-NEXT: $r12 = LWZ 4, $r1
     ; CHECK-NEXT: $cr2 = MTOCRF $r12
     ; CHECK-NEXT: $cr4 = MTOCRF killed $r12
@@ -49,14 +59,14 @@ liveins:
 body:             |
   bb.0.entry:
     liveins: $r3
-    renamable $r14 = ANDI_rec killed renamable $r3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    renamable $r31 = ANDI_rec killed renamable $r3, 1, implicit-def dead $cr0, implicit-def $cr0gt
     renamable $cr3lt = COPY $cr0gt
-    renamable $r3 = COPY $r14
+    renamable $r3 = COPY $r31
     BLR implicit $lr, implicit $rm, implicit $r3
 
     ; CHECK-LABEL: fixedStack:
-    ; CHECK-NEXT:  - { id: 0, type: spill-slot, offset: -72, size: 4, alignment: 8, stack-id: default,
-    ; CHECK-NEXT:      callee-saved-register: '$r14', callee-saved-restored: true, debug-info-variable: '',
+    ; CHECK-NEXT:  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default,
+    ; CHECK-NEXT:      callee-saved-register: '$r31', callee-saved-restored: true, debug-info-variable: '',
     ; CHECK-NEXT:      debug-info-expression: '', debug-info-location: '' }
     ; CHECK-NEXT:  - { id: 1, type: default, offset: 4, size: 4, alignment: 4, stack-id: default,
     ; CHECK-NEXT:      isImmutable: true, isAliased: false, callee-saved-register: '$cr3',
@@ -65,12 +75,12 @@ body:             |
     ; CHECK-LABEL: stack:
 
     ; CHECK:      bb.0.entry:
-    ; CHECK-NEXT:   liveins: $r3, $r14, $cr3
+    ; CHECK-NEXT:   liveins: $r3, $r31, $cr3
 
     ; CHECK:      $r12 = MFCR implicit killed $cr3
     ; CHECK-NEXT: STW killed $r12, 4, $r1
-    ; CHECK-NEXT: STW killed $r14, -72, $r1 :: (store (s32) into %fixed-stack.0, align 8)
+    ; CHECK-NEXT: STW killed $r31, -4, $r1 :: (store (s32) into %fixed-stack.0)
 
-    ; CHECK:      $r14 = LWZ -72, $r1 :: (load (s32) from %fixed-stack.0, align 8)
+    ; CHECK:      $r31 = LWZ -4, $r1 :: (load (s32) from %fixed-stack.0)
     ; CHECK-NEXT: $r12 = LWZ 4, $r1
     ; CHECK-NEXT: $cr3 = MTOCRF killed $r12
diff --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
index f22aeffdbb466a..412cb758ad602c 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -31,7 +31,7 @@
 ; After the prologue is set.
 ; DISABLE: cmpw 3, 4
 ; DISABLE-32: stw 0,
-; DISABLE-64-AIX: std 0, 
+; DISABLE-64-AIX: std 0,
 ; DISABLE-NEXT: bge 0, {{.*}}[[EXIT_LABEL:BB[0-9_]+]]
 ;
 ; Store %a on the stack
@@ -421,14 +421,14 @@ entry:
 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 ;
 ; Prologue code.
-; Make sure we save the CSR used in the inline asm: r14
+; Make sure we save the CSR used in the inline asm: r31
 ; ENABLE-DAG: li [[IV:[0-9]+]], 10
-; ENABLE-64-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
-; ENABLE-32-DAG: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
+; ENABLE-64-DAG: std 31, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+; ENABLE-32-DAG: stw 31, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
 ;
 ; DISABLE: cmplwi 3, 0
-; DISABLE-64-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
-; DISABLE-32-NEXT: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
+; DISABLE-64-NEXT: std 31, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+; DISABLE-32-NEXT: stw 31, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 ; DISABLE: li [[IV:[0-9]+]], 10
 ;
@@ -437,20 +437,20 @@ entry:
 ;
 ; CHECK: {{.*}}[[LOOP_LABEL:BB[0-9_]+]]: # %for.body
 ; Inline asm statement.
-; CHECK: addi 14, 14, 1
+; CHECK: addi 31, 14, 1
 ; CHECK: bdnz {{.*}}[[LOOP_LABEL]]
 ;
 ; Epilogue code.
 ; CHECK: li 3, 0
-; CHECK-64-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
-; CHECK-32-DAG: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
+; CHECK-64-DAG: ld 31, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; CHECK-32-DAG: lwz 31, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
 ; CHECK-DAG: nop
 ; CHECK: blr
 ;
 ; CHECK: [[ELSE_LABEL]]
 ; CHECK-NEXT: slwi 3, 4, 1
-; DISABLE-64-NEXT: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
-; DISABLE-32-NEXT: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
+; DISABLE-64-NEXT: ld 31, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; DISABLE-32-NEXT: lwz 31, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
 ; CHECK-NEXT: blr
 define i32 @inlineAsm(i32 %cond, i32 %N) {
 entry:
@@ -463,7 +463,7 @@ for.preheader:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
-  tail call void asm "addi 14, 14, 1", "~{r14}"()
+  tail call void asm "addi 31, 14, 1", "~{r31}"()
   %inc = add nuw nsw i32 %i.03, 1
   %exitcond = icmp eq i32 %inc, 10
   br i1 %exitcond, label %for.exit, label %for.body
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir
index f4af2ad21a5675..196ad134bfa555 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir
+++ b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir
@@ -1,15 +1,15 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple powerpc64le-unknown-linux-gnu -x mir -mcpu=pwr8 -mattr=-altivec \
-# RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \
-# RUN: FileCheck %s --check-prefixes=CHECK,SAVEONE
+# RUN: -run-pass=prologepilog --verify-machineinstrs %s -o - | \
+# RUN: FileCheck %s --check-prefix=SAVEONE
 
 # RUN: llc -mtriple powerpc64-unknown-linux-gnu -x mir -mcpu=pwr7 -mattr=-altivec \
-# RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \
-# RUN: FileCheck %s --check-prefixes=CHECK,SAVEALL
-
+# RUN: -run-pass=prologepilog --verify-machineinstrs %s -o - | \
+# RUN: FileCheck %s --check-prefix=SAVEALL
 
 # RUN: llc -mtriple powerpc64-unknown-aix-xcoff -x mir -mcpu=pwr4 -mattr=-altivec \
-# RUN: -run-pass=prologepilog --verify-machineinstrs < %s | \
-# RUN: FileCheck %s --check-prefixes=CHECK,SAVEALL
+# RUN: -run-pass=prologepilog --verify-machineinstrs %s -o - | \
+# RUN: FileCheck %s --check-prefix=SAVEALL
 
 ---
 name:            CRAllSave
@@ -20,33 +20,39 @@ liveins:
 body:             |
   bb.0.entry:
     liveins: $x3
-    renamable $x29 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEONE-LABEL: name: CRAllSave
+    ; SAVEONE: liveins: $x3, $cr2, $cr4
+    ; SAVEONE-NEXT: {{  $}}
+    ; SAVEONE-NEXT: $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4
+    ; SAVEONE-NEXT: STW8 killed $x12, 8, $x1
+    ; SAVEONE-NEXT: renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEONE-NEXT: renamable $cr2lt = COPY $cr0gt
+    ; SAVEONE-NEXT: renamable $cr4lt = COPY $cr0gt
+    ; SAVEONE-NEXT: $x12 = LWZ8 8, $x1
+    ; SAVEONE-NEXT: $cr2 = MTOCRF8 $x12
+    ; SAVEONE-NEXT: $cr4 = MTOCRF8 killed $x12
+    ; SAVEONE-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
+    ;
+    ; SAVEALL-LABEL: name: CRAllSave
+    ; SAVEALL: liveins: $x3, $cr2, $cr4
+    ; SAVEALL-NEXT: {{  $}}
+    ; SAVEALL-NEXT: $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4
+    ; SAVEALL-NEXT: STW8 killed $x12, 8, $x1
+    ; SAVEALL-NEXT: renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEALL-NEXT: renamable $cr2lt = COPY $cr0gt
+    ; SAVEALL-NEXT: renamable $cr4lt = COPY $cr0gt
+    ; SAVEALL-NEXT: $x12 = LWZ8 8, $x1
+    ; SAVEALL-NEXT: $cr2 = MTOCRF8 $x12
+    ; SAVEALL-NEXT: $cr4 = MTOCRF8 killed $x12
+    ; SAVEALL-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
+    renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
     renamable $cr2lt = COPY $cr0gt
     renamable $cr4lt = COPY $cr0gt
-    renamable $x3 = COPY $x29
     BLR8 implicit $lr8, implicit $rm, implicit $x3
 
-    ; CHECK-LABEL: fixedStack:
-    ; CHECK-NEXT:     - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, stack-id: default,
-    ; CHECK-NEXT:         callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
-    ; CHECK-NEXT:         debug-info-expression: '', debug-info-location: '' }
-    ; CHECK-NEXT:     - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default,
-    ; CHECK-NEXT:         isImmutable: true, isAliased: false, callee-saved-register: '$cr4',
-    ; CHECK-NEXT:         callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
-    ; CHECK-NEXT:         debug-info-location: '' }
-    ; CHECK-LABEL:  stack:
 
-    ; Verify the proper live-ins have been added in the prologue.
-    ; CHECK:    liveins: $x3, $x29, $cr2, $cr4
 
-    ; CHECK:     $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4
-    ; CHECK-DAG: STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.0)
-    ; CHECK-DAG: STW8 killed $x12, 8, $x1
 
-    ; CHECK:     $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.0)
-    ; CHECK:     $x12 = LWZ8 8, $x1
-    ; CHECK:     $cr2 = MTOCRF8 $x12
-    ; CHECK:     $cr4 = MTOCRF8 killed $x12
 
 ...
 ---
@@ -58,37 +64,36 @@ liveins:
 body:             |
   bb.0.entry:
     liveins: $x3
-    renamable $x14 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEONE-LABEL: name: CR2Save
+    ; SAVEONE: liveins: $x3, $cr2
+    ; SAVEONE-NEXT: {{  $}}
+    ; SAVEONE-NEXT: $x12 = MFOCRF8 killed $cr2
+    ; SAVEONE-NEXT: STW8 killed $x12, 8, $x1
+    ; SAVEONE-NEXT: renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEONE-NEXT: renamable $cr2lt = COPY $cr0gt
+    ; SAVEONE-NEXT: $x12 = LWZ8 8, $x1
+    ; SAVEONE-NEXT: $cr2 = MTOCRF8 killed $x12
+    ; SAVEONE-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
+    ;
+    ; SAVEALL-LABEL: name: CR2Save
+    ; SAVEALL: liveins: $x3, $cr2
+    ; SAVEALL-NEXT: {{  $}}
+    ; SAVEALL-NEXT: $x12 = MFCR8 implicit killed $cr2
+    ; SAVEALL-NEXT: STW8 killed $x12, 8, $x1
+    ; SAVEALL-NEXT: renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
+    ; SAVEALL-NEXT: renamable $cr2lt = COPY $cr0gt
+    ; SAVEALL-NEXT: $x12 = LWZ8 8, $x1
+    ; SAVEALL-NEXT: $cr2 = MTOCRF8 killed $x12
+    ; SAVEALL-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
+    renamable $x3 = ANDI8_rec killed renamable $x3, 1, implicit-def dead $cr0, implicit-def $cr0gt
     renamable $cr2lt = COPY $cr0gt
-    renamable $x3 = COPY $x14
     BLR8 implicit $lr8, implicit $rm, implicit $x3
 
-    ; CHECK-LABEL: CR2Save
 
-    ; CHECK-LABEL: fixedStack:
-    ; CHECK-NEXT:   - { id: 0, type: spill-slot, offset: -144, size: 8, alignment: 16, stack-id: default,
-    ; CHECK-NEXT:       callee-saved-register: '$x14', callee-saved-restored: true, debug-info-variable: '',
-    ; CHECK-NEXT:       debug-info-expression: '', debug-info-location: '' }
-    ; CHECK-NEXT:   - { id: 1, type: default, offset: 8, size: 4, alignment: 8, stack-id: default,
-    ; CHECK-NEXT:       isImmutable: true, isAliased: false, callee-saved-register: '$cr2',
-    ; CHECK-NEXT:       callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
-    ; CHECK-NEXT:       debug-info-location: '' }
-    ; CHECK-LABEL:  stack:
 
-    ; Verify the proper live-ins have been added in the prologue.
-    ; CHECK:    liveins: $x3, $x14, $cr2
 
-    ; ELF V2 ABI allows saving only the clobbered cr fields,
-    ; whereas the other ABIs do not.
-    ; SAVEONE:     $x12 = MFOCRF8 killed $cr2
-    ; SAVEALL:     $x12 = MFCR8 implicit killed $cr2
 
-    ; CHECK-DAG: STD killed $x14, -144, $x1 :: (store (s64) into %fixed-stack.0, align 16)
-    ; CHECK-DAG: STW8 killed $x12, 8, $x1
 
-    ; CHECK:     $x14 = LD -144, $x1 :: (load (s64) from %fixed-stack.0, align 16)
-    ; CHECK:     $x12 = LWZ8 8, $x1
-    ; CHECK:     $cr2 = MTOCRF8 killed $x12
 
 
 ...

From 082c81ae4ab9db6bb0acd52098288223dd58501a Mon Sep 17 00:00:00 2001
From: Florian Hahn 
Date: Tue, 7 May 2024 21:31:40 +0100
Subject: [PATCH 21/64] [LV] Properly extend versioned constant strides.

We only version unknown strides to 1. If the original type is i1, then
the sign of the extension matters. Properly extend the stride value
before replacing it.

Fixes https://github.com/llvm/llvm-project/issues/91369.
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp             | 6 ++++--
 .../LoopVectorize/version-stride-with-integer-casts.ll      | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3be0102bea3e34..261933966b74b4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8841,8 +8841,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       VPValue *StrideVPV = Plan->getLiveIn(U);
       if (!StrideVPV)
         continue;
-      VPValue *CI = Plan->getOrAddLiveIn(ConstantInt::get(
-          U->getType(), ScevStride->getAPInt().getSExtValue()));
+      unsigned BW = U->getType()->getScalarSizeInBits();
+      APInt C = isa(U) ? ScevStride->getAPInt().sext(BW)
+                                 : ScevStride->getAPInt().zext(BW);
+      VPValue *CI = Plan->getOrAddLiveIn(ConstantInt::get(U->getType(), C));
       StrideVPV->replaceAllUsesWith(CI);
     }
   }
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 45745f85de95c7..45596169da3cc0 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -415,7 +415,6 @@ exit:
 
 ; Test case to make sure that uses of versioned strides of type i1 are properly
 ; extended. From https://github.com/llvm/llvm-project/issues/91369.
-; FIXME: Currently miscompiled.
 define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-LABEL: define void @zext_of_i1_stride(
 ; CHECK-SAME: i1 [[G:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -441,7 +440,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <4 x i16> , ptr [[TMP5]], align 2
+; CHECK-NEXT:    store <4 x i16> , ptr [[TMP5]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

From 0b50d095bccbd47c77e5ad2b03b09b41b696c4a0 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Tue, 7 May 2024 16:44:00 -0400
Subject: [PATCH 22/64] [AMDGPU] Don't optimize agpr phis if the operand
 doesn't have subreg use (#91267)

If the operand doesn't have any subreg use, the optimization could
potentially
generate `V_ACCVGPR_READ_B32_e64` with wrong register class. The
following example demonstrates the issue.

Input MIR:

```
bb.0:
  %0:sgpr_32 = S_MOV_B32 0
  %1:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %0:sgpr_32, %subreg.sub1, %0:sgpr_32, %subreg.sub2, %0:sgpr_32, %subreg.sub3
  %2:vreg_128 = COPY %1:sgpr_128
  %3:areg_128 = COPY %2:vreg_128, implicit $exec

bb.1:
  %4:areg_128 = PHI %3:areg_128, %bb.0, %6:areg_128, %bb.1
  %5:areg_128 = PHI %3:areg_128, %bb.0, %7:areg_128, %bb.1
  ...
```

Output of current implementation:

```
bb.0:
  %0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
  %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
  %2:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
  %3:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
  %4:areg_128 = REG_SEQUENCE %0:agpr_32, %subreg.sub0, %1:agpr_32, %subreg.sub1, %2:agpr_32, %subreg.sub2, %3:agpr_32, %subreg.sub3
  %5:vreg_128 = V_ACCVGPR_READ_B32_e64 %4:areg_128, implicit $exec
  %6:areg_128 = COPY %46:vreg_128

bb.1:
  %7:areg_128 = PHI %6:areg_128, %bb.0, %9:areg_128, %bb.1
  %8:areg_128 = PHI %6:areg_128, %bb.0, %10:areg_128, %bb.1
  ...
```

The problem is the generated `V_ACCVGPR_READ_B32_e64` instruction.
Apparently the operand `%4:areg_128` is not valid for this.

In this patch, we don't count the none-subreg use because
`V_ACCVGPR_READ_B32_e64` can't handle none-32-bit operand.

Fixes: SWDEV-459556
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp   |  2 +
 llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir | 83 ++++++++++++++++++++-
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index cb448aaafa4c08..5c411a0955878f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -2106,6 +2106,8 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
 
     for (unsigned K = 1; K < MI.getNumOperands(); K += 2) {
       MachineOperand &PhiMO = MI.getOperand(K);
+      if (!PhiMO.getSubReg())
+        continue;
       RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO);
     }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
index a32b3d0f1e6b35..e94546fd5e8a51 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
@@ -465,7 +465,6 @@ body: |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   S_ENDPGM 0
   bb.0:
-    ; Tests that tryOptimizeAGPRPhis kicks in for GFX908.
     liveins: $sgpr0, $scc
     successors: %bb.1
 
@@ -715,3 +714,85 @@ body: |
   bb.3:
     S_ENDPGM 0
 ...
+
+---
+name:            skip_optimize_agpr_phi_without_subreg_use
+tracksRegLiveness: true
+body:             |
+  ; GFX908-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
+  ; GFX908: bb.0:
+  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX908-NEXT:   liveins: $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX908-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; GFX908-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX908-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX908-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX908-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.1:
+  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX908-NEXT:   liveins: $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1
+  ; GFX908-NEXT:   [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
+  ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT: bb.2:
+  ; GFX908-NEXT:   S_ENDPGM 0
+  ;
+  ; GFX90A-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
+  ; GFX90A: bb.0:
+  ; GFX90A-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX90A-NEXT:   liveins: $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GFX90A-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; GFX90A-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX90A-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX90A-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX90A-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+  ; GFX90A-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.1:
+  ; GFX90A-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX90A-NEXT:   liveins: $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT:   [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1
+  ; GFX90A-NEXT:   [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX90A-NEXT:   [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
+  ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GFX90A-NEXT: {{  $}}
+  ; GFX90A-NEXT: bb.2:
+  ; GFX90A-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $scc
+    successors: %bb.1
+
+    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %1:sgpr_32 = S_MOV_B32 0
+    %2:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3
+    %3:vreg_128 = COPY %2
+    %4:sreg_64 = S_MOV_B64 0
+    %5:areg_128_align2 = COPY %3, implicit $exec
+
+  bb.1:
+    liveins: $scc
+    successors: %bb.1, %bb.2
+
+    %9:areg_128_align2 = PHI %5, %bb.0, %10, %bb.1
+    %11:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 %0:vgpr_32, %0:vgpr_32, %9:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
+    %12:vgpr_32 = COPY %11.sub3
+    %13:vgpr_32 = COPY %11.sub2
+    %14:vgpr_32 = COPY %11.sub1
+    %15:vgpr_32 = COPY %11.sub0
+    %10:areg_128_align2 = COPY %11, implicit $exec
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+  bb.2:
+    S_ENDPGM 0
+
+...

From 272ea28bdec93b33527dc54edbdef8f43c51df47 Mon Sep 17 00:00:00 2001
From: Adrian Prantl 
Date: Tue, 7 May 2024 12:57:43 -0700
Subject: [PATCH 23/64] Remove else-after-break (NFC)

---
 lldb/source/Expression/UserExpression.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp
index 5658426c88912d..06fdb7007cedef 100644
--- a/lldb/source/Expression/UserExpression.cpp
+++ b/lldb/source/Expression/UserExpression.cpp
@@ -308,17 +308,16 @@ UserExpression::Evaluate(ExecutionContext &exe_ctx,
           diagnostic_manager.Clear();
           user_expression_sp = fixed_expression_sp;
           break;
+        }
+        // The fixed expression also didn't parse. Let's check for any new
+        // fixits we could try.
+        if (!fixed_expression_sp->GetFixedText().empty()) {
+          *fixed_expression = fixed_expression_sp->GetFixedText().str();
         } else {
-          // The fixed expression also didn't parse. Let's check for any new
-          // Fix-Its we could try.
-          if (!fixed_expression_sp->GetFixedText().empty()) {
-            *fixed_expression = fixed_expression_sp->GetFixedText().str();
-          } else {
-            // Fixed expression didn't compile without a fixit, don't retry and
-            // don't tell the user about it.
-            fixed_expression->clear();
-            break;
-          }
+          // Fixed expression didn't compile without a fixit, don't retry and
+          // don't tell the user about it.
+          fixed_expression->clear();
+          break;
         }
       }
     }

From 8c4d7989c2b4a7e251afc3b13002611646de90b6 Mon Sep 17 00:00:00 2001
From: Adrian Prantl 
Date: Tue, 7 May 2024 12:58:20 -0700
Subject: [PATCH 24/64] Add a missing check for nullptr

This can't happen with Clang, but I've seen a crash report from the
Swift plugin where this happened.

rdar://126564844
---
 lldb/source/Expression/UserExpression.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp
index 06fdb7007cedef..b78f439957673f 100644
--- a/lldb/source/Expression/UserExpression.cpp
+++ b/lldb/source/Expression/UserExpression.cpp
@@ -300,6 +300,8 @@ UserExpression::Evaluate(ExecutionContext &exe_ctx,
             target->GetUserExpressionForLanguage(
                 fixed_expression->c_str(), full_prefix, language, desired_type,
                 options, ctx_obj, error));
+        if (!fixed_expression_sp)
+          break;
         DiagnosticManager fixed_diagnostic_manager;
         parse_success = fixed_expression_sp->Parse(
             fixed_diagnostic_manager, exe_ctx, execution_policy,

From a70ad96b3cc5275246f7f007d1892bb867b75bc0 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Tue, 7 May 2024 13:45:58 -0700
Subject: [PATCH 25/64] [AMDGPU] Fix condition in VOP3_Real_Base. NFCI.
 (#91373)

---
 llvm/lib/Target/AMDGPU/VOPInstructions.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 74988ac634c31e..d974aacd7d4585 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1410,7 +1410,7 @@ multiclass VOP3_Real_Base op, string opName = NAME,
       def _e64#Gen.Suffix :
         VOP3_Real_Gen,
         VOP3FP8OpSel_dst_bytesel_gfx11_gfx12;
-    } if ps.Pfl.HasOpSel then {
+    } else if ps.Pfl.HasOpSel then {
       def _e64#Gen.Suffix :
         VOP3_Real_Gen,
         VOP3OpSel_gfx11_gfx12;

From 2ad6917c4c524576405f2146424911fd9adb3528 Mon Sep 17 00:00:00 2001
From: Ellis Hoag 
Date: Tue, 7 May 2024 13:55:44 -0700
Subject: [PATCH 26/64] [modules] Accept equivalent module caches from
 different symlink (#90925)

Use `VFS.equivalent()`, which follows symlinks, to check if two module
cache paths are equivalent. This prevents a PCH error when building from
a different path that is a symlink of the original.

```
error: PCH was compiled with module cache path '/home/foo/blah/ModuleCache/2IBP1TNT8OR8D', but the path is currently '/data/users/foo/blah/ModuleCache/2IBP1TNT8OR8D'
1 error generated.
```
---
 clang/lib/Serialization/ASTReader.cpp         | 53 ++++++++++---------
 clang/test/Modules/module-symlink.m           | 14 +++++
 llvm/include/llvm/Support/VirtualFileSystem.h |  4 ++
 llvm/lib/Support/VirtualFileSystem.cpp        | 10 ++++
 4 files changed, 55 insertions(+), 26 deletions(-)
 create mode 100644 clang/test/Modules/module-symlink.m

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index b4b2f999d22597..856c743086c516 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -829,36 +829,37 @@ bool SimpleASTReaderListener::ReadPreprocessorOptions(
                                   OptionValidateNone);
 }
 
-/// Check the header search options deserialized from the control block
-/// against the header search options in an existing preprocessor.
+/// Check that the specified and the existing module cache paths are equivalent.
 ///
 /// \param Diags If non-null, produce diagnostics for any mismatches incurred.
-static bool checkHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
-                                     StringRef SpecificModuleCachePath,
-                                     StringRef ExistingModuleCachePath,
-                                     DiagnosticsEngine *Diags,
-                                     const LangOptions &LangOpts,
-                                     const PreprocessorOptions &PPOpts) {
-  if (LangOpts.Modules) {
-    if (SpecificModuleCachePath != ExistingModuleCachePath &&
-        !PPOpts.AllowPCHWithDifferentModulesCachePath) {
-      if (Diags)
-        Diags->Report(diag::err_pch_modulecache_mismatch)
-          << SpecificModuleCachePath << ExistingModuleCachePath;
-      return true;
-    }
-  }
-
-  return false;
+/// \returns true when the module cache paths differ.
+static bool checkModuleCachePath(llvm::vfs::FileSystem &VFS,
+                                 StringRef SpecificModuleCachePath,
+                                 StringRef ExistingModuleCachePath,
+                                 DiagnosticsEngine *Diags,
+                                 const LangOptions &LangOpts,
+                                 const PreprocessorOptions &PPOpts) {
+  if (!LangOpts.Modules || PPOpts.AllowPCHWithDifferentModulesCachePath ||
+      SpecificModuleCachePath == ExistingModuleCachePath)
+    return false;
+  auto EqualOrErr =
+      VFS.equivalent(SpecificModuleCachePath, ExistingModuleCachePath);
+  if (EqualOrErr && *EqualOrErr)
+    return false;
+  if (Diags)
+    Diags->Report(diag::err_pch_modulecache_mismatch)
+        << SpecificModuleCachePath << ExistingModuleCachePath;
+  return true;
 }
 
 bool PCHValidator::ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
                                            StringRef SpecificModuleCachePath,
                                            bool Complain) {
-  return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
-                                  PP.getHeaderSearchInfo().getModuleCachePath(),
-                                  Complain ? &Reader.Diags : nullptr,
-                                  PP.getLangOpts(), PP.getPreprocessorOpts());
+  return checkModuleCachePath(Reader.getFileManager().getVirtualFileSystem(),
+                              SpecificModuleCachePath,
+                              PP.getHeaderSearchInfo().getModuleCachePath(),
+                              Complain ? &Reader.Diags : nullptr,
+                              PP.getLangOpts(), PP.getPreprocessorOpts());
 }
 
 void PCHValidator::ReadCounter(const ModuleFile &M, unsigned Value) {
@@ -5376,9 +5377,9 @@ namespace {
     bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
                                  StringRef SpecificModuleCachePath,
                                  bool Complain) override {
-      return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
-                                      ExistingModuleCachePath, nullptr,
-                                      ExistingLangOpts, ExistingPPOpts);
+      return checkModuleCachePath(
+          FileMgr.getVirtualFileSystem(), SpecificModuleCachePath,
+          ExistingModuleCachePath, nullptr, ExistingLangOpts, ExistingPPOpts);
     }
 
     bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
diff --git a/clang/test/Modules/module-symlink.m b/clang/test/Modules/module-symlink.m
new file mode 100644
index 00000000000000..efdaf3db0dfef2
--- /dev/null
+++ b/clang/test/Modules/module-symlink.m
@@ -0,0 +1,14 @@
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules-cache-path=%t/modules -fmodules -fimplicit-module-maps -I %S/Inputs -emit-pch -o %t.pch %s -verify
+
+// RUN: ln -s %t/modules %t/modules.symlink
+// RUN: %clang_cc1 -fmodules-cache-path=%t/modules.symlink -fmodules -fimplicit-module-maps -I %S/Inputs -include-pch %t.pch %s -verify
+// RUN: not %clang_cc1 -fmodules-cache-path=%t/modules.dne -fmodules -fimplicit-module-maps -I %S/Inputs -include-pch %t.pch %s -verify
+
+// expected-no-diagnostics
+
+@import ignored_macros;
+
+struct Point p;
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 49e67e7555a0db..a1e38de74dfcc4 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -320,6 +320,10 @@ class FileSystem : public llvm::ThreadSafeRefCountedBase,
   ///          platform-specific error_code.
   virtual std::error_code makeAbsolute(SmallVectorImpl &Path) const;
 
+  /// \returns true if \p A and \p B represent the same file, or an error or
+  /// false if they do not.
+  llvm::ErrorOr equivalent(const Twine &A, const Twine &B);
+
   enum class PrintType { Summary, Contents, RecursiveContents };
   void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
              unsigned IndentLevel = 0) const {
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 152fcfe695b297..54b9c38f760955 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -151,6 +151,16 @@ bool FileSystem::exists(const Twine &Path) {
   return Status && Status->exists();
 }
 
+llvm::ErrorOr FileSystem::equivalent(const Twine &A, const Twine &B) {
+  auto StatusA = status(A);
+  if (!StatusA)
+    return StatusA.getError();
+  auto StatusB = status(B);
+  if (!StatusB)
+    return StatusB.getError();
+  return StatusA->equivalent(*StatusB);
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void FileSystem::dump() const { print(dbgs(), PrintType::RecursiveContents); }
 #endif

From bc8a42762057d7036f6871211e62b1c3efb2738a Mon Sep 17 00:00:00 2001
From: Marian Buschsieweke 
Date: Tue, 7 May 2024 22:58:13 +0200
Subject: [PATCH 27/64] [MSP430][Clang] Update list of MCUs (#91258)

This updates the list of MSP430 MCUs from TI's devices.csv obtained from [1] under the "Header and Support Files" link. A simple python script has been used to generate this list and is included as well.

[1]: https://www.ti.com/tool/MSP430-GCC-OPENSOURCE#downloads
---
 clang/include/clang/Basic/MSP430Target.def    | 583 +++++++++++++++---
 .../Basic/Target/MSP430/gen-msp430-def.py     | 126 ++++
 2 files changed, 626 insertions(+), 83 deletions(-)
 create mode 100755 clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py

diff --git a/clang/include/clang/Basic/MSP430Target.def b/clang/include/clang/Basic/MSP430Target.def
index 7a10be1d54c8d6..8fd44e3ba8e719 100644
--- a/clang/include/clang/Basic/MSP430Target.def
+++ b/clang/include/clang/Basic/MSP430Target.def
@@ -8,6 +8,10 @@
 //
 // This file defines the MSP430 devices and their features.
 //
+// Generated from TI's devices.csv in version 1.212 using the script in
+// Target/MSP430/gen-msp430-def.py - use this tool rather than adding
+// new MCUs by hand.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef MSP430_MCU_FEAT
@@ -24,7 +28,19 @@ MSP430_MCU("msp430c112")
 MSP430_MCU("msp430c1121")
 MSP430_MCU("msp430c1331")
 MSP430_MCU("msp430c1351")
+MSP430_MCU("msp430c311s")
+MSP430_MCU("msp430c312")
+MSP430_MCU("msp430c313")
+MSP430_MCU("msp430c314")
+MSP430_MCU("msp430c315")
+MSP430_MCU("msp430c323")
+MSP430_MCU("msp430c325")
+MSP430_MCU("msp430c412")
+MSP430_MCU("msp430c413")
 MSP430_MCU("msp430e112")
+MSP430_MCU("msp430e313")
+MSP430_MCU("msp430e315")
+MSP430_MCU("msp430e325")
 MSP430_MCU("msp430f110")
 MSP430_MCU("msp430f1101")
 MSP430_MCU("msp430f1101a")
@@ -44,7 +60,6 @@ MSP430_MCU("msp430f135")
 MSP430_MCU("msp430f155")
 MSP430_MCU("msp430f156")
 MSP430_MCU("msp430f157")
-MSP430_MCU("msp430p112")
 MSP430_MCU("msp430f2001")
 MSP430_MCU("msp430f2011")
 MSP430_MCU("msp430f2002")
@@ -64,6 +79,58 @@ MSP430_MCU("msp430f2272")
 MSP430_MCU("msp430f2234")
 MSP430_MCU("msp430f2254")
 MSP430_MCU("msp430f2274")
+MSP430_MCU("msp430f412")
+MSP430_MCU("msp430f413")
+MSP430_MCU("msp430f415")
+MSP430_MCU("msp430f417")
+MSP430_MCU("msp430f4132")
+MSP430_MCU("msp430f4152")
+MSP430_MCU("msp430f435")
+MSP430_MCU("msp430f436")
+MSP430_MCU("msp430f437")
+MSP430_MCU("msp430f4351")
+MSP430_MCU("msp430f4361")
+MSP430_MCU("msp430f4371")
+MSP430_MCU("msp430fe423")
+MSP430_MCU("msp430fe425")
+MSP430_MCU("msp430fe427")
+MSP430_MCU("msp430fe423a")
+MSP430_MCU("msp430fe425a")
+MSP430_MCU("msp430fe427a")
+MSP430_MCU("msp430fe4232")
+MSP430_MCU("msp430fe4242")
+MSP430_MCU("msp430fe4252")
+MSP430_MCU("msp430fe4272")
+MSP430_MCU("msp430f4250")
+MSP430_MCU("msp430f4260")
+MSP430_MCU("msp430f4270")
+MSP430_MCU("msp430fg4250")
+MSP430_MCU("msp430fg4260")
+MSP430_MCU("msp430fg4270")
+MSP430_MCU("msp430fw423")
+MSP430_MCU("msp430fw425")
+MSP430_MCU("msp430fw427")
+MSP430_MCU("msp430fw428")
+MSP430_MCU("msp430fw429")
+MSP430_MCU("msp430fg437")
+MSP430_MCU("msp430fg438")
+MSP430_MCU("msp430fg439")
+MSP430_MCU("msp430f438")
+MSP430_MCU("msp430f439")
+MSP430_MCU("msp430f477")
+MSP430_MCU("msp430f478")
+MSP430_MCU("msp430f479")
+MSP430_MCU("msp430fg477")
+MSP430_MCU("msp430fg478")
+MSP430_MCU("msp430fg479")
+MSP430_MCU("msp430p112")
+MSP430_MCU("msp430p313")
+MSP430_MCU("msp430p315")
+MSP430_MCU("msp430p315s")
+MSP430_MCU("msp430p325")
+MSP430_MCU("msp430l092")
+MSP430_MCU("msp430c091")
+MSP430_MCU("msp430c092")
 MSP430_MCU("msp430g2211")
 MSP430_MCU("msp430g2201")
 MSP430_MCU("msp430g2111")
@@ -115,68 +182,32 @@ MSP430_MCU("msp430g2855")
 MSP430_MCU("msp430g2955")
 MSP430_MCU("msp430g2230")
 MSP430_MCU("msp430g2210")
-MSP430_MCU("msp430c311s")
-MSP430_MCU("msp430c312")
-MSP430_MCU("msp430c313")
-MSP430_MCU("msp430c314")
-MSP430_MCU("msp430c315")
-MSP430_MCU("msp430c323")
-MSP430_MCU("msp430c325")
-MSP430_MCU("msp430c412")
-MSP430_MCU("msp430c413")
-MSP430_MCU("msp430e313")
-MSP430_MCU("msp430e315")
-MSP430_MCU("msp430e325")
-MSP430_MCU("msp430p313")
-MSP430_MCU("msp430p315")
-MSP430_MCU("msp430p315s")
-MSP430_MCU("msp430p325")
-MSP430_MCU("msp430f412")
-MSP430_MCU("msp430f413")
-MSP430_MCU("msp430f415")
-MSP430_MCU("msp430f417")
-MSP430_MCU("msp430f4132")
-MSP430_MCU("msp430f4152")
-MSP430_MCU("msp430f435")
-MSP430_MCU("msp430f436")
-MSP430_MCU("msp430f437")
-MSP430_MCU("msp430f4351")
-MSP430_MCU("msp430f4361")
-MSP430_MCU("msp430f4371")
-MSP430_MCU("msp430fe423")
-MSP430_MCU("msp430fe425")
-MSP430_MCU("msp430fe427")
-MSP430_MCU("msp430fe423a")
-MSP430_MCU("msp430fe425a")
-MSP430_MCU("msp430fe427a")
-MSP430_MCU("msp430fe4232")
-MSP430_MCU("msp430fe4242")
-MSP430_MCU("msp430fe4252")
-MSP430_MCU("msp430fe4272")
-MSP430_MCU("msp430f4250")
-MSP430_MCU("msp430f4260")
-MSP430_MCU("msp430f4270")
-MSP430_MCU("msp430fg4250")
-MSP430_MCU("msp430fg4260")
-MSP430_MCU("msp430fg4270")
-MSP430_MCU("msp430fw423")
-MSP430_MCU("msp430fw425")
-MSP430_MCU("msp430fw427")
-MSP430_MCU("msp430fw428")
-MSP430_MCU("msp430fw429")
-MSP430_MCU("msp430fg437")
-MSP430_MCU("msp430fg438")
-MSP430_MCU("msp430fg439")
-MSP430_MCU("msp430f438")
-MSP430_MCU("msp430f439")
-MSP430_MCU("msp430f477")
-MSP430_MCU("msp430f478")
-MSP430_MCU("msp430f479")
-MSP430_MCU("msp430fg477")
-MSP430_MCU("msp430fg478")
-MSP430_MCU("msp430fg479")
+MSP430_MCU("rf430frl152h")
+MSP430_MCU("rf430frl153h")
+MSP430_MCU("rf430frl154h")
+MSP430_MCU("rf430frl152h_rom")
+MSP430_MCU("rf430frl153h_rom")
+MSP430_MCU("rf430frl154h_rom")
+MSP430_MCU("msp430fr4131")
+MSP430_MCU("msp430fr4132")
+MSP430_MCU("msp430fr4133")
+MSP430_MCU("msp430fr2032")
+MSP430_MCU("msp430fr2033")
+MSP430_MCU("msp430fr2110")
+MSP430_MCU("msp430fr2111")
+MSP430_MCU("msp430fr2310")
+MSP430_MCU("msp430fr2311")
+MSP430_MCU("msp430fr2100")
+MSP430_MCU("msp430fr2000")
 
 // With 16-bit hardware multiplier
+MSP430_MCU_FEAT("msp430c336", "16bit")
+MSP430_MCU_FEAT("msp430c337", "16bit")
+MSP430_MCU_FEAT("msp430cg4616", "16bit")
+MSP430_MCU_FEAT("msp430cg4617", "16bit")
+MSP430_MCU_FEAT("msp430cg4618", "16bit")
+MSP430_MCU_FEAT("msp430cg4619", "16bit")
+MSP430_MCU_FEAT("msp430e337", "16bit")
 MSP430_MCU_FEAT("msp430f147", "16bit")
 MSP430_MCU_FEAT("msp430f148", "16bit")
 MSP430_MCU_FEAT("msp430f149", "16bit")
@@ -189,21 +220,6 @@ MSP430_MCU_FEAT("msp430f169", "16bit")
 MSP430_MCU_FEAT("msp430f1610", "16bit")
 MSP430_MCU_FEAT("msp430f1611", "16bit")
 MSP430_MCU_FEAT("msp430f1612", "16bit")
-MSP430_MCU_FEAT("msp430c336", "16bit")
-MSP430_MCU_FEAT("msp430c337", "16bit")
-MSP430_MCU_FEAT("msp430e337", "16bit")
-MSP430_MCU_FEAT("msp430p337", "16bit")
-MSP430_MCU_FEAT("msp430f423", "16bit")
-MSP430_MCU_FEAT("msp430f425", "16bit")
-MSP430_MCU_FEAT("msp430f427", "16bit")
-MSP430_MCU_FEAT("msp430f423a", "16bit")
-MSP430_MCU_FEAT("msp430f425a", "16bit")
-MSP430_MCU_FEAT("msp430f427a", "16bit")
-MSP430_MCU_FEAT("msp430f4481", "16bit")
-MSP430_MCU_FEAT("msp430f4491", "16bit")
-MSP430_MCU_FEAT("msp430f447", "16bit")
-MSP430_MCU_FEAT("msp430f448", "16bit")
-MSP430_MCU_FEAT("msp430f449", "16bit")
 MSP430_MCU_FEAT("msp430f2330", "16bit")
 MSP430_MCU_FEAT("msp430f2350", "16bit")
 MSP430_MCU_FEAT("msp430f2370", "16bit")
@@ -216,12 +232,38 @@ MSP430_MCU_FEAT("msp430f2410", "16bit")
 MSP430_MCU_FEAT("msp430f2471", "16bit")
 MSP430_MCU_FEAT("msp430f2481", "16bit")
 MSP430_MCU_FEAT("msp430f2491", "16bit")
-MSP430_MCU_FEAT("msp430i2020", "16bit")
-MSP430_MCU_FEAT("msp430i2021", "16bit")
-MSP430_MCU_FEAT("msp430i2030", "16bit")
-MSP430_MCU_FEAT("msp430i2031", "16bit")
-MSP430_MCU_FEAT("msp430i2040", "16bit")
-MSP430_MCU_FEAT("msp430i2041", "16bit")
+MSP430_MCU_FEAT("msp430f2416", "16bit")
+MSP430_MCU_FEAT("msp430f2417", "16bit")
+MSP430_MCU_FEAT("msp430f2418", "16bit")
+MSP430_MCU_FEAT("msp430f2419", "16bit")
+MSP430_MCU_FEAT("msp430f2616", "16bit")
+MSP430_MCU_FEAT("msp430f2617", "16bit")
+MSP430_MCU_FEAT("msp430f2618", "16bit")
+MSP430_MCU_FEAT("msp430f2619", "16bit")
+MSP430_MCU_FEAT("msp430f423", "16bit")
+MSP430_MCU_FEAT("msp430f425", "16bit")
+MSP430_MCU_FEAT("msp430f427", "16bit")
+MSP430_MCU_FEAT("msp430f423a", "16bit")
+MSP430_MCU_FEAT("msp430f425a", "16bit")
+MSP430_MCU_FEAT("msp430f427a", "16bit")
+MSP430_MCU_FEAT("msp430f4481", "16bit")
+MSP430_MCU_FEAT("msp430f4491", "16bit")
+MSP430_MCU_FEAT("msp430f447", "16bit")
+MSP430_MCU_FEAT("msp430f448", "16bit")
+MSP430_MCU_FEAT("msp430f449", "16bit")
+MSP430_MCU_FEAT("msp430f46161", "16bit")
+MSP430_MCU_FEAT("msp430f46171", "16bit")
+MSP430_MCU_FEAT("msp430f46181", "16bit")
+MSP430_MCU_FEAT("msp430f46191", "16bit")
+MSP430_MCU_FEAT("msp430f4616", "16bit")
+MSP430_MCU_FEAT("msp430f4617", "16bit")
+MSP430_MCU_FEAT("msp430f4618", "16bit")
+MSP430_MCU_FEAT("msp430f4619", "16bit")
+MSP430_MCU_FEAT("msp430fg4616", "16bit")
+MSP430_MCU_FEAT("msp430fg4617", "16bit")
+MSP430_MCU_FEAT("msp430fg4618", "16bit")
+MSP430_MCU_FEAT("msp430fg4619", "16bit")
+MSP430_MCU_FEAT("msp430p337", "16bit")
 MSP430_MCU_FEAT("msp430afe221", "16bit")
 MSP430_MCU_FEAT("msp430afe231", "16bit")
 MSP430_MCU_FEAT("msp430afe251", "16bit")
@@ -231,12 +273,387 @@ MSP430_MCU_FEAT("msp430afe252", "16bit")
 MSP430_MCU_FEAT("msp430afe223", "16bit")
 MSP430_MCU_FEAT("msp430afe233", "16bit")
 MSP430_MCU_FEAT("msp430afe253", "16bit")
+MSP430_MCU_FEAT("msp430i2020", "16bit")
+MSP430_MCU_FEAT("msp430i2021", "16bit")
+MSP430_MCU_FEAT("msp430i2030", "16bit")
+MSP430_MCU_FEAT("msp430i2031", "16bit")
+MSP430_MCU_FEAT("msp430i2040", "16bit")
+MSP430_MCU_FEAT("msp430i2041", "16bit")
 
-// With 32 Bit Hardware Multiplier
+// With 32-bit hardware multiplier
 MSP430_MCU_FEAT("msp430f4783", "32bit")
 MSP430_MCU_FEAT("msp430f4793", "32bit")
 MSP430_MCU_FEAT("msp430f4784", "32bit")
 MSP430_MCU_FEAT("msp430f4794", "32bit")
+MSP430_MCU_FEAT("msp430f47126", "32bit")
+MSP430_MCU_FEAT("msp430f47127", "32bit")
+MSP430_MCU_FEAT("msp430f47163", "32bit")
+MSP430_MCU_FEAT("msp430f47173", "32bit")
+MSP430_MCU_FEAT("msp430f47183", "32bit")
+MSP430_MCU_FEAT("msp430f47193", "32bit")
+MSP430_MCU_FEAT("msp430f47166", "32bit")
+MSP430_MCU_FEAT("msp430f47176", "32bit")
+MSP430_MCU_FEAT("msp430f47186", "32bit")
+MSP430_MCU_FEAT("msp430f47196", "32bit")
+MSP430_MCU_FEAT("msp430f47167", "32bit")
+MSP430_MCU_FEAT("msp430f47177", "32bit")
+MSP430_MCU_FEAT("msp430f47187", "32bit")
+MSP430_MCU_FEAT("msp430f47197", "32bit")
+MSP430_MCU_FEAT("msp430f5418", "32bit")
+MSP430_MCU_FEAT("msp430f5419", "32bit")
+MSP430_MCU_FEAT("msp430f5435", "32bit")
+MSP430_MCU_FEAT("msp430f5436", "32bit")
+MSP430_MCU_FEAT("msp430f5437", "32bit")
+MSP430_MCU_FEAT("msp430f5438", "32bit")
+MSP430_MCU_FEAT("msp430f5418a", "32bit")
+MSP430_MCU_FEAT("msp430f5419a", "32bit")
+MSP430_MCU_FEAT("msp430f5435a", "32bit")
+MSP430_MCU_FEAT("msp430f5436a", "32bit")
+MSP430_MCU_FEAT("msp430f5437a", "32bit")
+MSP430_MCU_FEAT("msp430f5438a", "32bit")
+MSP430_MCU_FEAT("msp430f5212", "32bit")
+MSP430_MCU_FEAT("msp430f5213", "32bit")
+MSP430_MCU_FEAT("msp430f5214", "32bit")
+MSP430_MCU_FEAT("msp430f5217", "32bit")
+MSP430_MCU_FEAT("msp430f5218", "32bit")
+MSP430_MCU_FEAT("msp430f5219", "32bit")
+MSP430_MCU_FEAT("msp430f5222", "32bit")
+MSP430_MCU_FEAT("msp430f5223", "32bit")
+MSP430_MCU_FEAT("msp430f5224", "32bit")
+MSP430_MCU_FEAT("msp430f5227", "32bit")
+MSP430_MCU_FEAT("msp430f5228", "32bit")
+MSP430_MCU_FEAT("msp430f5229", "32bit")
+MSP430_MCU_FEAT("msp430f5232", "32bit")
+MSP430_MCU_FEAT("msp430f5234", "32bit")
+MSP430_MCU_FEAT("msp430f5237", "32bit")
+MSP430_MCU_FEAT("msp430f5239", "32bit")
+MSP430_MCU_FEAT("msp430f5242", "32bit")
+MSP430_MCU_FEAT("msp430f5244", "32bit")
+MSP430_MCU_FEAT("msp430f5247", "32bit")
+MSP430_MCU_FEAT("msp430f5249", "32bit")
+MSP430_MCU_FEAT("msp430f5304", "32bit")
+MSP430_MCU_FEAT("msp430f5308", "32bit")
+MSP430_MCU_FEAT("msp430f5309", "32bit")
+MSP430_MCU_FEAT("msp430f5310", "32bit")
+MSP430_MCU_FEAT("msp430f5340", "32bit")
+MSP430_MCU_FEAT("msp430f5341", "32bit")
+MSP430_MCU_FEAT("msp430f5342", "32bit")
+MSP430_MCU_FEAT("msp430f5324", "32bit")
+MSP430_MCU_FEAT("msp430f5325", "32bit")
+MSP430_MCU_FEAT("msp430f5326", "32bit")
+MSP430_MCU_FEAT("msp430f5327", "32bit")
+MSP430_MCU_FEAT("msp430f5328", "32bit")
+MSP430_MCU_FEAT("msp430f5329", "32bit")
+MSP430_MCU_FEAT("msp430f5500", "32bit")
+MSP430_MCU_FEAT("msp430f5501", "32bit")
+MSP430_MCU_FEAT("msp430f5502", "32bit")
+MSP430_MCU_FEAT("msp430f5503", "32bit")
+MSP430_MCU_FEAT("msp430f5504", "32bit")
+MSP430_MCU_FEAT("msp430f5505", "32bit")
+MSP430_MCU_FEAT("msp430f5506", "32bit")
+MSP430_MCU_FEAT("msp430f5507", "32bit")
+MSP430_MCU_FEAT("msp430f5508", "32bit")
+MSP430_MCU_FEAT("msp430f5509", "32bit")
+MSP430_MCU_FEAT("msp430f5510", "32bit")
+MSP430_MCU_FEAT("msp430f5513", "32bit")
+MSP430_MCU_FEAT("msp430f5514", "32bit")
+MSP430_MCU_FEAT("msp430f5515", "32bit")
+MSP430_MCU_FEAT("msp430f5517", "32bit")
+MSP430_MCU_FEAT("msp430f5519", "32bit")
+MSP430_MCU_FEAT("msp430f5521", "32bit")
+MSP430_MCU_FEAT("msp430f5522", "32bit")
+MSP430_MCU_FEAT("msp430f5524", "32bit")
+MSP430_MCU_FEAT("msp430f5525", "32bit")
+MSP430_MCU_FEAT("msp430f5526", "32bit")
+MSP430_MCU_FEAT("msp430f5527", "32bit")
+MSP430_MCU_FEAT("msp430f5528", "32bit")
+MSP430_MCU_FEAT("msp430f5529", "32bit")
+MSP430_MCU_FEAT("cc430f5133", "32bit")
+MSP430_MCU_FEAT("cc430f5135", "32bit")
+MSP430_MCU_FEAT("cc430f5137", "32bit")
+MSP430_MCU_FEAT("cc430f6125", "32bit")
+MSP430_MCU_FEAT("cc430f6126", "32bit")
+MSP430_MCU_FEAT("cc430f6127", "32bit")
+MSP430_MCU_FEAT("cc430f6135", "32bit")
+MSP430_MCU_FEAT("cc430f6137", "32bit")
+MSP430_MCU_FEAT("cc430f5123", "32bit")
+MSP430_MCU_FEAT("cc430f5125", "32bit")
+MSP430_MCU_FEAT("cc430f5143", "32bit")
+MSP430_MCU_FEAT("cc430f5145", "32bit")
+MSP430_MCU_FEAT("cc430f5147", "32bit")
+MSP430_MCU_FEAT("cc430f6143", "32bit")
+MSP430_MCU_FEAT("cc430f6145", "32bit")
+MSP430_MCU_FEAT("cc430f6147", "32bit")
+MSP430_MCU_FEAT("msp430f5333", "32bit")
+MSP430_MCU_FEAT("msp430f5335", "32bit")
+MSP430_MCU_FEAT("msp430f5336", "32bit")
+MSP430_MCU_FEAT("msp430f5338", "32bit")
+MSP430_MCU_FEAT("msp430f5630", "32bit")
+MSP430_MCU_FEAT("msp430f5631", "32bit")
+MSP430_MCU_FEAT("msp430f5632", "32bit")
+MSP430_MCU_FEAT("msp430f5633", "32bit")
+MSP430_MCU_FEAT("msp430f5634", "32bit")
+MSP430_MCU_FEAT("msp430f5635", "32bit")
+MSP430_MCU_FEAT("msp430f5636", "32bit")
+MSP430_MCU_FEAT("msp430f5637", "32bit")
+MSP430_MCU_FEAT("msp430f5638", "32bit")
+MSP430_MCU_FEAT("msp430f6433", "32bit")
+MSP430_MCU_FEAT("msp430f6435", "32bit")
+MSP430_MCU_FEAT("msp430f6436", "32bit")
+MSP430_MCU_FEAT("msp430f6438", "32bit")
+MSP430_MCU_FEAT("msp430f6630", "32bit")
+MSP430_MCU_FEAT("msp430f6631", "32bit")
+MSP430_MCU_FEAT("msp430f6632", "32bit")
+MSP430_MCU_FEAT("msp430f6633", "32bit")
+MSP430_MCU_FEAT("msp430f6634", "32bit")
+MSP430_MCU_FEAT("msp430f6635", "32bit")
+MSP430_MCU_FEAT("msp430f6636", "32bit")
+MSP430_MCU_FEAT("msp430f6637", "32bit")
+MSP430_MCU_FEAT("msp430f6638", "32bit")
+MSP430_MCU_FEAT("msp430f5358", "32bit")
+MSP430_MCU_FEAT("msp430f5359", "32bit")
+MSP430_MCU_FEAT("msp430f5658", "32bit")
+MSP430_MCU_FEAT("msp430f5659", "32bit")
+MSP430_MCU_FEAT("msp430f6458", "32bit")
+MSP430_MCU_FEAT("msp430f6459", "32bit")
+MSP430_MCU_FEAT("msp430f6658", "32bit")
+MSP430_MCU_FEAT("msp430f6659", "32bit")
+MSP430_MCU_FEAT("msp430fg6425", "32bit")
+MSP430_MCU_FEAT("msp430fg6426", "32bit")
+MSP430_MCU_FEAT("msp430fg6625", "32bit")
+MSP430_MCU_FEAT("msp430fg6626", "32bit")
+MSP430_MCU_FEAT("msp430f5131", "32bit")
+MSP430_MCU_FEAT("msp430f5151", "32bit")
+MSP430_MCU_FEAT("msp430f5171", "32bit")
+MSP430_MCU_FEAT("msp430f5132", "32bit")
+MSP430_MCU_FEAT("msp430f5152", "32bit")
+MSP430_MCU_FEAT("msp430f5172", "32bit")
+MSP430_MCU_FEAT("msp430f6720", "32bit")
+MSP430_MCU_FEAT("msp430f6721", "32bit")
+MSP430_MCU_FEAT("msp430f6723", "32bit")
+MSP430_MCU_FEAT("msp430f6724", "32bit")
+MSP430_MCU_FEAT("msp430f6725", "32bit")
+MSP430_MCU_FEAT("msp430f6726", "32bit")
+MSP430_MCU_FEAT("msp430f6730", "32bit")
+MSP430_MCU_FEAT("msp430f6731", "32bit")
+MSP430_MCU_FEAT("msp430f6733", "32bit")
+MSP430_MCU_FEAT("msp430f6734", "32bit")
+MSP430_MCU_FEAT("msp430f6735", "32bit")
+MSP430_MCU_FEAT("msp430f6736", "32bit")
+MSP430_MCU_FEAT("msp430f67621", "32bit")
+MSP430_MCU_FEAT("msp430f67641", "32bit")
+MSP430_MCU_FEAT("msp430f6720a", "32bit")
+MSP430_MCU_FEAT("msp430f6721a", "32bit")
+MSP430_MCU_FEAT("msp430f6723a", "32bit")
+MSP430_MCU_FEAT("msp430f6724a", "32bit")
+MSP430_MCU_FEAT("msp430f6725a", "32bit")
+MSP430_MCU_FEAT("msp430f6726a", "32bit")
+MSP430_MCU_FEAT("msp430f6730a", "32bit")
+MSP430_MCU_FEAT("msp430f6731a", "32bit")
+MSP430_MCU_FEAT("msp430f6733a", "32bit")
+MSP430_MCU_FEAT("msp430f6734a", "32bit")
+MSP430_MCU_FEAT("msp430f6735a", "32bit")
+MSP430_MCU_FEAT("msp430f6736a", "32bit")
+MSP430_MCU_FEAT("msp430f67621a", "32bit")
+MSP430_MCU_FEAT("msp430f67641a", "32bit")
+MSP430_MCU_FEAT("msp430f67451", "32bit")
+MSP430_MCU_FEAT("msp430f67651", "32bit")
+MSP430_MCU_FEAT("msp430f67751", "32bit")
+MSP430_MCU_FEAT("msp430f67461", "32bit")
+MSP430_MCU_FEAT("msp430f67661", "32bit")
+MSP430_MCU_FEAT("msp430f67761", "32bit")
+MSP430_MCU_FEAT("msp430f67471", "32bit")
+MSP430_MCU_FEAT("msp430f67671", "32bit")
+MSP430_MCU_FEAT("msp430f67771", "32bit")
+MSP430_MCU_FEAT("msp430f67481", "32bit")
+MSP430_MCU_FEAT("msp430f67681", "32bit")
+MSP430_MCU_FEAT("msp430f67781", "32bit")
+MSP430_MCU_FEAT("msp430f67491", "32bit")
+MSP430_MCU_FEAT("msp430f67691", "32bit")
+MSP430_MCU_FEAT("msp430f67791", "32bit")
+MSP430_MCU_FEAT("msp430f6745", "32bit")
+MSP430_MCU_FEAT("msp430f6765", "32bit")
+MSP430_MCU_FEAT("msp430f6775", "32bit")
+MSP430_MCU_FEAT("msp430f6746", "32bit")
+MSP430_MCU_FEAT("msp430f6766", "32bit")
+MSP430_MCU_FEAT("msp430f6776", "32bit")
+MSP430_MCU_FEAT("msp430f6747", "32bit")
+MSP430_MCU_FEAT("msp430f6767", "32bit")
+MSP430_MCU_FEAT("msp430f6777", "32bit")
+MSP430_MCU_FEAT("msp430f6748", "32bit")
+MSP430_MCU_FEAT("msp430f6768", "32bit")
+MSP430_MCU_FEAT("msp430f6778", "32bit")
+MSP430_MCU_FEAT("msp430f6749", "32bit")
+MSP430_MCU_FEAT("msp430f6769", "32bit")
+MSP430_MCU_FEAT("msp430f6779", "32bit")
+MSP430_MCU_FEAT("msp430f67451a", "32bit")
+MSP430_MCU_FEAT("msp430f67651a", "32bit")
+MSP430_MCU_FEAT("msp430f67751a", "32bit")
+MSP430_MCU_FEAT("msp430f67461a", "32bit")
+MSP430_MCU_FEAT("msp430f67661a", "32bit")
+MSP430_MCU_FEAT("msp430f67761a", "32bit")
+MSP430_MCU_FEAT("msp430f67471a", "32bit")
+MSP430_MCU_FEAT("msp430f67671a", "32bit")
+MSP430_MCU_FEAT("msp430f67771a", "32bit")
+MSP430_MCU_FEAT("msp430f67481a", "32bit")
+MSP430_MCU_FEAT("msp430f67681a", "32bit")
+MSP430_MCU_FEAT("msp430f67781a", "32bit")
+MSP430_MCU_FEAT("msp430f67491a", "32bit")
+MSP430_MCU_FEAT("msp430f67691a", "32bit")
+MSP430_MCU_FEAT("msp430f67791a", "32bit")
+MSP430_MCU_FEAT("msp430f6745a", "32bit")
+MSP430_MCU_FEAT("msp430f6765a", "32bit")
+MSP430_MCU_FEAT("msp430f6775a", "32bit")
+MSP430_MCU_FEAT("msp430f6746a", "32bit")
+MSP430_MCU_FEAT("msp430f6766a", "32bit")
+MSP430_MCU_FEAT("msp430f6776a", "32bit")
+MSP430_MCU_FEAT("msp430f6747a", "32bit")
+MSP430_MCU_FEAT("msp430f6767a", "32bit")
+MSP430_MCU_FEAT("msp430f6777a", "32bit")
+MSP430_MCU_FEAT("msp430f6748a", "32bit")
+MSP430_MCU_FEAT("msp430f6768a", "32bit")
+MSP430_MCU_FEAT("msp430f6778a", "32bit")
+MSP430_MCU_FEAT("msp430f6749a", "32bit")
+MSP430_MCU_FEAT("msp430f6769a", "32bit")
+MSP430_MCU_FEAT("msp430f6779a", "32bit")
+MSP430_MCU_FEAT("msp430fr5720", "32bit")
+MSP430_MCU_FEAT("msp430fr5721", "32bit")
+MSP430_MCU_FEAT("msp430fr5722", "32bit")
+MSP430_MCU_FEAT("msp430fr5723", "32bit")
+MSP430_MCU_FEAT("msp430fr5724", "32bit")
+MSP430_MCU_FEAT("msp430fr5725", "32bit")
+MSP430_MCU_FEAT("msp430fr5726", "32bit")
+MSP430_MCU_FEAT("msp430fr5727", "32bit")
+MSP430_MCU_FEAT("msp430fr5728", "32bit")
+MSP430_MCU_FEAT("msp430fr5729", "32bit")
+MSP430_MCU_FEAT("msp430fr5730", "32bit")
+MSP430_MCU_FEAT("msp430fr5731", "32bit")
+MSP430_MCU_FEAT("msp430fr5732", "32bit")
+MSP430_MCU_FEAT("msp430fr5733", "32bit")
+MSP430_MCU_FEAT("msp430fr5734", "32bit")
+MSP430_MCU_FEAT("msp430fr5735", "32bit")
+MSP430_MCU_FEAT("msp430fr5736", "32bit")
+MSP430_MCU_FEAT("msp430fr5737", "32bit")
+MSP430_MCU_FEAT("msp430fr5738", "32bit")
+MSP430_MCU_FEAT("msp430fr5739", "32bit")
+MSP430_MCU_FEAT("msp430bt5190", "32bit")
+MSP430_MCU_FEAT("msp430fr5857", "32bit")
+MSP430_MCU_FEAT("msp430fr5858", "32bit")
+MSP430_MCU_FEAT("msp430fr5859", "32bit")
+MSP430_MCU_FEAT("msp430fr5847", "32bit")
+MSP430_MCU_FEAT("msp430fr58471", "32bit")
+MSP430_MCU_FEAT("msp430fr5848", "32bit")
+MSP430_MCU_FEAT("msp430fr5849", "32bit")
+MSP430_MCU_FEAT("msp430fr5867", "32bit")
+MSP430_MCU_FEAT("msp430fr58671", "32bit")
+MSP430_MCU_FEAT("msp430fr5868", "32bit")
+MSP430_MCU_FEAT("msp430fr5869", "32bit")
+MSP430_MCU_FEAT("msp430fr5957", "32bit")
+MSP430_MCU_FEAT("msp430fr5958", "32bit")
+MSP430_MCU_FEAT("msp430fr5959", "32bit")
+MSP430_MCU_FEAT("msp430fr5947", "32bit")
+MSP430_MCU_FEAT("msp430fr59471", "32bit")
+MSP430_MCU_FEAT("msp430fr5948", "32bit")
+MSP430_MCU_FEAT("msp430fr5949", "32bit")
+MSP430_MCU_FEAT("msp430fr5967", "32bit")
+MSP430_MCU_FEAT("msp430fr5968", "32bit")
+MSP430_MCU_FEAT("msp430fr5969", "32bit")
+MSP430_MCU_FEAT("msp430fr59691", "32bit")
+MSP430_MCU_FEAT("rf430f5175", "32bit")
+MSP430_MCU_FEAT("rf430f5155", "32bit")
+MSP430_MCU_FEAT("rf430f5144", "32bit")
+MSP430_MCU_FEAT("msp430fr69271", "32bit")
+MSP430_MCU_FEAT("msp430fr68791", "32bit")
+MSP430_MCU_FEAT("msp430fr69791", "32bit")
+MSP430_MCU_FEAT("msp430fr6927", "32bit")
+MSP430_MCU_FEAT("msp430fr6928", "32bit")
+MSP430_MCU_FEAT("msp430fr6877", "32bit")
+MSP430_MCU_FEAT("msp430fr6977", "32bit")
+MSP430_MCU_FEAT("msp430fr6879", "32bit")
+MSP430_MCU_FEAT("msp430fr6979", "32bit")
+MSP430_MCU_FEAT("msp430fr58891", "32bit")
+MSP430_MCU_FEAT("msp430fr68891", "32bit")
+MSP430_MCU_FEAT("msp430fr59891", "32bit")
+MSP430_MCU_FEAT("msp430fr69891", "32bit")
+MSP430_MCU_FEAT("msp430fr5887", "32bit")
+MSP430_MCU_FEAT("msp430fr5888", "32bit")
+MSP430_MCU_FEAT("msp430fr5889", "32bit")
+MSP430_MCU_FEAT("msp430fr6887", "32bit")
+MSP430_MCU_FEAT("msp430fr6888", "32bit")
+MSP430_MCU_FEAT("msp430fr6889", "32bit")
+MSP430_MCU_FEAT("msp430fr5986", "32bit")
+MSP430_MCU_FEAT("msp430fr5987", "32bit")
+MSP430_MCU_FEAT("msp430fr5988", "32bit")
+MSP430_MCU_FEAT("msp430fr5989", "32bit")
+MSP430_MCU_FEAT("msp430fr6987", "32bit")
+MSP430_MCU_FEAT("msp430fr6988", "32bit")
+MSP430_MCU_FEAT("msp430fr6989", "32bit")
+MSP430_MCU_FEAT("msp430fr5922", "32bit")
+MSP430_MCU_FEAT("msp430fr5870", "32bit")
+MSP430_MCU_FEAT("msp430fr5970", "32bit")
+MSP430_MCU_FEAT("msp430fr5872", "32bit")
+MSP430_MCU_FEAT("msp430fr5972", "32bit")
+MSP430_MCU_FEAT("msp430fr6820", "32bit")
+MSP430_MCU_FEAT("msp430fr6920", "32bit")
+MSP430_MCU_FEAT("msp430fr6822", "32bit")
+MSP430_MCU_FEAT("msp430fr6922", "32bit")
+MSP430_MCU_FEAT("msp430fr6870", "32bit")
+MSP430_MCU_FEAT("msp430fr6970", "32bit")
+MSP430_MCU_FEAT("msp430fr6872", "32bit")
+MSP430_MCU_FEAT("msp430fr6972", "32bit")
+MSP430_MCU_FEAT("msp430fr59221", "32bit")
+MSP430_MCU_FEAT("msp430fr58721", "32bit")
+MSP430_MCU_FEAT("msp430fr59721", "32bit")
+MSP430_MCU_FEAT("msp430fr68221", "32bit")
+MSP430_MCU_FEAT("msp430fr69221", "32bit")
+MSP430_MCU_FEAT("msp430fr68721", "32bit")
+MSP430_MCU_FEAT("msp430fr69721", "32bit")
+MSP430_MCU_FEAT("msp430sl5438a", "32bit")
+MSP430_MCU_FEAT("msp430fr2433", "32bit")
+MSP430_MCU_FEAT("msp430fr2532", "32bit")
+MSP430_MCU_FEAT("msp430fr2533", "32bit")
+MSP430_MCU_FEAT("msp430fr2632", "32bit")
+MSP430_MCU_FEAT("msp430fr2633", "32bit")
+MSP430_MCU_FEAT("msp430f5252", "32bit")
+MSP430_MCU_FEAT("msp430f5253", "32bit")
+MSP430_MCU_FEAT("msp430f5254", "32bit")
+MSP430_MCU_FEAT("msp430f5255", "32bit")
+MSP430_MCU_FEAT("msp430f5256", "32bit")
+MSP430_MCU_FEAT("msp430f5257", "32bit")
+MSP430_MCU_FEAT("msp430f5258", "32bit")
+MSP430_MCU_FEAT("msp430f5259", "32bit")
+MSP430_MCU_FEAT("msp430fr5962", "32bit")
+MSP430_MCU_FEAT("msp430fr5964", "32bit")
+MSP430_MCU_FEAT("msp430fr5992", "32bit")
+MSP430_MCU_FEAT("msp430fr5994", "32bit")
+MSP430_MCU_FEAT("msp430fr59941", "32bit")
+MSP430_MCU_FEAT("msp430fr2355", "32bit")
+MSP430_MCU_FEAT("msp430fr2155", "32bit")
+MSP430_MCU_FEAT("msp430fr2353", "32bit")
+MSP430_MCU_FEAT("msp430fr2153", "32bit")
+MSP430_MCU_FEAT("msp430fr2522", "32bit")
+MSP430_MCU_FEAT("msp430fr2512", "32bit")
+MSP430_MCU_FEAT("msp430fr2422", "32bit")
+MSP430_MCU_FEAT("msp430fr2676", "32bit")
+MSP430_MCU_FEAT("msp430fr2476", "32bit")
+MSP430_MCU_FEAT("msp430fr2675", "32bit")
+MSP430_MCU_FEAT("msp430fr2673", "32bit")
+MSP430_MCU_FEAT("msp430fr2475", "32bit")
+MSP430_MCU_FEAT("msp430fr2672", "32bit")
+MSP430_MCU_FEAT("msp430fr6043", "32bit")
+MSP430_MCU_FEAT("msp430fr5043", "32bit")
+MSP430_MCU_FEAT("msp430fr6041", "32bit")
+MSP430_MCU_FEAT("msp430fr60431", "32bit")
+MSP430_MCU_FEAT("msp430fr5041", "32bit")
+MSP430_MCU_FEAT("msp430fr50431", "32bit")
+MSP430_MCU_FEAT("msp430fr6005", "32bit")
+MSP430_MCU_FEAT("msp430fr6047", "32bit")
+MSP430_MCU_FEAT("msp430fr6037", "32bit")
+MSP430_MCU_FEAT("msp430fr6045", "32bit")
+MSP430_MCU_FEAT("msp430fr60471", "32bit")
+MSP430_MCU_FEAT("msp430fr6035", "32bit")
+MSP430_MCU_FEAT("msp430fr6007", "32bit")
+MSP430_MCU_FEAT("msp430fr60371", "32bit")
 
 // Generic MCUs
 MSP430_MCU("msp430i2xxgeneric")
diff --git a/clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py b/clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py
new file mode 100755
index 00000000000000..3ae6fdd9d5c650
--- /dev/null
+++ b/clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+# ===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===----------------------------------------------------------------------===##
+"""
+Script to generate MSP430 definitions from TI's devices.csv
+
+Download the devices.csv from [1] using the link "Header and Support Files".
+
+[1]: https://www.ti.com/tool/MSP430-GCC-OPENSOURCE#downloads
+"""
+import csv
+import sys
+
+DEVICE_COLUMN = 0
+MULTIPLIER_COLUMN = 3
+
+MULTIPLIER_SW = "0"
+MULTIPLIER_HW_16 = ("1", "2")
+MULTIPLIER_HW_32 = ("4", "8")
+
+PREFIX = """//===--- MSP430Target.def - MSP430 Feature/Processor Database----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 devices and their features.
+//
+// Generated from TI's devices.csv in version {} using the script in
+// Target/MSP430/gen-msp430-def.py - use this tool rather than adding
+// new MCUs by hand.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCU_FEAT
+#define MSP430_MCU_FEAT(NAME, HWMULT) MSP430_MCU(NAME)
+#endif
+
+#ifndef MSP430_MCU
+#define MSP430_MCU(NAME)
+#endif
+
+"""
+
+SUFFIX = """
+// Generic MCUs
+MSP430_MCU("msp430i2xxgeneric")
+
+#undef MSP430_MCU
+#undef MSP430_MCU_FEAT
+"""
+
+
+def csv2def(csv_path, def_path):
+    """
+    Parse the devices.csv file at the given path, generate the definitions and
+    write them to the given path.
+
+    :param csv_path: Path to the devices.csv to parse
+    :type csv_path: str
+    :param def_path: Path to the output file to write the definitions to
+    "type def_path: str
+    """
+
+    mcus_multiplier_sw = []
+    mcus_multiplier_hw_16 = []
+    mcus_multiplier_hw_32 = []
+    version = "unknown"
+
+    with open(csv_path) as csv_file:
+        csv_reader = csv.reader(csv_file)
+        while True:
+            row = next(csv_reader)
+            if len(row) < MULTIPLIER_COLUMN:
+                continue
+
+            if row[DEVICE_COLUMN] == "# Device Name":
+                assert row[MULTIPLIER_COLUMN] == "MPY_TYPE", "File format changed"
+                break
+
+            if row[0] == "Version:":
+                version = row[1]
+
+        for row in csv_reader:
+            if row[DEVICE_COLUMN].endswith("generic"):
+                continue
+            if row[MULTIPLIER_COLUMN] == MULTIPLIER_SW:
+                mcus_multiplier_sw.append(row[DEVICE_COLUMN])
+            elif row[MULTIPLIER_COLUMN] in MULTIPLIER_HW_16:
+                mcus_multiplier_hw_16.append(row[DEVICE_COLUMN])
+            elif row[MULTIPLIER_COLUMN] in MULTIPLIER_HW_32:
+                mcus_multiplier_hw_32.append(row[DEVICE_COLUMN])
+            else:
+                assert 0, "Unknown multiplier type"
+
+    with open(def_path, "w") as def_file:
+        def_file.write(PREFIX.format(version))
+
+        for mcu in mcus_multiplier_sw:
+            def_file.write(f'MSP430_MCU("{mcu}")\n')
+
+        def_file.write("\n// With 16-bit hardware multiplier\n")
+
+        for mcu in mcus_multiplier_hw_16:
+            def_file.write(f'MSP430_MCU_FEAT("{mcu}", "16bit")\n')
+
+        def_file.write("\n// With 32-bit hardware multiplier\n")
+
+        for mcu in mcus_multiplier_hw_32:
+            def_file.write(f'MSP430_MCU_FEAT("{mcu}", "32bit")\n')
+
+        def_file.write(SUFFIX)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        sys.exit(f"Usage: {sys.argv[0]}  ")
+
+    csv2def(sys.argv[1], sys.argv[2])

From 2a3903fa0e88d7149df11aa37d4ba87c5e5f0913 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Tue, 7 May 2024 14:20:13 -0700
Subject: [PATCH 28/64] [AMDGPU] Prevent FMINIMUM and FMAXIMUM beeing fully
 scalarized (#91378)

This is the same logic as with FMINNUM_IEEE/FMAXNUM_IEEE.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp    |  8 +++-
 llvm/test/CodeGen/AMDGPU/fmaximum.ll         | 39 ++++++++++++--------
 llvm/test/CodeGen/AMDGPU/fminimum.ll         | 39 ++++++++++++--------
 llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll |  8 ++--
 llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll |  8 ++--
 5 files changed, 63 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ed41c10b50d323..33bdd6195a0408 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -854,9 +854,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   if (Subtarget->hasPrefetch())
     setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
 
-  if (Subtarget->hasIEEEMinMax())
+  if (Subtarget->hasIEEEMinMax()) {
     setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
                        {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
+    setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
+                       {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
+                       Custom);
+  }
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN,
                      {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
@@ -5821,6 +5825,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FMUL:
   case ISD::FMINNUM_IEEE:
   case ISD::FMAXNUM_IEEE:
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM:
   case ISD::UADDSAT:
   case ISD::USUBSAT:
   case ISD::SADDSAT:
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
index dd685a6169d843..87ac95a1cd7390 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
@@ -148,23 +148,35 @@ define amdgpu_ps <2 x half> @test_fmaximum_v2f16_ss(<2 x half> inreg %a, <2 x ha
 }
 
 define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b) {
-; GCN-LABEL: test_fmaximum_v3f16_vv:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_pk_maximum_f16 v0, v0, v2
-; GCN-NEXT:    v_maximum_f16 v1, v1, v3
-; GCN-NEXT:    ; return to shader part epilog
+; GFX12-SDAG-LABEL: test_fmaximum_v3f16_vv:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_pk_maximum_f16 v0, v0, v2
+; GFX12-SDAG-NEXT:    v_pk_maximum_f16 v1, v1, v3
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: test_fmaximum_v3f16_vv:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_pk_maximum_f16 v0, v0, v2
+; GFX12-GISEL-NEXT:    v_maximum_f16 v1, v1, v3
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
   %val = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %val
 }
 
 define amdgpu_ps <3 x half> @test_fmaximum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) {
-; GCN-LABEL: test_fmaximum_v3f16_ss:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_pk_maximum_f16 v0, s0, s2
-; GCN-NEXT:    s_maximum_f16 s0, s1, s3
-; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GCN-NEXT:    v_mov_b32_e32 v1, s0
-; GCN-NEXT:    ; return to shader part epilog
+; GFX12-SDAG-LABEL: test_fmaximum_v3f16_ss:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_pk_maximum_f16 v0, s0, s2
+; GFX12-SDAG-NEXT:    v_pk_maximum_f16 v1, s1, s3
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: test_fmaximum_v3f16_ss:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_pk_maximum_f16 v0, s0, s2
+; GFX12-GISEL-NEXT:    s_maximum_f16 s0, s1, s3
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, s0
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
   %val = call <3 x half> @llvm.maximum.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %val
 }
@@ -306,6 +318,3 @@ declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
 declare double @llvm.maximum.f64(double, double)
 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
 declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll
index 2b3cc4fd73858c..45f6bff10f45ee 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll
@@ -148,23 +148,35 @@ define amdgpu_ps <2 x half> @test_fminimum_v2f16_ss(<2 x half> inreg %a, <2 x ha
 }
 
 define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b) {
-; GCN-LABEL: test_fminimum_v3f16_vv:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_pk_minimum_f16 v0, v0, v2
-; GCN-NEXT:    v_minimum_f16 v1, v1, v3
-; GCN-NEXT:    ; return to shader part epilog
+; GFX12-SDAG-LABEL: test_fminimum_v3f16_vv:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v0, v0, v2
+; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v1, v1, v3
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: test_fminimum_v3f16_vv:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_pk_minimum_f16 v0, v0, v2
+; GFX12-GISEL-NEXT:    v_minimum_f16 v1, v1, v3
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
   %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %val
 }
 
 define amdgpu_ps <3 x half> @test_fminimum_v3f16_ss(<3 x half> inreg %a, <3 x half> inreg %b) {
-; GCN-LABEL: test_fminimum_v3f16_ss:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_pk_minimum_f16 v0, s0, s2
-; GCN-NEXT:    s_minimum_f16 s0, s1, s3
-; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GCN-NEXT:    v_mov_b32_e32 v1, s0
-; GCN-NEXT:    ; return to shader part epilog
+; GFX12-SDAG-LABEL: test_fminimum_v3f16_ss:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v0, s0, s2
+; GFX12-SDAG-NEXT:    v_pk_minimum_f16 v1, s1, s3
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: test_fminimum_v3f16_ss:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_pk_minimum_f16 v0, s0, s2
+; GFX12-GISEL-NEXT:    s_minimum_f16 s0, s1, s3
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, s0
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
   %val = call <3 x half> @llvm.minimum.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %val
 }
@@ -306,6 +318,3 @@ declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
 declare double @llvm.minimum.f64(double, double)
 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
 declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX12-GISEL: {{.*}}
-; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index c49e6a9a9f25cf..c476208ed8f4e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -1794,7 +1794,7 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
-; GFX12-NEXT:    v_maximum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -2013,7 +2013,7 @@ define <3 x half> @v_maximum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
-; GFX12-NEXT:    v_maximum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -2163,7 +2163,7 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
-; GFX12-NEXT:    v_maximum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -2260,7 +2260,7 @@ define <3 x half> @v_maximum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_maximum_f16 v0, v0, v2
-; GFX12-NEXT:    v_maximum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_maximum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan nsz <3 x half> @llvm.maximum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index 7281c3fd64d466..66f3a48b13ee58 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -1461,7 +1461,7 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
-; GFX12-NEXT:    v_minimum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -1635,7 +1635,7 @@ define <3 x half> @v_minimum_v3f16__nnan(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
-; GFX12-NEXT:    v_minimum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -1740,7 +1740,7 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
-; GFX12-NEXT:    v_minimum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op
@@ -1792,7 +1792,7 @@ define <3 x half> @v_minimum_v3f16__nnan_nsz(<3 x half> %src0, <3 x half> %src1)
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_pk_minimum_f16 v0, v0, v2
-; GFX12-NEXT:    v_minimum_f16 v1, v1, v3
+; GFX12-NEXT:    v_pk_minimum_f16 v1, v1, v3
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan nsz <3 x half> @llvm.minimum.v3f16(<3 x half> %src0, <3 x half> %src1)
   ret <3 x half> %op

From a5044e6d505deb79f1b00bb39d11096d29b9c910 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan 
Date: Tue, 7 May 2024 17:36:58 -0400
Subject: [PATCH 29/64] [libc] fix typo due to futex renaming (#91379)

---
 libc/src/__support/threads/linux/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index b277c2a37f2d0f..9bee30206f1b9f 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -27,7 +27,7 @@ add_header_library(
   HDRS
     mutex.h
   DEPENDS
-    .futex
+    .futex_utils
     libc.src.__support.threads.mutex_common
 )
 

From ccf765cfd578c4ea4f710386e19cb8d1ef1859ce Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 7 May 2024 15:01:15 -0700
Subject: [PATCH 30/64] [compiler-rt][ctx_profile] Add the instrumented
 contextual profiling APIs (#89838)

APIs for contextual profiling. `ContextNode` is the call context-specific counter buffer. `ContextRoot` is associated to those functions that constitute roots into interesting call graphs, and is the object on which we hang off `Arena`s for allocating `ContextNode`s, as well as the `ContextNode` corresponding to such functions. Graphs of `ContextNode`s are accessible by one thread at a time.

(Tracking Issue: #89287, more details in the RFC referenced there)
---
 .../lib/ctx_profile/CtxInstrProfiling.cpp     | 283 +++++++++++++++++-
 .../lib/ctx_profile/CtxInstrProfiling.h       | 208 +++++++++++++
 .../tests/CtxInstrProfilingTest.cpp           | 192 ++++++++++++
 3 files changed, 681 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index 7620ce92f7ebde..68bfe5c1ae6144 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -10,20 +10,115 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_dense_map.h"
+#include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_thread_safety.h"
+#include "sanitizer_common/sanitizer_vector.h"
 
 #include 
 
 using namespace __ctx_profile;
 
+namespace {
+// Keep track of all the context roots we actually saw, so we can then traverse
+// them when the user asks for the profile in __llvm_ctx_profile_fetch
+__sanitizer::SpinMutex AllContextsMutex;
+SANITIZER_GUARDED_BY(AllContextsMutex)
+__sanitizer::Vector AllContextRoots;
+
+// utility to taint a pointer by setting the LSB. There is an assumption
+// throughout that the addresses of contexts are even (really, they should be
+// align(8), but "even"-ness is the minimum assumption)
+// "scratch contexts" are buffers that we return in certain cases - they are
+// large enough to allow for memory safe counter access, but they don't link
+// subcontexts below them (the runtime recognizes them and enforces that)
+ContextNode *markAsScratch(const ContextNode *Ctx) {
+  return reinterpret_cast(reinterpret_cast(Ctx) | 1);
+}
+
+// Used when getting the data from TLS. We don't *really* need to reset, but
+// it's a simpler system if we do.
+template  inline T consume(T &V) {
+  auto R = V;
+  V = {0};
+  return R;
+}
+
+// We allocate at least kBuffSize Arena pages. The scratch buffer is also that
+// large.
+constexpr size_t kPower = 20;
+constexpr size_t kBuffSize = 1 << kPower;
+
+// Highly unlikely we need more than kBuffSize for a context.
+size_t getArenaAllocSize(size_t Needed) {
+  if (Needed >= kBuffSize)
+    return 2 * Needed;
+  return kBuffSize;
+}
+
+// verify the structural integrity of the context
+bool validate(const ContextRoot *Root) {
+  // all contexts should be laid out in some arena page. Go over each arena
+  // allocated for this Root, and jump over contained contexts based on
+  // self-reported sizes.
+  __sanitizer::DenseMap ContextStartAddrs;
+  for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) {
+    const auto *Pos = Mem->start();
+    while (Pos < Mem->pos()) {
+      const auto *Ctx = reinterpret_cast(Pos);
+      if (!ContextStartAddrs.insert({reinterpret_cast(Ctx), true})
+               .second)
+        return false;
+      Pos += Ctx->size();
+    }
+  }
+
+  // Now traverse the contexts again the same way, but validate all nonull
+  // subcontext addresses appear in the set computed above.
+  for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) {
+    const auto *Pos = Mem->start();
+    while (Pos < Mem->pos()) {
+      const auto *Ctx = reinterpret_cast(Pos);
+      for (uint32_t I = 0; I < Ctx->callsites_size(); ++I)
+        for (auto *Sub = Ctx->subContexts()[I]; Sub; Sub = Sub->next())
+          if (!ContextStartAddrs.find(reinterpret_cast(Sub)))
+            return false;
+
+      Pos += Ctx->size();
+    }
+  }
+  return true;
+}
+} // namespace
+
+// the scratch buffer - what we give when we can't produce a real context (the
+// scratch isn't "real" in that it's expected to be clobbered carelessly - we
+// don't read it). The other important thing is that the callees from a scratch
+// context also get a scratch context.
+// Eventually this can be replaced with per-function buffers, a'la the typical
+// (flat) instrumented FDO buffers. The clobbering aspect won't apply there, but
+// the part about determining the nature of the subcontexts does.
+__thread char __Buffer[kBuffSize] = {0};
+
+#define TheScratchContext                                                      \
+  markAsScratch(reinterpret_cast(__Buffer))
+
+// init the TLSes
+__thread void *volatile __llvm_ctx_profile_expected_callee[2] = {nullptr,
+                                                                 nullptr};
+__thread ContextNode **volatile __llvm_ctx_profile_callsite[2] = {0, 0};
+
+__thread ContextRoot *volatile __llvm_ctx_profile_current_context_root =
+    nullptr;
+
 // FIXME(mtrofin): use malloc / mmap instead of sanitizer common APIs to reduce
 // the dependency on the latter.
 Arena *Arena::allocateNewArena(size_t Size, Arena *Prev) {
   assert(!Prev || Prev->Next == nullptr);
-  Arena *NewArena =
-      new (__sanitizer::InternalAlloc(Size + sizeof(Arena))) Arena(Size);
+  Arena *NewArena = new (__sanitizer::InternalAlloc(
+      Size + sizeof(Arena), /*cache=*/nullptr, /*alignment=*/ExpectedAlignment))
+      Arena(Size);
   if (Prev)
     Prev->Next = NewArena;
   return NewArena;
@@ -38,3 +133,187 @@ void Arena::freeArenaList(Arena *&A) {
   }
   A = nullptr;
 }
+
+inline ContextNode *ContextNode::alloc(char *Place, GUID Guid,
+                                       uint32_t NrCounters,
+                                       uint32_t NrCallsites,
+                                       ContextNode *Next) {
+  assert(reinterpret_cast(Place) % ExpectedAlignment == 0);
+  return new (Place) ContextNode(Guid, NrCounters, NrCallsites, Next);
+}
+
+void ContextNode::reset() {
+  // FIXME(mtrofin): this is std::memset, which we can probably use if we
+  // drop/reduce the dependency on sanitizer_common.
+  for (uint32_t I = 0; I < NrCounters; ++I)
+    counters()[I] = 0;
+  for (uint32_t I = 0; I < NrCallsites; ++I)
+    for (auto *Next = subContexts()[I]; Next; Next = Next->Next)
+      Next->reset();
+}
+
+// If this is the first time we hit a callsite with this (Guid) particular
+// callee, we need to allocate.
+ContextNode *getCallsiteSlow(uint64_t Guid, ContextNode **InsertionPoint,
+                             uint32_t NrCounters, uint32_t NrCallsites) {
+  auto AllocSize = ContextNode::getAllocSize(NrCounters, NrCallsites);
+  auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem;
+  char *AllocPlace = Mem->tryBumpAllocate(AllocSize);
+  if (!AllocPlace) {
+    // if we failed to allocate on the current arena, allocate a new arena,
+    // and place it on __llvm_ctx_profile_current_context_root->CurrentMem so we
+    // find it from now on for other cases when we need to getCallsiteSlow.
+    // Note that allocateNewArena will link the allocated memory in the list of
+    // Arenas.
+    __llvm_ctx_profile_current_context_root->CurrentMem = Mem =
+        Mem->allocateNewArena(getArenaAllocSize(AllocSize), Mem);
+    AllocPlace = Mem->tryBumpAllocate(AllocSize);
+  }
+  auto *Ret = ContextNode::alloc(AllocPlace, Guid, NrCounters, NrCallsites,
+                                 *InsertionPoint);
+  *InsertionPoint = Ret;
+  return Ret;
+}
+
+ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
+                                            uint32_t NrCounters,
+                                            uint32_t NrCallsites) {
+  // fast "out" if we're not even doing contextual collection.
+  if (!__llvm_ctx_profile_current_context_root)
+    return TheScratchContext;
+
+  // also fast "out" if the caller is scratch. We can see if it's scratch by
+  // looking at the interior pointer into the subcontexts vector that the caller
+  // provided, which, if the context is scratch, so is that interior pointer
+  // (because all the address calculations are using even values. Or more
+  // precisely, aligned - 8 values)
+  auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]);
+  if (!CallsiteContext || isScratch(CallsiteContext))
+    return TheScratchContext;
+
+  // if the callee isn't the expected one, return scratch.
+  // Signal handler(s) could have been invoked at any point in the execution.
+  // Should that have happened, and had it (the handler) be built with
+  // instrumentation, its __llvm_ctx_profile_get_context would have failed here.
+  // Its sub call graph would have then populated
+  // __llvm_ctx_profile_{expected_callee | callsite} at index 1.
+  // The normal call graph may be impacted in that, if the signal handler
+  // happened somewhere before we read the TLS here, we'd see the TLS reset and
+  // we'd also fail here. That would just mean we would loose counter values for
+  // the normal subgraph, this time around. That should be very unlikely, but if
+  // it happens too frequently, we should be able to detect discrepancies in
+  // entry counts (caller-callee). At the moment, the design goes on the
+  // assumption that is so unfrequent, though, that it's not worth doing more
+  // for that case.
+  auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]);
+  if (ExpectedCallee != Callee)
+    return TheScratchContext;
+
+  auto *Callsite = *CallsiteContext;
+  // in the case of indirect calls, we will have all seen targets forming a
+  // linked list here. Find the one corresponding to this callee.
+  while (Callsite && Callsite->guid() != Guid) {
+    Callsite = Callsite->next();
+  }
+  auto *Ret = Callsite ? Callsite
+                       : getCallsiteSlow(Guid, CallsiteContext, NrCounters,
+                                         NrCallsites);
+  if (Ret->callsites_size() != NrCallsites ||
+      Ret->counters_size() != NrCounters)
+    __sanitizer::Printf("[ctxprof] Returned ctx differs from what's asked: "
+                        "Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n",
+                        Ret, Guid, NrCallsites, NrCounters, Ret->guid(),
+                        Ret->callsites_size(), Ret->counters_size());
+  Ret->onEntry();
+  return Ret;
+}
+
+// This should be called once for a Root. Allocate the first arena, set up the
+// first context.
+void setupContext(ContextRoot *Root, GUID Guid, uint32_t NrCounters,
+                  uint32_t NrCallsites) {
+  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+      &AllContextsMutex);
+  // Re-check - we got here without having had taken a lock.
+  if (Root->FirstMemBlock)
+    return;
+  const auto Needed = ContextNode::getAllocSize(NrCounters, NrCallsites);
+  auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed));
+  Root->FirstMemBlock = M;
+  Root->CurrentMem = M;
+  Root->FirstNode = ContextNode::alloc(M->tryBumpAllocate(Needed), Guid,
+                                       NrCounters, NrCallsites);
+  AllContextRoots.PushBack(Root);
+}
+
+ContextNode *__llvm_ctx_profile_start_context(
+    ContextRoot *Root, GUID Guid, uint32_t Counters,
+    uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+  if (!Root->FirstMemBlock) {
+    setupContext(Root, Guid, Counters, Callsites);
+  }
+  if (Root->Taken.TryLock()) {
+    __llvm_ctx_profile_current_context_root = Root;
+    Root->FirstNode->onEntry();
+    return Root->FirstNode;
+  }
+  // If this thread couldn't take the lock, return scratch context.
+  __llvm_ctx_profile_current_context_root = nullptr;
+  return TheScratchContext;
+}
+
+void __llvm_ctx_profile_release_context(ContextRoot *Root)
+    SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+  if (__llvm_ctx_profile_current_context_root) {
+    __llvm_ctx_profile_current_context_root = nullptr;
+    Root->Taken.Unlock();
+  }
+}
+
+void __llvm_ctx_profile_start_collection() {
+  size_t NrMemUnits = 0;
+  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+      &AllContextsMutex);
+  for (uint32_t I = 0; I < AllContextRoots.Size(); ++I) {
+    auto *Root = AllContextRoots[I];
+    __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> Lock(
+        &Root->Taken);
+    for (auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next())
+      ++NrMemUnits;
+
+    Root->FirstNode->reset();
+  }
+  __sanitizer::Printf("[ctxprof] Initial NrMemUnits: %zu \n", NrMemUnits);
+}
+
+bool __llvm_ctx_profile_fetch(
+    void *Data, bool (*Writer)(void *W, const __ctx_profile::ContextNode &)) {
+  assert(Writer);
+  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+      &AllContextsMutex);
+
+  for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) {
+    auto *Root = AllContextRoots[I];
+    __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> TakenLock(
+        &Root->Taken);
+    if (!validate(Root)) {
+      __sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
+      return false;
+    }
+    if (!Writer(Data, *Root->FirstNode))
+      return false;
+  }
+  return true;
+}
+
+void __llvm_ctx_profile_free() {
+  __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
+      &AllContextsMutex);
+  for (int I = 0, E = AllContextRoots.Size(); I < E; ++I)
+    for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) {
+      auto *C = A;
+      A = A->next();
+      __sanitizer::InternalFree(C);
+    }
+  AllContextRoots.Reset();
+}
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index c1789c32a64c25..8c4be5d8a23a78 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -9,9 +9,16 @@
 #ifndef CTX_PROFILE_CTXINSTRPROFILING_H_
 #define CTX_PROFILE_CTXINSTRPROFILING_H_
 
+#include "sanitizer_common/sanitizer_mutex.h"
 #include 
 
 namespace __ctx_profile {
+using GUID = uint64_t;
+static constexpr size_t ExpectedAlignment = 8;
+// We really depend on this, see further below. We currently support x86_64.
+// When we want to support other archs, we need to trace the places Alignment is
+// used and adjust accordingly.
+static_assert(sizeof(void *) == ExpectedAlignment);
 
 /// Arena (bump allocator) forming a linked list. Intentionally not thread safe.
 /// Allocation and de-allocation happen using sanitizer APIs. We make that
@@ -51,5 +58,206 @@ class Arena final {
   const uint64_t Size;
 };
 
+// The memory available for allocation follows the Arena header, and we expect
+// it to be thus aligned.
+static_assert(alignof(Arena) == ExpectedAlignment);
+
+/// The contextual profile is a directed tree where each node has one parent. A
+/// node (ContextNode) corresponds to a function activation. The root of the
+/// tree is at a function that was marked as entrypoint to the compiler. A node
+/// stores counter values for edges and a vector of subcontexts. These are the
+/// contexts of callees. The index in the subcontext vector corresponds to the
+/// index of the callsite (as was instrumented via llvm.instrprof.callsite). At
+/// that index we find a linked list, potentially empty, of ContextNodes. Direct
+/// calls will have 0 or 1 values in the linked list, but indirect callsites may
+/// have more.
+///
+/// The ContextNode has a fixed sized header describing it - the GUID of the
+/// function, the size of the counter and callsite vectors. It is also an
+/// (intrusive) linked list for the purposes of the indirect call case above.
+///
+/// Allocation is expected to happen on an Arena. The allocation lays out inline
+/// the counter and subcontexts vectors. The class offers APIs to correctly
+/// reference the latter.
+///
+/// The layout is as follows:
+///
+/// [[declared fields][counters vector][vector of ptrs to subcontexts]]
+///
+/// See also documentation on the counters and subContexts members below.
+///
+/// The structure of the ContextNode is known to LLVM, because LLVM needs to:
+///   (1) increment counts, and
+///   (2) form a GEP for the position in the subcontext list of a callsite
+/// This means changes to LLVM contextual profile lowering and changes here
+/// must be coupled.
+/// Note: the header content isn't interesting to LLVM (other than its size)
+///
+/// Part of contextual collection is the notion of "scratch contexts". These are
+/// buffers that are "large enough" to allow for memory-safe acceses during
+/// counter increments - meaning the counter increment code in LLVM doesn't need
+/// to be concerned with memory safety. Their subcontexts never get populated,
+/// though. The runtime code here produces and recognizes them.
+class ContextNode final {
+  const GUID Guid;
+  ContextNode *const Next;
+  const uint32_t NrCounters;
+  const uint32_t NrCallsites;
+
+public:
+  ContextNode(GUID Guid, uint32_t NrCounters, uint32_t NrCallsites,
+              ContextNode *Next = nullptr)
+      : Guid(Guid), Next(Next), NrCounters(NrCounters),
+        NrCallsites(NrCallsites) {}
+  static inline ContextNode *alloc(char *Place, GUID Guid, uint32_t NrCounters,
+                                   uint32_t NrCallsites,
+                                   ContextNode *Next = nullptr);
+
+  static inline size_t getAllocSize(uint32_t NrCounters, uint32_t NrCallsites) {
+    return sizeof(ContextNode) + sizeof(uint64_t) * NrCounters +
+           sizeof(ContextNode *) * NrCallsites;
+  }
+
+  // The counters vector starts right after the static header.
+  uint64_t *counters() {
+    ContextNode *addr_after = &(this[1]);
+    return reinterpret_cast(addr_after);
+  }
+
+  uint32_t counters_size() const { return NrCounters; }
+  uint32_t callsites_size() const { return NrCallsites; }
+
+  const uint64_t *counters() const {
+    return const_cast(this)->counters();
+  }
+
+  // The subcontexts vector starts right after the end of the counters vector.
+  ContextNode **subContexts() {
+    return reinterpret_cast(&(counters()[NrCounters]));
+  }
+
+  ContextNode *const *subContexts() const {
+    return const_cast(this)->subContexts();
+  }
+
+  GUID guid() const { return Guid; }
+  ContextNode *next() { return Next; }
+
+  size_t size() const { return getAllocSize(NrCounters, NrCallsites); }
+
+  void reset();
+
+  // since we go through the runtime to get a context back to LLVM, in the entry
+  // basic block, might as well handle incrementing the entry basic block
+  // counter.
+  void onEntry() { ++counters()[0]; }
+
+  uint64_t entrycount() const { return counters()[0]; }
+};
+
+// Verify maintenance to ContextNode doesn't change this invariant, which makes
+// sure the inlined vectors are appropriately aligned.
+static_assert(alignof(ContextNode) == ExpectedAlignment);
+
+/// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
+/// with allocating and zero-initializing the global value (as in, GlobalValue)
+/// for it.
+struct ContextRoot {
+  ContextNode *FirstNode = nullptr;
+  Arena *FirstMemBlock = nullptr;
+  Arena *CurrentMem = nullptr;
+  // This is init-ed by the static zero initializer in LLVM.
+  // Taken is used to ensure only one thread traverses the contextual graph -
+  // either to read it or to write it. On server side, the same entrypoint will
+  // be entered by numerous threads, but over time, the profile aggregated by
+  // collecting sequentially on one thread at a time is expected to converge to
+  // the aggregate profile that may have been observable on all the threads.
+  // Note that this is node-by-node aggregation, i.e. summing counters of nodes
+  // at the same position in the graph, not flattening.
+  // Threads that cannot lock Taken (fail TryLock) are given a "scratch context"
+  // - a buffer they can clobber, safely from a memory access perspective.
+  //
+  // Note about "scratch"-ness: we currently ignore the data written in them
+  // (which is anyway clobbered). The design allows for that not be the case -
+  // because "scratch"-ness is first and foremost about not trying to build
+  // subcontexts, and is captured by tainting the pointer value (pointer to the
+  // memory treated as context), but right now, we drop that info.
+  //
+  // We could consider relaxing the requirement of more than one thread
+  // entering by holding a few context trees per entrypoint and then aggregating
+  // them (as explained above) at the end of the profile collection - it's a
+  // tradeoff between collection time and memory use: higher precision can be
+  // obtained with either less concurrent collections but more collection time,
+  // or with more concurrent collections (==more memory) and less collection
+  // time. Note that concurrent collection does happen for different
+  // entrypoints, regardless.
+  ::__sanitizer::StaticSpinMutex Taken;
+
+  // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
+  // instrumentation lowering side because it is responsible for allocating and
+  // zero-initializing ContextRoots.
+  static_assert(sizeof(Taken) == 1);
+};
+
+/// This API is exposed for testing. See the APIs below about the contract with
+/// LLVM.
+inline bool isScratch(const void *Ctx) {
+  return (reinterpret_cast(Ctx) & 1);
+}
+
 } // namespace __ctx_profile
+
+extern "C" {
+
+// LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
+// position 0 is used when the current context isn't scratch, 1 when it is. They
+// are volatile because of signal handlers - we mean to specifically control
+// when the data is loaded.
+//
+/// TLS where LLVM stores the pointer of the called value, as part of lowering a
+/// llvm.instrprof.callsite
+extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];
+/// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
+/// corresponds to the callsite being lowered.
+extern __thread __ctx_profile::ContextNode *
+    *volatile __llvm_ctx_profile_callsite[2];
+
+// __llvm_ctx_profile_current_context_root is exposed for unit testing,
+// othwerise it's only used internally by compiler-rt/ctx_profile.
+extern __thread __ctx_profile::ContextRoot
+    *volatile __llvm_ctx_profile_current_context_root;
+
+/// called by LLVM in the entry BB of a "entry point" function. The returned
+/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
+__ctx_profile::ContextNode *
+__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
+                                 __ctx_profile::GUID Guid, uint32_t Counters,
+                                 uint32_t Callsites);
+
+/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
+/// entry point function.
+void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
+
+/// called for any other function than entry points, in the entry BB of such
+/// function. Same consideration about LSB of returned value as .._start_context
+__ctx_profile::ContextNode *
+__llvm_ctx_profile_get_context(void *Callee, __ctx_profile::GUID Guid,
+                               uint32_t NrCounters, uint32_t NrCallsites);
+
+/// Prepares for collection. Currently this resets counter values but preserves
+/// internal context tree structure.
+void __llvm_ctx_profile_start_collection();
+
+/// Completely free allocated memory.
+void __llvm_ctx_profile_free();
+
+/// Used to obtain the profile. The Writer is called for each root ContextNode,
+/// with the ContextRoot::Taken taken. The Writer is responsible for traversing
+/// the structure underneath.
+/// The Writer's first parameter plays the role of closure for Writer, and is
+/// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.
+/// The second parameter is the root of a context tree.
+bool __llvm_ctx_profile_fetch(
+    void *Data, bool (*Writer)(void *, const __ctx_profile::ContextNode &));
+}
 #endif // CTX_PROFILE_CTXINSTRPROFILING_H_
diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index 44f37d25763206..f6ebe6ab2e50c5 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -1,8 +1,17 @@
 #include "../CtxInstrProfiling.h"
 #include "gtest/gtest.h"
+#include 
 
 using namespace __ctx_profile;
 
+class ContextTest : public ::testing::Test {
+  void SetUp() override { memset(&Root, 0, sizeof(ContextRoot)); }
+  void TearDown() override { __llvm_ctx_profile_free(); }
+
+public:
+  ContextRoot Root;
+};
+
 TEST(ArenaTest, Basic) {
   Arena *A = Arena::allocateNewArena(1024);
   EXPECT_EQ(A->size(), 1024U);
@@ -20,3 +29,186 @@ TEST(ArenaTest, Basic) {
   Arena::freeArenaList(A);
   EXPECT_EQ(A, nullptr);
 }
+
+TEST_F(ContextTest, Basic) {
+  auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+  ASSERT_NE(Ctx, nullptr);
+  EXPECT_NE(Root.CurrentMem, nullptr);
+  EXPECT_EQ(Root.FirstMemBlock, Root.CurrentMem);
+  EXPECT_EQ(Ctx->size(), sizeof(ContextNode) + 10 * sizeof(uint64_t) +
+                             4 * sizeof(ContextNode *));
+  EXPECT_EQ(Ctx->counters_size(), 10U);
+  EXPECT_EQ(Ctx->callsites_size(), 4U);
+  EXPECT_EQ(__llvm_ctx_profile_current_context_root, &Root);
+  Root.Taken.CheckLocked();
+  EXPECT_FALSE(Root.Taken.TryLock());
+  __llvm_ctx_profile_release_context(&Root);
+  EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
+  EXPECT_TRUE(Root.Taken.TryLock());
+  Root.Taken.Unlock();
+}
+
+TEST_F(ContextTest, Callsite) {
+  auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+  int FakeCalleeAddress = 0;
+  const bool IsScratch = isScratch(Ctx);
+  EXPECT_FALSE(IsScratch);
+  // This is the sequence the caller performs - it's the lowering of the
+  // instrumentation of the callsite "2". "2" is arbitrary here.
+  __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
+  __llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
+  // This is what the callee does
+  auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+  // We expect the subcontext to be appropriately placed and dimensioned
+  EXPECT_EQ(Ctx->subContexts()[2], Subctx);
+  EXPECT_EQ(Subctx->counters_size(), 3U);
+  EXPECT_EQ(Subctx->callsites_size(), 1U);
+  // We reset these in _get_context.
+  EXPECT_EQ(__llvm_ctx_profile_expected_callee[0], nullptr);
+  EXPECT_EQ(__llvm_ctx_profile_callsite[0], nullptr);
+
+  EXPECT_EQ(Subctx->size(), sizeof(ContextNode) + 3 * sizeof(uint64_t) +
+                                1 * sizeof(ContextNode *));
+  __llvm_ctx_profile_release_context(&Root);
+}
+
+TEST_F(ContextTest, ScratchNoCollection) {
+  EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
+  int FakeCalleeAddress = 0;
+  // this would be the very first function executing this. the TLS is empty,
+  // too.
+  auto *Ctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+  // We never entered a context (_start_context was never called) - so the
+  // returned context must be scratch.
+  EXPECT_TRUE(isScratch(Ctx));
+}
+
+TEST_F(ContextTest, ScratchDuringCollection) {
+  auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+  int FakeCalleeAddress = 0;
+  int OtherFakeCalleeAddress = 0;
+  __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
+  __llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
+  auto *Subctx =
+      __llvm_ctx_profile_get_context(&OtherFakeCalleeAddress, 2, 3, 1);
+  // We expected a different callee - so return scratch. It mimics what happens
+  // in the case of a signal handler - in this case, OtherFakeCalleeAddress is
+  // the signal handler.
+  EXPECT_TRUE(isScratch(Subctx));
+  EXPECT_EQ(__llvm_ctx_profile_expected_callee[0], nullptr);
+  EXPECT_EQ(__llvm_ctx_profile_callsite[0], nullptr);
+
+  int ThirdFakeCalleeAddress = 0;
+  __llvm_ctx_profile_expected_callee[1] = &ThirdFakeCalleeAddress;
+  __llvm_ctx_profile_callsite[1] = &Subctx->subContexts()[0];
+
+  auto *Subctx2 =
+      __llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
+  // We again expect scratch because the '0' position is where the runtime
+  // looks, so it doesn't matter the '1' position is populated correctly.
+  EXPECT_TRUE(isScratch(Subctx2));
+
+  __llvm_ctx_profile_expected_callee[0] = &ThirdFakeCalleeAddress;
+  __llvm_ctx_profile_callsite[0] = &Subctx->subContexts()[0];
+  auto *Subctx3 =
+      __llvm_ctx_profile_get_context(&ThirdFakeCalleeAddress, 3, 0, 0);
+  // We expect scratch here, too, because the value placed in
+  // __llvm_ctx_profile_callsite is scratch
+  EXPECT_TRUE(isScratch(Subctx3));
+
+  __llvm_ctx_profile_release_context(&Root);
+}
+
+TEST_F(ContextTest, NeedMoreMemory) {
+  auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+  int FakeCalleeAddress = 0;
+  const bool IsScratch = isScratch(Ctx);
+  EXPECT_FALSE(IsScratch);
+  const auto *CurrentMem = Root.CurrentMem;
+  __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
+  __llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
+  // Allocate a massive subcontext to force new arena allocation
+  auto *Subctx =
+      __llvm_ctx_profile_get_context(&FakeCalleeAddress, 3, 1 << 20, 1);
+  EXPECT_EQ(Ctx->subContexts()[2], Subctx);
+  EXPECT_NE(CurrentMem, Root.CurrentMem);
+  EXPECT_NE(Root.CurrentMem, nullptr);
+}
+
+TEST_F(ContextTest, ConcurrentRootCollection) {
+  std::atomic NonScratch = 0;
+  std::atomic Executions = 0;
+
+  __sanitizer::Semaphore GotCtx;
+
+  auto Entrypoint = [&]() {
+    ++Executions;
+    auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+    GotCtx.Post();
+    const bool IS = isScratch(Ctx);
+    NonScratch += (!IS);
+    if (!IS) {
+      GotCtx.Wait();
+      GotCtx.Wait();
+    }
+    __llvm_ctx_profile_release_context(&Root);
+  };
+  std::thread T1(Entrypoint);
+  std::thread T2(Entrypoint);
+  T1.join();
+  T2.join();
+  EXPECT_EQ(NonScratch, 1);
+  EXPECT_EQ(Executions, 2);
+}
+
+TEST_F(ContextTest, Dump) {
+  auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
+  int FakeCalleeAddress = 0;
+  __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
+  __llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
+  auto *Subctx = __llvm_ctx_profile_get_context(&FakeCalleeAddress, 2, 3, 1);
+  (void)Subctx;
+  __llvm_ctx_profile_release_context(&Root);
+
+  struct Writer {
+    ContextRoot *const Root;
+    const size_t Entries;
+    bool State = false;
+    Writer(ContextRoot *Root, size_t Entries) : Root(Root), Entries(Entries) {}
+
+    bool write(const ContextNode &Node) {
+      EXPECT_FALSE(Root->Taken.TryLock());
+      EXPECT_EQ(Node.guid(), 1);
+      EXPECT_EQ(Node.counters()[0], Entries);
+      EXPECT_EQ(Node.counters_size(), 10);
+      EXPECT_EQ(Node.callsites_size(), 4);
+      EXPECT_EQ(Node.subContexts()[0], nullptr);
+      EXPECT_EQ(Node.subContexts()[1], nullptr);
+      EXPECT_NE(Node.subContexts()[2], nullptr);
+      EXPECT_EQ(Node.subContexts()[3], nullptr);
+      const auto &SN = *Node.subContexts()[2];
+      EXPECT_EQ(SN.guid(), 2);
+      EXPECT_EQ(SN.counters()[0], Entries);
+      EXPECT_EQ(SN.counters_size(), 3);
+      EXPECT_EQ(SN.callsites_size(), 1);
+      EXPECT_EQ(SN.subContexts()[0], nullptr);
+      State = true;
+      return true;
+    }
+  };
+  Writer W(&Root, 1);
+  EXPECT_FALSE(W.State);
+  __llvm_ctx_profile_fetch(&W, [](void *W, const ContextNode &Node) -> bool {
+    return reinterpret_cast(W)->write(Node);
+  });
+  EXPECT_TRUE(W.State);
+
+  // this resets all counters but not the internal structure.
+  __llvm_ctx_profile_start_collection();
+  Writer W2(&Root, 0);
+  EXPECT_FALSE(W2.State);
+  __llvm_ctx_profile_fetch(&W2, [](void *W, const ContextNode &Node) -> bool {
+    return reinterpret_cast(W)->write(Node);
+  });
+  EXPECT_TRUE(W2.State);
+}

From 8fc68879badc2dc83e8b9a575992af285d4a1057 Mon Sep 17 00:00:00 2001
From: Ryosuke Niwa 
Date: Tue, 7 May 2024 15:16:29 -0700
Subject: [PATCH 31/64] Fix a typo in webkit.NoUncountedMemberChecker. (#91402)

Co-authored-by: Brianna Fan 
---
 .../Checkers/WebKit/NoUncountedMembersChecker.cpp               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp
index c753ed84a700cd..69a0eb3086ab72 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp
@@ -34,7 +34,7 @@ class NoUncountedMemberChecker
 public:
   NoUncountedMemberChecker()
       : Bug(this,
-            "Member variable is a raw-poiner/reference to reference-countable "
+            "Member variable is a raw-pointer/reference to reference-countable "
             "type",
             "WebKit coding guidelines") {}
 

From ff0c5ccbe8879ccad9cb3548b69b114872c33ebb Mon Sep 17 00:00:00 2001
From: Maksim Panchenko 
Date: Tue, 7 May 2024 16:05:10 -0700
Subject: [PATCH 32/64] [BOLT] Add a test for BOLT-reserved space in a binary
 (#91399)

Test case for #90300.
---
 bolt/test/runtime/bolt-reserved.cpp | 40 +++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 bolt/test/runtime/bolt-reserved.cpp

diff --git a/bolt/test/runtime/bolt-reserved.cpp b/bolt/test/runtime/bolt-reserved.cpp
new file mode 100644
index 00000000000000..5e93b4f7c3d40b
--- /dev/null
+++ b/bolt/test/runtime/bolt-reserved.cpp
@@ -0,0 +1,40 @@
+// REQUIRES: system-linux
+
+/*
+ * Check that llvm-bolt uses reserved space in a binary for allocating
+ * new sections.
+ */
+
+// RUN: %clang %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt.exe 2>&1 | FileCheck %s
+// RUN: %t.bolt.exe
+
+// CHECK: BOLT-INFO: using reserved space
+
+/*
+ * Check that llvm-bolt detects a condition when the reserved space is
+ * not enough for allocating new sections.
+ */
+
+// RUN: %clang %s -o %t.exe -Wl,--no-eh-frame-hdr -Wl,-q -DTINY
+// RUN: not llvm-bolt %t.exe -o %t.bolt.exe 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-TINY
+
+// CHECK-TINY: BOLT-ERROR: reserved space (1 byte) is smaller than required
+
+#ifdef TINY
+#define RSIZE "1"
+#else
+#define RSIZE "8192 * 1024"
+#endif
+
+asm(".pushsection .text \n\
+       .globl __bolt_reserved_start \n\
+       .type __bolt_reserved_start, @object \n\
+       __bolt_reserved_start: \n\
+       .space " RSIZE " \n\
+       .globl __bolt_reserved_end \n\
+       __bolt_reserved_end: \n\
+     .popsection");
+
+int main() { return 0; }

From 54401b43494a57baae9d3663cd7c694b040ef01c Mon Sep 17 00:00:00 2001
From: Prathamesh Tagore <63031630+meshtag@users.noreply.github.com>
Date: Wed, 8 May 2024 04:49:55 +0530
Subject: [PATCH 33/64] [mlir][memref.expand_shape] Add verifier check to
 ensure correct output_shape is provided by user (#91245)

The verifier was not checking for the case when the user provided shape
in output_shape is different than the one inferred from output type. Fix
this.
---
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 10 ++++++++++
 mlir/test/Dialect/MemRef/invalid.mlir    | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 393f73dc65cd8d..78201ae29cd9bd 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -2353,6 +2353,16 @@ LogicalResult ExpandShapeOp::verify() {
            << " dynamic dims while output_shape has " << getOutputShape().size()
            << " values";
 
+  // Verify if provided output shapes are in agreement with output type.
+  DenseI64ArrayAttr staticOutputShapes = getStaticOutputShapeAttr();
+  ArrayRef resShape = getResult().getType().getShape();
+  unsigned staticShapeNum = 0;
+
+  for (auto [pos, shape] : llvm::enumerate(resShape))
+    if (!ShapedType::isDynamic(shape) &&
+        shape != staticOutputShapes[staticShapeNum++])
+      emitOpError("invalid output shape provided at pos ") << pos;
+
   return success();
 }
 
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 70c96aad9555ef..0f533cb95a0ca9 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -1103,3 +1103,14 @@ func.func @subview_invalid_strides_rank_reduction(%m: memref<7x22x333x4444xi32>)
       : memref<7x22x333x4444xi32> to memref<7x11x4444xi32>
   return
 }
+
+// -----
+
+func.func @expand_shape_invalid_output_shape(
+    %arg0: memref<30x20xf32, strided<[4000, 2], offset: 100>>) {
+  // expected-error @+1 {{invalid output shape provided at pos 2}}
+  %0 = memref.expand_shape %arg0 [[0, 1], [2]] output_shape [2, 15, 21] :
+      memref<30x20xf32, strided<[4000, 2], offset: 100>>
+      into memref<2x15x20xf32, strided<[60000, 4000, 2], offset: 100>>
+  return
+}

From c0d9efd35d6a44258466349a7ba3a10c693b8c9c Mon Sep 17 00:00:00 2001
From: Youngsuk Kim 
Date: Tue, 7 May 2024 19:20:26 -0400
Subject: [PATCH 34/64] [llvm][NVPTX] Remove outdated comments (NFC) (#91409)

---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 142dd64ddea9dc..393fa29ff0516e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2497,10 +2497,6 @@ defm FSetNE : FSET_FORMAT;
 defm FSetNUM : FSET_FORMAT;
 defm FSetNAN : FSET_FORMAT;
 
-// FIXME: What is this doing here?  Can it be deleted?
-// def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
-//                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
 def SDTDeclareParamProfile :
   SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
 def SDTDeclareScalarParamProfile :

From 3f37397c959a85f4cad91b655ea03a5d2450ab38 Mon Sep 17 00:00:00 2001
From: Max Winkler 
Date: Tue, 7 May 2024 19:46:19 -0400
Subject: [PATCH 35/64] [clang][CodeGen] Fix MSVC ABI for classes with a
 deleted copy assignment operator (#90547)

For global functions and static methods the MSVC ABI returns
structs/classes with a deleted copy assignment operator indirectly.
From local testing this ABI holds true for all currently supported
architectures including ARM64EC.
---
 clang/docs/ReleaseNotes.rst                   |  3 +
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         | 22 ++++-
 .../test/CodeGen/x64-microsoft-arguments.cpp  | 92 +++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/x64-microsoft-arguments.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cc3108bf41d680..106b1e6f99454c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -76,6 +76,9 @@ ABI Changes in This Version
   returning a class in a register. This affects some uses of std::pair.
   (#GH86384).
 
+- Fixed Microsoft calling convention when returning classes that have a deleted
+  copy assignment operator. Such a class should be returned indirectly.
+
 AST Dumping Potentially Breaking Changes
 ----------------------------------------
 
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index d47927745759e1..e4f798f6a97d97 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1122,7 +1122,22 @@ static bool isTrivialForMSVC(const CXXRecordDecl *RD, QualType Ty,
   //   No base classes
   //   No virtual functions
   // Additionally, we need to ensure that there is a trivial copy assignment
-  // operator, a trivial destructor and no user-provided constructors.
+  // operator, a trivial destructor, no user-provided constructors and no
+  // deleted copy assignment operator.
+
+  // We need to cover two cases when checking for a deleted copy assignment
+  // operator.
+  //
+  // struct S { int& r; };
+  // The above will have an implicit copy assignment operator that is deleted
+  // and there will not be a `CXXMethodDecl` for the copy assignment operator.
+  // This is handled by the `needsImplicitCopyAssignment()` check below.
+  //
+  // struct S { S& operator=(const S&) = delete; int i; };
+  // The above will not have an implicit copy assignment operator that is
+  // deleted but there is a deleted `CXXMethodDecl` for the declared copy
+  // assignment operator. This is handled by the `isDeleted()` check below.
+
   if (RD->hasProtectedFields() || RD->hasPrivateFields())
     return false;
   if (RD->getNumBases() > 0)
@@ -1131,6 +1146,8 @@ static bool isTrivialForMSVC(const CXXRecordDecl *RD, QualType Ty,
     return false;
   if (RD->hasNonTrivialCopyAssignment())
     return false;
+  if (RD->needsImplicitCopyAssignment() && !RD->hasSimpleCopyAssignment())
+    return false;
   for (const Decl *D : RD->decls()) {
     if (auto *Ctor = dyn_cast(D)) {
       if (Ctor->isUserProvided())
@@ -1138,6 +1155,9 @@ static bool isTrivialForMSVC(const CXXRecordDecl *RD, QualType Ty,
     } else if (auto *Template = dyn_cast(D)) {
       if (isa(Template->getTemplatedDecl()))
         return false;
+    } else if (auto *MethodDecl = dyn_cast(D)) {
+      if (MethodDecl->isCopyAssignmentOperator() && MethodDecl->isDeleted())
+        return false;
     }
   }
   if (RD->hasNonTrivialDestructor())
diff --git a/clang/test/CodeGen/x64-microsoft-arguments.cpp b/clang/test/CodeGen/x64-microsoft-arguments.cpp
new file mode 100644
index 00000000000000..c666c92ad2db25
--- /dev/null
+++ b/clang/test/CodeGen/x64-microsoft-arguments.cpp
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -ffreestanding -emit-llvm -O0 \
+// RUN: -x c++ -o - %s | FileCheck %s
+
+int global_i = 0;
+
+// Pass and return object with a reference type (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f1@@YA?AUS1@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S1) align 8 {{.*}})
+// CHECK: call void @"?func1@@YA?AUS1@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S1) align 8 {{.*}}, i64 {{.*}})
+struct S1 {
+  int& r;
+};
+
+S1 func1(S1 x);
+S1 f1() {
+  S1 x{ global_i };
+  return func1(x);
+}
+
+// Pass and return object with a reference type within an inner struct (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f2@@YA?AUS2@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S2) align 8 {{.*}})
+// CHECK: call void @"?func2@@YA?AUS2@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S2) align 8 {{.*}}, i64 {{.*}})
+struct Inner {
+  int& r;
+};
+
+struct S2 {
+  Inner i;
+};
+
+S2 func2(S2 x);
+S2 f2() {
+  S2 x{ { global_i } };
+  return func2(x);
+}
+
+// Pass and return object with a reference type (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f3@@YA?AUS3@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S3) align 8 {{.*}})
+// CHECK: call void @"?func3@@YA?AUS3@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S3) align 8 {{.*}}, i64 {{.*}})
+struct S3 {
+  const int& r;
+};
+
+S3 func3(S3 x);
+S3 f3() {
+  S3 x{ global_i };
+  return func3(x);
+}
+
+// Pass and return object with a reference type within an inner struct (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f4@@YA?AUS4@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S4) align 8 {{.*}})
+// CHECK: call void @"?func4@@YA?AUS4@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S4) align 8 {{.*}}, i64 {{.*}})
+struct InnerConst {
+  const int& r;
+};
+
+struct S4 {
+  InnerConst i;
+};
+
+S4 func4(S4 x);
+S4 f4() {
+  S4 x{ { global_i } };
+  return func4(x);
+}
+
+// Pass and return an object with an explicitly deleted copy assignment operator (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f5@@YA?AUS5@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S5) align 4 {{.*}})
+// CHECK: call void @"?func5@@YA?AUS5@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S5) align 4 {{.*}}, i32 {{.*}})
+struct S5 {
+  S5& operator=(const S5&) = delete;
+  int i;
+};
+
+S5 func5(S5 x);
+S5 f5() {
+  S5 x{ 1 };
+  return func5(x);
+}
+
+// Pass and return an object with an explicitly defaulted copy assignment operator that is implicitly deleted (pass directly, return indirectly).
+// CHECK: define dso_local void @"?f6@@YA?AUS6@@XZ"(ptr dead_on_unwind noalias writable sret(%struct.S6) align 8 {{.*}})
+// CHECK: call void @"?func6@@YA?AUS6@@U1@@Z"(ptr dead_on_unwind writable sret(%struct.S6) align 8 {{.*}}, i64 {{.*}})
+struct S6 {
+  S6& operator=(const S6&) = default;
+  int& i;
+};
+
+S6 func6(S6 x);
+S6 f6() {
+  S6 x{ global_i };
+  return func6(x);
+}

From 04d0a691af9e116f651d233c5689863f614d3adf Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Tue, 7 May 2024 16:56:45 -0700
Subject: [PATCH 36/64] [ELF] Fix --compress-debug-sections=zstd when zlib is
 disabled

---
 lld/ELF/Options.td         | 2 +-
 lld/ELF/OutputSections.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 73a4f9662a561f..b9e05a4b1fd5cf 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -68,7 +68,7 @@ defm compress_debug_sections:
   MetaVarName<"[none,zlib,zstd]">;
 
 defm compress_sections: EEq<"compress-sections",
-  "Compress output sections that match the glob and do not have the SHF_ALLOC flag."
+  "Compress output sections that match the glob and do not have the SHF_ALLOC flag. "
   "The compression level is  (if specified) or a default speed-focused level">,
   MetaVarName<"={none,zlib,zstd}[:level]">;
 
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 2dbbff06a89087..9c667241360f6b 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -438,10 +438,10 @@ template  void OutputSection::maybeCompress() {
     compressed.type = ELFCOMPRESS_ZLIB;
     compressed.checksum = checksum;
   }
+#endif
 
   compressed.shards = std::move(shardsOut);
   flags |= SHF_COMPRESSED;
-#endif
 }
 
 static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) {

From 77c5cea78eac3f20d0ba79f5892235e5aac82603 Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski 
Date: Tue, 7 May 2024 20:04:57 -0400
Subject: [PATCH 37/64] [Clang][Sema] Explicit template arguments are not
 substituted into the exception specification of a function (#90760)

[temp.deduct.general] p6 states:
> At certain points in the template argument deduction process it is
necessary to take a function type that makes use of template parameters
and replace those template parameters with the corresponding template
arguments.
This is done at the beginning of template argument deduction when any
explicitly specified template arguments are substituted into the
function type, and again at the end of template argument deduction when
any template arguments that were deduced or obtained from default
arguments are substituted.

[temp.deduct.general] p7 goes on to say:
> The _deduction substitution loci_ are
> - the function type outside of the _noexcept-specifier_,
> - the explicit-specifier,
> - the template parameter declarations, and
> - the template argument list of a partial specialization
>
> The substitution occurs in all types and expressions that are used in
the deduction substitution loci. [...]

Consider the following:
```cpp
struct A
{
    static constexpr bool x = true;
};

template
void f(T, U) noexcept(T::x); // #1

template
void f(T, U*) noexcept(T::y); // #2

template<>
void f(A, int*) noexcept; // clang currently accepts, GCC and EDG reject
```

Currently, `Sema::SubstituteExplicitTemplateArguments` will substitute
into the _noexcept-specifier_ when deducing template arguments from a
function declaration or when deducing template arguments for taking the
address of a function template (and the substitution is treated as a
SFINAE context). In the above example, `#1` is selected as the primary
template because substitution of the explicit template arguments into
the _noexcept-specifier_ of `#2` failed, which resulted in the candidate
being ignored.

This behavior is incorrect ([temp.deduct.general] note 4 says as much), and
this patch corrects it by deferring all substitution into the
_noexcept-specifier_ until it is instantiated.

As part of the necessary changes to make this patch work, the
instantiation of the exception specification of a function template
specialization when taking the address of a function template is changed
to only occur for the function selected by overload resolution per
[except.spec] p13.1 (as opposed to being instantiated for every candidate).
---
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/lib/Sema/SemaInit.cpp                   | 24 ++++++++++----
 clang/lib/Sema/SemaTemplateDeduction.cpp      | 32 ++-----------------
 clang/test/CXX/drs/dr13xx.cpp                 | 13 +++-----
 clang/test/CXX/except/except.spec/p13.cpp     | 27 ++++++++++++++++
 clang/test/CXX/temp/temp.deduct/p7.cpp        | 14 ++++++++
 .../SemaCXX/cxx1z-noexcept-function-type.cpp  |  4 +--
 clang/test/SemaTemplate/temp_arg_type.cpp     | 10 +++---
 8 files changed, 75 insertions(+), 51 deletions(-)
 create mode 100644 clang/test/CXX/temp/temp.deduct/p7.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 106b1e6f99454c..c4a9501ca15cfe 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -691,6 +691,8 @@ Bug Fixes to C++ Support
 - Fix an assertion failure when parsing an invalid members of an anonymous class. (#GH85447)
 - Fixed a misuse of ``UnresolvedLookupExpr`` for ill-formed templated expressions. Fixes (#GH48673), (#GH63243)
   and (#GH88832).
+- Clang now defers all substitution into the exception specification of a function template specialization
+  until the noexcept-specifier is instantiated.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 7d9eaf6720461d..c8049ae581f843 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6576,12 +6576,12 @@ void InitializationSequence::InitializeFrom(Sema &S,
 
     AddPassByIndirectCopyRestoreStep(DestType, ShouldCopy);
   } else if (ICS.isBad()) {
-    DeclAccessPair dap;
-    if (isLibstdcxxPointerReturnFalseHack(S, Entity, Initializer)) {
+    if (isLibstdcxxPointerReturnFalseHack(S, Entity, Initializer))
       AddZeroInitializationStep(Entity.getType());
-    } else if (Initializer->getType() == Context.OverloadTy &&
-               !S.ResolveAddressOfOverloadedFunction(Initializer, DestType,
-                                                     false, dap))
+    else if (DeclAccessPair Found;
+             Initializer->getType() == Context.OverloadTy &&
+             !S.ResolveAddressOfOverloadedFunction(Initializer, DestType,
+                                                   /*Complain=*/false, Found))
       SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
     else if (Initializer->getType()->isFunctionType() &&
              isExprAnUnaddressableFunction(S, Initializer))
@@ -9641,6 +9641,8 @@ bool InitializationSequence::Diagnose(Sema &S,
   if (!Failed())
     return false;
 
+  QualType DestType = Entity.getType();
+
   // When we want to diagnose only one element of a braced-init-list,
   // we need to factor it out.
   Expr *OnlyArg;
@@ -9650,11 +9652,21 @@ bool InitializationSequence::Diagnose(Sema &S,
       OnlyArg = List->getInit(0);
     else
       OnlyArg = Args[0];
+
+    if (OnlyArg->getType() == S.Context.OverloadTy) {
+      DeclAccessPair Found;
+      if (FunctionDecl *FD = S.ResolveAddressOfOverloadedFunction(
+              OnlyArg, DestType.getNonReferenceType(), /*Complain=*/false,
+              Found)) {
+        if (Expr *Resolved =
+                S.FixOverloadedFunctionReference(OnlyArg, Found, FD).get())
+          OnlyArg = Resolved;
+      }
+    }
   }
   else
     OnlyArg = nullptr;
 
-  QualType DestType = Entity.getType();
   switch (Failure) {
   case FK_TooManyInitsForReference:
     // FIXME: Customize for the initialized entity?
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 9f9e4422827173..dcaea4a77bffd2 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -1323,13 +1323,11 @@ bool Sema::isSameOrCompatibleFunctionType(QualType P, QualType A) {
     return Context.hasSameType(P, A);
 
   // Noreturn and noexcept adjustment.
-  QualType AdjustedParam;
-  if (IsFunctionConversion(P, A, AdjustedParam))
-    return Context.hasSameType(AdjustedParam, A);
+  if (QualType AdjustedParam; IsFunctionConversion(P, A, AdjustedParam))
+    P = AdjustedParam;
 
   // FIXME: Compatible calling conventions.
-
-  return Context.hasSameType(P, A);
+  return Context.hasSameFunctionTypeIgnoringExceptionSpec(P, A);
 }
 
 /// Get the index of the first template parameter that was originally from the
@@ -3509,23 +3507,6 @@ TemplateDeductionResult Sema::SubstituteExplicitTemplateArguments(
   if (FunctionType) {
     auto EPI = Proto->getExtProtoInfo();
     EPI.ExtParameterInfos = ExtParamInfos.getPointerOrNull(ParamTypes.size());
-
-    // In C++1z onwards, exception specifications are part of the function type,
-    // so substitution into the type must also substitute into the exception
-    // specification.
-    SmallVector ExceptionStorage;
-    if (getLangOpts().CPlusPlus17 &&
-        SubstExceptionSpec(
-            Function->getLocation(), EPI.ExceptionSpec, ExceptionStorage,
-            getTemplateInstantiationArgs(
-                FunctionTemplate, nullptr, /*Final=*/true,
-                /*Innermost=*/SugaredExplicitArgumentList->asArray(),
-                /*RelativeToPrimary=*/false,
-                /*Pattern=*/nullptr,
-                /*ForConstraintInstantiation=*/false,
-                /*SkipForSpecialization=*/true)))
-      return TemplateDeductionResult::SubstitutionFailure;
-
     *FunctionType = BuildFunctionType(ResultType, ParamTypes,
                                       Function->getLocation(),
                                       Function->getDeclName(),
@@ -4705,13 +4686,6 @@ TemplateDeductionResult Sema::DeduceTemplateArguments(
                                                Info.getLocation()))
     return TemplateDeductionResult::MiscellaneousDeductionFailure;
 
-  auto *SpecializationFPT =
-      Specialization->getType()->castAs();
-  if (IsAddressOfFunction && getLangOpts().CPlusPlus17 &&
-      isUnresolvedExceptionSpec(SpecializationFPT->getExceptionSpecType()) &&
-      !ResolveExceptionSpec(Info.getLocation(), SpecializationFPT))
-    return TemplateDeductionResult::MiscellaneousDeductionFailure;
-
   // Adjust the exception specification of the argument to match the
   // substituted and resolved type we just formed. (Calling convention and
   // noreturn can't be dependent, so we don't actually need this for them
diff --git a/clang/test/CXX/drs/dr13xx.cpp b/clang/test/CXX/drs/dr13xx.cpp
index dad82c4e2829f0..a334b6d01acf51 100644
--- a/clang/test/CXX/drs/dr13xx.cpp
+++ b/clang/test/CXX/drs/dr13xx.cpp
@@ -281,13 +281,10 @@ namespace cwg1330 { // cwg1330: 4 c++11
   decltype(f()) f2; // #cwg1330-f-char
   bool f3 = noexcept(f()); /// #cwg1330-f-float
 #endif
-  // In C++17 onwards, substituting explicit template arguments into the
-  // function type substitutes into the exception specification (because it's
-  // part of the type). In earlier languages, we don't notice there's a problem
-  // until we've already started to instantiate.
   template int f(); // #cwg1330-f-short
-  // since-cxx17-error@-1 {{explicit instantiation of 'f' does not refer to a function template, variable template, member function, member class, or static data member}}
-  //   since-cxx17-note@#cwg1330-f {{candidate template ignored: substitution failure [with T = short]: type 'short' cannot be used prior to '::' because it has no members}}
+  // since-cxx17-error@#cwg1330-f {{type 'short' cannot be used prior to '::' because it has no members}}
+  //   since-cxx17-note@#cwg1330-f {{in instantiation of exception specification for 'f' requested here}}
+  //   since-cxx17-note@#cwg1330-f-short {{in instantiation of function template specialization 'cwg1330::f' requested here}}
 
   template struct C {
     C() throw(typename T::type); // #cwg1330-C
@@ -500,7 +497,7 @@ namespace cwg1359 { // cwg1359: 3.5
   union B { constexpr B() = default; int a; }; // #cwg1359-B
   // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr before C++23}}
   union C { constexpr C() = default; int a, b; }; // #cwg1359-C
-  // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}} 
+  // cxx11-17-error@-1 {{defaulted definition of default constructor cannot be marked constexpr}}
   struct X { constexpr X() = default; union {}; };
   // since-cxx11-error@-1 {{declaration does not declare anything}}
   struct Y { constexpr Y() = default; union { int a; }; }; // #cwg1359-Y
@@ -720,7 +717,7 @@ struct A {
 } // namespace cwg1397
 
 namespace cwg1399 { // cwg1399: dup 1388
-  template void f(T..., int, T...) {} // #cwg1399-f 
+  template void f(T..., int, T...) {} // #cwg1399-f
   // cxx98-error@-1 {{variadic templates are a C++11 extension}}
   void g() {
     f(0);
diff --git a/clang/test/CXX/except/except.spec/p13.cpp b/clang/test/CXX/except/except.spec/p13.cpp
index 61cdb74f21ec51..29390c277c5203 100644
--- a/clang/test/CXX/except/except.spec/p13.cpp
+++ b/clang/test/CXX/except/except.spec/p13.cpp
@@ -72,3 +72,30 @@ template<>
 void f(A, int***); // expected-error {{'f' is missing exception specification 'noexcept'}}
 
 }
+
+namespace N3 {
+
+template
+void f(T, U) noexcept(T::y); // #1
+
+template // #2
+void f(T, U*) noexcept(T::x);
+
+// Deduction should succeed for both candidates, and #2 should be selected by overload resolution.
+// Only the exception specification of #2 should be instantiated.
+void (*x)(A, int*) = f;
+}
+
+namespace N4 {
+
+template
+void f(T, U) noexcept(T::x); // #1
+
+template
+void f(T, U*) noexcept(T::y); // #2
+// expected-error@-1 {{no member named 'y' in 'A'}}
+
+// Deduction should succeed for both candidates, and #2 should be selected by overload resolution.
+// Only the exception specification of #2 should be instantiated.
+void (*x)(A, int*) = f; // expected-note {{in instantiation of exception specification for 'f' requested here}}
+}
diff --git a/clang/test/CXX/temp/temp.deduct/p7.cpp b/clang/test/CXX/temp/temp.deduct/p7.cpp
new file mode 100644
index 00000000000000..cf6d17fc51ac95
--- /dev/null
+++ b/clang/test/CXX/temp/temp.deduct/p7.cpp
@@ -0,0 +1,14 @@
+// RUN:  %clang_cc1 -verify %s
+
+struct A {
+  static constexpr bool x = true;
+};
+
+template
+void f(T, U) noexcept(T::x);
+
+template
+void f(T, U*) noexcept(T::y); // expected-error {{no member named 'y' in 'A'}}
+
+template<>
+void f(A, int*); // expected-note {{in instantiation of exception specification}}
diff --git a/clang/test/SemaCXX/cxx1z-noexcept-function-type.cpp b/clang/test/SemaCXX/cxx1z-noexcept-function-type.cpp
index 5e56f19477d6ca..c8204c21523a37 100644
--- a/clang/test/SemaCXX/cxx1z-noexcept-function-type.cpp
+++ b/clang/test/SemaCXX/cxx1z-noexcept-function-type.cpp
@@ -18,7 +18,7 @@ template void redecl3() throw(B); // expected-error {{do
 
 typedef int I;
 template void redecl4(I) noexcept(B);
-template void redecl4(I) noexcept(B); // expected-note {{could not match 'void (I) noexcept(false)' (aka 'void (int) noexcept(false)') against 'void (int) noexcept'}}
+template void redecl4(I) noexcept(B);
 
 void (*init_with_exact_type_a)(int) noexcept = redecl4;
 void (*init_with_mismatched_type_a)(int) = redecl4;
@@ -27,7 +27,7 @@ using DeducedType_a = decltype(deduce_auto_from_noexcept_function_ptr_a);
 using DeducedType_a = void (*)(int) noexcept;
 
 void (*init_with_exact_type_b)(int) = redecl4;
-void (*init_with_mismatched_type_b)(int) noexcept = redecl4; // expected-error {{does not match required type}}
+void (*init_with_mismatched_type_b)(int) noexcept = redecl4; // expected-error {{cannot initialize a variable of type}}
 auto deduce_auto_from_noexcept_function_ptr_b = redecl4;
 using DeducedType_b = decltype(deduce_auto_from_noexcept_function_ptr_b);
 using DeducedType_b = void (*)(int);
diff --git a/clang/test/SemaTemplate/temp_arg_type.cpp b/clang/test/SemaTemplate/temp_arg_type.cpp
index 9069f63e0224fe..cdbcf281125efd 100644
--- a/clang/test/SemaTemplate/temp_arg_type.cpp
+++ b/clang/test/SemaTemplate/temp_arg_type.cpp
@@ -11,7 +11,7 @@ A<0> *a1; // expected-error{{template argument for template type parameter must
 A *a2; // expected-error{{use of class template 'A' requires template arguments}}
 
 A *a3;
-A *a4; 
+A *a4;
 A *a5;
 A > *a6;
 
@@ -95,15 +95,13 @@ namespace deduce_noexcept {
   template void dep() noexcept(true); // expected-error {{does not refer to a function template}}
   template void dep() noexcept(false); // expected-error {{does not refer to a function template}}
 
-  // FIXME: It's also not clear whether this should be valid: do we substitute
-  // into the function type (including the exception specification) or not?
-  template typename T::type1 f() noexcept(T::a);
-  template typename T::type2 f() noexcept(T::b) {}
+  template typename T::type1 f() noexcept(T::a); // expected-note {{candidate}}
+  template typename T::type2 f() noexcept(T::b) {} // expected-note {{candidate}}
   struct X {
     static constexpr bool b = true;
     using type1 = void;
     using type2 = void;
   };
-  template void f();
+  template void f(); // expected-error {{partial ordering for explicit instantiation of 'f' is ambiguous}}
 }
 #endif

From d4cf20ca37160cb062a9db773d0e6255d6bbc31a Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski 
Date: Tue, 7 May 2024 20:09:19 -0400
Subject: [PATCH 38/64] [Clang][Sema] Don't set instantiated from function when
 rewriting operator<=> (#91339)

The following snippet causes a crash:
```
template
struct A
{
    bool operator<=>(const A&) const requires true = default;
};

bool f(A a)
{
    return a != A();
}
```
This occurs because during the rewrite from `operator<=>` to
`operator==`, the "pattern" `operator<=>` function is set as the
instantiated from function for the newly created `operator==` function.
This is obviously incorrect, and this patch fixes it.
---
 .../clangd/unittests/FindTargetTests.cpp      |  5 +---
 .../clangd/unittests/HoverTests.cpp           |  4 +--
 clang/docs/ReleaseNotes.rst                   |  2 ++
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  | 24 +++++++++--------
 .../class.compare.default/p4.cpp              | 27 ++++++++++++++++---
 5 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
index 94437857cecca6..0b2273f0a9a6e3 100644
--- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
@@ -642,10 +642,7 @@ TEST_F(TargetDeclTest, RewrittenBinaryOperator) {
     bool x = (Foo(1) [[!=]] Foo(2));
   )cpp";
   EXPECT_DECLS("CXXRewrittenBinaryOperator",
-               {"std::strong_ordering operator<=>(const Foo &) const = default",
-                Rel::TemplatePattern},
-               {"bool operator==(const Foo &) const noexcept = default",
-                Rel::TemplateInstantiation});
+               {"bool operator==(const Foo &) const noexcept = default"});
 }
 
 TEST_F(TargetDeclTest, FunctionTemplate) {
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index 28df24f34827c0..d9e97e5215a261 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -3091,7 +3091,7 @@ TEST(Hover, All) {
             HI.NamespaceScope = "";
             HI.Definition =
                 "bool operator==(const Foo &) const noexcept = default";
-            HI.Documentation = "Foo spaceship";
+            HI.Documentation = "";
           }},
   };
 
@@ -3894,7 +3894,7 @@ TEST(Hover, SpaceshipTemplateNoCrash) {
   TU.ExtraArgs.push_back("-std=c++20");
   auto AST = TU.build();
   auto HI = getHover(AST, T.point(), format::getLLVMStyle(), nullptr);
-  EXPECT_EQ(HI->Documentation, "Foo bar baz");
+  EXPECT_EQ(HI->Documentation, "");
 }
 
 TEST(Hover, ForwardStructNoCrash) {
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c4a9501ca15cfe..c8ef2e8d614a7d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -693,6 +693,8 @@ Bug Fixes to C++ Support
   and (#GH88832).
 - Clang now defers all substitution into the exception specification of a function template specialization
   until the noexcept-specifier is instantiated.
+- Fix a crash when an implicitly declared ``operator==`` function with a trailing requires-clause has its
+  constraints compared to that of another declaration.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index d544cfac55ba36..fde2d920c785ec 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -2269,16 +2269,18 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
                             TemplateArgumentList::CreateCopy(SemaRef.Context,
                                                              Innermost),
                                                 /*InsertPos=*/nullptr);
-  } else if (isFriend && D->isThisDeclarationADefinition()) {
-    // Do not connect the friend to the template unless it's actually a
-    // definition. We don't want non-template functions to be marked as being
-    // template instantiations.
-    Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
-  } else if (!isFriend) {
-    // If this is not a function template, and this is not a friend (that is,
-    // this is a locally declared function), save the instantiation relationship
-    // for the purposes of constraint instantiation.
-    Function->setInstantiatedFromDecl(D);
+  } else if (FunctionRewriteKind == RewriteKind::None) {
+    if (isFriend && D->isThisDeclarationADefinition()) {
+      // Do not connect the friend to the template unless it's actually a
+      // definition. We don't want non-template functions to be marked as being
+      // template instantiations.
+      Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
+    } else if (!isFriend) {
+      // If this is not a function template, and this is not a friend (that is,
+      // this is a locally declared function), save the instantiation
+      // relationship for the purposes of constraint instantiation.
+      Function->setInstantiatedFromDecl(D);
+    }
   }
 
   if (isFriend) {
@@ -2669,7 +2671,7 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
                          TemplateArgumentList::CreateCopy(SemaRef.Context,
                                                           Innermost),
                                               /*InsertPos=*/nullptr);
-  } else if (!isFriend) {
+  } else if (!isFriend && FunctionRewriteKind == RewriteKind::None) {
     // Record that this is an instantiation of a member function.
     Method->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
   }
diff --git a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp
index 534c3b34d8832a..53a8bfc9a4f4a8 100644
--- a/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp
+++ b/clang/test/CXX/class/class.compare/class.compare.default/p4.cpp
@@ -18,14 +18,22 @@ namespace std {
 
 namespace N {
   struct A {
-    friend constexpr std::strong_ordering operator<=>(const A&, const A&) = default;
+    friend constexpr std::strong_ordering operator<=>(const A&, const A&) = default; // expected-note 2{{declared here}}
   };
 
-  constexpr bool (*test_a_not_found)(const A&, const A&) = &operator==; // expected-error {{undeclared}}
+  constexpr std::strong_ordering (*test_a_threeway_not_found)(const A&, const A&) = &operator<=>; // expected-error {{undeclared}}
+
+  constexpr std::strong_ordering operator<=>(const A&, const A&) noexcept;
+  constexpr std::strong_ordering (*test_a_threeway)(const A&, const A&) = &operator<=>;
+  static_assert(!(*test_a_threeway)(A(), A())); // expected-error {{static assertion expression is not an integral constant expression}}
+                                               // expected-note@-1 {{undefined function 'operator<=>' cannot be used in a constant expression}}
+
+  constexpr bool (*test_a_equal_not_found)(const A&, const A&) = &operator==; // expected-error {{undeclared}}
 
   constexpr bool operator==(const A&, const A&) noexcept;
-  constexpr bool (*test_a)(const A&, const A&) noexcept = &operator==;
-  static_assert((*test_a)(A(), A()));
+  constexpr bool (*test_a_equal)(const A&, const A&) noexcept = &operator==;
+  static_assert((*test_a_equal)(A(), A())); // expected-error {{static assertion expression is not an integral constant expression}}
+                                            // expected-note@-1 {{undefined function 'operator==' cannot be used in a constant expression}}
 }
 
 struct B1 {
@@ -161,3 +169,14 @@ struct non_constexpr_type {
 
 my_struct obj; // cxx2a-note {{in instantiation of template class 'GH61238::my_struct' requested here}}
 }
+
+namespace Constrained {
+  template
+  struct A {
+    std::strong_ordering operator<=>(const A&) const requires true = default;
+  };
+
+  bool f(A a) {
+    return a != A();
+  }
+}

From 83f3b1cb480b41e3347035aff14fd4bc2ba21d24 Mon Sep 17 00:00:00 2001
From: Yinying Li 
Date: Tue, 7 May 2024 20:28:39 -0400
Subject: [PATCH 39/64] [mlir][sparse] Add verification for explicit/implicit
 value (#90111)

1. Verify that the type of explicit/implicit values should be the same
as the tensor element type.
2. Verify that implicit value could only be zero.
3. Verify that explicit/implicit values should be numeric.
4. Fix the type change issue caused by SparseTensorType(enc).
---
 .../Dialect/SparseTensor/IR/SparseTensor.h    | 13 +++
 .../SparseTensor/IR/SparseTensorAttrDefs.td   | 15 ++++
 .../SparseTensor/IR/SparseTensorType.h        | 25 +-----
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 78 +++++++++++------
 .../SparseTensor/invalid_encoding.mlir        | 85 +++++++++++++++++++
 5 files changed, 169 insertions(+), 47 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index b182b4c72b9535..3cf81d2e58f21c 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -41,6 +41,19 @@ using Level = uint64_t;
 /// including the value `ShapedType::kDynamic` (for shapes).
 using Size = int64_t;
 
+/// A simple structure that encodes a range of levels in the sparse tensors
+/// that forms a COO segment.
+struct COOSegment {
+  std::pair lvlRange; // [low, high)
+  bool isSoA;
+
+  bool isAoS() const { return !isSoA; }
+  bool isSegmentStart(Level l) const { return l == lvlRange.first; }
+  bool inSegment(Level l) const {
+    return l >= lvlRange.first && l < lvlRange.second;
+  }
+};
+
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index eefa4c71bbd2ca..53dd8e39438cc6 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -502,9 +502,24 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     //
     // Helper function to translate between level/dimension space.
     //
+
     SmallVector translateShape(::mlir::ArrayRef srcShape, ::mlir::sparse_tensor::CrdTransDirectionKind) const;
     ValueRange translateCrds(::mlir::OpBuilder &builder, ::mlir::Location loc, ::mlir::ValueRange crds, ::mlir::sparse_tensor::CrdTransDirectionKind) const;
 
+    //
+    // COO methods.
+    //
+
+    /// Returns the starting level of this sparse tensor type for a
+    /// trailing COO region that spans **at least** two levels. If
+    /// no such COO region is found, then returns the level-rank.
+    ///
+    /// DEPRECATED: use getCOOSegment instead;
+    Level getAoSCOOStart() const;
+
+    /// Returns a list of COO segments in the sparse tensor types.
+    SmallVector getCOOSegments() const;
+
     //
     // Printing methods.
     //
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
index ea3d8013b45671..a154d7fa5fb6e5 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
@@ -18,19 +18,6 @@
 namespace mlir {
 namespace sparse_tensor {
 
-/// A simple structure that encodes a range of levels in the sparse tensors that
-/// forms a COO segment.
-struct COOSegment {
-  std::pair lvlRange; // [low, high)
-  bool isSoA;
-
-  bool isAoS() const { return !isSoA; }
-  bool isSegmentStart(Level l) const { return l == lvlRange.first; }
-  bool inSegment(Level l) const {
-    return l >= lvlRange.first && l < lvlRange.second;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 /// A wrapper around `RankedTensorType`, which has three goals:
 ///
@@ -73,12 +60,6 @@ class SparseTensorType {
       : SparseTensorType(
             RankedTensorType::get(stp.getShape(), stp.getElementType(), enc)) {}
 
-  // TODO: remove?
-  SparseTensorType(SparseTensorEncodingAttr enc)
-      : SparseTensorType(RankedTensorType::get(
-            SmallVector(enc.getDimRank(), ShapedType::kDynamic),
-            Float32Type::get(enc.getContext()), enc)) {}
-
   SparseTensorType &operator=(const SparseTensorType &) = delete;
   SparseTensorType(const SparseTensorType &) = default;
 
@@ -369,13 +350,15 @@ class SparseTensorType {
   /// no such COO region is found, then returns the level-rank.
   ///
   /// DEPRECATED: use getCOOSegment instead;
-  Level getAoSCOOStart() const;
+  Level getAoSCOOStart() const { return getEncoding().getAoSCOOStart(); };
 
   /// Returns [un]ordered COO type for this sparse tensor type.
   RankedTensorType getCOOType(bool ordered) const;
 
   /// Returns a list of COO segments in the sparse tensor types.
-  SmallVector getCOOSegments() const;
+  SmallVector getCOOSegments() const {
+    return getEncoding().getCOOSegments();
+  }
 
 private:
   // These two must be const, to ensure coherence of the memoized fields.
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index de3d3006ebaac5..4cc6ee971d4a3e 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -104,7 +104,7 @@ void StorageLayout::foreachField(
         callback) const {
   const auto lvlTypes = enc.getLvlTypes();
   const Level lvlRank = enc.getLvlRank();
-  SmallVector cooSegs = SparseTensorType(enc).getCOOSegments();
+  SmallVector cooSegs = enc.getCOOSegments();
   FieldIndex fieldIdx = kDataFieldStartingIdx;
 
   ArrayRef cooSegsRef = cooSegs;
@@ -211,7 +211,7 @@ StorageLayout::getFieldIndexAndStride(SparseTensorFieldKind kind,
   unsigned stride = 1;
   if (kind == SparseTensorFieldKind::CrdMemRef) {
     assert(lvl.has_value());
-    const Level cooStart = SparseTensorType(enc).getAoSCOOStart();
+    const Level cooStart = enc.getAoSCOOStart();
     const Level lvlRank = enc.getLvlRank();
     if (lvl.value() >= cooStart && lvl.value() < lvlRank) {
       lvl = cooStart;
@@ -912,46 +912,53 @@ LogicalResult SparseTensorEncodingAttr::verifyEncoding(
     return emitError()
            << "dimension-rank mismatch between encoding and tensor shape: "
            << getDimRank() << " != " << dimRank;
+  if (auto expVal = getExplicitVal()) {
+    Type attrType = llvm::dyn_cast(expVal).getType();
+    if (attrType != elementType) {
+      return emitError() << "explicit value type mismatch between encoding and "
+                         << "tensor element type: " << attrType
+                         << " != " << elementType;
+    }
+  }
+  if (auto impVal = getImplicitVal()) {
+    Type attrType = llvm::dyn_cast(impVal).getType();
+    if (attrType != elementType) {
+      return emitError() << "implicit value type mismatch between encoding and "
+                         << "tensor element type: " << attrType
+                         << " != " << elementType;
+    }
+    // Currently, we only support zero as the implicit value.
+    auto impFVal = llvm::dyn_cast(impVal);
+    auto impIntVal = llvm::dyn_cast(impVal);
+    auto impComplexVal = llvm::dyn_cast(impVal);
+    if ((impFVal && impFVal.getValue().isNonZero()) ||
+        (impIntVal && !impIntVal.getValue().isZero()) ||
+        (impComplexVal && (impComplexVal.getImag().isNonZero() ||
+                           impComplexVal.getReal().isNonZero()))) {
+      return emitError() << "implicit value must be zero";
+    }
+  }
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// SparseTensorType Methods.
-//===----------------------------------------------------------------------===//
-
-bool mlir::sparse_tensor::SparseTensorType::isCOOType(Level startLvl,
-                                                      bool isUnique) const {
-  if (!hasEncoding())
-    return false;
-  if (!isCompressedLvl(startLvl) && !isLooseCompressedLvl(startLvl))
-    return false;
-  for (Level l = startLvl + 1; l < lvlRank; ++l)
-    if (!isSingletonLvl(l))
-      return false;
-  // If isUnique is true, then make sure that the last level is unique,
-  // that is, when lvlRank == 1, the only compressed level is unique,
-  // and when lvlRank > 1, the last singleton is unique.
-  return !isUnique || isUniqueLvl(lvlRank - 1);
-}
-
-Level mlir::sparse_tensor::SparseTensorType::getAoSCOOStart() const {
+Level mlir::sparse_tensor::SparseTensorEncodingAttr::getAoSCOOStart() const {
   SmallVector coo = getCOOSegments();
   assert(coo.size() == 1 || coo.empty());
   if (!coo.empty() && coo.front().isAoS()) {
     return coo.front().lvlRange.first;
   }
-  return lvlRank;
+  return getLvlRank();
 }
 
 SmallVector
-mlir::sparse_tensor::SparseTensorType::getCOOSegments() const {
+mlir::sparse_tensor::SparseTensorEncodingAttr::getCOOSegments() const {
   SmallVector ret;
-  if (!hasEncoding() || lvlRank <= 1)
+  if (getLvlRank() <= 1)
     return ret;
 
   ArrayRef lts = getLvlTypes();
   Level l = 0;
-  while (l < lvlRank) {
+  while (l < getLvlRank()) {
     auto lt = lts[l];
     if (lt.isa()) {
       auto cur = lts.begin() + l;
@@ -975,6 +982,25 @@ mlir::sparse_tensor::SparseTensorType::getCOOSegments() const {
   return ret;
 }
 
+//===----------------------------------------------------------------------===//
+// SparseTensorType Methods.
+//===----------------------------------------------------------------------===//
+
+bool mlir::sparse_tensor::SparseTensorType::isCOOType(Level startLvl,
+                                                      bool isUnique) const {
+  if (!hasEncoding())
+    return false;
+  if (!isCompressedLvl(startLvl) && !isLooseCompressedLvl(startLvl))
+    return false;
+  for (Level l = startLvl + 1; l < lvlRank; ++l)
+    if (!isSingletonLvl(l))
+      return false;
+  // If isUnique is true, then make sure that the last level is unique,
+  // that is, when lvlRank == 1, the only compressed level is unique,
+  // and when lvlRank > 1, the last singleton is unique.
+  return !isUnique || isUniqueLvl(lvlRank - 1);
+}
+
 RankedTensorType
 mlir::sparse_tensor::SparseTensorType::getCOOType(bool ordered) const {
   SmallVector lvlTypes;
diff --git a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
index 8096c010ac935a..a3f72bd3ae971c 100644
--- a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
@@ -443,3 +443,88 @@ func.func private @NOutOfM(%arg0: tensor) {
 func.func private @NOutOfM(%arg0: tensor) {
   return
 }
+
+// -----
+
+#CSR_ExpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = 1 : i32,
+  implicitVal = 0.0 : f32
+}>
+
+// expected-error@+1 {{explicit value type mismatch between encoding and tensor element type: 'i32' != 'f32'}}
+func.func private @sparse_csr(tensor)
+
+// -----
+
+#CSR_ImpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = 1 : i32,
+  implicitVal = 0.0 : f32
+}>
+
+// expected-error@+1 {{implicit value type mismatch between encoding and tensor element type: 'f32' != 'i32'}}
+func.func private @sparse_csr(tensor)
+
+// -----
+
+// expected-error@+1 {{expected a numeric value for explicitVal}}
+#CSR_ExpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  explicitVal = "str"
+}>
+func.func private @sparse_csr(tensor)
+
+// -----
+
+// expected-error@+1 {{expected a numeric value for implicitVal}}
+#CSR_ImpType = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = "str"
+}>
+func.func private @sparse_csr(tensor)
+
+// -----
+
+#CSR_ImpVal = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = 1 : i32
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor)
+
+// -----
+
+#CSR_ImpVal = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 32,
+  crdWidth = 32,
+  implicitVal = 1.0 : f32
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor)
+
+// -----
+
+#CSR_OnlyOnes = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  posWidth = 64,
+  crdWidth = 64,
+  explicitVal = #complex.number<:f32 1.0, 0.0>,
+  implicitVal = #complex.number<:f32 1.0, 0.0>
+}>
+
+// expected-error@+1 {{implicit value must be zero}}
+func.func private @sparse_csr(tensor, #CSR_OnlyOnes>)

From 34ae2265e88c8a04350de5a244d0d888e74a8388 Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski 
Date: Tue, 7 May 2024 21:41:33 -0400
Subject: [PATCH 40/64] [Clang][Sema] Improve support for explicit
 specializations of constrained member functions & member function templates
 (#88963)

Consider the following snippet from the discussion of CWG2847 on the core reflector:
```
template
concept C = sizeof(T) <= sizeof(long);

template
struct A
{
    template
    void f(U) requires C; // #1, declares a function template

    void g() requires C; // #2, declares a function

    template<>
    void f(char);  // #3, an explicit specialization of a function template that declares a function
};

template<>
template
void A::f(U) requires C; // #4, an explicit specialization of a function template that declares a function template

template<>
template<>
void A::f(int); // #5, an explicit specialization of a function template that declares a function

template<>
void A::g(); // #6, an explicit specialization of a function that declares a function
```

A number of problems exist:
- Clang rejects `#4` because the trailing _requires-clause_ has `U`
substituted with the wrong template parameter depth when
`Sema::AreConstraintExpressionsEqual` is called to determine whether it
matches the trailing _requires-clause_ of the implicitly instantiated
function template.
- Clang rejects `#5` because the function template specialization
instantiated from `A::f` has a trailing _requires-clause_, but `#5`
does not (nor can it have one as it isn't a templated function).
- Clang rejects `#6` for the same reasons it rejects `#5`.

This patch resolves these issues by making the following changes:
- To fix `#4`, `Sema::AreConstraintExpressionsEqual` is passed
`FunctionTemplateDecl`s when comparing the trailing _requires-clauses_
of `#4` and the function template instantiated from `#1`.
- To fix `#5` and `#6`, the trailing _requires-clauses_ are not compared
for explicit specializations that declare functions.

In addition to these changes, `CheckMemberSpecialization` now considers
constraint satisfaction/constraint partial ordering when determining
which member function is specialized by an explicit specialization of a
member function for an implicit instantiation of a class template (we
previously would select the first function that has the same type as the
explicit specialization). With constraints taken under consideration, we
match EDG's behavior for these declarations.
---
 clang/docs/ReleaseNotes.rst                   |  4 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  5 ++
 clang/include/clang/Sema/Sema.h               |  3 +
 clang/lib/Sema/SemaConcept.cpp                |  2 +-
 clang/lib/Sema/SemaOverload.cpp               | 72 +++++-------------
 clang/lib/Sema/SemaTemplate.cpp               | 57 ++++++++++----
 clang/lib/Sema/SemaTemplateDeduction.cpp      | 32 ++++++++
 clang/lib/Sema/SemaTemplateInstantiate.cpp    |  7 ++
 .../temp/temp.spec/temp.expl.spec/p14-23.cpp  | 60 +++++++++++++++
 .../CXX/temp/temp.spec/temp.expl.spec/p8.cpp  | 74 +++++++++++++++++++
 10 files changed, 248 insertions(+), 68 deletions(-)
 create mode 100644 clang/test/CXX/temp/temp.spec/temp.expl.spec/p14-23.cpp
 create mode 100644 clang/test/CXX/temp/temp.spec/temp.expl.spec/p8.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c8ef2e8d614a7d..0f9728c00e6483 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -695,6 +695,10 @@ Bug Fixes to C++ Support
   until the noexcept-specifier is instantiated.
 - Fix a crash when an implicitly declared ``operator==`` function with a trailing requires-clause has its
   constraints compared to that of another declaration.
+- Fix a bug where explicit specializations of member functions/function templates would have substitution
+  performed incorrectly when checking constraints. Fixes (#GH90349).
+- Clang now allows constrained member functions to be explicitly specialized for an implicit instantiation
+  of a class template.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9a0bae9c216de9..9317ae675c72be 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5437,6 +5437,11 @@ def note_function_template_spec_matched : Note<
 def err_function_template_partial_spec : Error<
     "function template partial specialization is not allowed">;
 
+def err_function_member_spec_ambiguous : Error<
+    "ambiguous member function specialization %q0 of %q1">;
+def note_function_member_spec_matched : Note<
+    "member function specialization matches %0">;
+
 // C++ Template Instantiation
 def err_template_recursion_depth_exceeded : Error<
   "recursive template instantiation exceeded maximum depth of %0">,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a80ac6dbc76137..ddb3de2b66023c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9739,6 +9739,9 @@ class Sema final : public SemaBase {
                      const PartialDiagnostic &CandidateDiag,
                      bool Complain = true, QualType TargetType = QualType());
 
+  FunctionDecl *getMoreConstrainedFunction(FunctionDecl *FD1,
+                                           FunctionDecl *FD2);
+
   ///@}
 
   //
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index e00c972602829e..7bfec4e11f7aab 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -811,7 +811,7 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
   // this may happen while we're comparing two templates' constraint
   // equivalence.
   LocalInstantiationScope ScopeForParameters(S);
-  if (auto *FD = llvm::dyn_cast(DeclInfo.getDecl()))
+  if (auto *FD = DeclInfo.getDecl()->getAsFunction())
     for (auto *PVD : FD->parameters())
       ScopeForParameters.InstantiatedLocal(PVD, PVD);
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index a416df2e97c439..f173300b5c96cf 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1303,6 +1303,8 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
   if (New->isMSVCRTEntryPoint())
     return false;
 
+  NamedDecl *OldDecl = Old;
+  NamedDecl *NewDecl = New;
   FunctionTemplateDecl *OldTemplate = Old->getDescribedFunctionTemplate();
   FunctionTemplateDecl *NewTemplate = New->getDescribedFunctionTemplate();
 
@@ -1347,6 +1349,8 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
   // references to non-instantiated entities during constraint substitution.
   // GH78101.
   if (NewTemplate) {
+    OldDecl = OldTemplate;
+    NewDecl = NewTemplate;
     // C++ [temp.over.link]p4:
     //   The signature of a function template consists of its function
     //   signature, its return type and its template parameter list. The names
@@ -1506,13 +1510,14 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
     }
   }
 
-  if (!UseOverrideRules) {
+  if (!UseOverrideRules &&
+      New->getTemplateSpecializationKind() != TSK_ExplicitSpecialization) {
     Expr *NewRC = New->getTrailingRequiresClause(),
          *OldRC = Old->getTrailingRequiresClause();
     if ((NewRC != nullptr) != (OldRC != nullptr))
       return true;
-
-    if (NewRC && !SemaRef.AreConstraintExpressionsEqual(Old, OldRC, New, NewRC))
+    if (NewRC &&
+        !SemaRef.AreConstraintExpressionsEqual(OldDecl, OldRC, NewDecl, NewRC))
       return true;
   }
 
@@ -10695,29 +10700,10 @@ bool clang::isBetterOverloadCandidate(
   //   -— F1 and F2 are non-template functions with the same
   //      parameter-type-lists, and F1 is more constrained than F2 [...],
   if (!Cand1IsSpecialization && !Cand2IsSpecialization &&
-      sameFunctionParameterTypeLists(S, Cand1, Cand2)) {
-    FunctionDecl *Function1 = Cand1.Function;
-    FunctionDecl *Function2 = Cand2.Function;
-    if (FunctionDecl *MF = Function1->getInstantiatedFromMemberFunction())
-      Function1 = MF;
-    if (FunctionDecl *MF = Function2->getInstantiatedFromMemberFunction())
-      Function2 = MF;
-
-    const Expr *RC1 = Function1->getTrailingRequiresClause();
-    const Expr *RC2 = Function2->getTrailingRequiresClause();
-    if (RC1 && RC2) {
-      bool AtLeastAsConstrained1, AtLeastAsConstrained2;
-      if (S.IsAtLeastAsConstrained(Function1, RC1, Function2, RC2,
-                                   AtLeastAsConstrained1) ||
-          S.IsAtLeastAsConstrained(Function2, RC2, Function1, RC1,
-                                   AtLeastAsConstrained2))
-        return false;
-      if (AtLeastAsConstrained1 != AtLeastAsConstrained2)
-        return AtLeastAsConstrained1;
-    } else if (RC1 || RC2) {
-      return RC1 != nullptr;
-    }
-  }
+      sameFunctionParameterTypeLists(S, Cand1, Cand2) &&
+      S.getMoreConstrainedFunction(Cand1.Function, Cand2.Function) ==
+          Cand1.Function)
+    return true;
 
   //   -- F1 is a constructor for a class D, F2 is a constructor for a base
   //      class B of D, and for all arguments the corresponding parameters of
@@ -13385,25 +13371,6 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
            static_cast(CUDA().IdentifyPreference(Caller, FD2));
   };
 
-  auto CheckMoreConstrained = [&](FunctionDecl *FD1,
-                                  FunctionDecl *FD2) -> std::optional {
-    if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
-      FD1 = MF;
-    if (FunctionDecl *MF = FD2->getInstantiatedFromMemberFunction())
-      FD2 = MF;
-    SmallVector AC1, AC2;
-    FD1->getAssociatedConstraints(AC1);
-    FD2->getAssociatedConstraints(AC2);
-    bool AtLeastAsConstrained1, AtLeastAsConstrained2;
-    if (IsAtLeastAsConstrained(FD1, AC1, FD2, AC2, AtLeastAsConstrained1))
-      return std::nullopt;
-    if (IsAtLeastAsConstrained(FD2, AC2, FD1, AC1, AtLeastAsConstrained2))
-      return std::nullopt;
-    if (AtLeastAsConstrained1 == AtLeastAsConstrained2)
-      return std::nullopt;
-    return AtLeastAsConstrained1;
-  };
-
   // Don't use the AddressOfResolver because we're specifically looking for
   // cases where we have one overload candidate that lacks
   // enable_if/pass_object_size/...
@@ -13440,15 +13407,14 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
       }
       // FD has the same CUDA prefernece than Result. Continue check
       // constraints.
-      std::optional MoreConstrainedThanPrevious =
-          CheckMoreConstrained(FD, Result);
-      if (!MoreConstrainedThanPrevious) {
-        IsResultAmbiguous = true;
-        AmbiguousDecls.push_back(FD);
+      FunctionDecl *MoreConstrained = getMoreConstrainedFunction(FD, Result);
+      if (MoreConstrained != FD) {
+        if (!MoreConstrained) {
+          IsResultAmbiguous = true;
+          AmbiguousDecls.push_back(FD);
+        }
         continue;
       }
-      if (!*MoreConstrainedThanPrevious)
-        continue;
       // FD is more constrained - replace Result with it.
     }
     FoundBetter();
@@ -13467,7 +13433,7 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) {
       // constraints.
       if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0)
         continue;
-      if (!CheckMoreConstrained(Skipped, Result))
+      if (!getMoreConstrainedFunction(Skipped, Result))
         return nullptr;
     }
     Pair = DAP;
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 5c72270ff15047..7e57fa0696725a 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -10339,24 +10339,53 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
   if (Previous.empty()) {
     // Nowhere to look anyway.
   } else if (FunctionDecl *Function = dyn_cast(Member)) {
+    SmallVector Candidates;
+    bool Ambiguous = false;
     for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
            I != E; ++I) {
-      NamedDecl *D = (*I)->getUnderlyingDecl();
-      if (CXXMethodDecl *Method = dyn_cast(D)) {
-        QualType Adjusted = Function->getType();
-        if (!hasExplicitCallingConv(Adjusted))
-          Adjusted = adjustCCAndNoReturn(Adjusted, Method->getType());
-        // This doesn't handle deduced return types, but both function
-        // declarations should be undeduced at this point.
-        if (Context.hasSameType(Adjusted, Method->getType())) {
-          FoundInstantiation = *I;
-          Instantiation = Method;
-          InstantiatedFrom = Method->getInstantiatedFromMemberFunction();
-          MSInfo = Method->getMemberSpecializationInfo();
-          break;
-        }
+      CXXMethodDecl *Method =
+          dyn_cast((*I)->getUnderlyingDecl());
+      if (!Method)
+        continue;
+      QualType Adjusted = Function->getType();
+      if (!hasExplicitCallingConv(Adjusted))
+        Adjusted = adjustCCAndNoReturn(Adjusted, Method->getType());
+      // This doesn't handle deduced return types, but both function
+      // declarations should be undeduced at this point.
+      if (!Context.hasSameType(Adjusted, Method->getType()))
+        continue;
+      if (ConstraintSatisfaction Satisfaction;
+          Method->getTrailingRequiresClause() &&
+          (CheckFunctionConstraints(Method, Satisfaction,
+                                    /*UsageLoc=*/Member->getLocation(),
+                                    /*ForOverloadResolution=*/true) ||
+           !Satisfaction.IsSatisfied))
+        continue;
+      Candidates.push_back(Method);
+      FunctionDecl *MoreConstrained =
+          Instantiation ? getMoreConstrainedFunction(
+                              Method, cast(Instantiation))
+                        : Method;
+      if (!MoreConstrained) {
+        Ambiguous = true;
+        continue;
+      }
+      if (MoreConstrained == Method) {
+        Ambiguous = false;
+        FoundInstantiation = *I;
+        Instantiation = Method;
+        InstantiatedFrom = Method->getInstantiatedFromMemberFunction();
+        MSInfo = Method->getMemberSpecializationInfo();
       }
     }
+    if (Ambiguous) {
+      Diag(Member->getLocation(), diag::err_function_member_spec_ambiguous)
+          << Member << (InstantiatedFrom ? InstantiatedFrom : Instantiation);
+      for (FunctionDecl *Candidate : Candidates)
+        Diag(Candidate->getLocation(), diag::note_function_member_spec_matched)
+            << Candidate;
+      return true;
+    }
   } else if (isa(Member)) {
     VarDecl *PrevVar;
     if (Previous.isSingleResult() &&
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index dcaea4a77bffd2..fe7e35d8415106 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -5852,6 +5852,38 @@ UnresolvedSetIterator Sema::getMostSpecialized(
   return SpecEnd;
 }
 
+/// Returns the more constrained function according to the rules of
+/// partial ordering by constraints (C++ [temp.constr.order]).
+///
+/// \param FD1 the first function
+///
+/// \param FD2 the second function
+///
+/// \returns the more constrained function. If neither function is
+/// more constrained, returns NULL.
+FunctionDecl *Sema::getMoreConstrainedFunction(FunctionDecl *FD1,
+                                               FunctionDecl *FD2) {
+  assert(!FD1->getDescribedTemplate() && !FD2->getDescribedTemplate() &&
+         "not for function templates");
+  FunctionDecl *F1 = FD1;
+  if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction())
+    F1 = MF;
+  FunctionDecl *F2 = FD2;
+  if (FunctionDecl *MF = FD2->getInstantiatedFromMemberFunction())
+    F2 = MF;
+  llvm::SmallVector AC1, AC2;
+  F1->getAssociatedConstraints(AC1);
+  F2->getAssociatedConstraints(AC2);
+  bool AtLeastAsConstrained1, AtLeastAsConstrained2;
+  if (IsAtLeastAsConstrained(F1, AC1, F2, AC2, AtLeastAsConstrained1))
+    return nullptr;
+  if (IsAtLeastAsConstrained(F2, AC2, F1, AC1, AtLeastAsConstrained2))
+    return nullptr;
+  if (AtLeastAsConstrained1 == AtLeastAsConstrained2)
+    return nullptr;
+  return AtLeastAsConstrained1 ? FD1 : FD2;
+}
+
 /// Determine whether one partial specialization, P1, is at least as
 /// specialized than another, P2.
 ///
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 3a9fd906b7af86..07626058c79776 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -275,6 +275,13 @@ Response HandleFunction(Sema &SemaRef, const FunctionDecl *Function,
                                      TemplateArgs->asArray(),
                                      /*Final=*/false);
 
+    if (RelativeToPrimary &&
+        (Function->getTemplateSpecializationKind() ==
+             TSK_ExplicitSpecialization ||
+         (Function->getFriendObjectKind() &&
+          !Function->getPrimaryTemplate()->getFriendObjectKind())))
+      return Response::UseNextDecl(Function);
+
     // If this function was instantiated from a specialized member that is
     // a function template, we're done.
     assert(Function->getPrimaryTemplate() && "No function template?");
diff --git a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p14-23.cpp b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p14-23.cpp
new file mode 100644
index 00000000000000..dc17cea99d4351
--- /dev/null
+++ b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p14-23.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c++20 -verify %s
+
+template
+concept C = I >= 4;
+
+template
+concept D = I < 8;
+
+template
+struct A {
+  constexpr static int f() { return 0; }
+  constexpr static int f() requires C && D { return 1; }
+  constexpr static int f() requires C { return 2; }
+
+  constexpr static int g() requires C { return 0; } // #candidate-0
+  constexpr static int g() requires D { return 1; } // #candidate-1
+
+  constexpr static int h() requires C { return 0; } // expected-note {{member declaration nearly matches}}
+};
+
+template<>
+constexpr int A<2>::f() { return 3; }
+
+template<>
+constexpr int A<4>::f() { return 4; }
+
+template<>
+constexpr int A<8>::f() { return 5; }
+
+static_assert(A<3>::f() == 0);
+static_assert(A<5>::f() == 1);
+static_assert(A<9>::f() == 2);
+static_assert(A<2>::f() == 3);
+static_assert(A<4>::f() == 4);
+static_assert(A<8>::f() == 5);
+
+template<>
+constexpr int A<0>::g() { return 2; }
+
+template<>
+constexpr int A<8>::g() { return 3; }
+
+template<>
+constexpr int A<6>::g() { return 4; } // expected-error {{ambiguous member function specialization 'A<6>::g' of 'A::g'}}
+                                      // expected-note@#candidate-0 {{member function specialization matches 'g'}}
+                                      // expected-note@#candidate-1 {{member function specialization matches 'g'}}
+
+static_assert(A<9>::g() == 0);
+static_assert(A<1>::g() == 1);
+static_assert(A<0>::g() == 2);
+static_assert(A<8>::g() == 3);
+
+template<>
+constexpr int A<4>::h() { return 1; }
+
+template<>
+constexpr int A<0>::h() { return 2; } // expected-error {{out-of-line definition of 'h' does not match any declaration in 'A<0>'}}
+
+static_assert(A<5>::h() == 0);
+static_assert(A<4>::h() == 1);
diff --git a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p8.cpp b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p8.cpp
new file mode 100644
index 00000000000000..87e10d10e4b453
--- /dev/null
+++ b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p8.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+template
+concept C = sizeof(T) <= sizeof(long);
+
+template
+struct A {
+  template
+  void f(U) requires C;
+
+  void g() requires C;
+
+  template
+  void h(U) requires C;
+
+  constexpr int i() requires C {
+    return 0;
+  }
+
+  constexpr int i() requires C && true {
+    return 1;
+  }
+
+  template<>
+  void f(char);
+};
+
+template<>
+template
+void A::f(U) requires C;
+
+template<>
+template
+void A::h(U) requires C;
+
+template<>
+template<>
+void A::f(int);
+
+template<>
+void A::g();
+
+template<>
+constexpr int A::i() {
+  return 2;
+}
+
+static_assert(A().i() == 2);
+
+template
+struct D {
+  template
+  static constexpr int f(U);
+
+  template
+  static constexpr int f(U) requires (sizeof(T) == 1);
+
+  template<>
+  constexpr int f(int) {
+    return 1;
+  }
+};
+
+template<>
+template
+constexpr int D::f(U) requires (sizeof(signed char) == 1) {
+  return 0;
+}
+
+static_assert(D::f(0) == 1);
+static_assert(D::f(0) == 1);
+static_assert(D::f(0) == 1);
+static_assert(D::f(0.0) == 0);

From 584253c4e2f788f870488fc32193b52d67ddaccc Mon Sep 17 00:00:00 2001
From: Benji Smith <6193112+Benjins@users.noreply.github.com>
Date: Tue, 7 May 2024 21:59:53 -0400
Subject: [PATCH 41/64] [C API] Add getters and build function for CallBr
 (#91154)

This adds LLVMBuildCallBr to create CallBr instructions, and getters for
the CallBr-specific data. The remainder of its data, e.g.
arguments/function, can be accessed using existing getters.
---
 llvm/docs/ReleaseNotes.rst        |  7 ++++++
 llvm/include/llvm-c/Core.h        | 28 ++++++++++++++++++++++
 llvm/lib/IR/Core.cpp              | 35 +++++++++++++++++++++++++++
 llvm/test/Bindings/llvm-c/echo.ll | 26 ++++++++++++++++++++
 llvm/tools/llvm-c-test/echo.cpp   | 40 +++++++++++++++++++++++++++++++
 5 files changed, 136 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 9deae46d0233d2..26f1d33f68009d 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -168,6 +168,13 @@ Changes to the C API
 
 * Added ``LLVMCreateConstantRangeAttribute`` function for creating ConstantRange Attributes.
 
+* Added the following functions for creating and accessing data for CallBr instructions:
+
+  * ``LLVMBuildCallBr``
+  * ``LLVMGetCallBrDefaultDest``
+  * ``LLVMGetCallBrNumIndirectDests``
+  * ``LLVMGetCallBrIndirectDest``
+
 Changes to the CodeGen infrastructure
 -------------------------------------
 
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index ba02ca48257532..9d09546513f0ed 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -3737,6 +3737,28 @@ void LLVMSetNormalDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
  */
 void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
 
+/**
+ * Get the default destination of a CallBr instruction.
+ *
+ * @see llvm::CallBrInst::getDefaultDest()
+ */
+LLVMBasicBlockRef LLVMGetCallBrDefaultDest(LLVMValueRef CallBr);
+
+/**
+ * Get the number of indirect destinations of a CallBr instruction.
+ *
+ * @see llvm::CallBrInst::getNumIndirectDests()
+
+ */
+unsigned LLVMGetCallBrNumIndirectDests(LLVMValueRef CallBr);
+
+/**
+ * Get the indirect destination of a CallBr instruction at the given index.
+ *
+ * @see llvm::CallBrInst::getIndirectDest()
+ */
+LLVMBasicBlockRef LLVMGetCallBrIndirectDest(LLVMValueRef CallBr, unsigned Idx);
+
 /**
  * @}
  */
@@ -4023,6 +4045,12 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef, LLVMValueRef V,
                              LLVMBasicBlockRef Else, unsigned NumCases);
 LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
                                  unsigned NumDests);
+LLVMValueRef LLVMBuildCallBr(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                             LLVMBasicBlockRef DefaultDest,
+                             LLVMBasicBlockRef *IndirectDests,
+                             unsigned NumIndirectDests, LLVMValueRef *Args,
+                             unsigned NumArgs, LLVMOperandBundleRef *Bundles,
+                             unsigned NumBundles, const char *Name);
 LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Fn,
                               LLVMValueRef *Args, unsigned NumArgs,
                               LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 1b84527d5e8741..df90b883411231 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -47,6 +47,10 @@ using namespace llvm;
 
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OperandBundleDef, LLVMOperandBundleRef)
 
+inline BasicBlock **unwrap(LLVMBasicBlockRef *BBs) {
+  return reinterpret_cast(BBs);
+}
+
 #define DEBUG_TYPE "ir"
 
 void llvm::initializeCore(PassRegistry &Registry) {
@@ -3031,6 +3035,18 @@ void LLVMSetUnwindDest(LLVMValueRef Invoke, LLVMBasicBlockRef B) {
   unwrap(Invoke)->setUnwindDest(unwrap(B));
 }
 
+LLVMBasicBlockRef LLVMGetCallBrDefaultDest(LLVMValueRef CallBr) {
+  return wrap(unwrap(CallBr)->getDefaultDest());
+}
+
+unsigned LLVMGetCallBrNumIndirectDests(LLVMValueRef CallBr) {
+  return unwrap(CallBr)->getNumIndirectDests();
+}
+
+LLVMBasicBlockRef LLVMGetCallBrIndirectDest(LLVMValueRef CallBr, unsigned Idx) {
+  return wrap(unwrap(CallBr)->getIndirectDest(Idx));
+}
+
 /*--.. Operations on terminators ...........................................--*/
 
 unsigned LLVMGetNumSuccessors(LLVMValueRef Term) {
@@ -3258,6 +3274,25 @@ LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
   return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
 }
 
+LLVMValueRef LLVMBuildCallBr(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                             LLVMBasicBlockRef DefaultDest,
+                             LLVMBasicBlockRef *IndirectDests,
+                             unsigned NumIndirectDests, LLVMValueRef *Args,
+                             unsigned NumArgs, LLVMOperandBundleRef *Bundles,
+                             unsigned NumBundles, const char *Name) {
+
+  SmallVector OBs;
+  for (auto *Bundle : ArrayRef(Bundles, NumBundles)) {
+    OperandBundleDef *OB = unwrap(Bundle);
+    OBs.push_back(*OB);
+  }
+
+  return wrap(unwrap(B)->CreateCallBr(
+      unwrap(Ty), unwrap(Fn), unwrap(DefaultDest),
+      ArrayRef(unwrap(IndirectDests), NumIndirectDests),
+      ArrayRef(unwrap(Args), NumArgs), OBs, Name));
+}
+
 LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
                               LLVMValueRef *Args, unsigned NumArgs,
                               LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
diff --git a/llvm/test/Bindings/llvm-c/echo.ll b/llvm/test/Bindings/llvm-c/echo.ll
index 953a16b7e624e1..bb5fae0dcd12ee 100644
--- a/llvm/test/Bindings/llvm-c/echo.ll
+++ b/llvm/test/Bindings/llvm-c/echo.ll
@@ -348,6 +348,32 @@ define void @test_func_prologue_data_01() prologue %func_prolog_struct <{ i8 235
   ret void
 }
 
+
+define void @test_call_br_01(i32 %input) {
+entry:
+  callbr void asm "nop", "r,!i"(i32 %input) to label %bb_01 [label %bb_02]
+
+bb_01:
+  ret void
+bb_02:
+  ret void
+}
+
+define void @test_call_br_02(i32 %input0, i32 %input1) {
+entry:
+  ; Multiple indirect destinations, operand bundles, and arguments
+  callbr void asm "nop", "r,r,!i,!i"(i32 %input0, i32 %input1)
+    ["op0"(i32 %input1), "op1"(label %bb_02)]
+    to label %bb_01 [label %bb_03, label %bb_02]
+
+bb_01:
+  ret void
+bb_02:
+  ret void
+bb_03:
+  ret void
+}
+
 !llvm.dbg.cu = !{!0, !2}
 !llvm.module.flags = !{!3}
 
diff --git a/llvm/tools/llvm-c-test/echo.cpp b/llvm/tools/llvm-c-test/echo.cpp
index 347863638849ce..518716168c4235 100644
--- a/llvm/tools/llvm-c-test/echo.cpp
+++ b/llvm/tools/llvm-c-test/echo.cpp
@@ -570,6 +570,46 @@ struct FunCloner {
           LLVMDisposeOperandBundle(Bundle);
         break;
       }
+      case LLVMCallBr: {
+        LLVMTypeRef FnTy = CloneType(LLVMGetCalledFunctionType(Src));
+        LLVMValueRef Fn = CloneValue(LLVMGetCalledValue(Src));
+
+        LLVMBasicBlockRef DefaultDest =
+            DeclareBB(LLVMGetCallBrDefaultDest(Src));
+
+        // Clone indirect destinations
+        SmallVector IndirectDests;
+        unsigned IndirectDestCount = LLVMGetCallBrNumIndirectDests(Src);
+        for (unsigned i = 0; i < IndirectDestCount; ++i)
+          IndirectDests.push_back(DeclareBB(LLVMGetCallBrIndirectDest(Src, i)));
+
+        // Clone input arguments
+        SmallVector Args;
+        unsigned ArgCount = LLVMGetNumArgOperands(Src);
+        for (unsigned i = 0; i < ArgCount; ++i)
+          Args.push_back(CloneValue(LLVMGetOperand(Src, i)));
+
+        // Clone operand bundles
+        SmallVector Bundles;
+        unsigned BundleCount = LLVMGetNumOperandBundles(Src);
+        for (unsigned i = 0; i < BundleCount; ++i) {
+          auto Bundle = LLVMGetOperandBundleAtIndex(Src, i);
+          Bundles.push_back(CloneOB(Bundle));
+          LLVMDisposeOperandBundle(Bundle);
+        }
+
+        Dst = LLVMBuildCallBr(Builder, FnTy, Fn, DefaultDest,
+                              IndirectDests.data(), IndirectDests.size(),
+                              Args.data(), Args.size(), Bundles.data(),
+                              Bundles.size(), Name);
+
+        CloneAttrs(Src, Dst);
+
+        for (auto Bundle : Bundles)
+          LLVMDisposeOperandBundle(Bundle);
+
+        break;
+      }
       case LLVMUnreachable:
         Dst = LLVMBuildUnreachable(Builder);
         break;

From c4e5a8a4d3ef0948384d9411ea1e44fc113e5b5c Mon Sep 17 00:00:00 2001
From: Aart Bik 
Date: Tue, 7 May 2024 19:01:36 -0700
Subject: [PATCH 42/64] [mlir][sparse] support 'batch' dimensions in
 sparse_tensor.print (#91411)

---
 .../Transforms/SparseTensorCodegen.cpp        | 12 ++-
 .../Transforms/SparseTensorRewriting.cpp      | 66 ++++++++++-------
 .../SparseTensor/CPU/sparse_pack_d.mlir       | 12 +--
 .../SparseTensor/CPU/sparse_print_3d.mlir     | 74 +++++++++++++++++++
 4 files changed, 130 insertions(+), 34 deletions(-)
 create mode 100755 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index d9b203a8864885..164e722c45dba8 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -417,11 +417,17 @@ static void genEndInsert(OpBuilder &builder, Location loc,
 /// Generates a subview into the sizes.
 static Value genSliceToSize(OpBuilder &builder, Location loc, Value mem,
                             Value sz) {
-  auto elemTp = llvm::cast(mem.getType()).getElementType();
+  auto memTp = llvm::cast(mem.getType());
+  // For higher-dimensional memrefs, we assume that the innermost
+  // dimension is always of the right size.
+  // TODO: generate complex truncating view here too?
+  if (memTp.getRank() > 1)
+    return mem;
+  // Truncate linear memrefs to given size.
   return builder
       .create(
-          loc, MemRefType::get({ShapedType::kDynamic}, elemTp), mem,
-          ValueRange{}, ValueRange{sz}, ValueRange{},
+          loc, MemRefType::get({ShapedType::kDynamic}, memTp.getElementType()),
+          mem, ValueRange{}, ValueRange{sz}, ValueRange{},
           ArrayRef{0},                    // static offset
           ArrayRef{ShapedType::kDynamic}, // dynamic size
           ArrayRef{1})                    // static stride
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 7d469198a653cc..025fd3331ba890 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -785,45 +785,61 @@ struct PrintRewriter : public OpRewritePattern {
   }
 
 private:
-  // Helper to print contents of a single memref. Note that for the "push_back"
-  // vectors, this prints the full capacity, not just the size. This is done
-  // on purpose, so that clients see how much storage has been allocated in
-  // total. Contents of the extra capacity in the buffer may be uninitialized
-  // (unless the flag enable-buffer-initialization is set to true).
+  // Helper to print contents of a single memref. For "push_back" vectors,
+  // we assume that the previous getters for pos/crd/val have added a
+  // slice-to-size view to make sure we just print the size and not the
+  // full capacity.
   //
-  // Generates code to print:
+  // Generates code to print (1-dim or higher):
   //    ( a0, a1, ... )
   static void printContents(PatternRewriter &rewriter, Location loc,
                             Value vec) {
+    auto shape = cast(vec.getType()).getShape();
+    SmallVector idxs;
+    printContentsLevel(rewriter, loc, vec, 0, shape, idxs);
+    rewriter.create(loc, vector::PrintPunctuation::NewLine);
+  }
+
+  // Helper to the helper.
+  static void printContentsLevel(PatternRewriter &rewriter, Location loc,
+                                 Value vec, unsigned i, ArrayRef shape,
+                                 SmallVectorImpl &idxs) {
     // Open bracket.
     rewriter.create(loc, vector::PrintPunctuation::Open);
-    // For loop over elements.
+    // Generate for loop.
     auto zero = constantIndex(rewriter, loc, 0);
-    auto size = rewriter.create(loc, vec, zero);
+    auto index = constantIndex(rewriter, loc, i);
+    auto size = rewriter.create(loc, vec, index);
     auto step = constantIndex(rewriter, loc, 1);
     auto forOp = rewriter.create(loc, zero, size, step);
+    idxs.push_back(forOp.getInductionVar());
     rewriter.setInsertionPointToStart(forOp.getBody());
-    auto idx = forOp.getInductionVar();
-    auto val = rewriter.create(loc, vec, idx);
-    if (llvm::isa(val.getType())) {
-      // Since the vector dialect does not support complex types in any op,
-      // we split those into (real, imag) pairs here.
-      Value real = rewriter.create(loc, val);
-      Value imag = rewriter.create(loc, val);
-      rewriter.create(loc, vector::PrintPunctuation::Open);
-      rewriter.create(loc, real,
-                                       vector::PrintPunctuation::Comma);
-      rewriter.create(loc, imag,
-                                       vector::PrintPunctuation::Close);
-      rewriter.create(loc, vector::PrintPunctuation::Comma);
+    if (i < shape.size() - 1) {
+      // Enter deeper loop nest.
+      printContentsLevel(rewriter, loc, vec, i + 1, shape, idxs);
     } else {
-      rewriter.create(loc, val,
-                                       vector::PrintPunctuation::Comma);
+      // Actual contents printing.
+      auto val = rewriter.create(loc, vec, idxs);
+      if (llvm::isa(val.getType())) {
+        // Since the vector dialect does not support complex types in any op,
+        // we split those into (real, imag) pairs here.
+        Value real = rewriter.create(loc, val);
+        Value imag = rewriter.create(loc, val);
+        rewriter.create(loc, vector::PrintPunctuation::Open);
+        rewriter.create(loc, real,
+                                         vector::PrintPunctuation::Comma);
+        rewriter.create(loc, imag,
+                                         vector::PrintPunctuation::Close);
+        rewriter.create(loc, vector::PrintPunctuation::Comma);
+      } else {
+        rewriter.create(loc, val,
+                                         vector::PrintPunctuation::Comma);
+      }
     }
+    idxs.pop_back();
     rewriter.setInsertionPointAfter(forOp);
-    // Close bracket and end of line.
+    // Close bracket.
     rewriter.create(loc, vector::PrintPunctuation::Close);
-    rewriter.create(loc, vector::PrintPunctuation::NewLine);
   }
 
   // Helper method to print run-time lvl/dim sizes.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
index 20ae7e86285cce..467a77f30777a2 100755
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
@@ -29,7 +29,7 @@
   crdWidth = 32
 }>
 
-#BatchedCSR = #sparse_tensor.encoding<{
+#DenseCSR = #sparse_tensor.encoding<{
   map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 : compressed),
   posWidth = 64,
   crdWidth = 32
@@ -42,7 +42,7 @@
 }>
 
 //
-// Test assembly operation with CCC, batched-CSR and CSR-dense.
+// Test assembly operation with CCC, dense-CSR and CSR-dense.
 //
 module {
   //
@@ -77,7 +77,7 @@ module {
         tensor<6xi64>, tensor<8xi32>), tensor<8xf32> to tensor<4x3x2xf32, #CCC>
 
     //
-    // Setup BatchedCSR.
+    // Setup DenseCSR.
     //
 
     %data1 = arith.constant dense<
@@ -88,7 +88,7 @@ module {
     %crd1 = arith.constant dense<
        [ 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1]> : tensor<16xi32>
 
-    %s1 = sparse_tensor.assemble (%pos1, %crd1), %data1 : (tensor<13xi64>, tensor<16xi32>), tensor<16xf32> to tensor<4x3x2xf32, #BatchedCSR>
+    %s1 = sparse_tensor.assemble (%pos1, %crd1), %data1 : (tensor<13xi64>, tensor<16xi32>), tensor<16xf32> to tensor<4x3x2xf32, #DenseCSR>
 
     //
     // Setup CSRDense.
@@ -137,7 +137,7 @@ module {
     // CHECK-NEXT: ----
     //
     sparse_tensor.print %s0 : tensor<4x3x2xf32, #CCC>
-    sparse_tensor.print %s1 : tensor<4x3x2xf32, #BatchedCSR>
+    sparse_tensor.print %s1 : tensor<4x3x2xf32, #DenseCSR>
     sparse_tensor.print %s2 : tensor<4x3x2xf32, #CSRDense>
 
     // TODO: This check is no longer needed once the codegen path uses the
@@ -148,7 +148,7 @@ module {
       // sparse_tensor.assemble copies buffers when running with the runtime
       // library. Deallocations are not needed when running in codegen mode.
       bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
-      bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
+      bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #DenseCSR>
       bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
     }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir
new file mode 100755
index 00000000000000..98dee304fa511d
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir
@@ -0,0 +1,74 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+//  do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
+// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// TODO: make this work with libgen
+
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true
+// RUN: %{compile} | %{run} | FileCheck %s
+//
+
+#BatchedCSR = #sparse_tensor.encoding<{
+  map = (d0, d1, d2) -> (d0 : batch, d1 : dense, d2 : compressed)
+}>
+
+module {
+
+  //
+  // Main driver that tests 3-D sparse tensor printing.
+  //
+  func.func @main() {
+
+    %pos = arith.constant dense<
+      [[ 0, 8, 16, 24, 32],
+       [ 0, 8, 16, 24, 32]]
+    > : tensor<2x5xindex>
+
+    %crd = arith.constant dense<
+      [[0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7],
+       [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]]
+    > : tensor<2x32xindex>
+
+    %val = arith.constant dense<
+      [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.,
+        12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22.,
+        23., 24., 25., 26., 27., 28., 29., 30., 31., 32.],
+       [33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43.,
+        44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54.,
+        55., 56., 57., 58., 59., 60., 61., 62., 63., 64.]]
+    > : tensor<2x32xf64>
+
+    %X = sparse_tensor.assemble (%pos, %crd), %val
+      : (tensor<2x5xindex>, tensor<2x32xindex>), tensor<2x32xf64> to tensor<2x4x8xf64, #BatchedCSR>
+
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 32
+    // CHECK-NEXT: dim = ( 2, 4, 8 )
+    // CHECK-NEXT: lvl = ( 2, 4, 8 )
+    // CHECK-NEXT: pos[2] : ( ( 0, 8, 16, 24, 32,  )( 0, 8, 16, 24, 32,  ) )
+    // CHECK-NEXT: crd[2] : ( ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,  )
+    // CHECK-SAME:            ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,  ) )
+    // CHECK-NEXT: values : ( ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,  )
+    // CHECK-SAME:            ( 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,  ) )
+    // CHECK-NEXT: ----
+    sparse_tensor.print %X : tensor<2x4x8xf64, #BatchedCSR>
+
+    return
+  }
+}

From 2dade0041a62b192e9bde24ae6bbe6208f027523 Mon Sep 17 00:00:00 2001
From: Jinsong Ji 
Date: Tue, 7 May 2024 19:02:10 -0700
Subject: [PATCH 43/64] [Analysis] Attribute Range should not prevent tail call
 optimization (#91122)

- Remove Range attr when comparing for tailcall
- Add test for testcall with range
---
 llvm/lib/CodeGen/Analysis.cpp                 |  7 +--
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  7 +--
 llvm/test/CodeGen/X86/tailcall-range.ll       | 53 +++++++++++++++++++
 3 files changed, 61 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tailcall-range.ll

diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index af7643d93591f7..e693cdbd0ccc1c 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -593,9 +593,10 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
 
   // Following attributes are completely benign as far as calling convention
   // goes, they shouldn't affect whether the call is a tail call.
-  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
-                           Attribute::DereferenceableOrNull, Attribute::NoAlias,
-                           Attribute::NonNull, Attribute::NoUndef}) {
+  for (const auto &Attr :
+       {Attribute::Alignment, Attribute::Dereferenceable,
+        Attribute::DereferenceableOrNull, Attribute::NoAlias,
+        Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) {
     CallerAttrs.removeAttribute(Attr);
     CalleeAttrs.removeAttribute(Attr);
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 336d89fbcf638e..9ec3ac4f99915e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -62,9 +62,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   // the return. Ignore following attributes because they don't affect the
   // call sequence.
   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
-  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
-                           Attribute::DereferenceableOrNull, Attribute::NoAlias,
-                           Attribute::NonNull, Attribute::NoUndef})
+  for (const auto &Attr :
+       {Attribute::Alignment, Attribute::Dereferenceable,
+        Attribute::DereferenceableOrNull, Attribute::NoAlias,
+        Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
     CallerAttrs.removeAttribute(Attr);
 
   if (CallerAttrs.hasAttributes())
diff --git a/llvm/test/CodeGen/X86/tailcall-range.ll b/llvm/test/CodeGen/X86/tailcall-range.ll
new file mode 100644
index 00000000000000..6ae7405ebc4a98
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tailcall-range.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+define range(i32 0, 2) i32 @foo(ptr %this) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    retq
+entry:
+  %call = load volatile i1, ptr %this, align 1
+  %spec.select = zext i1 %call to i32
+  ret i32 %spec.select
+}
+
+define range(i32 0, 2) i32 @bar(ptr %this) {
+; CHECK-LABEL: bar:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp foo@PLT # TAILCALL
+entry:
+  %ret = musttail call i32 @foo(ptr null)
+  ret i32 %ret
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+define range(i64 0, 8) i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llroundf@PLT # TAILCALL
+entry:
+  %ret = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %ret
+}
+
+declare i32 @callee()
+
+define range(i32 0, 2) i32 @func_with_range_attr() {
+; CHECK-LABEL: func_with_range_attr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp callee@PLT # TAILCALL
+entry:
+  %ret = musttail call i32 @callee()
+  ret i32 %ret
+}
+
+define i32 @call_with_range_attr() {
+; CHECK-LABEL: call_with_range_attr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp callee@PLT # TAILCALL
+entry:
+  %ret = musttail call range(i32 0, 2) i32 @callee()
+  ret i32 %ret
+}

From 7098cd215b95286794d9e0c822e8323ad0509750 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Wed, 8 May 2024 11:02:00 +0900
Subject: [PATCH 44/64] [NFC] Add myself as code owner for llvm/IR/Core.cpp

In practice I end up reviewing most changes to the C API.
---
 .github/CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 0f178df1d18f8c..ad81bf1684b6cd 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -23,6 +23,7 @@
 /llvm/lib/Analysis/ScalarEvolution.cpp @nikic
 /llvm/lib/Analysis/ValueTracking.cpp @nikic
 /llvm/lib/IR/ConstantRange.cpp @nikic
+/llvm/lib/IR/Core.cpp @nikic
 /llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @nikic
 /llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @nikic
 /llvm/lib/Transforms/InstCombine/ @nikic

From d085b42cbbefe79a41113abcd2b1e1f2a203acef Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Wed, 8 May 2024 10:04:09 +0800
Subject: [PATCH 45/64] [InstSimplify] Do not simplify freeze in
 `simplifyWithOpReplaced` (#91215)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See the LangRef:
> All uses of a value returned by the same ‘freeze’ instruction are
guaranteed to always observe the same value, while different ‘freeze’
instructions may yield different values.

It is incorrect to replace freezes with the simplified value.

Proof:
https://alive2.llvm.org/ce/z/3Dn9Cd
https://alive2.llvm.org/ce/z/Qyh5h6

Fixes https://github.com/llvm/llvm-project/issues/91178
---
 llvm/lib/Analysis/InstructionSimplify.cpp  |  4 +++
 llvm/test/Transforms/InstCombine/icmp.ll   | 15 ++++++++++
 llvm/test/Transforms/InstCombine/select.ll | 32 ++++++++++++++++++++++
 llvm/test/Transforms/PGOProfile/chr.ll     |  7 +++--
 4 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4061dae83c10f3..37a7259a5cd021 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4312,6 +4312,10 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   if (match(I, m_Intrinsic()))
     return nullptr;
 
+  // Don't simplify freeze.
+  if (isa(I))
+    return nullptr;
+
   // Replace Op with RepOp in instruction operands.
   SmallVector NewOps;
   bool AnyReplaced = false;
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 31093c7ca1036c..2d786c8f48833d 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -5183,3 +5183,18 @@ entry:
   %cmp = icmp eq i8 %add2, %add1
   ret i1 %cmp
 }
+
+define i1 @icmp_freeze_sext(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_freeze_sext(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp uge i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP1_FR:%.*]] = freeze i1 [[CMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[Y]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = or i1 [[TMP1]], [[CMP1_FR]]
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp uge i16 %x, %y
+  %ext = sext i1 %cmp1 to i16
+  %ext.fr = freeze i16 %ext
+  %cmp2 = icmp uge i16 %ext.fr, %y
+  ret i1 %cmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 2efe2742ca4916..2ade6faa99be3f 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -4580,3 +4580,35 @@ define i32 @sequence_select_with_same_cond_extra_use(i1 %c1, i1 %c2){
   %s3 = select i1 %c1, i32 789, i32 %s2
   ret i32 %s3
 }
+
+define i8 @test_replace_freeze_multiuse(i1 %x, i8 %y) {
+; CHECK-LABEL: @test_replace_freeze_multiuse(
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[X:%.*]] to i8
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i8 [[EXT]], [[Y:%.*]]
+; CHECK-NEXT:    [[SHL_FR:%.*]] = freeze i8 [[SHL]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X]], i8 0, i8 [[SHL_FR]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SHL_FR]], [[SEL]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %ext = zext i1 %x to i8
+  %shl = shl nuw i8 %ext, %y
+  %shl.fr = freeze i8 %shl
+  %sel = select i1 %x, i8 0, i8 %shl.fr
+  %add = add i8 %shl.fr, %sel
+  ret i8 %add
+}
+
+define i8 @test_replace_freeze_oneuse(i1 %x, i8 %y) {
+; CHECK-LABEL: @test_replace_freeze_oneuse(
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[X:%.*]] to i8
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i8 [[EXT]], [[Y:%.*]]
+; CHECK-NEXT:    [[SHL_FR:%.*]] = freeze i8 [[SHL]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X]], i8 0, i8 [[SHL_FR]]
+; CHECK-NEXT:    ret i8 [[SEL]]
+;
+  %ext = zext i1 %x to i8
+  %shl = shl nuw i8 %ext, %y
+  %shl.fr = freeze i8 %shl
+  %sel = select i1 %x, i8 0, i8 %shl.fr
+  ret i8 %sel
+}
diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index 0551a171091ca7..38e8f8536a19c0 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -1298,11 +1298,12 @@ define i32 @test_chr_14(ptr %i, ptr %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Z_FR:%.*]] = freeze i32 [[Z:%.*]]
 ; CHECK-NEXT:    [[I0:%.*]] = load i32, ptr [[I:%.*]], align 4
-; CHECK-NEXT:    [[V1:%.*]] = icmp eq i32 [[Z_FR]], 1
-; CHECK-NEXT:    br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
+; CHECK-NEXT:    [[V1_NOT:%.*]] = icmp eq i32 [[Z_FR]], 1
+; CHECK-NEXT:    br i1 [[V1_NOT]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
 ; CHECK:       entry.split.nonchr:
+; CHECK-NEXT:    [[PRED_FR:%.*]] = freeze i1 [[PRED:%.*]]
 ; CHECK-NEXT:    [[V0:%.*]] = icmp eq i32 [[Z_FR]], 0
-; CHECK-NEXT:    [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
+; CHECK-NEXT:    [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED_FR]]
 ; CHECK-NEXT:    br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
 ; CHECK:       bb0.nonchr:
 ; CHECK-NEXT:    call void @foo()

From bb01b89cda71fe1594a87f81b3f3c01f66fcac59 Mon Sep 17 00:00:00 2001
From: Ryosuke Niwa 
Date: Tue, 7 May 2024 19:10:50 -0700
Subject: [PATCH 46/64] [analyzer] Ignore system headers in WebKit checkers.
 (#91103)

---
 .../WebKit/UncountedCallArgsChecker.cpp         |  3 +++
 .../WebKit/UncountedLocalVarsChecker.cpp        |  3 +++
 .../Checkers/WebKit/mock-system-header.h        | 17 +++++++++++++++++
 .../Checkers/WebKit/uncounted-local-vars.cpp    | 11 +++++++++++
 .../Checkers/WebKit/uncounted-members.cpp       | 10 ++++++++++
 .../Checkers/WebKit/uncounted-obj-arg.cpp       |  6 ++++++
 6 files changed, 50 insertions(+)
 create mode 100644 clang/test/Analysis/Checkers/WebKit/mock-system-header.h

diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
index 0f40ecc7ba3000..9a178a690ff24b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
@@ -150,6 +150,9 @@ class UncountedCallArgsChecker
   bool shouldSkipCall(const CallExpr *CE) const {
     const auto *Callee = CE->getDirectCallee();
 
+    if (BR->getSourceManager().isInSystemHeader(CE->getExprLoc()))
+      return true;
+
     if (Callee && TFA.isTrivial(Callee))
       return true;
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
index 6036ad58cf253c..98a73810b7afca 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
@@ -230,6 +230,9 @@ class UncountedLocalVarsChecker
     if (!V->isLocalVarDecl())
       return true;
 
+    if (BR->getSourceManager().isInSystemHeader(V->getLocation()))
+      return true;
+
     return false;
   }
 
diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
new file mode 100644
index 00000000000000..a1d30957b19cbe
--- /dev/null
+++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
@@ -0,0 +1,17 @@
+#pragma clang system_header
+
+template 
+void callMethod(CreateFunction createFunction) {
+  createFunction()->method();
+}
+
+template 
+inline void localVar(CreateFunction createFunction) {
+  T* obj = createFunction();
+  obj->method();
+}
+
+template 
+struct MemberVariable {
+    T* obj { nullptr };
+};
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
index 00673e91f471ea..8da1dc557a5a3a 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedLocalVarsChecker -verify %s
 
 #include "mock-types.h"
+#include "mock-system-header.h"
 
 void someFunction();
 
@@ -187,3 +188,13 @@ void bar() {
 }
 
 } // namespace ignore_for_if
+
+namespace ignore_system_headers {
+
+RefCountable *provide_ref_ctnbl();
+
+void system_header() {
+  localVar(provide_ref_ctnbl);
+}
+
+} // ignore_system_headers
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp
index 108d5effdd2e8d..bca7b3bad3a155 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=webkit.NoUncountedMemberChecker -verify %s
 
 #include "mock-types.h"
+#include "mock-system-header.h"
 
 namespace members {
   struct Foo {
@@ -50,3 +51,12 @@ namespace ignore_unions {
 
   void forceTmplToInstantiate(RefPtr) {}
 }
+
+namespace ignore_system_header {
+
+void foo(RefCountable* t) {
+  MemberVariable var { t };
+  var.obj->method();
+}
+
+} // ignore_system_header
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
index 63a68a994a5c64..e75d42b9f14915 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s
 
 #include "mock-types.h"
+#include "mock-system-header.h"
 
 void WTFBreakpointTrap();
 void WTFCrashWithInfo(int, const char*, const char*, int);
@@ -147,6 +148,7 @@ class RefCounted {
   void ref() const;
   void deref() const;
 
+  void method();
   void someFunction();
   int otherFunction();
 
@@ -399,3 +401,7 @@ void someFunction(const RefCounted&);
 void test2() {
     someFunction(*object());
 }
+
+void system_header() {
+  callMethod(object);
+}

From 0af448b71116ae93eae1cb9c3121cb94be076fc3 Mon Sep 17 00:00:00 2001
From: Menooker 
Date: Wed, 8 May 2024 10:14:52 +0800
Subject: [PATCH 47/64] [MLIR][Bufferization] BufferResultsToOutParams: Add an
 option to eliminate AllocOp and avoid Copy (#90011)

Add an option hoist-static-allocs to remove the unnecessary memref.alloc
and memref.copy after this pass, when the memref in ReturnOp is
allocated by memref.alloc and is statically shaped. Instead, it replaces
the uses of the allocated memref with the memref in the out argument.
By default, BufferResultsToOutParams will result in a memcpy operation
to copy the originally returned memref to the output argument memref.
This is inefficient when the source of memcpy (the returned memref in
the original ReturnOp) is from a local AllocOp. The pass can use the
output argument memref to replace the locally allocated memref for
better performance.hoist-static-allocs avoids dynamic allocation and
memory movement.
This option will be critical for performance-sensivtive applications,
which require BufferResultsToOutParams pass for a caller-owned output
buffer calling convension.
---
 .../Dialect/Bufferization/Transforms/Passes.h |  4 ++
 .../Bufferization/Transforms/Passes.td        |  9 +++++
 .../Transforms/BufferResultsToOutParams.cpp   | 21 ++++++++---
 .../buffer-results-to-out-params-elim.mlir    | 37 +++++++++++++++++++
 4 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 mlir/test/Transforms/buffer-results-to-out-params-elim.mlir

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index a729bc99b987cd..459c252b707121 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -166,6 +166,10 @@ struct BufferResultsToOutParamsOpts {
   /// If true, the pass adds a "bufferize.result" attribute to each output
   /// parameter.
   bool addResultAttribute = false;
+
+  /// If true, the pass eliminates the memref.alloc and memcpy if the returned
+  /// memref is allocated in the current function.
+  bool hoistStaticAllocs = false;
 };
 
 /// Creates a pass that converts memref function results to out-params.
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 1303dc2c9ae10f..75ce85c9128c94 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -315,11 +315,20 @@ def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">
     The main issue with this pass (and the out-param calling convention) is that
     buffers for results need to be allocated in the caller. This currently only
     works for static shaped memrefs.
+
+    If the hoist-static-allocs option is on, the pass tries to eliminate the
+    allocation for the returned memref and avoid the memory-copy if possible.
+    This optimization applies on the returned memref which has static shape and
+    is allocated by memref.alloc in the function. It will use the memref given
+    in function argument to replace the allocated memref.
   }];
   let options = [
     Option<"addResultAttribute", "add-result-attr", "bool",
        /*default=*/"false",
        "Add the attribute 'bufferize.result' to all output parameters.">,
+    Option<"hoistStaticAllocs", "hoist-static-allocs",
+       "bool", /*default=*/"false",
+       "Hoist static allocations to call sites.">,
   ];
   let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()";
   let dependentDialects = ["memref::MemRefDialect"];
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index a2222e169c4d64..a5f01eadb21343 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -107,7 +107,8 @@ updateFuncOp(func::FuncOp func,
 // the given out-params.
 static LogicalResult updateReturnOps(func::FuncOp func,
                                      ArrayRef appendedEntryArgs,
-                                     MemCpyFn memCpyFn) {
+                                     MemCpyFn memCpyFn,
+                                     bool hoistStaticAllocs) {
   auto res = func.walk([&](func::ReturnOp op) {
     SmallVector copyIntoOutParams;
     SmallVector keepAsReturnOperands;
@@ -118,10 +119,15 @@ static LogicalResult updateReturnOps(func::FuncOp func,
         keepAsReturnOperands.push_back(operand);
     }
     OpBuilder builder(op);
-    for (auto t : llvm::zip(copyIntoOutParams, appendedEntryArgs)) {
-      if (failed(
-              memCpyFn(builder, op.getLoc(), std::get<0>(t), std::get<1>(t))))
-        return WalkResult::interrupt();
+    for (auto [orig, arg] : llvm::zip(copyIntoOutParams, appendedEntryArgs)) {
+      if (hoistStaticAllocs && isa(orig.getDefiningOp()) &&
+          orig.getType().cast().hasStaticShape()) {
+        orig.replaceAllUsesWith(arg);
+        orig.getDefiningOp()->erase();
+      } else {
+        if (failed(memCpyFn(builder, op.getLoc(), orig, arg)))
+          return WalkResult::interrupt();
+      }
     }
     builder.create(op.getLoc(), keepAsReturnOperands);
     op.erase();
@@ -212,7 +218,8 @@ LogicalResult mlir::bufferization::promoteBufferResultsToOutParams(
       return success();
     };
     if (failed(updateReturnOps(func, appendedEntryArgs,
-                               options.memCpyFn.value_or(defaultMemCpyFn)))) {
+                               options.memCpyFn.value_or(defaultMemCpyFn),
+                               options.hoistStaticAllocs))) {
       return failure();
     }
   }
@@ -233,6 +240,8 @@ struct BufferResultsToOutParamsPass
     // Convert from pass options in tablegen to BufferResultsToOutParamsOpts.
     if (addResultAttribute)
       options.addResultAttribute = true;
+    if (hoistStaticAllocs)
+      options.hoistStaticAllocs = true;
 
     if (failed(bufferization::promoteBufferResultsToOutParams(getOperation(),
                                                               options)))
diff --git a/mlir/test/Transforms/buffer-results-to-out-params-elim.mlir b/mlir/test/Transforms/buffer-results-to-out-params-elim.mlir
new file mode 100644
index 00000000000000..f77dbfaa6cb11e
--- /dev/null
+++ b/mlir/test/Transforms/buffer-results-to-out-params-elim.mlir
@@ -0,0 +1,37 @@
+// RUN: mlir-opt -allow-unregistered-dialect -p 'builtin.module(buffer-results-to-out-params{hoist-static-allocs})'  %s | FileCheck %s
+
+// CHECK-LABEL:   func @basic(
+// CHECK-SAME:                %[[ARG:.*]]: memref<8x64xf32>) {
+// CHECK-NOT:        memref.alloc()
+// CHECK:           "test.source"(%[[ARG]])  : (memref<8x64xf32>) -> ()
+// CHECK:           return
+// CHECK:         }
+func.func @basic() -> (memref<8x64xf32>) {
+  %b = memref.alloc() : memref<8x64xf32>
+  "test.source"(%b)  : (memref<8x64xf32>) -> ()
+  return %b : memref<8x64xf32>
+}
+
+// CHECK-LABEL:   func @basic_no_change(
+// CHECK-SAME:                %[[ARG:.*]]: memref) {
+// CHECK:           %[[RESULT:.*]] = "test.source"() : () -> memref
+// CHECK:           memref.copy %[[RESULT]], %[[ARG]]  : memref to memref
+// CHECK:           return
+// CHECK:         }
+func.func @basic_no_change() -> (memref) {
+  %0 = "test.source"() : () -> (memref)
+  return %0 : memref
+}
+
+// CHECK-LABEL:   func @basic_dynamic(
+// CHECK-SAME:                %[[D:.*]]: index, %[[ARG:.*]]: memref) {
+// CHECK:           %[[RESULT:.*]] = memref.alloc(%[[D]]) : memref
+// CHECK:           "test.source"(%[[RESULT]])  : (memref) -> ()
+// CHECK:           memref.copy %[[RESULT]], %[[ARG]]
+// CHECK:           return
+// CHECK:         }
+func.func @basic_dynamic(%d: index) -> (memref) {
+  %b = memref.alloc(%d) : memref
+  "test.source"(%b)  : (memref) -> ()
+  return %b : memref
+}
\ No newline at end of file

From 1c8c2fdd289075d6ef448f60db9dd30caf7f78df Mon Sep 17 00:00:00 2001
From: Jie Fu 
Date: Wed, 8 May 2024 10:38:34 +0800
Subject: [PATCH 48/64] [mlir] Fix -Wdeprecated-declarations in
 BufferResultsToOutParams.cpp (NFC)

/llvm-project/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp:124:26:
error: 'cast' is deprecated: Use mlir::cast() instead [-Werror,-Wdeprecated-declarations]
  124 |           orig.getType().cast().hasStaticShape()) {
      |
---
 .../Bufferization/Transforms/BufferResultsToOutParams.cpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index a5f01eadb21343..b19636adaa69e6 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -121,7 +121,7 @@ static LogicalResult updateReturnOps(func::FuncOp func,
     OpBuilder builder(op);
     for (auto [orig, arg] : llvm::zip(copyIntoOutParams, appendedEntryArgs)) {
       if (hoistStaticAllocs && isa(orig.getDefiningOp()) &&
-          orig.getType().cast().hasStaticShape()) {
+          mlir::cast(orig.getType()).hasStaticShape()) {
         orig.replaceAllUsesWith(arg);
         orig.getDefiningOp()->erase();
       } else {

From 31b45a9d0d91cc3a78446ee379abc6f2a3000065 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com>
Date: Tue, 7 May 2024 22:54:15 -0400
Subject: [PATCH 49/64] [clang][hlsl] Add tan intrinsic part 1 (#90276)

This change is an implementation of #87367's investigation on supporting
IEEE math operations as intrinsics.
Which was discussed in this RFC:
https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

If you want an overarching view of how this will all connect see:
https://github.com/llvm/llvm-project/pull/90088

Changes:
- `clang/docs/LanguageExtensions.rst` - Document the new elementwise tan
builtin.
-  `clang/include/clang/Basic/Builtins.td` - Implement the tan builtin.
- `clang/lib/CodeGen/CGBuiltin.cpp` - invoke the tan intrinsic on uses
of the builtin
- `clang/lib/Headers/hlsl/hlsl_intrinsics.h` - Associate the tan builtin
with the equivalent hlsl apis
- `clang/lib/Sema/SemaChecking.cpp` - Add generic sema checks as well as
HLSL specifc sema checks to the tan builtin
-  `llvm/include/llvm/IR/Intrinsics.td` - Create the tan intrinsic
-  `llvm/docs/LangRef.rst` - Document the tan intrinsic
---
 clang/docs/LanguageExtensions.rst             |  1 +
 clang/include/clang/Basic/Builtins.td         |  6 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  4 +-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 23 ++++++++
 clang/lib/Sema/SemaChecking.cpp               |  2 +
 .../test/CodeGen/builtins-elementwise-math.c  | 16 +++++
 .../CodeGen/strictfp-elementwise-bulitins.cpp | 10 ++++
 clang/test/CodeGenHLSL/builtins/tan.hlsl      | 59 +++++++++++++++++++
 clang/test/Sema/aarch64-sve-vector-trig-ops.c | 42 +++++++------
 clang/test/Sema/builtins-elementwise-math.c   | 21 +++++++
 clang/test/Sema/riscv-rvv-vector-trig-ops.c   | 44 ++++++++------
 .../SemaCXX/builtins-elementwise-math.cpp     |  7 +++
 .../BuiltIns/half-float-only-errors.hlsl      |  1 +
 llvm/docs/LangRef.rst                         | 37 ++++++++++++
 llvm/include/llvm/IR/Intrinsics.td            |  1 +
 15 files changed, 236 insertions(+), 38 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/tan.hlsl

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c2e90f4e7d587a..3627a780886a0a 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -656,6 +656,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_ceil(T x)           return the smallest integral value greater than or equal to x    floating point types
  T __builtin_elementwise_sin(T x)            return the sine of x interpreted as an angle in radians          floating point types
  T __builtin_elementwise_cos(T x)            return the cosine of x interpreted as an angle in radians        floating point types
+ T __builtin_elementwise_tan(T x)            return the tangent of x interpreted as an angle in radians       floating point types
  T __builtin_elementwise_floor(T x)          return the largest integral value less than or equal to x        floating point types
  T __builtin_elementwise_log(T x)            return the natural logarithm of x                                floating point types
  T __builtin_elementwise_log2(T x)           return the base 2 logarithm of x                                 floating point types
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index de721a87b3341d..11982af3fa609b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1326,6 +1326,12 @@ def ElementwiseSqrt : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseTan : Builtin {
+  let Spellings = ["__builtin_elementwise_tan"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseTrunc : Builtin {
   let Spellings = ["__builtin_elementwise_trunc"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e8a6bd050e17ea..4b03b8b0e093ed 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3822,7 +3822,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_sin:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
-
+  case Builtin::BI__builtin_elementwise_tan:
+    return RValue::get(
+        emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan"));
   case Builtin::BI__builtin_elementwise_trunc:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 06409c6fc77417..3390f0962f67d0 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1441,6 +1441,29 @@ float3 sqrt(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
 float4 sqrt(float4);
 
+//===----------------------------------------------------------------------===//
+// tan builtins
+//===----------------------------------------------------------------------===//
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half tan(half);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half2 tan(half2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half3 tan(half3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half4 tan(half4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float tan(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float2 tan(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float3 tan(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float4 tan(float4);
+
 //===----------------------------------------------------------------------===//
 // trunc builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3179d542b1f926..e8e74467208c73 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3047,6 +3047,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
+  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
@@ -5677,6 +5678,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_elementwise_roundeven:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
+  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc: {
     if (CheckFloatOrHalfRepresentations(this, TheCall))
       return true;
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 1c667e5bff1eab..1b5466abd347d7 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -604,6 +604,22 @@ void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_sqrt(vf1);
 }
 
+void test_builtin_elementwise_tan(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_tan(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.tan.f32(float [[F1]])
+  f2 = __builtin_elementwise_tan(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.tan.f64(double [[D1]])
+  d2 = __builtin_elementwise_tan(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.tan.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_tan(vf1);
+}
+
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
index fdf865ebbe8911..c72d5949916911 100644
--- a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
+++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -187,6 +187,16 @@ float4 strict_elementwise_sqrt(float4 a) {
   return __builtin_elementwise_sqrt(a);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_TAN]]
+//
+float4 strict_elementwise_tan(float4 a) {
+  return __builtin_elementwise_tan(a);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
new file mode 100644
index 00000000000000..aa542fac226d0b
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// CHECK-LABEL: test_tan_half
+// NATIVE_HALF: call half @llvm.tan.f16
+// NO_HALF: call float @llvm.tan.f32
+half test_tan_half ( half p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_half2
+// NATIVE_HALF: call <2 x half> @llvm.tan.v2f16
+// NO_HALF: call <2 x float> @llvm.tan.v2f32
+half2 test_tan_half2 ( half2 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_half3
+// NATIVE_HALF: call <3 x half> @llvm.tan.v3f16
+// NO_HALF: call <3 x float> @llvm.tan.v3f32
+half3 test_tan_half3 ( half3 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_half4
+// NATIVE_HALF: call <4 x half> @llvm.tan.v4f16
+// NO_HALF: call <4 x float> @llvm.tan.v4f32
+half4 test_tan_half4 ( half4 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_float
+// CHECK: call float @llvm.tan.f32
+float test_tan_float ( float p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_float2
+// CHECK: call <2 x float> @llvm.tan.v2f32
+float2 test_tan_float2 ( float2 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_float3
+// CHECK: call <3 x float> @llvm.tan.v3f32
+float3 test_tan_float3 ( float3 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK-LABEL: test_tan_float4
+// CHECK: call <4 x float> @llvm.tan.v4f32
+float4 test_tan_float4 ( float4 p0 ) {
+  return tan ( p0 );
+}
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index 70832e77fdf225..6863f32b59489d 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -1,18 +1,24 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: aarch64-registered-target
-
-#include 
-
-
-svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+
+svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 2e05337273ee41..2e4319d158e7a0 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -626,6 +626,27 @@ void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv,
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_tan(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_tan(f);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_tan();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_tan(i);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_tan(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_tan(u);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_tan(uv);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index 9879b3ca4be6e7..459582fe283938 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -1,19 +1,25 @@
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: riscv-registered-target
-
-#include 
-
-
-vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
+// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: riscv-registered-target
+
+#include 
+
+
+vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 44a44ab055e997..499f2795ddb272 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -111,6 +111,13 @@ void test_builtin_elementwise_sin() {
   static_assert(!is_const::value);
 }
 
+void test_builtin_elementwise_tan() {
+  const float a = 42.0;
+  float b = 42.3;
+  static_assert(!is_const::value);
+  static_assert(!is_const::value);
+}
+
 void test_builtin_elementwise_sqrt() {
   const float a = 42.0;
   float b = 42.3;
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
index ef0928f8fef0d6..4089188134d321 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
@@ -9,6 +9,7 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sin
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sqrt
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_roundeven
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
 
 double2 test_double_builtin(double2 p0) {
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index ff0fc55860de94..cc7094116b8bc8 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15272,6 +15272,43 @@ trapping or setting ``errno``.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+'``llvm.tan.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.tan`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.tan.f32(float  %Val)
+      declare double    @llvm.tan.f64(double %Val)
+      declare x86_fp80  @llvm.tan.f80(x86_fp80  %Val)
+      declare fp128     @llvm.tan.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.tan.*``' intrinsics return the tangent of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating-point numbers of the same type.
+
+Semantics:
+""""""""""
+
+Return the same value as a corresponding libm '``tan``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
 '``llvm.pow.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 28116e5316c96b..29143123193b9e 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1025,6 +1025,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
   def int_sin  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_cos  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_tan  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_pow  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
                            [LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_log  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;

From b438a817bd863699715116ee7d85b454f3289c08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jhonatan=20Cl=C3=A9to?=
 <52751492+cl3to@users.noreply.github.com>
Date: Wed, 8 May 2024 00:21:32 -0300
Subject: [PATCH 50/64] [Offload] Fix dataDelete op for TARGET_ALLOC_HOST
 memory type (#91134)

Summary:
The `GenericDeviceTy::dataDelete` method doesn't verify the
`TargetAllocTy` of the of the device pointer. Because of this, it can
use the `MemoryManager` to free the ptr. However, the
`TARGET_ALLOC_HOST` and `TARGET_ALLOC_SHARED` types are not allocated
using the `MemoryManager` in the `GenericDeviceTy::dataAlloc` method.
Since the `MemoryManager` uses the `DeviceAllocatorTy::free` operation
without specifying the type of the ptr, some plugins may use incorrect
operations to free ptrs of certain types. In particular, this bug causes
the CUDA plugin to use the `cuMemFree` operation on ptrs of type
`TARGET_ALLOC_HOST`, resulting in an unchecked error, as shown in the
output snippet of the test
`offload/test/api/omp_host_pinned_memory_alloc.c`:

```
omptarget --> Notifying about an unmapping: HstPtr=0x00007c6114200000
omptarget --> Call to llvm_omp_target_free_host for device 0 and address 0x00007c6114200000
omptarget --> Call to omp_get_num_devices returning 1
omptarget --> Call to omp_get_initial_device returning 1
PluginInterface --> MemoryManagerTy::free: target memory 0x00007c6114200000.
PluginInterface --> Cannot find its node. Delete it on device directly.
TARGET CUDA RTL --> Failure to free memory: Error in cuMemFree[Host]: invalid argument
omptarget --> omp_target_free deallocated device ptr
```

This patch fixes this by adding the check of the device pointer type
before calling the appropriate operation for each type.
---
 .../common/src/PluginInterface.cpp            | 26 ++++++++++++++-----
 offload/src/omptarget.cpp                     |  4 ++-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fdb..8de93ba17a560b 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1348,13 +1348,27 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
     return Plugin::success();
 
   int Res;
-  if (MemoryManager)
-    Res = MemoryManager->free(TgtPtr);
-  else
+  switch (Kind) {
+  case TARGET_ALLOC_DEFAULT:
+  case TARGET_ALLOC_DEVICE_NON_BLOCKING:
+  case TARGET_ALLOC_DEVICE:
+    if (MemoryManager) {
+      Res = MemoryManager->free(TgtPtr);
+      if (Res)
+        return Plugin::error(
+            "Failure to deallocate device pointer %p via memory manager",
+            TgtPtr);
+      break;
+    }
+    [[fallthrough]];
+  case TARGET_ALLOC_HOST:
+  case TARGET_ALLOC_SHARED:
     Res = free(TgtPtr, Kind);
-
-  if (Res)
-    return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
+    if (Res)
+      return Plugin::error(
+          "Failure to deallocate device pointer %p via device deallocator",
+          TgtPtr);
+  }
 
   // Unregister deallocated pinned memory buffer if the type is host memory.
   if (Kind == TARGET_ALLOC_HOST)
diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp
index 803e941fe83822..5d5c6b05051bd5 100644
--- a/offload/src/omptarget.cpp
+++ b/offload/src/omptarget.cpp
@@ -461,7 +461,9 @@ void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
   if (!DeviceOrErr)
     FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
 
-  DeviceOrErr->deleteData(DevicePtr, Kind);
+  if (DeviceOrErr->deleteData(DevicePtr, Kind) == OFFLOAD_FAIL)
+    FATAL_MESSAGE(DeviceNum, "%s", "Failed to deallocate device ptr");
+
   DP("omp_target_free deallocated device ptr\n");
 }
 

From f4d2f7a3b7984795d61ff45daf37c76bf3fc8604 Mon Sep 17 00:00:00 2001
From: Liao Chunyu 
Date: Wed, 8 May 2024 11:22:16 +0800
Subject: [PATCH 51/64] [RISCV] Codegen support for XCVbi extension (#89719)

spec:
https://github.com/openhwgroup/cv32e40p/blob/master/docs/source/instruction_set_extensions.rst#immediate-branching-operations

Contributors: @CharKeaney, @jeremybennett, @lewis-revill,
@NandniJamnadas,
@PaoloS02, @simonpcook, @xingmingjie, @realqhc, @PhilippvK,@melonedo
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  26 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |  23 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |   4 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td    |  26 ++
 .../RISCV/RISCVRedundantCopyElimination.cpp   |   6 +-
 llvm/test/CodeGen/RISCV/xcvbi.ll              | 248 ++++++++++++++++++
 6 files changed, 315 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/xcvbi.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2818e1911ee556..3536eb4c0ba4b9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17663,6 +17663,7 @@ static bool isSelectPseudo(MachineInstr &MI) {
   default:
     return false;
   case RISCV::Select_GPR_Using_CC_GPR:
+  case RISCV::Select_GPR_Using_CC_Imm:
   case RISCV::Select_FPR16_Using_CC_GPR:
   case RISCV::Select_FPR16INX_Using_CC_GPR:
   case RISCV::Select_FPR32_Using_CC_GPR:
@@ -17846,7 +17847,9 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
   // is checked here and handled by a separate function -
   // EmitLoweredCascadedSelect.
   Register LHS = MI.getOperand(1).getReg();
-  Register RHS = MI.getOperand(2).getReg();
+  Register RHS;
+  if (MI.getOperand(2).isReg())
+    RHS = MI.getOperand(2).getReg();
   auto CC = static_cast(MI.getOperand(3).getImm());
 
   SmallVector SelectDebugValues;
@@ -17855,8 +17858,9 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
 
   MachineInstr *LastSelectPseudo = &MI;
   auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
-  if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
-      Next->getOpcode() == MI.getOpcode() &&
+  if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
+       MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
+      Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
       Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
       Next->getOperand(5).isKill()) {
     return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
@@ -17868,6 +17872,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
       continue;
     if (isSelectPseudo(*SequenceMBBI)) {
       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
+          !SequenceMBBI->getOperand(2).isReg() ||
           SequenceMBBI->getOperand(2).getReg() != RHS ||
           SequenceMBBI->getOperand(3).getImm() != CC ||
           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
@@ -17917,10 +17922,16 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
   HeadMBB->addSuccessor(TailMBB);
 
   // Insert appropriate branch.
-  BuildMI(HeadMBB, DL, TII.getBrCond(CC))
-    .addReg(LHS)
-    .addReg(RHS)
-    .addMBB(TailMBB);
+  if (MI.getOperand(2).isImm())
+    BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
+        .addReg(LHS)
+        .addImm(MI.getOperand(2).getImm())
+        .addMBB(TailMBB);
+  else
+    BuildMI(HeadMBB, DL, TII.getBrCond(CC))
+        .addReg(LHS)
+        .addReg(RHS)
+        .addMBB(TailMBB);
 
   // IfFalseMBB just falls through to TailMBB.
   IfFalseMBB->addSuccessor(TailMBB);
@@ -18166,6 +18177,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
            "ReadCounterWide is only to be used on riscv32");
     return emitReadCounterWidePseudo(MI, BB);
   case RISCV::Select_GPR_Using_CC_GPR:
+  case RISCV::Select_GPR_Using_CC_Imm:
   case RISCV::Select_FPR16_Using_CC_GPR:
   case RISCV::Select_FPR16INX_Using_CC_GPR:
   case RISCV::Select_FPR32_Using_CC_GPR:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 8cb9a40a98bcd8..444b9076005c22 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -833,6 +833,10 @@ static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
   switch (Opc) {
   default:
     return RISCVCC::COND_INVALID;
+  case RISCV::CV_BEQIMM:
+    return RISCVCC::COND_EQ;
+  case RISCV::CV_BNEIMM:
+    return RISCVCC::COND_NE;
   case RISCV::BEQ:
     return RISCVCC::COND_EQ;
   case RISCV::BNE:
@@ -863,14 +867,14 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
   Cond.push_back(LastInst.getOperand(1));
 }
 
-unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
+unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) {
   switch (CC) {
   default:
     llvm_unreachable("Unknown condition code!");
   case RISCVCC::COND_EQ:
-    return RISCV::BEQ;
+    return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ;
   case RISCVCC::COND_NE:
-    return RISCV::BNE;
+    return Imm ? RISCV::CV_BNEIMM : RISCV::BNE;
   case RISCVCC::COND_LT:
     return RISCV::BLT;
   case RISCVCC::COND_GE:
@@ -882,8 +886,9 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
   }
 }
 
-const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
-  return get(RISCVCC::getBrCond(CC));
+const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC,
+                                             bool Imm) const {
+  return get(RISCVCC::getBrCond(CC, Imm));
 }
 
 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
@@ -1032,8 +1037,10 @@ unsigned RISCVInstrInfo::insertBranch(
 
   // Either a one or two-way conditional branch.
   auto CC = static_cast(Cond[0].getImm());
-  MachineInstr &CondMI =
-      *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
+  MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm()))
+                              .add(Cond[1])
+                              .add(Cond[2])
+                              .addMBB(TBB);
   if (BytesAdded)
     *BytesAdded += getInstSizeInBytes(CondMI);
 
@@ -1257,6 +1264,8 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
   case RISCV::BGE:
   case RISCV::BLTU:
   case RISCV::BGEU:
+  case RISCV::CV_BEQIMM:
+  case RISCV::CV_BNEIMM:
     return isIntN(13, BrOffset);
   case RISCV::JAL:
   case RISCV::PseudoBR:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 170f813eb10d7d..e069717aaef232 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -45,7 +45,7 @@ enum CondCode {
 };
 
 CondCode getOppositeBranchCondition(CondCode);
-unsigned getBrCond(CondCode CC);
+unsigned getBrCond(CondCode CC, bool Imm = false);
 
 } // end of namespace RISCVCC
 
@@ -65,7 +65,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   explicit RISCVInstrInfo(RISCVSubtarget &STI);
 
   MCInst getNop() const override;
-  const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
+  const MCInstrDesc &getBrCond(RISCVCC::CondCode CC, bool Imm = false) const;
 
   Register isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index 924e91e15c348f..6dae8ca8f7a84e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -704,3 +704,29 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
             (CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
   def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
 }
+
+//===----------------------------------------------------------------------===//
+// Patterns for immediate branching operations 
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in {
+  def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETEQ, bb:$imm12),
+            (CV_BEQIMM GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12)>;
+  def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETNE, bb:$imm12),
+            (CV_BNEIMM GPR:$rs1, simm5:$imm5, simm13_lsb0:$imm12)>;
+
+  let usesCustomInserter = 1 in
+  def Select_GPR_Using_CC_Imm : Pseudo<(outs GPR:$dst),
+                             (ins GPR:$lhs, simm5:$imm5, ixlenimm:$cc,
+                              GPR:$truev, GPR:$falsev), []>;
+
+
+  class Selectbi
+      : Pat<(riscv_selectcc_frag:$cc (i32 GPR:$lhs), simm5:$Constant, Cond,
+                                     (i32 GPR:$truev), GPR:$falsev),
+            (Select_GPR_Using_CC_Imm GPR:$lhs, simm5:$Constant,
+             (IntCCtoRISCVCC $cc), GPR:$truev, GPR:$falsev)>;
+
+  def : Selectbi;
+  def : Selectbi;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
index 61d605fda3f53a..65ff67b424796c 100644
--- a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
@@ -77,9 +77,11 @@ guaranteesZeroRegInBlock(MachineBasicBlock &MBB,
   assert(Cond.size() == 3 && "Unexpected number of operands");
   assert(TBB != nullptr && "Expected branch target basic block");
   auto CC = static_cast(Cond[0].getImm());
-  if (CC == RISCVCC::COND_EQ && Cond[2].getReg() == RISCV::X0 && TBB == &MBB)
+  if (CC == RISCVCC::COND_EQ && Cond[2].isReg() &&
+      Cond[2].getReg() == RISCV::X0 && TBB == &MBB)
     return true;
-  if (CC == RISCVCC::COND_NE && Cond[2].getReg() == RISCV::X0 && TBB != &MBB)
+  if (CC == RISCVCC::COND_NE && Cond[2].isReg() &&
+      Cond[2].getReg() == RISCV::X0 && TBB != &MBB)
     return true;
   return false;
 }
diff --git a/llvm/test/CodeGen/RISCV/xcvbi.ll b/llvm/test/CodeGen/RISCV/xcvbi.ll
new file mode 100644
index 00000000000000..afd30faa56f90b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xcvbi.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=riscv32 -mattr=+xcvbi -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK_NOPT
+; RUN: llc -O3 -mtriple=riscv32 -mattr=+xcvbi -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK_OPT
+
+define i32 @beqimm(i32 %a) {
+; CHECK_NOPT-LABEL: beqimm:
+; CHECK_NOPT:       # %bb.0:
+; CHECK_NOPT-NEXT:    cv.beqimm a0, 5, .LBB0_2
+; CHECK_NOPT-NEXT:    j .LBB0_1
+; CHECK_NOPT-NEXT:  .LBB0_1: # %f
+; CHECK_NOPT-NEXT:    li a0, 0
+; CHECK_NOPT-NEXT:    ret
+; CHECK_NOPT-NEXT:  .LBB0_2: # %t
+; CHECK_NOPT-NEXT:    li a0, 1
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: beqimm:
+; CHECK_OPT:       # %bb.0:
+; CHECK_OPT-NEXT:    cv.bneimm a0, 5, .LBB0_2
+; CHECK_OPT-NEXT:  # %bb.1: # %t
+; CHECK_OPT-NEXT:    li a0, 1
+; CHECK_OPT-NEXT:    ret
+; CHECK_OPT-NEXT:  .LBB0_2: # %f
+; CHECK_OPT-NEXT:    li a0, 0
+; CHECK_OPT-NEXT:    ret
+  %1 = icmp eq i32 %a, 5
+  br i1 %1, label %t, label %f
+f:
+  ret i32 0
+t:
+  ret i32 1
+}
+
+define i32 @bneimm(i32 %a) {
+; CHECK_NOPT-LABEL: bneimm:
+; CHECK_NOPT:       # %bb.0:
+; CHECK_NOPT-NEXT:    cv.bneimm a0, 5, .LBB1_2
+; CHECK_NOPT-NEXT:    j .LBB1_1
+; CHECK_NOPT-NEXT:  .LBB1_1: # %f
+; CHECK_NOPT-NEXT:    li a0, 0
+; CHECK_NOPT-NEXT:    ret
+; CHECK_NOPT-NEXT:  .LBB1_2: # %t
+; CHECK_NOPT-NEXT:    li a0, 1
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: bneimm:
+; CHECK_OPT:       # %bb.0:
+; CHECK_OPT-NEXT:    cv.beqimm a0, 5, .LBB1_2
+; CHECK_OPT-NEXT:  # %bb.1: # %t
+; CHECK_OPT-NEXT:    li a0, 1
+; CHECK_OPT-NEXT:    ret
+; CHECK_OPT-NEXT:  .LBB1_2: # %f
+; CHECK_OPT-NEXT:    li a0, 0
+; CHECK_OPT-NEXT:    ret
+  %1 = icmp ne i32 %a, 5
+  br i1 %1, label %t, label %f
+f:
+  ret i32 0
+t:
+  ret i32 1
+}
+
+define i32 @select_beqimm_1(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_beqimm_1:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    cv.beqimm a0, -16, .LBB2_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB2_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_beqimm_1:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    cv.beqimm a0, -16, .LBB2_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB2_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp eq i32 %a, -16
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+
+define i32 @select_beqimm_2(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_beqimm_2:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    cv.beqimm a0, 0, .LBB3_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB3_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_beqimm_2:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    cv.beqimm a0, 0, .LBB3_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB3_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp eq i32 %a, 0
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+
+define i32 @select_beqimm_3(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_beqimm_3:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    cv.beqimm a0, 15, .LBB4_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB4_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_beqimm_3:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    cv.beqimm a0, 15, .LBB4_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB4_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp eq i32 %a, 15
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+
+define i32 @select_no_beqimm_1(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_no_beqimm_1:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    li a1, -17
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    beq a0, a1, .LBB5_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB5_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_no_beqimm_1:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    li a3, -17
+; CHECK_OPT-NEXT:    beq a0, a3, .LBB5_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB5_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp eq i32 %a, -17
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+
+define i32 @select_no_beqimm_2(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_no_beqimm_2:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    li a1, 16
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    beq a0, a1, .LBB6_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB6_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_no_beqimm_2:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    li a3, 16
+; CHECK_OPT-NEXT:    beq a0, a3, .LBB6_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB6_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp eq i32 %a, 16
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+
+define i32 @select_bneimm_1(i32 %a, i32 %x, i32 %y) {
+; CHECK_NOPT-LABEL: select_bneimm_1:
+; CHECK_NOPT:       # %bb.0: # %entry
+; CHECK_NOPT-NEXT:    addi sp, sp, -16
+; CHECK_NOPT-NEXT:    .cfi_def_cfa_offset 16
+; CHECK_NOPT-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:    cv.bneimm a0, 0, .LBB7_2
+; CHECK_NOPT-NEXT:  # %bb.1: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; CHECK_NOPT-NEXT:  .LBB7_2: # %entry
+; CHECK_NOPT-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK_NOPT-NEXT:    addi sp, sp, 16
+; CHECK_NOPT-NEXT:    ret
+;
+; CHECK_OPT-LABEL: select_bneimm_1:
+; CHECK_OPT:       # %bb.0: # %entry
+; CHECK_OPT-NEXT:    cv.bneimm a0, 0, .LBB7_2
+; CHECK_OPT-NEXT:  # %bb.1: # %entry
+; CHECK_OPT-NEXT:    mv a2, a1
+; CHECK_OPT-NEXT:  .LBB7_2: # %entry
+; CHECK_OPT-NEXT:    mv a0, a2
+; CHECK_OPT-NEXT:    ret
+entry:
+  %cmp.not = icmp ne i32 %a, 0
+  %cond = select i1 %cmp.not, i32 %y, i32 %x
+  ret i32 %cond
+}
+

From 48b6f4a18255816df51fcab7648c5a7f205dfe14 Mon Sep 17 00:00:00 2001
From: Luke Lau 
Date: Wed, 8 May 2024 11:33:05 +0800
Subject: [PATCH 52/64] [RISCV] Rewrite spill-fpr-scalar.ll test to not use
 vsetvli. NFC (#91428)

It was relying on the fact that vsetvlis have side effects to prevent
reordering, but #91319 proposes to remove the side effects. This reworks
it to use volatile loads and stores instead.
---
 llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll | 73 ++++++++-------------
 1 file changed, 28 insertions(+), 45 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
index 48fb21dc5a8a09..6b9b88d90de613 100644
--- a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
+++ b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
@@ -1,75 +1,58 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh -target-abi=lp64 \
 ; RUN:   -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
 
-declare half @llvm.riscv.vfmv.f.s.nxv1f16()
-declare float @llvm.riscv.vfmv.f.s.nxv1f32()
-declare double @llvm.riscv.vfmv.f.s.nxv1f64()
-
-declare  @llvm.riscv.vfmv.v.f.nxv1f16(, half, i64);
-declare  @llvm.riscv.vfmv.v.f.nxv1f32(, float, i64);
-declare  @llvm.riscv.vfmv.v.f.nxv1f64(, double, i64);
-
-define  @intrinsic_vfmv.f.s_s_nxv1f16( %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f16:
-; CHECK:       # %bb.0: # %entry
+define void @spill_half(ptr) nounwind {
+; CHECK-LABEL: spill_half:
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    flh fa5, 0(a0)
 ; CHECK-NEXT:    fsh fa5, 14(sp) # 2-byte Folded Spill
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    flh fa5, 14(sp) # 2-byte Folded Reload
-; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    fsh fa5, 0(a0)
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
-entry:
-  %a = call half @llvm.riscv.vfmv.f.s.nxv1f16( %0)
-  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
-  %b = call  @llvm.riscv.vfmv.v.f.nxv1f16( undef, half %a, i64 %1)
-  ret  %b
+  %2 = load volatile half, ptr %0
+  call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  store volatile half %2, ptr %0
+  ret void
 }
 
-define  @intrinsic_vfmv.f.s_s_nxv1f32( %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f32:
-; CHECK:       # %bb.0: # %entry
+define void @spill_float(ptr) nounwind {
+; CHECK-LABEL: spill_float:
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    flw fa5, 0(a0)
 ; CHECK-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
 ; CHECK-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    fsw fa5, 0(a0)
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
-entry:
-  %a = call float @llvm.riscv.vfmv.f.s.nxv1f32( %0)
-  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
-  %b = call  @llvm.riscv.vfmv.v.f.nxv1f32( undef, float %a, i64 %1)
-  ret  %b
+  %2 = load volatile float, ptr %0
+  call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  store volatile float %2, ptr %0
+  ret void
 }
 
-define  @intrinsic_vfmv.f.s_s_nxv1f64( %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f64:
-; CHECK:       # %bb.0: # %entry
+define void @spill_double(ptr) nounwind {
+; CHECK-LABEL: spill_double:
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa5, v8
+; CHECK-NEXT:    fld fa5, 0(a0)
 ; CHECK-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; CHECK-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    fsd fa5, 0(a0)
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
-entry:
-  %a = call double @llvm.riscv.vfmv.f.s.nxv1f64( %0)
-  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
-  %b = call  @llvm.riscv.vfmv.v.f.nxv1f64( undef, double %a, i64 %1)
-  ret  %b
+  %2 = load volatile double, ptr %0
+  call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  store volatile double %2, ptr %0
+  ret void
 }

From 812c3025ec033ad1f306aff7f8b6e6695a79ee35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= 
Date: Wed, 8 May 2024 05:35:23 +0200
Subject: [PATCH 53/64] [GlobalIsel][AArch64] legalize ptr add (#89218)

LLVM ERROR: unable to legalize instruction: %275:_(<4 x p0>) = G_PTR_ADD
%268:_, %274:_(<4 x s64>) (in function: prepare_for_pass)
---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  4 +-
 .../AArch64/GlobalISel/legalize-ptr-add.mir   | 59 ++++++++++++++++++-
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 24389124966813..d4aac94d24f12a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -177,7 +177,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_PTR_ADD)
       .legalFor({{p0, s64}, {v2p0, v2s64}})
-      .clampScalar(1, s64, s64);
+      .clampScalar(1, s64, s64)
+      .clampNumElements(0, v2p0, v2p0)
+      .clampNumElements(1, v2s64, v2s64);
 
   getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
index 1ecd36b55380a6..1d3f7eab79d69d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir
@@ -6,12 +6,65 @@ body:             |
   bb.0.entry:
     ; CHECK-LABEL: name: test_ptr_add_vec_p0
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(<2 x p0>) = G_PTR_ADD [[COPY]], [[COPY1]](<2 x s64>)
-    ; CHECK: $q0 = COPY [[PTR_ADD]](<2 x p0>)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p0>) = G_PTR_ADD [[COPY]], [[COPY1]](<2 x s64>)
+    ; CHECK-NEXT: $q0 = COPY [[PTR_ADD]](<2 x p0>)
     %0:_(<2 x p0>) = COPY $q0
     %1:_(<2 x s64>) = COPY $q1
     %3:_(<2 x p0>) = G_PTR_ADD %0, %1(<2 x s64>)
     $q0 = COPY %3(<2 x p0>)
 
 ...
+---
+name:            test_ptr_add_vec_4xp0
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: test_ptr_add_vec_4xp0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY2]](p0), [[COPY3]](p0)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64)
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p0>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s64>)
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(<2 x p0>) = G_PTR_ADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]](<2 x s64>)
+    ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: %extract0:_(p0) = G_EXTRACT_VECTOR_ELT [[PTR_ADD]](<2 x p0>), %zero(s64)
+    ; CHECK-NEXT: %extract1:_(p0) = G_EXTRACT_VECTOR_ELT [[PTR_ADD]](<2 x p0>), %one(s64)
+    ; CHECK-NEXT: %extract2:_(p0) = G_EXTRACT_VECTOR_ELT [[PTR_ADD1]](<2 x p0>), %zero(s64)
+    ; CHECK-NEXT: %extract3:_(p0) = G_EXTRACT_VECTOR_ELT [[PTR_ADD1]](<2 x p0>), %one(s64)
+    ; CHECK-NEXT: $x0 = COPY %extract0(p0)
+    ; CHECK-NEXT: $x1 = COPY %extract1(p0)
+    ; CHECK-NEXT: $x2 = COPY %extract2(p0)
+    ; CHECK-NEXT: $x3 = COPY %extract3(p0)
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(p0) = COPY $x2
+    %3:_(p0) = COPY $x3
+    %4:_(s64) = COPY $x4
+    %5:_(s64) = COPY $x5
+    %6:_(s64) = COPY $x6
+    %7:_(s64) = COPY $x7
+    %ptr:_(<4 x p0>) = G_BUILD_VECTOR %0(p0), %1(p0), %2(p0), %3(p0)
+    %add:_(<4 x s64>) = G_BUILD_VECTOR %4(s64), %5(s64), %6(s64), %7(s64)
+    %res:_(<4 x p0>) = G_PTR_ADD %ptr, %add(<4 x s64>)
+    %zero:_(s64) = G_CONSTANT i64 0
+    %one:_(s64) = G_CONSTANT i64 1
+    %two:_(s64) = G_CONSTANT i64 2
+    %three:_(s64) = G_CONSTANT i64 3
+    %extract0:_(p0) = G_EXTRACT_VECTOR_ELT %res(<4 x p0>), %zero(s64)
+    %extract1:_(p0) = G_EXTRACT_VECTOR_ELT %res(<4 x p0>), %one(s64)
+    %extract2:_(p0) = G_EXTRACT_VECTOR_ELT %res(<4 x p0>), %two(s64)
+    %extract3:_(p0) = G_EXTRACT_VECTOR_ELT %res(<4 x p0>), %three(s64)
+    $x0 = COPY %extract0(p0)
+    $x1 = COPY %extract1(p0)
+    $x2 = COPY %extract2(p0)
+    $x3 = COPY %extract3(p0)
+...

From 2c209957819328481554e7c5929d134502b4972a Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Tue, 7 May 2024 20:56:55 -0700
Subject: [PATCH 54/64] [RISCV] Detect duplicate extensions in
 parseNormalizedArchString. (#91416)

This detects the same extension name being added twice. Mostly I'm
worried about the case that the same string appears with two different
versions. We will only preserve one of the versions.

We could allow the same version to be repeated, but that doesn't seem
useful at the moment.

I've updated addExtension to use map::emplace instead of
map::operator[]. This means we only keep the first version if there are
duplicates. Previously we kept the last version, but that shouldn't matter
now that we don't allow duplicates. parseArchString already doesn't allow
duplicates.
---
 llvm/include/llvm/TargetParser/RISCVISAInfo.h    | 2 +-
 llvm/lib/TargetParser/RISCVISAInfo.cpp           | 8 +++++---
 llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 8 ++++++++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/TargetParser/RISCVISAInfo.h b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
index 36617a9b625972..12f6b46fb3cee3 100644
--- a/llvm/include/llvm/TargetParser/RISCVISAInfo.h
+++ b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
@@ -87,7 +87,7 @@ class RISCVISAInfo {
 
   RISCVISAUtils::OrderedExtensionMap Exts;
 
-  void addExtension(StringRef ExtName, RISCVISAUtils::ExtensionVersion Version);
+  bool addExtension(StringRef ExtName, RISCVISAUtils::ExtensionVersion Version);
 
   Error checkDependency();
 
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 9c2ac8c3893f16..96590745b2ebcf 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -159,9 +159,9 @@ findDefaultVersion(StringRef ExtName) {
   return std::nullopt;
 }
 
-void RISCVISAInfo::addExtension(StringRef ExtName,
+bool RISCVISAInfo::addExtension(StringRef ExtName,
                                 RISCVISAUtils::ExtensionVersion Version) {
-  Exts[ExtName.str()] = Version;
+  return Exts.emplace(ExtName, Version).second;
 }
 
 static StringRef getExtensionTypeDesc(StringRef Ext) {
@@ -492,7 +492,9 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
                                "'" + Twine(ExtName[0]) +
                                    "' must be followed by a letter");
 
-    ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion});
+    if (!ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion}))
+      return createStringError(errc::invalid_argument,
+                               "duplicate extension '" + ExtName + "'");
   }
   ISAInfo->updateImpliedLengths();
   return std::move(ISAInfo);
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index 83b52d0527c3ad..0e807cfb8e3b85 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -78,6 +78,14 @@ TEST(ParseNormalizedArchString, RejectsBadX) {
   }
 }
 
+TEST(ParseNormalizedArchString, DuplicateExtension) {
+  for (StringRef Input : {"rv64i2p0_a2p0_a1p0"}) {
+    EXPECT_EQ(
+        toString(RISCVISAInfo::parseNormalizedArchString(Input).takeError()),
+        "duplicate extension 'a'");
+  }
+}
+
 TEST(ParseNormalizedArchString, AcceptsValidBaseISAsAndSetsXLen) {
   auto MaybeRV32I = RISCVISAInfo::parseNormalizedArchString("rv32i2p0");
   ASSERT_THAT_EXPECTED(MaybeRV32I, Succeeded());

From 85ef6b7c364f3b57c13c179bf278fe47366287a2 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com>
Date: Wed, 8 May 2024 00:10:08 -0400
Subject: [PATCH 55/64] [DXIL] Add tan intrinsic part 2 (#90277)

This change is an implementation of #87367's investigation on supporting
IEEE math operations as intrinsics.
Which was discussed in this RFC:
https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

If you want an overarching view of how this will all connect see:
https://github.com/llvm/llvm-project/pull/90088

Changes:
-  `llvm/include/llvm/IR/Intrinsics.td` - Create the tan intrinsic
- `llvm/lib/Target/DirectX/DXIL.td` - Map `int_tan` (the tan intrinsic)
to the equivalent DXIL Op.
---
 llvm/lib/Target/DirectX/DXIL.td        |  3 +++
 llvm/test/CodeGen/DirectX/tan.ll       | 20 ++++++++++++++++++++
 llvm/test/CodeGen/DirectX/tan_error.ll | 10 ++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/tan.ll
 create mode 100644 llvm/test/CodeGen/DirectX/tan_error.ll

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index cd388ed3e3191b..24a0c8524230c5 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -266,6 +266,9 @@ def Cos  : DXILOpMapping<12, unary, int_cos,
 def Sin  : DXILOpMapping<13, unary, int_sin,
                          "Returns sine(theta) for theta in radians.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def Tan  : DXILOpMapping<14, unary, int_tan,
+                         "Returns tangent(theta) for theta in radians.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Exp2 : DXILOpMapping<21, unary, int_exp2,
                          "Returns the base 2 exponential, or 2**x, of the specified value."
                          "exp2(x) = 2**x.",
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
new file mode 100644
index 00000000000000..567ab02d40f918
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for tan are generated for float and half.
+
+define noundef float @tan_float(float noundef %a) #0 {
+entry:
+; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}})
+  %elt.tan = call float @llvm.tan.f32(float %a)
+  ret float %elt.tan
+}
+
+define noundef half @tan_half(half noundef %a) #0 {
+entry:
+; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}})
+  %elt.tan = call half @llvm.tan.f16(half %a)
+  ret half %elt.tan
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/tan_error.ll b/llvm/test/CodeGen/DirectX/tan_error.ll
new file mode 100644
index 00000000000000..c870c36f54925d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/tan_error.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation tan does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload
+
+define noundef double @tan_double(double noundef %a) #0 {
+entry:
+  %1 = call double @llvm.tan.f64(double %a)
+  ret double %1
+}

From ef84452571b8e8f4a38a173e6adf6a5ecbbde97e Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Tue, 7 May 2024 21:17:50 -0700
Subject: [PATCH 56/64] [DAGCombiner] Be more careful about looking through
 extends and truncates in mergeTruncStores. (#91375)

Previously we recursively looked through extends and truncates on both
SourceValue and WideVal.

SourceValue is the largest source found for each of the stores we are
combining. WideVal is the source for the current store.

Previously we could incorrectly look through a (zext (trunc X)) pair and
incorrectly believe X to be a good source.

I think we could also look through a zext on one store and a sext on
another store and arbitrarily pick one of the extends as the final
source.

With this patch we only look through one level of extend or truncate.
And we don't look through extends/truncs on both SourceValue and WideVal
at the same time.

This may lose some optimization cases, but keeps everything we had tests
for.

Fixes #90936.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 21 +++++++++++--------
 llvm/test/CodeGen/AArch64/pr90936.ll          |  8 +++++--
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206f..e835bd950a7be4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8728,15 +8728,16 @@ static std::optional isBigEndian(const ArrayRef ByteOffsets,
   return BigEndian;
 }
 
+// Look through one layer of truncate or extend.
 static SDValue stripTruncAndExt(SDValue Value) {
   switch (Value.getOpcode()) {
   case ISD::TRUNCATE:
   case ISD::ZERO_EXTEND:
   case ISD::SIGN_EXTEND:
   case ISD::ANY_EXTEND:
-    return stripTruncAndExt(Value.getOperand(0));
+    return Value.getOperand(0);
   }
-  return Value;
+  return SDValue();
 }
 
 /// Match a pattern where a wide type scalar value is stored by several narrow
@@ -8849,16 +8850,18 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
     }
 
     // Stores must share the same source value with different offsets.
-    // Truncate and extends should be stripped to get the single source value.
     if (!SourceValue)
       SourceValue = WideVal;
-    else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
-      return SDValue();
-    else if (SourceValue.getValueType() != WideVT) {
-      if (WideVal.getValueType() == WideVT ||
-          WideVal.getScalarValueSizeInBits() >
-              SourceValue.getScalarValueSizeInBits())
+    else if (SourceValue != WideVal) {
+      // Truncate and extends can be stripped to see if the values are related.
+      if (stripTruncAndExt(SourceValue) != WideVal &&
+          stripTruncAndExt(WideVal) != SourceValue)
+        return SDValue();
+
+      if (WideVal.getScalarValueSizeInBits() >
+          SourceValue.getScalarValueSizeInBits())
         SourceValue = WideVal;
+
       // Give up if the source value type is smaller than the store size.
       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
         return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/pr90936.ll b/llvm/test/CodeGen/AArch64/pr90936.ll
index cd816cdbf73512..3ed8468b37f4e5 100644
--- a/llvm/test/CodeGen/AArch64/pr90936.ll
+++ b/llvm/test/CodeGen/AArch64/pr90936.ll
@@ -22,8 +22,12 @@ bb:
 define void @g(i32 %arg, ptr %arg1) {
 ; CHECK-LABEL: g:
 ; CHECK:       // %bb.0: // %bb
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    lsr w8, w0, #8
+; CHECK-NEXT:    lsr w9, w0, #16
+; CHECK-NEXT:    strb w0, [x1]
+; CHECK-NEXT:    strb wzr, [x1, #3]
+; CHECK-NEXT:    strb w8, [x1, #1]
+; CHECK-NEXT:    strb w9, [x1, #2]
 ; CHECK-NEXT:    ret
 bb:
   %i = trunc i32 %arg to i8

From 0d93b01c3b1e2e543acec3f36db639b8b7b0b20d Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Tue, 7 May 2024 21:18:28 -0700
Subject: [PATCH 57/64] [RISCV] Don't crash if parseNormalizedArchString
 encounters a multi-letter extension with an unknown prefix. (#91398)

The sorting code previously asserted if a prefix was multiple letters,
but didn't start with s, x, or z.

Replace the assert with an explicit check and sort the multi-letter
extension after the known multi-letter prefixes.
---
 llvm/lib/Support/RISCVISAUtils.cpp               | 11 +++++++----
 llvm/unittests/TargetParser/RISCVISAInfoTest.cpp |  8 ++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAUtils.cpp b/llvm/lib/Support/RISCVISAUtils.cpp
index 46efe93695074f..d6b002e66e7ab2 100644
--- a/llvm/lib/Support/RISCVISAUtils.cpp
+++ b/llvm/lib/Support/RISCVISAUtils.cpp
@@ -24,13 +24,15 @@ using namespace llvm;
 // -Multi-letter extensions starting with 's' in alphabetical order.
 // -(TODO) Multi-letter extensions starting with 'zxm' in alphabetical order.
 // -X extensions in alphabetical order.
+// -Unknown multi-letter extensions in alphabetical order.
 // These flags are used to indicate the category. The first 6 bits store the
 // single letter extension rank for single letter and multi-letter extensions
 // starting with 'z'.
 enum RankFlags {
   RF_Z_EXTENSION = 1 << 6,
-  RF_S_EXTENSION = 1 << 7,
-  RF_X_EXTENSION = 1 << 8,
+  RF_S_EXTENSION = 2 << 6,
+  RF_X_EXTENSION = 3 << 6,
+  RF_UNKNOWN_MULTILETTER_EXTENSION = 4 << 6,
 };
 
 // Get the rank for single-letter extension, lower value meaning higher
@@ -68,8 +70,9 @@ static unsigned getExtensionRank(const std::string &ExtName) {
   case 'x':
     return RF_X_EXTENSION;
   default:
-    assert(ExtName.size() == 1);
-    return singleLetterExtensionRank(ExtName[0]);
+    if (ExtName.size() == 1)
+      return singleLetterExtensionRank(ExtName[0]);
+    return RF_UNKNOWN_MULTILETTER_EXTENSION;
   }
 }
 
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index 0e807cfb8e3b85..f9e386a85fea8e 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -150,6 +150,14 @@ TEST(ParseNormalizedArchString, UpdatesFLenMinVLenMaxELen) {
   EXPECT_EQ(Info.getMaxELenFp(), 64U);
 }
 
+TEST(ParseNormalizedArchString, AcceptsUnknownMultiletter) {
+  auto MaybeISAInfo = RISCVISAInfo::parseNormalizedArchString(
+      "rv64i2p0_f2p0_d2p0_zicsr2p0_ykk1p0");
+  ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded());
+  RISCVISAInfo &Info = **MaybeISAInfo;
+  EXPECT_EQ(Info.toString(), "rv64i2p0_f2p0_d2p0_zicsr2p0_ykk1p0");
+}
+
 TEST(ParseArchString, RejectsInvalidChars) {
   for (StringRef Input : {"RV32", "rV64", "rv32i2P0", "rv64i2p0_A2p0"}) {
     EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),

From 8296f061aafb844bf3b9b002b7791ade7a1d3006 Mon Sep 17 00:00:00 2001
From: Luke Lau 
Date: Wed, 8 May 2024 12:33:01 +0800
Subject: [PATCH 58/64] [RISCV] Add invariants that registers always have
 definitions. NFC (#90587)

For vector merge operands, we check if it's a NoRegister beforehand so
any other register type should have a definition.

For VL operands, they don't get replaced with NoRegisters since they're
scalar and should also always have a definition, even if it's an
implicit_def.

All the definitions at this stage should also be unique, this will
change in #70549
---
 llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 42 +++++++++++---------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index eaebdc2e54bef4..06456f97f5ebc9 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -194,19 +194,22 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
   if (UseMO.getReg().isPhysical())
     return false;
 
-  if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
-    if (UseMI->isImplicitDef())
-      return true;
+  MachineInstr *UseMI = MRI.getUniqueVRegDef(UseMO.getReg());
+  assert(UseMI);
+  if (UseMI->isImplicitDef())
+    return true;
 
-    if (UseMI->isRegSequence()) {
-      for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
-        MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
-        if (!SourceMI || !SourceMI->isImplicitDef())
-          return false;
-      }
-      return true;
+  if (UseMI->isRegSequence()) {
+    for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
+      MachineInstr *SourceMI =
+          MRI.getUniqueVRegDef(UseMI->getOperand(i).getReg());
+      assert(SourceMI);
+      if (!SourceMI->isImplicitDef())
+        return false;
     }
+    return true;
   }
+
   return false;
 }
 
@@ -886,7 +889,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
     if (AVLReg == RISCV::X0)
       NewInfo.setAVLVLMAX();
     else
-      NewInfo.setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg);
+      NewInfo.setAVLRegDef(MRI.getUniqueVRegDef(AVLReg), AVLReg);
   }
   NewInfo.setVTYPE(MI.getOperand(2).getImm());
 
@@ -958,7 +961,8 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
       else
         InstrInfo.setAVLImm(Imm);
     } else {
-      InstrInfo.setAVLRegDef(MRI->getVRegDef(VLOp.getReg()), VLOp.getReg());
+      InstrInfo.setAVLRegDef(MRI->getUniqueVRegDef(VLOp.getReg()),
+                             VLOp.getReg());
     }
   } else {
     assert(isScalarExtractInstr(MI));
@@ -1231,7 +1235,7 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
 
   if (RISCV::isFaultFirstLoad(MI)) {
     // Update AVL to vl-output of the fault first load.
-    Info.setAVLRegDef(MRI->getVRegDef(MI.getOperand(1).getReg()),
+    Info.setAVLRegDef(MRI->getUniqueVRegDef(MI.getOperand(1).getReg()),
                       MI.getOperand(1).getReg());
     return;
   }
@@ -1338,8 +1342,9 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
     const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
 
     // We need the PHI input to the be the output of a VSET(I)VLI.
-    MachineInstr *DefMI = MRI->getVRegDef(InReg);
-    if (!DefMI || !isVectorConfigInstr(*DefMI))
+    MachineInstr *DefMI = MRI->getUniqueVRegDef(InReg);
+    assert(DefMI);
+    if (!isVectorConfigInstr(*DefMI))
       return true;
 
     // We found a VSET(I)VLI make sure it matches the output of the
@@ -1399,7 +1404,8 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
         if (VLOp.isReg()) {
           Register Reg = VLOp.getReg();
-          MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
+          MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg);
+          assert(VLOpDef);
 
           // Erase the AVL operand from the instruction.
           VLOp.setReg(RISCV::NoRegister);
@@ -1409,8 +1415,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
           // as an ADDI. However, the ADDI might not have been used in the
           // vsetvli, or a vsetvli might not have been emitted, so it may be
           // dead now.
-          if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
-              MRI->use_nodbg_empty(Reg))
+          if (TII->isAddImmediate(*VLOpDef, Reg) && MRI->use_nodbg_empty(Reg))
             VLOpDef->eraseFromParent();
         }
         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
@@ -1682,6 +1687,7 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
     MachineInstr &MI = *I++;
     if (RISCV::isFaultFirstLoad(MI)) {
       Register VLOutput = MI.getOperand(1).getReg();
+      assert(VLOutput.isVirtual());
       if (!MRI->use_nodbg_empty(VLOutput))
         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
                 VLOutput);

From 3e82442ff7288b4c41bb77888bc2cfea2c34d6ee Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com>
Date: Wed, 8 May 2024 00:57:39 -0400
Subject: [PATCH 59/64] [SPIRV] Add tan intrinsic part 3 (#90278)

This change is an implementation of #87367's investigation on supporting
IEEE math operations as intrinsics.
Which was discussed in this RFC:
https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

If you want an overarching view of how this will all connect see:
https://github.com/llvm/llvm-project/pull/90088
Changes:
- `llvm/docs/GlobalISel/GenericOpcode.rst` - Document the `G_FTAN`
opcode
-  `llvm/include/llvm/IR/Intrinsics.td` - Create the tan intrinsic
- `llvm/include/llvm/Support/TargetOpcodes.def` - Create a `G_FTAN`
Opcode handler
- `llvm/include/llvm/Target/GenericOpcodes.td` - Define the `G_FTAN`
Opcode
- `llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp` Map the tan intrinsic
to `G_FTAN` Opcode
- `llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp` - Map the
`G_FTAN` opcode to the GLSL 4.5 and openCL tan instructions.
- `llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp` - Define `G_FTAN` as a
legal spirv target opcode.
---
 llvm/docs/GlobalISel/GenericOpcode.rst        |  4 +-
 llvm/include/llvm/Support/TargetOpcodes.def   |  3 ++
 llvm/include/llvm/Target/GenericOpcodes.td    |  7 +++
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  2 +
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  2 +
 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp  |  1 +
 .../GlobalISel/legalizer-info-validation.mir  |  3 ++
 .../test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll | 45 +++++++++++++++++++
 8 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll

diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 492d30280f4776..52dc039df7779e 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -592,8 +592,8 @@ G_FLOG, G_FLOG2, G_FLOG10
 
 Calculate the base-e, base-2, or base-10 respectively.
 
-G_FCEIL, G_FCOS, G_FSIN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+G_FCEIL, G_FCOS, G_FSIN, G_FTAN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 These correspond to the standard C functions of the same name.
 
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index cb98f96af522f7..559a588c251482 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -781,6 +781,9 @@ HANDLE_TARGET_OPCODE(G_FCOS)
 /// Floating point sine.
 HANDLE_TARGET_OPCODE(G_FSIN)
 
+/// Floating point Tangent.
+HANDLE_TARGET_OPCODE(G_FTAN)
+
 /// Floating point square root.
 HANDLE_TARGET_OPCODE(G_FSQRT)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 8380d2738d164b..c40498e5542154 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -988,6 +988,13 @@ def G_FSIN : GenericInstruction {
   let hasSideEffects = false;
 }
 
+// Floating point tangent of a value.
+def G_FTAN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = false;
+}
+
 // Floating point square root of a value.
 // This returns NaN for negative nonzero values.
 // NOTE: Unlike libm sqrt(), this never sets errno. In all other respects it's
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 77ee5e645288b1..6661127162e524 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1945,6 +1945,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
       return TargetOpcode::G_FSIN;
     case Intrinsic::sqrt:
       return TargetOpcode::G_FSQRT;
+    case Intrinsic::tan:
+      return TargetOpcode::G_FTAN;
     case Intrinsic::trunc:
       return TargetOpcode::G_INTRINSIC_TRUNC;
     case Intrinsic::readcyclecounter:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9994a966c82c31..2051cdc7e01ff8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -467,6 +467,8 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
     return selectExtInst(ResVReg, ResType, I, CL::cos, GL::Cos);
   case TargetOpcode::G_FSIN:
     return selectExtInst(ResVReg, ResType, I, CL::sin, GL::Sin);
+  case TargetOpcode::G_FTAN:
+    return selectExtInst(ResVReg, ResType, I, CL::tan, GL::Tan);
 
   case TargetOpcode::G_FSQRT:
     return selectExtInst(ResVReg, ResType, I, CL::sqrt, GL::Sqrt);
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 4b871bdd5d0758..e7b35555293a3e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -277,6 +277,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
                                G_FCEIL,
                                G_FCOS,
                                G_FSIN,
+                               G_FTAN,
                                G_FSQRT,
                                G_FFLOOR,
                                G_FRINT,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 20133158e4fa9a..d71111b57efe51 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -674,6 +674,9 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll
new file mode 100644
index 00000000000000..7bdce99dbfaa7e
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll
@@ -0,0 +1,45 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @tan_float(float noundef %a) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call float @llvm.tan.f32(float %a)
+  ret float %elt.tan
+}
+
+define noundef half @tan_half(half noundef %a) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call half @llvm.tan.f16(half %a)
+  ret half %elt.tan
+}
+
+define noundef <4 x float> @tan_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call <4 x float> @llvm.tan.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.tan
+}
+
+define noundef <4 x half> @tan_half4(<4 x half> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call <4 x half> @llvm.tan.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.tan
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
+declare <4 x half> @llvm.tan.v4f16(<4 x half>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)

From 084e2b53d22c11e013b0a495b65d39aa7f934048 Mon Sep 17 00:00:00 2001
From: Christian Ulmann 
Date: Wed, 8 May 2024 07:40:15 +0200
Subject: [PATCH 60/64] [MLIR][Interfaces] Change MemorySlotInterface to use
 OpBuilder (#91341)

This commit changes the `MemorySlotInterface` back to using `OpBuilder`
instead of a rewriter. This was originally introduced in
https://reviews.llvm.org/D150432 but it was shown that patterns are a
bad idea for both Mem2Reg and SROA.
Mem2Reg suffers from the usage of a rewriter due to being forced to
create new basic blocks. This is an issue, as it leads to the
invalidation of the dominance information, which can be expensive to
recompute.
---
 .../mlir/Interfaces/MemorySlotInterfaces.td   |  63 ++---
 mlir/include/mlir/Transforms/Mem2Reg.h        |   2 +-
 mlir/include/mlir/Transforms/SROA.h           |   2 +-
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 248 +++++++++---------
 .../Dialect/MemRef/IR/MemRefMemorySlot.cpp    |  52 ++--
 mlir/lib/Transforms/Mem2Reg.cpp               |  90 +++----
 mlir/lib/Transforms/SROA.cpp                  |  26 +-
 7 files changed, 216 insertions(+), 267 deletions(-)

diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
index 764fa6d547b2eb..adf182ac7069d2 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
@@ -40,42 +40,40 @@ def PromotableAllocationOpInterface
         Provides the default Value of this memory slot. The provided Value
         will be used as the reaching definition of loads done before any store.
         This Value must outlive the promotion and dominate all the uses of this
-        slot's pointer. The provided rewriter can be used to create the default
+        slot's pointer. The provided builder can be used to create the default
         value on the fly.
 
-        The rewriter is located at the beginning of the block where the slot
-        pointer is defined. All IR mutations must happen through the rewriter.
+        The builder is located at the beginning of the block where the slot
+        pointer is defined.
       }], "::mlir::Value", "getDefaultValue",
       (ins
         "const ::mlir::MemorySlot &":$slot,
-        "::mlir::RewriterBase &":$rewriter)
+        "::mlir::OpBuilder &":$builder)
     >,
     InterfaceMethod<[{
         Hook triggered for every new block argument added to a block.
         This will only be called for slots declared by this operation.
 
-        The rewriter is located at the beginning of the block on call. All IR
-        mutations must happen through the rewriter.
+        The builder is located at the beginning of the block on call. All IR
+        mutations must happen through the builder.
       }],
       "void", "handleBlockArgument",
       (ins
         "const ::mlir::MemorySlot &":$slot,
         "::mlir::BlockArgument":$argument,
-        "::mlir::RewriterBase &":$rewriter
+        "::mlir::OpBuilder &":$builder
       )
     >,
     InterfaceMethod<[{
         Hook triggered once the promotion of a slot is complete. This can
         also clean up the created default value if necessary.
         This will only be called for slots declared by this operation.
-
-        All IR mutations must happen through the rewriter.
       }],
       "void", "handlePromotionComplete",
       (ins
         "const ::mlir::MemorySlot &":$slot, 
         "::mlir::Value":$defaultValue,
-        "::mlir::RewriterBase &":$rewriter)
+        "::mlir::OpBuilder &":$builder)
     >,
   ];
 }
@@ -119,15 +117,14 @@ def PromotableMemOpInterface : OpInterface<"PromotableMemOpInterface"> {
         The returned value must dominate all operations dominated by the storing
         operation.
 
-        If IR must be mutated to extract a concrete value being stored, mutation
-        must happen through the provided rewriter. The rewriter is located
-        immediately after the memory operation on call. No IR deletion is
-        allowed in this method. IR mutations must not introduce new uses of the
-        memory slot. Existing control flow must not be modified.
+        The builder is located immediately after the memory operation on call.
+        No IR deletion is allowed in this method. IR mutations must not
+        introduce new uses of the memory slot. Existing control flow must not
+        be modified.
       }],
       "::mlir::Value", "getStored",
       (ins "const ::mlir::MemorySlot &":$slot,
-           "::mlir::RewriterBase &":$rewriter,
+           "::mlir::OpBuilder &":$builder,
            "::mlir::Value":$reachingDef,
            "const ::mlir::DataLayout &":$dataLayout)
     >,
@@ -166,14 +163,13 @@ def PromotableMemOpInterface : OpInterface<"PromotableMemOpInterface"> {
         have been done at the point of calling this method, but it will be done
         eventually.
 
-        The rewriter is located after the promotable operation on call. All IR
-        mutations must happen through the rewriter.
+        The builder is located after the promotable operation on call.
       }],
       "::mlir::DeletionKind",
       "removeBlockingUses",
       (ins "const ::mlir::MemorySlot &":$slot,
            "const ::llvm::SmallPtrSetImpl &":$blockingUses,
-           "::mlir::RewriterBase &":$rewriter,
+           "::mlir::OpBuilder &":$builder,
            "::mlir::Value":$reachingDefinition,
            "const ::mlir::DataLayout &":$dataLayout)
     >,
@@ -224,13 +220,12 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
         have been done at the point of calling this method, but it will be done
         eventually.
 
-        The rewriter is located after the promotable operation on call. All IR
-        mutations must happen through the rewriter.
+        The builder is located after the promotable operation on call.
       }],
       "::mlir::DeletionKind",
       "removeBlockingUses",
       (ins "const ::llvm::SmallPtrSetImpl &":$blockingUses,
-           "::mlir::RewriterBase &":$rewriter)
+           "::mlir::OpBuilder &":$builder)
     >,
     InterfaceMethod<[{
         This method allows the promoted operation to visit the SSA values used
@@ -254,13 +249,12 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
         scheduled for removal and if `requiresReplacedValues` returned
         true.
 
-        The rewriter is located after the promotable operation on call. All IR
-        mutations must happen through the rewriter. During the transformation,
-        *no operation should be deleted*.
+        The builder is located after the promotable operation on call. During
+        the transformation, *no operation should be deleted*.
       }],
       "void", "visitReplacedValues",
       (ins "::llvm::ArrayRef>":$mutatedDefs,
-           "::mlir::RewriterBase &":$rewriter), [{}], [{ return; }]
+           "::mlir::OpBuilder &":$builder), [{}], [{ return; }]
     >,
   ];
 }
@@ -293,25 +287,23 @@ def DestructurableAllocationOpInterface
         at the end of this call. Only generates subslots for the indices found in
         `usedIndices` since all other subslots are unused.
 
-        The rewriter is located at the beginning of the block where the slot
-        pointer is defined. All IR mutations must happen through the rewriter.
+        The builder is located at the beginning of the block where the slot
+        pointer is defined.
       }],
       "::llvm::DenseMap<::mlir::Attribute, ::mlir::MemorySlot>",
       "destructure",
       (ins "const ::mlir::DestructurableMemorySlot &":$slot,
            "const ::llvm::SmallPtrSetImpl<::mlir::Attribute> &":$usedIndices,
-           "::mlir::RewriterBase &":$rewriter)
+           "::mlir::OpBuilder &":$builder)
     >,
     InterfaceMethod<[{
         Hook triggered once the destructuring of a slot is complete, meaning the
         original slot is no longer being refered to and could be deleted.
         This will only be called for slots declared by this operation.
-
-        All IR mutations must happen through the rewriter.
       }],
       "void", "handleDestructuringComplete",
       (ins "const ::mlir::DestructurableMemorySlot &":$slot,
-           "::mlir::RewriterBase &":$rewriter)
+           "::mlir::OpBuilder &":$builder)
     >,
   ];
 }
@@ -376,15 +368,14 @@ def DestructurableAccessorOpInterface
         Rewires the use of a slot to the generated subslots, without deleting
         any operation. Returns whether the accessor should be deleted.
 
-        All IR mutations must happen through the rewriter. Deletion of
-        operations is not allowed, only the accessor can be scheduled for
-        deletion by returning the appropriate value.
+        Deletion of operations is not allowed, only the accessor can be
+        scheduled for deletion by returning the appropriate value.
       }],
       "::mlir::DeletionKind",
       "rewire",
       (ins "const ::mlir::DestructurableMemorySlot &":$slot,
            "::llvm::DenseMap<::mlir::Attribute, ::mlir::MemorySlot> &":$subslots,
-           "::mlir::RewriterBase &":$rewriter,
+           "::mlir::OpBuilder &":$builder,
            "const ::mlir::DataLayout &":$dataLayout)
     >
   ];
diff --git a/mlir/include/mlir/Transforms/Mem2Reg.h b/mlir/include/mlir/Transforms/Mem2Reg.h
index ed10644e26a519..b4f939d6541428 100644
--- a/mlir/include/mlir/Transforms/Mem2Reg.h
+++ b/mlir/include/mlir/Transforms/Mem2Reg.h
@@ -27,7 +27,7 @@ struct Mem2RegStatistics {
 /// at least one memory slot was promoted.
 LogicalResult
 tryToPromoteMemorySlots(ArrayRef allocators,
-                        RewriterBase &rewriter, const DataLayout &dataLayout,
+                        OpBuilder &builder, const DataLayout &dataLayout,
                         Mem2RegStatistics statistics = {});
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Transforms/SROA.h b/mlir/include/mlir/Transforms/SROA.h
index d09a7989edeab2..fa84fb1eae73a4 100644
--- a/mlir/include/mlir/Transforms/SROA.h
+++ b/mlir/include/mlir/Transforms/SROA.h
@@ -31,7 +31,7 @@ struct SROAStatistics {
 /// failure if no slot was destructured.
 LogicalResult tryToDestructureMemorySlots(
     ArrayRef allocators,
-    RewriterBase &rewriter, const DataLayout &dataLayout,
+    OpBuilder &builder, const DataLayout &dataLayout,
     SROAStatistics statistics = {});
 
 } // namespace mlir
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 6eeb13ebffb51f..70102e1c819203 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -36,26 +36,26 @@ llvm::SmallVector LLVM::AllocaOp::getPromotableSlots() {
 }
 
 Value LLVM::AllocaOp::getDefaultValue(const MemorySlot &slot,
-                                      RewriterBase &rewriter) {
-  return rewriter.create(getLoc(), slot.elemType);
+                                      OpBuilder &builder) {
+  return builder.create(getLoc(), slot.elemType);
 }
 
 void LLVM::AllocaOp::handleBlockArgument(const MemorySlot &slot,
                                          BlockArgument argument,
-                                         RewriterBase &rewriter) {
+                                         OpBuilder &builder) {
   for (Operation *user : getOperation()->getUsers())
     if (auto declareOp = llvm::dyn_cast(user))
-      rewriter.create(declareOp.getLoc(), argument,
-                                        declareOp.getVarInfo(),
-                                        declareOp.getLocationExpr());
+      builder.create(declareOp.getLoc(), argument,
+                                       declareOp.getVarInfo(),
+                                       declareOp.getLocationExpr());
 }
 
 void LLVM::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
                                              Value defaultValue,
-                                             RewriterBase &rewriter) {
+                                             OpBuilder &builder) {
   if (defaultValue && defaultValue.use_empty())
-    rewriter.eraseOp(defaultValue.getDefiningOp());
-  rewriter.eraseOp(*this);
+    defaultValue.getDefiningOp()->erase();
+  this->erase();
 }
 
 SmallVector LLVM::AllocaOp::getDestructurableSlots() {
@@ -78,16 +78,16 @@ SmallVector LLVM::AllocaOp::getDestructurableSlots() {
 DenseMap
 LLVM::AllocaOp::destructure(const DestructurableMemorySlot &slot,
                             const SmallPtrSetImpl &usedIndices,
-                            RewriterBase &rewriter) {
+                            OpBuilder &builder) {
   assert(slot.ptr == getResult());
-  rewriter.setInsertionPointAfter(*this);
+  builder.setInsertionPointAfter(*this);
 
   auto destructurableType = cast(getElemType());
   DenseMap slotMap;
   for (Attribute index : usedIndices) {
     Type elemType = destructurableType.getTypeAtIndex(index);
     assert(elemType && "used index must exist");
-    auto subAlloca = rewriter.create(
+    auto subAlloca = builder.create(
         getLoc(), LLVM::LLVMPointerType::get(getContext()), elemType,
         getArraySize());
     slotMap.try_emplace(index, {subAlloca.getResult(), elemType});
@@ -97,9 +97,9 @@ LLVM::AllocaOp::destructure(const DestructurableMemorySlot &slot,
 }
 
 void LLVM::AllocaOp::handleDestructuringComplete(
-    const DestructurableMemorySlot &slot, RewriterBase &rewriter) {
+    const DestructurableMemorySlot &slot, OpBuilder &builder) {
   assert(slot.ptr == getResult());
-  rewriter.eraseOp(*this);
+  this->erase();
 }
 
 //===----------------------------------------------------------------------===//
@@ -112,7 +112,7 @@ bool LLVM::LoadOp::loadsFrom(const MemorySlot &slot) {
 
 bool LLVM::LoadOp::storesTo(const MemorySlot &slot) { return false; }
 
-Value LLVM::LoadOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value LLVM::LoadOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                               Value reachingDef, const DataLayout &dataLayout) {
   llvm_unreachable("getStored should not be called on LoadOp");
 }
@@ -175,7 +175,7 @@ static bool isBigEndian(const DataLayout &dataLayout) {
 
 /// Converts a value to an integer type of the same size.
 /// Assumes that the type can be converted.
-static Value castToSameSizedInt(RewriterBase &rewriter, Location loc, Value val,
+static Value castToSameSizedInt(OpBuilder &builder, Location loc, Value val,
                                 const DataLayout &dataLayout) {
   Type type = val.getType();
   assert(isSupportedTypeForConversion(type) &&
@@ -185,15 +185,15 @@ static Value castToSameSizedInt(RewriterBase &rewriter, Location loc, Value val,
     return val;
 
   uint64_t typeBitSize = dataLayout.getTypeSizeInBits(type);
-  IntegerType valueSizeInteger = rewriter.getIntegerType(typeBitSize);
+  IntegerType valueSizeInteger = builder.getIntegerType(typeBitSize);
 
   if (isa(type))
-    return rewriter.createOrFold(loc, valueSizeInteger, val);
-  return rewriter.createOrFold(loc, valueSizeInteger, val);
+    return builder.createOrFold(loc, valueSizeInteger, val);
+  return builder.createOrFold(loc, valueSizeInteger, val);
 }
 
 /// Converts a value with an integer type to `targetType`.
-static Value castIntValueToSameSizedType(RewriterBase &rewriter, Location loc,
+static Value castIntValueToSameSizedType(OpBuilder &builder, Location loc,
                                          Value val, Type targetType) {
   assert(isa(val.getType()) &&
          "expected value to have an integer type");
@@ -202,13 +202,13 @@ static Value castIntValueToSameSizedType(RewriterBase &rewriter, Location loc,
   if (val.getType() == targetType)
     return val;
   if (isa(targetType))
-    return rewriter.createOrFold(loc, targetType, val);
-  return rewriter.createOrFold(loc, targetType, val);
+    return builder.createOrFold(loc, targetType, val);
+  return builder.createOrFold(loc, targetType, val);
 }
 
 /// Constructs operations that convert `srcValue` into a new value of type
 /// `targetType`. Assumes the types have the same bitsize.
-static Value castSameSizedTypes(RewriterBase &rewriter, Location loc,
+static Value castSameSizedTypes(OpBuilder &builder, Location loc,
                                 Value srcValue, Type targetType,
                                 const DataLayout &dataLayout) {
   Type srcType = srcValue.getType();
@@ -226,18 +226,18 @@ static Value castSameSizedTypes(RewriterBase &rewriter, Location loc,
   // provenance.
   if (isa(targetType) &&
       isa(srcType))
-    return rewriter.createOrFold(loc, targetType,
-                                                        srcValue);
+    return builder.createOrFold(loc, targetType,
+                                                       srcValue);
 
   // For all other castable types, casting through integers is necessary.
-  Value replacement = castToSameSizedInt(rewriter, loc, srcValue, dataLayout);
-  return castIntValueToSameSizedType(rewriter, loc, replacement, targetType);
+  Value replacement = castToSameSizedInt(builder, loc, srcValue, dataLayout);
+  return castIntValueToSameSizedType(builder, loc, replacement, targetType);
 }
 
 /// Constructs operations that convert `srcValue` into a new value of type
 /// `targetType`. Performs bit-level extraction if the source type is larger
 /// than the target type. Assumes that this conversion is possible.
-static Value createExtractAndCast(RewriterBase &rewriter, Location loc,
+static Value createExtractAndCast(OpBuilder &builder, Location loc,
                                   Value srcValue, Type targetType,
                                   const DataLayout &dataLayout) {
   // Get the types of the source and target values.
@@ -249,31 +249,31 @@ static Value createExtractAndCast(RewriterBase &rewriter, Location loc,
   uint64_t srcTypeSize = dataLayout.getTypeSizeInBits(srcType);
   uint64_t targetTypeSize = dataLayout.getTypeSizeInBits(targetType);
   if (srcTypeSize == targetTypeSize)
-    return castSameSizedTypes(rewriter, loc, srcValue, targetType, dataLayout);
+    return castSameSizedTypes(builder, loc, srcValue, targetType, dataLayout);
 
   // First, cast the value to a same-sized integer type.
-  Value replacement = castToSameSizedInt(rewriter, loc, srcValue, dataLayout);
+  Value replacement = castToSameSizedInt(builder, loc, srcValue, dataLayout);
 
   // Truncate the integer if the size of the target is less than the value.
   if (isBigEndian(dataLayout)) {
     uint64_t shiftAmount = srcTypeSize - targetTypeSize;
-    auto shiftConstant = rewriter.create(
-        loc, rewriter.getIntegerAttr(srcType, shiftAmount));
+    auto shiftConstant = builder.create(
+        loc, builder.getIntegerAttr(srcType, shiftAmount));
     replacement =
-        rewriter.createOrFold(loc, srcValue, shiftConstant);
+        builder.createOrFold(loc, srcValue, shiftConstant);
   }
 
-  replacement = rewriter.create(
-      loc, rewriter.getIntegerType(targetTypeSize), replacement);
+  replacement = builder.create(
+      loc, builder.getIntegerType(targetTypeSize), replacement);
 
   // Now cast the integer to the actual target type if required.
-  return castIntValueToSameSizedType(rewriter, loc, replacement, targetType);
+  return castIntValueToSameSizedType(builder, loc, replacement, targetType);
 }
 
 /// Constructs operations that insert the bits of `srcValue` into the
 /// "beginning" of `reachingDef` (beginning is endianness dependent).
 /// Assumes that this conversion is possible.
-static Value createInsertAndCast(RewriterBase &rewriter, Location loc,
+static Value createInsertAndCast(OpBuilder &builder, Location loc,
                                  Value srcValue, Value reachingDef,
                                  const DataLayout &dataLayout) {
 
@@ -284,27 +284,27 @@ static Value createInsertAndCast(RewriterBase &rewriter, Location loc,
   uint64_t valueTypeSize = dataLayout.getTypeSizeInBits(srcValue.getType());
   uint64_t slotTypeSize = dataLayout.getTypeSizeInBits(reachingDef.getType());
   if (slotTypeSize == valueTypeSize)
-    return castSameSizedTypes(rewriter, loc, srcValue, reachingDef.getType(),
+    return castSameSizedTypes(builder, loc, srcValue, reachingDef.getType(),
                               dataLayout);
 
   // In the case where the store only overwrites parts of the memory,
   // bit fiddling is required to construct the new value.
 
   // First convert both values to integers of the same size.
-  Value defAsInt = castToSameSizedInt(rewriter, loc, reachingDef, dataLayout);
-  Value valueAsInt = castToSameSizedInt(rewriter, loc, srcValue, dataLayout);
+  Value defAsInt = castToSameSizedInt(builder, loc, reachingDef, dataLayout);
+  Value valueAsInt = castToSameSizedInt(builder, loc, srcValue, dataLayout);
   // Extend the value to the size of the reaching definition.
   valueAsInt =
-      rewriter.createOrFold(loc, defAsInt.getType(), valueAsInt);
+      builder.createOrFold(loc, defAsInt.getType(), valueAsInt);
   uint64_t sizeDifference = slotTypeSize - valueTypeSize;
   if (isBigEndian(dataLayout)) {
     // On big endian systems, a store to the base pointer overwrites the most
     // significant bits. To accomodate for this, the stored value needs to be
     // shifted into the according position.
-    Value bigEndianShift = rewriter.create(
-        loc, rewriter.getIntegerAttr(defAsInt.getType(), sizeDifference));
+    Value bigEndianShift = builder.create(
+        loc, builder.getIntegerAttr(defAsInt.getType(), sizeDifference));
     valueAsInt =
-        rewriter.createOrFold(loc, valueAsInt, bigEndianShift);
+        builder.createOrFold(loc, valueAsInt, bigEndianShift);
   }
 
   // Construct the mask that is used to erase the bits that are overwritten by
@@ -322,23 +322,23 @@ static Value createInsertAndCast(RewriterBase &rewriter, Location loc,
   }
 
   // Mask out the affected bits ...
-  Value mask = rewriter.create(
-      loc, rewriter.getIntegerAttr(defAsInt.getType(), maskValue));
-  Value masked = rewriter.createOrFold(loc, defAsInt, mask);
+  Value mask = builder.create(
+      loc, builder.getIntegerAttr(defAsInt.getType(), maskValue));
+  Value masked = builder.createOrFold(loc, defAsInt, mask);
 
   // ... and combine the result with the new value.
-  Value combined = rewriter.createOrFold(loc, masked, valueAsInt);
+  Value combined = builder.createOrFold(loc, masked, valueAsInt);
 
-  return castIntValueToSameSizedType(rewriter, loc, combined,
+  return castIntValueToSameSizedType(builder, loc, combined,
                                      reachingDef.getType());
 }
 
-Value LLVM::StoreOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value LLVM::StoreOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                Value reachingDef,
                                const DataLayout &dataLayout) {
   assert(reachingDef && reachingDef.getType() == slot.elemType &&
          "expected the reaching definition's type to match the slot's type");
-  return createInsertAndCast(rewriter, getLoc(), getValue(), reachingDef,
+  return createInsertAndCast(builder, getLoc(), getValue(), reachingDef,
                              dataLayout);
 }
 
@@ -360,13 +360,13 @@ bool LLVM::LoadOp::canUsesBeRemoved(
 
 DeletionKind LLVM::LoadOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
   // `canUsesBeRemoved` checked this blocking use must be the loaded slot
   // pointer.
-  Value newResult = createExtractAndCast(rewriter, getLoc(), reachingDefinition,
+  Value newResult = createExtractAndCast(builder, getLoc(), reachingDefinition,
                                          getResult().getType(), dataLayout);
-  rewriter.replaceAllUsesWith(getResult(), newResult);
+  getResult().replaceAllUsesWith(newResult);
   return DeletionKind::Delete;
 }
 
@@ -390,7 +390,7 @@ bool LLVM::StoreOp::canUsesBeRemoved(
 
 DeletionKind LLVM::StoreOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
   return DeletionKind::Delete;
 }
@@ -452,14 +452,13 @@ bool LLVM::LoadOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::LoadOp::rewire(const DestructurableMemorySlot &slot,
                                   DenseMap &subslots,
-                                  RewriterBase &rewriter,
+                                  OpBuilder &builder,
                                   const DataLayout &dataLayout) {
   auto index = IntegerAttr::get(IntegerType::get(getContext(), 32), 0);
   auto it = subslots.find(index);
   assert(it != subslots.end());
 
-  rewriter.modifyOpInPlace(
-      *this, [&]() { getAddrMutable().set(it->getSecond().ptr); });
+  getAddrMutable().set(it->getSecond().ptr);
   return DeletionKind::Keep;
 }
 
@@ -491,14 +490,13 @@ bool LLVM::StoreOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::StoreOp::rewire(const DestructurableMemorySlot &slot,
                                    DenseMap &subslots,
-                                   RewriterBase &rewriter,
+                                   OpBuilder &builder,
                                    const DataLayout &dataLayout) {
   auto index = IntegerAttr::get(IntegerType::get(getContext(), 32), 0);
   auto it = subslots.find(index);
   assert(it != subslots.end());
 
-  rewriter.modifyOpInPlace(
-      *this, [&]() { getAddrMutable().set(it->getSecond().ptr); });
+  getAddrMutable().set(it->getSecond().ptr);
   return DeletionKind::Keep;
 }
 
@@ -523,7 +521,7 @@ bool LLVM::BitcastOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::BitcastOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -535,7 +533,7 @@ bool LLVM::AddrSpaceCastOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -547,7 +545,7 @@ bool LLVM::LifetimeStartOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::LifetimeStartOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -559,7 +557,7 @@ bool LLVM::LifetimeEndOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::LifetimeEndOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -571,7 +569,7 @@ bool LLVM::InvariantStartOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::InvariantStartOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -583,7 +581,7 @@ bool LLVM::InvariantEndOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::InvariantEndOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -595,7 +593,7 @@ bool LLVM::DbgDeclareOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::DbgDeclareOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -611,28 +609,27 @@ bool LLVM::DbgValueOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::DbgValueOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
-  // Rewriter by default is after '*this', but we need it before '*this'.
-  rewriter.setInsertionPoint(*this);
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
+  // builder by default is after '*this', but we need it before '*this'.
+  builder.setInsertionPoint(*this);
 
   // Rather than dropping the debug value, replace it with undef to preserve the
   // debug local variable info. This allows the debugger to inform the user that
   // the variable has been optimized out.
   auto undef =
-      rewriter.create(getValue().getLoc(), getValue().getType());
-  rewriter.modifyOpInPlace(*this, [&] { getValueMutable().assign(undef); });
+      builder.create(getValue().getLoc(), getValue().getType());
+  getValueMutable().assign(undef);
   return DeletionKind::Keep;
 }
 
 bool LLVM::DbgDeclareOp::requiresReplacedValues() { return true; }
 
 void LLVM::DbgDeclareOp::visitReplacedValues(
-    ArrayRef> definitions,
-    RewriterBase &rewriter) {
+    ArrayRef> definitions, OpBuilder &builder) {
   for (auto [op, value] : definitions) {
-    rewriter.setInsertionPointAfter(op);
-    rewriter.create(getLoc(), value, getVarInfo(),
-                                      getLocationExpr());
+    builder.setInsertionPointAfter(op);
+    builder.create(getLoc(), value, getVarInfo(),
+                                     getLocationExpr());
   }
 }
 
@@ -658,7 +655,7 @@ bool LLVM::GEPOp::canUsesBeRemoved(
 }
 
 DeletionKind LLVM::GEPOp::removeBlockingUses(
-    const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) {
+    const SmallPtrSetImpl &blockingUses, OpBuilder &builder) {
   return DeletionKind::Delete;
 }
 
@@ -855,7 +852,7 @@ bool LLVM::GEPOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::GEPOp::rewire(const DestructurableMemorySlot &slot,
                                  DenseMap &subslots,
-                                 RewriterBase &rewriter,
+                                 OpBuilder &builder,
                                  const DataLayout &dataLayout) {
   std::optional accessInfo =
       getSubslotAccessInfo(slot, dataLayout, *this);
@@ -864,11 +861,11 @@ DeletionKind LLVM::GEPOp::rewire(const DestructurableMemorySlot &slot,
       IntegerAttr::get(IntegerType::get(getContext(), 32), accessInfo->index);
   const MemorySlot &newSlot = subslots.at(indexAttr);
 
-  auto byteType = IntegerType::get(rewriter.getContext(), 8);
-  auto newPtr = rewriter.createOrFold(
+  auto byteType = IntegerType::get(builder.getContext(), 8);
+  auto newPtr = builder.createOrFold(
       getLoc(), getResult().getType(), byteType, newSlot.ptr,
       ArrayRef(accessInfo->subslotOffset), getInbounds());
-  rewriter.replaceAllUsesWith(getResult(), newPtr);
+  getResult().replaceAllUsesWith(newPtr);
   return DeletionKind::Delete;
 }
 
@@ -938,7 +935,7 @@ bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
   return getDst() == slot.ptr;
 }
 
-Value LLVM::MemsetOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                 Value reachingDef,
                                 const DataLayout &dataLayout) {
   // TODO: Support non-integer types.
@@ -953,14 +950,14 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
         // or-ing it with the previous value.
         uint64_t coveredBits = 8;
         Value currentValue =
-            rewriter.create(getLoc(), intType, getVal());
+            builder.create(getLoc(), intType, getVal());
         while (coveredBits < intType.getWidth()) {
           Value shiftBy =
-              rewriter.create(getLoc(), intType, coveredBits);
+              builder.create(getLoc(), intType, coveredBits);
           Value shifted =
-              rewriter.create(getLoc(), currentValue, shiftBy);
+              builder.create(getLoc(), currentValue, shiftBy);
           currentValue =
-              rewriter.create(getLoc(), currentValue, shifted);
+              builder.create(getLoc(), currentValue, shifted);
           coveredBits *= 2;
         }
 
@@ -994,7 +991,7 @@ bool LLVM::MemsetOp::canUsesBeRemoved(
 
 DeletionKind LLVM::MemsetOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
   return DeletionKind::Delete;
 }
@@ -1026,7 +1023,7 @@ bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
                                     DenseMap &subslots,
-                                    RewriterBase &rewriter,
+                                    OpBuilder &builder,
                                     const DataLayout &dataLayout) {
   std::optional> types =
       cast(slot.elemType).getSubelementIndexMap();
@@ -1063,15 +1060,14 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
       uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
 
       Value newMemsetSizeValue =
-          rewriter
+          builder
               .create(
                   getLen().getLoc(),
                   IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
               .getResult();
 
-      rewriter.create(getLoc(), subslots.at(index).ptr,
-                                      getVal(), newMemsetSizeValue,
-                                      getIsVolatile());
+      builder.create(getLoc(), subslots.at(index).ptr, getVal(),
+                                     newMemsetSizeValue, getIsVolatile());
     }
 
     covered += typeSize;
@@ -1096,8 +1092,8 @@ static bool memcpyStoresTo(MemcpyLike op, const MemorySlot &slot) {
 
 template 
 static Value memcpyGetStored(MemcpyLike op, const MemorySlot &slot,
-                             RewriterBase &rewriter) {
-  return rewriter.create(op.getLoc(), slot.elemType, op.getSrc());
+                             OpBuilder &builder) {
+  return builder.create(op.getLoc(), slot.elemType, op.getSrc());
 }
 
 template 
@@ -1122,10 +1118,9 @@ template 
 static DeletionKind
 memcpyRemoveBlockingUses(MemcpyLike op, const MemorySlot &slot,
                          const SmallPtrSetImpl &blockingUses,
-                         RewriterBase &rewriter, Value reachingDefinition) {
+                         OpBuilder &builder, Value reachingDefinition) {
   if (op.loadsFrom(slot))
-    rewriter.create(op.getLoc(), reachingDefinition,
-                                   op.getDst());
+    builder.create(op.getLoc(), reachingDefinition, op.getDst());
   return DeletionKind::Delete;
 }
 
@@ -1168,23 +1163,23 @@ static bool memcpyCanRewire(MemcpyLike op, const DestructurableMemorySlot &slot,
 namespace {
 
 template 
-void createMemcpyLikeToReplace(RewriterBase &rewriter, const DataLayout &layout,
+void createMemcpyLikeToReplace(OpBuilder &builder, const DataLayout &layout,
                                MemcpyLike toReplace, Value dst, Value src,
                                Type toCpy, bool isVolatile) {
-  Value memcpySize = rewriter.create(
+  Value memcpySize = builder.create(
       toReplace.getLoc(), IntegerAttr::get(toReplace.getLen().getType(),
                                            layout.getTypeSize(toCpy)));
-  rewriter.create(toReplace.getLoc(), dst, src, memcpySize,
-                              isVolatile);
+  builder.create(toReplace.getLoc(), dst, src, memcpySize,
+                             isVolatile);
 }
 
 template <>
-void createMemcpyLikeToReplace(RewriterBase &rewriter, const DataLayout &layout,
+void createMemcpyLikeToReplace(OpBuilder &builder, const DataLayout &layout,
                                LLVM::MemcpyInlineOp toReplace, Value dst,
                                Value src, Type toCpy, bool isVolatile) {
   Type lenType = IntegerType::get(toReplace->getContext(),
                                   toReplace.getLen().getBitWidth());
-  rewriter.create(
+  builder.create(
       toReplace.getLoc(), dst, src,
       IntegerAttr::get(lenType, layout.getTypeSize(toCpy)), isVolatile);
 }
@@ -1196,7 +1191,7 @@ void createMemcpyLikeToReplace(RewriterBase &rewriter, const DataLayout &layout,
 template 
 static DeletionKind
 memcpyRewire(MemcpyLike op, const DestructurableMemorySlot &slot,
-             DenseMap &subslots, RewriterBase &rewriter,
+             DenseMap &subslots, OpBuilder &builder,
              const DataLayout &dataLayout) {
   if (subslots.empty())
     return DeletionKind::Delete;
@@ -1226,12 +1221,12 @@ memcpyRewire(MemcpyLike op, const DestructurableMemorySlot &slot,
     SmallVector gepIndices{
         0, static_cast(
                cast(index).getValue().getZExtValue())};
-    Value subslotPtrInOther = rewriter.create(
+    Value subslotPtrInOther = builder.create(
         op.getLoc(), LLVM::LLVMPointerType::get(op.getContext()), slot.elemType,
         isDst ? op.getSrc() : op.getDst(), gepIndices);
 
     // Then create a new memcpy out of this source pointer.
-    createMemcpyLikeToReplace(rewriter, dataLayout, op,
+    createMemcpyLikeToReplace(builder, dataLayout, op,
                               isDst ? subslot.ptr : subslotPtrInOther,
                               isDst ? subslotPtrInOther : subslot.ptr,
                               subslot.elemType, op.getIsVolatile());
@@ -1250,10 +1245,10 @@ bool LLVM::MemcpyOp::storesTo(const MemorySlot &slot) {
   return memcpyStoresTo(*this, slot);
 }
 
-Value LLVM::MemcpyOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value LLVM::MemcpyOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                 Value reachingDef,
                                 const DataLayout &dataLayout) {
-  return memcpyGetStored(*this, slot, rewriter);
+  return memcpyGetStored(*this, slot, builder);
 }
 
 bool LLVM::MemcpyOp::canUsesBeRemoved(
@@ -1266,9 +1261,9 @@ bool LLVM::MemcpyOp::canUsesBeRemoved(
 
 DeletionKind LLVM::MemcpyOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
-  return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+  return memcpyRemoveBlockingUses(*this, slot, blockingUses, builder,
                                   reachingDefinition);
 }
 
@@ -1288,9 +1283,9 @@ bool LLVM::MemcpyOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::MemcpyOp::rewire(const DestructurableMemorySlot &slot,
                                     DenseMap &subslots,
-                                    RewriterBase &rewriter,
+                                    OpBuilder &builder,
                                     const DataLayout &dataLayout) {
-  return memcpyRewire(*this, slot, subslots, rewriter, dataLayout);
+  return memcpyRewire(*this, slot, subslots, builder, dataLayout);
 }
 
 bool LLVM::MemcpyInlineOp::loadsFrom(const MemorySlot &slot) {
@@ -1302,9 +1297,9 @@ bool LLVM::MemcpyInlineOp::storesTo(const MemorySlot &slot) {
 }
 
 Value LLVM::MemcpyInlineOp::getStored(const MemorySlot &slot,
-                                      RewriterBase &rewriter, Value reachingDef,
+                                      OpBuilder &builder, Value reachingDef,
                                       const DataLayout &dataLayout) {
-  return memcpyGetStored(*this, slot, rewriter);
+  return memcpyGetStored(*this, slot, builder);
 }
 
 bool LLVM::MemcpyInlineOp::canUsesBeRemoved(
@@ -1317,9 +1312,9 @@ bool LLVM::MemcpyInlineOp::canUsesBeRemoved(
 
 DeletionKind LLVM::MemcpyInlineOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
-  return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+  return memcpyRemoveBlockingUses(*this, slot, blockingUses, builder,
                                   reachingDefinition);
 }
 
@@ -1341,9 +1336,8 @@ bool LLVM::MemcpyInlineOp::canRewire(
 DeletionKind
 LLVM::MemcpyInlineOp::rewire(const DestructurableMemorySlot &slot,
                              DenseMap &subslots,
-                             RewriterBase &rewriter,
-                             const DataLayout &dataLayout) {
-  return memcpyRewire(*this, slot, subslots, rewriter, dataLayout);
+                             OpBuilder &builder, const DataLayout &dataLayout) {
+  return memcpyRewire(*this, slot, subslots, builder, dataLayout);
 }
 
 bool LLVM::MemmoveOp::loadsFrom(const MemorySlot &slot) {
@@ -1354,10 +1348,10 @@ bool LLVM::MemmoveOp::storesTo(const MemorySlot &slot) {
   return memcpyStoresTo(*this, slot);
 }
 
-Value LLVM::MemmoveOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value LLVM::MemmoveOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                  Value reachingDef,
                                  const DataLayout &dataLayout) {
-  return memcpyGetStored(*this, slot, rewriter);
+  return memcpyGetStored(*this, slot, builder);
 }
 
 bool LLVM::MemmoveOp::canUsesBeRemoved(
@@ -1370,9 +1364,9 @@ bool LLVM::MemmoveOp::canUsesBeRemoved(
 
 DeletionKind LLVM::MemmoveOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
-  return memcpyRemoveBlockingUses(*this, slot, blockingUses, rewriter,
+  return memcpyRemoveBlockingUses(*this, slot, blockingUses, builder,
                                   reachingDefinition);
 }
 
@@ -1392,9 +1386,9 @@ bool LLVM::MemmoveOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind LLVM::MemmoveOp::rewire(const DestructurableMemorySlot &slot,
                                      DenseMap &subslots,
-                                     RewriterBase &rewriter,
+                                     OpBuilder &builder,
                                      const DataLayout &dataLayout) {
-  return memcpyRewire(*this, slot, subslots, rewriter, dataLayout);
+  return memcpyRewire(*this, slot, subslots, builder, dataLayout);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp
index 958c5f0c8dbc75..dca07e84ea73c7 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefMemorySlot.cpp
@@ -83,30 +83,30 @@ SmallVector memref::AllocaOp::getPromotableSlots() {
 }
 
 Value memref::AllocaOp::getDefaultValue(const MemorySlot &slot,
-                                        RewriterBase &rewriter) {
+                                        OpBuilder &builder) {
   assert(isSupportedElementType(slot.elemType));
   // TODO: support more types.
   return TypeSwitch(slot.elemType)
       .Case([&](MemRefType t) {
-        return rewriter.create(getLoc(), t);
+        return builder.create(getLoc(), t);
       })
       .Default([&](Type t) {
-        return rewriter.create(getLoc(), t,
-                                                  rewriter.getZeroAttr(t));
+        return builder.create(getLoc(), t,
+                                                 builder.getZeroAttr(t));
       });
 }
 
 void memref::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
                                                Value defaultValue,
-                                               RewriterBase &rewriter) {
+                                               OpBuilder &builder) {
   if (defaultValue.use_empty())
-    rewriter.eraseOp(defaultValue.getDefiningOp());
-  rewriter.eraseOp(*this);
+    defaultValue.getDefiningOp()->erase();
+  this->erase();
 }
 
 void memref::AllocaOp::handleBlockArgument(const MemorySlot &slot,
                                            BlockArgument argument,
-                                           RewriterBase &rewriter) {}
+                                           OpBuilder &builder) {}
 
 SmallVector
 memref::AllocaOp::getDestructurableSlots() {
@@ -127,8 +127,8 @@ memref::AllocaOp::getDestructurableSlots() {
 DenseMap
 memref::AllocaOp::destructure(const DestructurableMemorySlot &slot,
                               const SmallPtrSetImpl &usedIndices,
-                              RewriterBase &rewriter) {
-  rewriter.setInsertionPointAfter(*this);
+                              OpBuilder &builder) {
+  builder.setInsertionPointAfter(*this);
 
   DenseMap slotMap;
 
@@ -136,7 +136,7 @@ memref::AllocaOp::destructure(const DestructurableMemorySlot &slot,
   for (Attribute usedIndex : usedIndices) {
     Type elemType = memrefType.getTypeAtIndex(usedIndex);
     MemRefType elemPtr = MemRefType::get({}, elemType);
-    auto subAlloca = rewriter.create(getLoc(), elemPtr);
+    auto subAlloca = builder.create(getLoc(), elemPtr);
     slotMap.try_emplace(usedIndex,
                                     {subAlloca.getResult(), elemType});
   }
@@ -145,9 +145,9 @@ memref::AllocaOp::destructure(const DestructurableMemorySlot &slot,
 }
 
 void memref::AllocaOp::handleDestructuringComplete(
-    const DestructurableMemorySlot &slot, RewriterBase &rewriter) {
+    const DestructurableMemorySlot &slot, OpBuilder &builder) {
   assert(slot.ptr == getResult());
-  rewriter.eraseOp(*this);
+  this->erase();
 }
 
 //===----------------------------------------------------------------------===//
@@ -160,7 +160,7 @@ bool memref::LoadOp::loadsFrom(const MemorySlot &slot) {
 
 bool memref::LoadOp::storesTo(const MemorySlot &slot) { return false; }
 
-Value memref::LoadOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value memref::LoadOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                 Value reachingDef,
                                 const DataLayout &dataLayout) {
   llvm_unreachable("getStored should not be called on LoadOp");
@@ -179,11 +179,11 @@ bool memref::LoadOp::canUsesBeRemoved(
 
 DeletionKind memref::LoadOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
   // `canUsesBeRemoved` checked this blocking use must be the loaded slot
   // pointer.
-  rewriter.replaceAllUsesWith(getResult(), reachingDefinition);
+  getResult().replaceAllUsesWith(reachingDefinition);
   return DeletionKind::Delete;
 }
 
@@ -224,15 +224,13 @@ bool memref::LoadOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind memref::LoadOp::rewire(const DestructurableMemorySlot &slot,
                                     DenseMap &subslots,
-                                    RewriterBase &rewriter,
+                                    OpBuilder &builder,
                                     const DataLayout &dataLayout) {
   Attribute index = getAttributeIndexFromIndexOperands(
       getContext(), getIndices(), getMemRefType());
   const MemorySlot &memorySlot = subslots.at(index);
-  rewriter.modifyOpInPlace(*this, [&]() {
-    setMemRef(memorySlot.ptr);
-    getIndicesMutable().clear();
-  });
+  setMemRef(memorySlot.ptr);
+  getIndicesMutable().clear();
   return DeletionKind::Keep;
 }
 
@@ -242,7 +240,7 @@ bool memref::StoreOp::storesTo(const MemorySlot &slot) {
   return getMemRef() == slot.ptr;
 }
 
-Value memref::StoreOp::getStored(const MemorySlot &slot, RewriterBase &rewriter,
+Value memref::StoreOp::getStored(const MemorySlot &slot, OpBuilder &builder,
                                  Value reachingDef,
                                  const DataLayout &dataLayout) {
   return getValue();
@@ -261,7 +259,7 @@ bool memref::StoreOp::canUsesBeRemoved(
 
 DeletionKind memref::StoreOp::removeBlockingUses(
     const MemorySlot &slot, const SmallPtrSetImpl &blockingUses,
-    RewriterBase &rewriter, Value reachingDefinition,
+    OpBuilder &builder, Value reachingDefinition,
     const DataLayout &dataLayout) {
   return DeletionKind::Delete;
 }
@@ -282,15 +280,13 @@ bool memref::StoreOp::canRewire(const DestructurableMemorySlot &slot,
 
 DeletionKind memref::StoreOp::rewire(const DestructurableMemorySlot &slot,
                                      DenseMap &subslots,
-                                     RewriterBase &rewriter,
+                                     OpBuilder &builder,
                                      const DataLayout &dataLayout) {
   Attribute index = getAttributeIndexFromIndexOperands(
       getContext(), getIndices(), getMemRefType());
   const MemorySlot &memorySlot = subslots.at(index);
-  rewriter.modifyOpInPlace(*this, [&]() {
-    setMemRef(memorySlot.ptr);
-    getIndicesMutable().clear();
-  });
+  setMemRef(memorySlot.ptr);
+  getIndicesMutable().clear();
   return DeletionKind::Keep;
 }
 
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 71ba5bc076f0e6..1d7ba4ca4f83ed 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -164,7 +164,7 @@ class MemorySlotPromotionAnalyzer {
 class MemorySlotPromoter {
 public:
   MemorySlotPromoter(MemorySlot slot, PromotableAllocationOpInterface allocator,
-                     RewriterBase &rewriter, DominanceInfo &dominance,
+                     OpBuilder &builder, DominanceInfo &dominance,
                      const DataLayout &dataLayout, MemorySlotPromotionInfo info,
                      const Mem2RegStatistics &statistics);
 
@@ -195,7 +195,7 @@ class MemorySlotPromoter {
 
   MemorySlot slot;
   PromotableAllocationOpInterface allocator;
-  RewriterBase &rewriter;
+  OpBuilder &builder;
   /// Potentially non-initialized default value. Use `getOrCreateDefaultValue`
   /// to initialize it on demand.
   Value defaultValue;
@@ -213,12 +213,10 @@ class MemorySlotPromoter {
 
 MemorySlotPromoter::MemorySlotPromoter(
     MemorySlot slot, PromotableAllocationOpInterface allocator,
-    RewriterBase &rewriter, DominanceInfo &dominance,
-    const DataLayout &dataLayout, MemorySlotPromotionInfo info,
-    const Mem2RegStatistics &statistics)
-    : slot(slot), allocator(allocator), rewriter(rewriter),
-      dominance(dominance), dataLayout(dataLayout), info(std::move(info)),
-      statistics(statistics) {
+    OpBuilder &builder, DominanceInfo &dominance, const DataLayout &dataLayout,
+    MemorySlotPromotionInfo info, const Mem2RegStatistics &statistics)
+    : slot(slot), allocator(allocator), builder(builder), dominance(dominance),
+      dataLayout(dataLayout), info(std::move(info)), statistics(statistics) {
 #ifndef NDEBUG
   auto isResultOrNewBlockArgument = [&]() {
     if (BlockArgument arg = dyn_cast(slot.ptr))
@@ -236,9 +234,9 @@ Value MemorySlotPromoter::getOrCreateDefaultValue() {
   if (defaultValue)
     return defaultValue;
 
-  RewriterBase::InsertionGuard guard(rewriter);
-  rewriter.setInsertionPointToStart(slot.ptr.getParentBlock());
-  return defaultValue = allocator.getDefaultValue(slot, rewriter);
+  OpBuilder::InsertionGuard guard(builder);
+  builder.setInsertionPointToStart(slot.ptr.getParentBlock());
+  return defaultValue = allocator.getDefaultValue(slot, builder);
 }
 
 LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
@@ -437,8 +435,8 @@ Value MemorySlotPromoter::computeReachingDefInBlock(Block *block,
         reachingDefs.insert({memOp, reachingDef});
 
       if (memOp.storesTo(slot)) {
-        rewriter.setInsertionPointAfter(memOp);
-        Value stored = memOp.getStored(slot, rewriter, reachingDef, dataLayout);
+        builder.setInsertionPointAfter(memOp);
+        Value stored = memOp.getStored(slot, builder, reachingDef, dataLayout);
         assert(stored && "a memory operation storing to a slot must provide a "
                          "new definition of the slot");
         reachingDef = stored;
@@ -475,33 +473,10 @@ void MemorySlotPromoter::computeReachingDefInRegion(Region *region,
     Block *block = job.block->getBlock();
 
     if (info.mergePoints.contains(block)) {
-      // If the block is a merge point, we need to add a block argument to hold
-      // the selected reaching definition. This has to be a bit complicated
-      // because of RewriterBase limitations: we need to create a new block with
-      // the extra block argument, move the content of the block to the new
-      // block, and replace the block with the new block in the merge point set.
-      SmallVector argTypes;
-      SmallVector argLocs;
-      for (BlockArgument arg : block->getArguments()) {
-        argTypes.push_back(arg.getType());
-        argLocs.push_back(arg.getLoc());
-      }
-      argTypes.push_back(slot.elemType);
-      argLocs.push_back(slot.ptr.getLoc());
-      Block *newBlock = rewriter.createBlock(block, argTypes, argLocs);
-
-      info.mergePoints.erase(block);
-      info.mergePoints.insert(newBlock);
-
-      rewriter.replaceAllUsesWith(block, newBlock);
-      rewriter.mergeBlocks(block, newBlock,
-                           newBlock->getArguments().drop_back());
-
-      block = newBlock;
-
-      BlockArgument blockArgument = block->getArguments().back();
-      rewriter.setInsertionPointToStart(block);
-      allocator.handleBlockArgument(slot, blockArgument, rewriter);
+      BlockArgument blockArgument =
+          block->addArgument(slot.elemType, slot.ptr.getLoc());
+      builder.setInsertionPointToStart(block);
+      allocator.handleBlockArgument(slot, blockArgument, builder);
       job.reachingDef = blockArgument;
 
       if (statistics.newBlockArgumentAmount)
@@ -514,10 +489,8 @@ void MemorySlotPromoter::computeReachingDefInRegion(Region *region,
     if (auto terminator = dyn_cast(block->getTerminator())) {
       for (BlockOperand &blockOperand : terminator->getBlockOperands()) {
         if (info.mergePoints.contains(blockOperand.get())) {
-          rewriter.modifyOpInPlace(terminator, [&]() {
-            terminator.getSuccessorOperands(blockOperand.getOperandNumber())
-                .append(job.reachingDef);
-          });
+          terminator.getSuccessorOperands(blockOperand.getOperandNumber())
+              .append(job.reachingDef);
         }
       }
     }
@@ -569,9 +542,9 @@ void MemorySlotPromoter::removeBlockingUses() {
       if (!reachingDef)
         reachingDef = getOrCreateDefaultValue();
 
-      rewriter.setInsertionPointAfter(toPromote);
+      builder.setInsertionPointAfter(toPromote);
       if (toPromoteMemOp.removeBlockingUses(
-              slot, info.userToBlockingUses[toPromote], rewriter, reachingDef,
+              slot, info.userToBlockingUses[toPromote], builder, reachingDef,
               dataLayout) == DeletionKind::Delete)
         toErase.push_back(toPromote);
       if (toPromoteMemOp.storesTo(slot))
@@ -581,20 +554,20 @@ void MemorySlotPromoter::removeBlockingUses() {
     }
 
     auto toPromoteBasic = cast(toPromote);
-    rewriter.setInsertionPointAfter(toPromote);
+    builder.setInsertionPointAfter(toPromote);
     if (toPromoteBasic.removeBlockingUses(info.userToBlockingUses[toPromote],
-                                          rewriter) == DeletionKind::Delete)
+                                          builder) == DeletionKind::Delete)
       toErase.push_back(toPromote);
     if (toPromoteBasic.requiresReplacedValues())
       toVisit.push_back(toPromoteBasic);
   }
   for (PromotableOpInterface op : toVisit) {
-    rewriter.setInsertionPointAfter(op);
-    op.visitReplacedValues(replacedValuesList, rewriter);
+    builder.setInsertionPointAfter(op);
+    op.visitReplacedValues(replacedValuesList, builder);
   }
 
   for (Operation *toEraseOp : toErase)
-    rewriter.eraseOp(toEraseOp);
+    toEraseOp->erase();
 
   assert(slot.ptr.use_empty() &&
          "after promotion, the slot pointer should not be used anymore");
@@ -617,8 +590,7 @@ void MemorySlotPromoter::promoteSlot() {
       assert(succOperands.size() == mergePoint->getNumArguments() ||
              succOperands.size() + 1 == mergePoint->getNumArguments());
       if (succOperands.size() + 1 == mergePoint->getNumArguments())
-        rewriter.modifyOpInPlace(
-            user, [&]() { succOperands.append(getOrCreateDefaultValue()); });
+        succOperands.append(getOrCreateDefaultValue());
     }
   }
 
@@ -628,13 +600,12 @@ void MemorySlotPromoter::promoteSlot() {
   if (statistics.promotedAmount)
     (*statistics.promotedAmount)++;
 
-  allocator.handlePromotionComplete(slot, defaultValue, rewriter);
+  allocator.handlePromotionComplete(slot, defaultValue, builder);
 }
 
 LogicalResult mlir::tryToPromoteMemorySlots(
-    ArrayRef allocators,
-    RewriterBase &rewriter, const DataLayout &dataLayout,
-    Mem2RegStatistics statistics) {
+    ArrayRef allocators, OpBuilder &builder,
+    const DataLayout &dataLayout, Mem2RegStatistics statistics) {
   bool promotedAny = false;
 
   for (PromotableAllocationOpInterface allocator : allocators) {
@@ -646,7 +617,7 @@ LogicalResult mlir::tryToPromoteMemorySlots(
       MemorySlotPromotionAnalyzer analyzer(slot, dominance, dataLayout);
       std::optional info = analyzer.computeInfo();
       if (info) {
-        MemorySlotPromoter(slot, allocator, rewriter, dominance, dataLayout,
+        MemorySlotPromoter(slot, allocator, builder, dominance, dataLayout,
                            std::move(*info), statistics)
             .promoteSlot();
         promotedAny = true;
@@ -674,7 +645,6 @@ struct Mem2Reg : impl::Mem2RegBase {
         continue;
 
       OpBuilder builder(®ion.front(), region.front().begin());
-      IRRewriter rewriter(builder);
 
       // Promoting a slot can allow for further promotion of other slots,
       // promotion is tried until no promotion succeeds.
@@ -689,7 +659,7 @@ struct Mem2Reg : impl::Mem2RegBase {
         const DataLayout &dataLayout = dataLayoutAnalysis.getAtOrAbove(scopeOp);
 
         // Attempt promoting until no promotion succeeds.
-        if (failed(tryToPromoteMemorySlots(allocators, rewriter, dataLayout,
+        if (failed(tryToPromoteMemorySlots(allocators, builder, dataLayout,
                                            statistics)))
           break;
 
diff --git a/mlir/lib/Transforms/SROA.cpp b/mlir/lib/Transforms/SROA.cpp
index f24cbb7b1725cc..4e28fa687ffd43 100644
--- a/mlir/lib/Transforms/SROA.cpp
+++ b/mlir/lib/Transforms/SROA.cpp
@@ -134,15 +134,14 @@ computeDestructuringInfo(DestructurableMemorySlot &slot,
 /// subslots as specified by its allocator.
 static void destructureSlot(DestructurableMemorySlot &slot,
                             DestructurableAllocationOpInterface allocator,
-                            RewriterBase &rewriter,
-                            const DataLayout &dataLayout,
+                            OpBuilder &builder, const DataLayout &dataLayout,
                             MemorySlotDestructuringInfo &info,
                             const SROAStatistics &statistics) {
-  RewriterBase::InsertionGuard guard(rewriter);
+  OpBuilder::InsertionGuard guard(builder);
 
-  rewriter.setInsertionPointToStart(slot.ptr.getParentBlock());
+  builder.setInsertionPointToStart(slot.ptr.getParentBlock());
   DenseMap subslots =
-      allocator.destructure(slot, info.usedIndices, rewriter);
+      allocator.destructure(slot, info.usedIndices, builder);
 
   if (statistics.slotsWithMemoryBenefit &&
       slot.elementPtrs.size() != info.usedIndices.size())
@@ -160,9 +159,9 @@ static void destructureSlot(DestructurableMemorySlot &slot,
 
   llvm::SmallVector toErase;
   for (Operation *toRewire : llvm::reverse(usersToRewire)) {
-    rewriter.setInsertionPointAfter(toRewire);
+    builder.setInsertionPointAfter(toRewire);
     if (auto accessor = dyn_cast(toRewire)) {
-      if (accessor.rewire(slot, subslots, rewriter, dataLayout) ==
+      if (accessor.rewire(slot, subslots, builder, dataLayout) ==
           DeletionKind::Delete)
         toErase.push_back(accessor);
       continue;
@@ -170,12 +169,12 @@ static void destructureSlot(DestructurableMemorySlot &slot,
 
     auto promotable = cast(toRewire);
     if (promotable.removeBlockingUses(info.userToBlockingUses[promotable],
-                                      rewriter) == DeletionKind::Delete)
+                                      builder) == DeletionKind::Delete)
       toErase.push_back(promotable);
   }
 
   for (Operation *toEraseOp : toErase)
-    rewriter.eraseOp(toEraseOp);
+    toEraseOp->erase();
 
   assert(slot.ptr.use_empty() && "after destructuring, the original slot "
                                  "pointer should no longer be used");
@@ -186,12 +185,12 @@ static void destructureSlot(DestructurableMemorySlot &slot,
   if (statistics.destructuredAmount)
     (*statistics.destructuredAmount)++;
 
-  allocator.handleDestructuringComplete(slot, rewriter);
+  allocator.handleDestructuringComplete(slot, builder);
 }
 
 LogicalResult mlir::tryToDestructureMemorySlots(
     ArrayRef allocators,
-    RewriterBase &rewriter, const DataLayout &dataLayout,
+    OpBuilder &builder, const DataLayout &dataLayout,
     SROAStatistics statistics) {
   bool destructuredAny = false;
 
@@ -202,7 +201,7 @@ LogicalResult mlir::tryToDestructureMemorySlots(
       if (!info)
         continue;
 
-      destructureSlot(slot, allocator, rewriter, dataLayout, *info, statistics);
+      destructureSlot(slot, allocator, builder, dataLayout, *info, statistics);
       destructuredAny = true;
     }
   }
@@ -230,7 +229,6 @@ struct SROA : public impl::SROABase {
         continue;
 
       OpBuilder builder(®ion.front(), region.front().begin());
-      IRRewriter rewriter(builder);
 
       // Destructuring a slot can allow for further destructuring of other
       // slots, destructuring is tried until no destructuring succeeds.
@@ -243,7 +241,7 @@ struct SROA : public impl::SROABase {
           allocators.emplace_back(allocator);
         });
 
-        if (failed(tryToDestructureMemorySlots(allocators, rewriter, dataLayout,
+        if (failed(tryToDestructureMemorySlots(allocators, builder, dataLayout,
                                                statistics)))
           break;
 

From a99ce615f19fec6fbb835490b89f53cba3cf9eff Mon Sep 17 00:00:00 2001
From: jyu2-git 
Date: Tue, 7 May 2024 23:11:07 -0700
Subject: [PATCH 61/64] =?UTF-8?q?Revert=20"Revert=20"[OpenMP][TR12]=20chan?=
 =?UTF-8?q?ge=20property=20of=20map-type=20modifier."=E2=80=A6=20(#91141)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… (#90885)"

This reverts commit eea81aa29848361eb5b24f24d2af643fdeb9adfd.

Also change isMapType as @vitalybuka suggested. Hope this fix sanitizer
build problem.
---
 .../clang/Basic/DiagnosticParseKinds.td       |   5 +
 clang/lib/Parse/ParseOpenMP.cpp               |  51 +++++++--
 clang/test/OpenMP/target_ast_print.cpp        |  58 ++++++++++
 clang/test/OpenMP/target_map_messages.cpp     | 105 ++++++++++--------
 4 files changed, 165 insertions(+), 54 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index fdffb35ea0d955..44bc4e0e130de8 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1438,6 +1438,9 @@ def err_omp_decl_in_declare_simd_variant : Error<
 def err_omp_sink_and_source_iteration_not_allowd: Error<" '%0 %select{sink:|source:}1' must be with '%select{omp_cur_iteration - 1|omp_cur_iteration}1'">;
 def err_omp_unknown_map_type : Error<
   "incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'">;
+def err_omp_more_one_map_type : Error<"map type is already specified">;
+def note_previous_map_type_specified_here
+    : Note<"map type '%0' is previous specified here">;
 def err_omp_unknown_map_type_modifier : Error<
   "incorrect map type modifier, expected one of: 'always', 'close', 'mapper'"
   "%select{|, 'present'|, 'present', 'iterator'}0%select{|, 'ompx_hold'}1">;
@@ -1445,6 +1448,8 @@ def err_omp_map_type_missing : Error<
   "missing map type">;
 def err_omp_map_type_modifier_missing : Error<
   "missing map type modifier">;
+def err_omp_map_modifier_specification_list : Error<
+  "empty modifier-specification-list is not allowed">;
 def err_omp_declare_simd_inbranch_notinbranch : Error<
   "unexpected '%0' clause, '%1' is specified already">;
 def err_omp_expected_clause_argument
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 18ba1185ee8de7..5265d8f1922c31 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -4228,13 +4228,20 @@ bool Parser::parseMapperModifier(SemaOpenMP::OpenMPVarListDataTy &Data) {
   return T.consumeClose();
 }
 
+static OpenMPMapClauseKind isMapType(Parser &P);
+
 /// Parse map-type-modifiers in map clause.
-/// map([ [map-type-modifier[,] [map-type-modifier[,] ...] map-type : ] list)
+/// map([ [map-type-modifier[,] [map-type-modifier[,] ...] [map-type] : ] list)
 /// where, map-type-modifier ::= always | close | mapper(mapper-identifier) |
 /// present
+/// where, map-type ::= alloc | delete | from | release | to | tofrom
 bool Parser::parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data) {
+  bool HasMapType = false;
+  SourceLocation PreMapLoc = Tok.getLocation();
+  StringRef PreMapName = "";
   while (getCurToken().isNot(tok::colon)) {
     OpenMPMapModifierKind TypeModifier = isMapModifier(*this);
+    OpenMPMapClauseKind MapKind = isMapType(*this);
     if (TypeModifier == OMPC_MAP_MODIFIER_always ||
         TypeModifier == OMPC_MAP_MODIFIER_close ||
         TypeModifier == OMPC_MAP_MODIFIER_present ||
@@ -4257,6 +4264,19 @@ bool Parser::parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data) {
         Diag(Data.MapTypeModifiersLoc.back(), diag::err_omp_missing_comma)
             << "map type modifier";
 
+    } else if (getLangOpts().OpenMP >= 60 && MapKind != OMPC_MAP_unknown) {
+      if (!HasMapType) {
+        HasMapType = true;
+        Data.ExtraModifier = MapKind;
+        MapKind = OMPC_MAP_unknown;
+        PreMapLoc = Tok.getLocation();
+        PreMapName = Tok.getIdentifierInfo()->getName();
+      } else {
+        Diag(Tok, diag::err_omp_more_one_map_type);
+        Diag(PreMapLoc, diag::note_previous_map_type_specified_here)
+            << PreMapName;
+      }
+      ConsumeToken();
     } else {
       // For the case of unknown map-type-modifier or a map-type.
       // Map-type is followed by a colon; the function returns when it
@@ -4267,8 +4287,14 @@ bool Parser::parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data) {
         continue;
       }
       // Potential map-type token as it is followed by a colon.
-      if (PP.LookAhead(0).is(tok::colon))
-        return false;
+      if (PP.LookAhead(0).is(tok::colon)) {
+        if (getLangOpts().OpenMP >= 60) {
+          break;
+        } else {
+          return false;
+        }
+      }
+
       Diag(Tok, diag::err_omp_unknown_map_type_modifier)
           << (getLangOpts().OpenMP >= 51 ? (getLangOpts().OpenMP >= 52 ? 2 : 1)
                                          : 0)
@@ -4278,6 +4304,14 @@ bool Parser::parseMapTypeModifiers(SemaOpenMP::OpenMPVarListDataTy &Data) {
     if (getCurToken().is(tok::comma))
       ConsumeToken();
   }
+  if (getLangOpts().OpenMP >= 60 && !HasMapType) {
+    if (!Tok.is(tok::colon)) {
+      Diag(Tok, diag::err_omp_unknown_map_type);
+      ConsumeToken();
+    } else {
+      Data.ExtraModifier = OMPC_MAP_unknown;
+    }
+  }
   return false;
 }
 
@@ -4289,13 +4323,12 @@ static OpenMPMapClauseKind isMapType(Parser &P) {
   if (!Tok.isOneOf(tok::identifier, tok::kw_delete))
     return OMPC_MAP_unknown;
   Preprocessor &PP = P.getPreprocessor();
-  OpenMPMapClauseKind MapType =
-      static_cast(getOpenMPSimpleClauseType(
-          OMPC_map, PP.getSpelling(Tok), P.getLangOpts()));
+  unsigned MapType =
+      getOpenMPSimpleClauseType(OMPC_map, PP.getSpelling(Tok), P.getLangOpts());
   if (MapType == OMPC_MAP_to || MapType == OMPC_MAP_from ||
       MapType == OMPC_MAP_tofrom || MapType == OMPC_MAP_alloc ||
       MapType == OMPC_MAP_delete || MapType == OMPC_MAP_release)
-    return MapType;
+    return static_cast(MapType);
   return OMPC_MAP_unknown;
 }
 
@@ -4679,8 +4712,10 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
     // Only parse map-type-modifier[s] and map-type if a colon is present in
     // the map clause.
     if (ColonPresent) {
+      if (getLangOpts().OpenMP >= 60 && getCurToken().is(tok::colon))
+        Diag(Tok, diag::err_omp_map_modifier_specification_list);
       IsInvalidMapperModifier = parseMapTypeModifiers(Data);
-      if (!IsInvalidMapperModifier)
+      if (getLangOpts().OpenMP < 60 && !IsInvalidMapperModifier)
         parseMapType(*this, Data);
       else
         SkipUntil(tok::colon, tok::annot_pragma_openmp_end, StopBeforeMatch);
diff --git a/clang/test/OpenMP/target_ast_print.cpp b/clang/test/OpenMP/target_ast_print.cpp
index f4c10fe3a18194..ac5ed285d97e68 100644
--- a/clang/test/OpenMP/target_ast_print.cpp
+++ b/clang/test/OpenMP/target_ast_print.cpp
@@ -1201,6 +1201,64 @@ foo();
 }
 #endif // OMP52
 
+#ifdef OMP60
+
+///==========================================================================///
+// RUN: %clang_cc1 -DOMP60 -verify -Wno-vla -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix OMP60
+// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -include-pch %t -fsyntax-only -verify -Wno-vla %s -ast-print | FileCheck %s --check-prefix OMP60
+
+// RUN: %clang_cc1 -DOMP60 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix OMP60
+// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -include-pch %t -fsyntax-only -verify -Wno-vla %s -ast-print | FileCheck %s --check-prefix OMP60
+
+void foo() {}
+template 
+T tmain(T argc, T *argv) {
+  T i;
+#pragma omp target map(from always: i)
+  foo();
+#pragma omp target map(from, close: i)
+  foo();
+#pragma omp target map(always,close: i)
+  foo();
+  return 0;
+}
+//OMP60: template  T tmain(T argc, T *argv) {
+//OMP60-NEXT: T i;
+//OMP60-NEXT: #pragma omp target map(always,from: i)
+//OMP60-NEXT:     foo();
+//OMP60-NEXT: #pragma omp target map(close,from: i)
+//OMP60-NEXT:     foo();
+//OMP60-NEXT: #pragma omp target map(always,close,tofrom: i)
+//OMP60-NEXT:     foo();
+//OMP60-NEXT: return 0;
+//OMP60-NEXT:}
+//OMP60:  template<> int tmain(int argc, int *argv) {
+//OMP60-NEXT:  int i;
+//OMP60-NEXT:  #pragma omp target map(always,from: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  #pragma omp target map(close,from: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  #pragma omp target map(always,close,tofrom: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  return 0;
+//OMP60-NEXT:}
+//OMP60:  template<> char tmain(char argc, char *argv) {
+//OMP60-NEXT:  char i;
+//OMP60-NEXT:  #pragma omp target map(always,from: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  #pragma omp target map(close,from: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  #pragma omp target map(always,close,tofrom: i)
+//OMP60-NEXT:      foo();
+//OMP60-NEXT:  return 0;
+//OMP60-NEXT:}
+int main (int argc, char **argv) {
+  return tmain(argc, &argc) + tmain(argv[0][0], argv[0]);
+}
+#endif // OMP60
+
 #ifdef OMPX
 
 // RUN: %clang_cc1 -DOMPX -verify -Wno-vla -fopenmp -fopenmp-extensions -ast-print %s | FileCheck %s --check-prefix=OMPX
diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp
index a6776ee12c0ee2..3bd432b47e637f 100644
--- a/clang/test/OpenMP/target_map_messages.cpp
+++ b/clang/test/OpenMP/target_map_messages.cpp
@@ -1,34 +1,35 @@
 // -fopenmp, -fno-openmp-extensions
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,ge51,omp,ge51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=51 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,ge51,ge52,omp,ge52-omp,omp52 -fopenmp -fno-openmp-extensions -fopenmp-version=52 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,omp,ge51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=51 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge51,ge52,lt60,omp,ge52-omp,omp52 -fopenmp -fno-openmp-extensions -fopenmp-version=52 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge52,ge60,omp,ge60-omp,omp60 -fopenmp -fno-openmp-extensions -fopenmp-version=60 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
 // RUN: %clang_cc1 -DCCODE -verify -fopenmp -fno-openmp-extensions -ferror-limit 300 -x c %s -Wno-openmp -Wuninitialized -Wno-vla
 
 // -fopenmp-simd, -fno-openmp-extensions
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,ge51,omp,ge51-omp -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,omp,ge51-omp -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
 // RUN: %clang_cc1 -DCCODE -verify -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 -x c %s -Wno-openmp-mapping -Wuninitialized -Wno-vla
 
 // -fopenmp -fopenmp-extensions
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,ge51,ompx,ge51-ompx -fopenmp -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
 // RUN: %clang_cc1 -DCCODE -verify -fopenmp -fopenmp-extensions -ferror-limit 300 -x c %s -Wno-openmp -Wuninitialized -Wno-vla
 
 // -fopenmp-simd -fopenmp-extensions
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,lt50,lt51,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,lt51,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
-// RUN: %clang_cc1 -verify=expected,ge50,ge51,ompx,ge51-ompx -fopenmp-simd -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
+// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp-simd -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla
 // RUN: %clang_cc1 -DCCODE -verify -fopenmp-simd -fopenmp-extensions -ferror-limit 300 -x c %s -Wno-openmp-mapping -Wuninitialized -Wno-vla
 
 // Check
@@ -113,7 +114,7 @@ struct SA {
     #pragma omp target map(b[true:true])
     {}
 
-    #pragma omp target map(: c,f) // expected-error {{missing map type}}
+    #pragma omp target map(: c,f) // lt60-error {{missing map type}} // ge60-error {{empty modifier-specification-list is not allowed}}
     {}
     #pragma omp target map(always, tofrom: c,f)
     {}
@@ -159,28 +160,28 @@ struct SA {
     // expected-error@+1 {{use of undeclared identifier 'present'}}
     #pragma omp target map(present)
     {}
-    // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c,f)
     {}
-    // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c[1:2],f)
     {}
-    // ge52-omp-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c,f[1:2])
     {}
-    // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c[:],f)
     {}
-    // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
@@ -193,19 +194,19 @@ struct SA {
     {}
     #pragma omp target map(always, close, always, close, tofrom: a)   // expected-error 2 {{same map type modifier has been specified more than once}}
     {}
+    // ge60-error@+3 {{same map type modifier has been specified more than once}}
     // ge51-error@+2 {{same map type modifier has been specified more than once}}
     // lt51-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(present, present, tofrom: a)
     {}
-    // ge52-omp-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
-    // ge52-omp-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge52-error@+4 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // ompx-error@+3 {{same map type modifier has been specified more than once}}
     // ge51-omp-error@+2 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, ompx_hold, tofrom: a)
     {}
-    // ge52-omp-error@+9 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
-    // ge52-omp-error@+8 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
+    // ge60-error@+9 {{same map type modifier has been specified more than once}}
+    // ge52-error@+8 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // expected-error@+7 2 {{same map type modifier has been specified more than once}}
     // ge51-error@+6 {{same map type modifier has been specified more than once}}
     // lt51-ompx-error@+5 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}}
@@ -219,34 +220,45 @@ struct SA {
     {}
     #pragma omp target map( , , tofrom: a)   // expected-error {{missing map type modifier}} expected-error {{missing map type modifier}}
     {}
-    #pragma omp target map( , , : a)   // expected-error {{missing map type modifier}} expected-error {{missing map type modifier}} expected-error {{missing map type}}
+    #pragma omp target map( , , : a)   // expected-error {{missing map type modifier}} expected-error {{missing map type modifier}} lt60-error {{missing map type}}
     {}
+    // ge60-error@+4 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
     // ge51-error@+3 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-error@+2 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     // expected-error@+1 {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
     #pragma omp target map( d, f, bf: a)
     {}
+    // ge60-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator}}
     // expected-error@+4 {{missing map type modifier}}
     // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-    // expected-error@+1 {{missing map type}}
+    // lt60-error@+1 {{missing map type}}
     #pragma omp target map( , f, : a)
     {}
-    #pragma omp target map(always close: a)   // expected-error {{missing map type}} omp52-error{{missing ',' after map type modifier}}
+    #pragma omp target map(always close: a)   // lt60-error {{missing map type}} ge52-error{{missing ',' after map type modifier}}
     {}
-    #pragma omp target map(always close bf: a)   // omp52-error 2 {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} 
+    #pragma omp target map(always close bf: a)   // ge52-error 2 {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
     {}
-    // omp52-error@+4 {{missing ',' after map type modifier}}
+    // ge52-error@+4 {{missing ',' after map type modifier}}
     // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-    // expected-error@+1 {{missing map type}}
+    // lt60-error@+1 {{missing map type}}
     #pragma omp target map(always tofrom close: a)
     {}
+    // ge60-note@+4 {{map type 'tofrom' is previous specified here}}
+    // ge60-error@+3 {{map type is already specified}}
     // ge51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(tofrom from: a)
     {}
-    #pragma omp target map(close bf: a)   // omp52-error {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+    // ge60-note@+5 {{map type 'to' is previous specified here}}
+    // ge60-error@+4 {{map type is already specified}}
+    // ge52-error@+3 {{missing ',' after map type modifier}}
+    // ge51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
+    // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
+    #pragma omp target map(to always from: a)
+    {}
+    #pragma omp target map(close bf: a)   // ge52-error {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
     {}
     #pragma omp target map(([b[I]][bf])f)  // lt50-error {{expected ',' or ']' in lambda capture list}} lt50-error {{expected ')'}} lt50-note {{to match this '('}}
     {}
@@ -266,6 +278,7 @@ struct SA {
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(iterator(it=0:10, it=0:20), tofrom:a)
     {}
+    // ge60-error@+7 {{expected '(' after 'iterator'}}
     // ge51-ompx-error@+6 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'ompx_hold'}}
     // lt51-ompx-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'ompx_hold'}}
     // lt51-error@+4 {{expected '(' after 'iterator'}}
@@ -694,20 +707,20 @@ T tmain(T argc) {
   foo();
 
 #pragma omp target data map(always, tofrom: x)
-#pragma omp target data map(always: x) // expected-error {{missing map type}}
+#pragma omp target data map(always: x) // lt60-error {{missing map type}}
 // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
 // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-// expected-error@+1 {{missing map type}}
+// lt60-error@+1 {{missing map type}}
 #pragma omp target data map(tofrom, always: x)
 #pragma omp target data map(always, tofrom: always, tofrom, x)
 #pragma omp target map(tofrom j) // expected-error {{expected ',' or ')' in 'map' clause}}
   foo();
 
 #pragma omp target data map(close, tofrom: x)
-#pragma omp target data map(close: x) // expected-error {{missing map type}}
+#pragma omp target data map(close: x) // lt60-error {{missing map type}}
 // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
 // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-// expected-error@+1 {{missing map type}}
+// lt60-error@+1 {{missing map type}}
 #pragma omp target data map(tofrom, close: x)
 #pragma omp target data map(close, tofrom: close, tofrom, x)
   foo();
@@ -829,19 +842,19 @@ int main(int argc, char **argv) {
   foo();
 
 #pragma omp target data map(always, tofrom: x)
-#pragma omp target data map(always: x) // expected-error {{missing map type}}
+#pragma omp target data map(always: x) // lt60-error {{missing map type}}
 // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
 // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-// expected-error@+1 {{missing map type}}
+// lt60-error@+1 {{missing map type}}
 #pragma omp target data map(tofrom, always: x)
 #pragma omp target data map(always, tofrom: always, tofrom, x)
 #pragma omp target map(tofrom j) // expected-error {{expected ',' or ')' in 'map' clause}}
   foo();
 #pragma omp target data map(close, tofrom: x)
-#pragma omp target data map(close: x) // expected-error {{missing map type}}
+#pragma omp target data map(close: x) // lt60-error {{missing map type}}
 // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
 // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
-// expected-error@+1 {{missing map type}}
+// lt60-error@+1 {{missing map type}}
 #pragma omp target data map(tofrom, close: x)
   foo();
 // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}

From 8755d24cb34b902557469445e1983850e0ce7cc7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 7 May 2024 23:25:33 -0700
Subject: [PATCH 62/64] [compiler-rt][ctx_profile] Fix signed-ness warnings in
 test

Follow-up from PR ##89838. Some build bots warn-as-error
about signed/unsigned comparison in CtxInstrProfilingTest.

Example: https://lab.llvm.org/buildbot/#/builders/37/builds/34610
---
 .../lib/ctx_profile/tests/CtxInstrProfilingTest.cpp  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index f6ebe6ab2e50c5..1e96aea19ce471 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -178,19 +178,19 @@ TEST_F(ContextTest, Dump) {
 
     bool write(const ContextNode &Node) {
       EXPECT_FALSE(Root->Taken.TryLock());
-      EXPECT_EQ(Node.guid(), 1);
+      EXPECT_EQ(Node.guid(), 1U);
       EXPECT_EQ(Node.counters()[0], Entries);
-      EXPECT_EQ(Node.counters_size(), 10);
-      EXPECT_EQ(Node.callsites_size(), 4);
+      EXPECT_EQ(Node.counters_size(), 10U);
+      EXPECT_EQ(Node.callsites_size(), 4U);
       EXPECT_EQ(Node.subContexts()[0], nullptr);
       EXPECT_EQ(Node.subContexts()[1], nullptr);
       EXPECT_NE(Node.subContexts()[2], nullptr);
       EXPECT_EQ(Node.subContexts()[3], nullptr);
       const auto &SN = *Node.subContexts()[2];
-      EXPECT_EQ(SN.guid(), 2);
+      EXPECT_EQ(SN.guid(), 2U);
       EXPECT_EQ(SN.counters()[0], Entries);
-      EXPECT_EQ(SN.counters_size(), 3);
-      EXPECT_EQ(SN.callsites_size(), 1);
+      EXPECT_EQ(SN.counters_size(), 3U);
+      EXPECT_EQ(SN.callsites_size(), 1U);
       EXPECT_EQ(SN.subContexts()[0], nullptr);
       State = true;
       return true;

From 23ae482bd01d7c966f871ddd620e9a26d6d66299 Mon Sep 17 00:00:00 2001
From: martinboehme 
Date: Wed, 8 May 2024 08:36:53 +0200
Subject: [PATCH 63/64] [clang][dataflow] Allow `DataflowAnalysisContext` to
 use a non-owned `Solver`. (#91316)

For some callers (see change in DataflowAnalysis.h), this is more
convenient.
---
 .../Analysis/FlowSensitive/DataflowAnalysis.h |  5 ++--
 .../FlowSensitive/DataflowAnalysisContext.h   | 24 +++++++++++++++++--
 .../FlowSensitive/DataflowAnalysisContext.cpp | 10 ++++----
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 67eccdd030dcdd..763af244547647 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -283,9 +283,8 @@ llvm::Expected> diagnoseFunction(
   if (!Context)
     return Context.takeError();
 
-  auto OwnedSolver = std::make_unique(MaxSATIterations);
-  const WatchedLiteralsSolver *Solver = OwnedSolver.get();
-  DataflowAnalysisContext AnalysisContext(std::move(OwnedSolver));
+  auto Solver = std::make_unique(MaxSATIterations);
+  DataflowAnalysisContext AnalysisContext(*Solver);
   Environment Env(AnalysisContext, FuncDecl);
   AnalysisT Analysis = createAnalysis(ASTCtx, Env);
   llvm::SmallVector Diagnostics;
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index aa2c366cb164a9..5be4a1145f40d7 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -67,7 +67,19 @@ class DataflowAnalysisContext {
   DataflowAnalysisContext(std::unique_ptr S,
                           Options Opts = Options{
                               /*ContextSensitiveOpts=*/std::nullopt,
-                              /*Logger=*/nullptr});
+                              /*Logger=*/nullptr})
+      : DataflowAnalysisContext(*S, std::move(S), Opts) {}
+
+  /// Constructs a dataflow analysis context.
+  ///
+  /// Requirements:
+  ///
+  ///  `S` must outlive the `DataflowAnalysisContext`.
+  DataflowAnalysisContext(Solver &S, Options Opts = Options{
+                                         /*ContextSensitiveOpts=*/std::nullopt,
+                                         /*Logger=*/nullptr})
+      : DataflowAnalysisContext(S, nullptr, Opts) {}
+
   ~DataflowAnalysisContext();
 
   /// Sets a callback that returns the names and types of the synthetic fields
@@ -209,6 +221,13 @@ class DataflowAnalysisContext {
     using DenseMapInfo::isEqual;
   };
 
+  /// `S` is the solver to use. `OwnedSolver` may be:
+  /// *  Null (in which case `S` is non-onwed and must outlive this object), or
+  /// *  Non-null (in which case it must refer to `S`, and the
+  ///    `DataflowAnalysisContext will take ownership of `OwnedSolver`).
+  DataflowAnalysisContext(Solver &S, std::unique_ptr &&OwnedSolver,
+                          Options Opts);
+
   // Extends the set of modeled field declarations.
   void addModeledFields(const FieldSet &Fields);
 
@@ -232,7 +251,8 @@ class DataflowAnalysisContext {
            Solver::Result::Status::Unsatisfiable;
   }
 
-  std::unique_ptr S;
+  Solver &S;
+  std::unique_ptr OwnedSolver;
   std::unique_ptr A;
 
   // Maps from program declarations and statements to storage locations that are
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index e94fd39c45dc15..4b86daa56d7b52 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -170,7 +170,7 @@ DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
 
 Solver::Result DataflowAnalysisContext::querySolver(
     llvm::SetVector Constraints) {
-  return S->solve(Constraints.getArrayRef());
+  return S.solve(Constraints.getArrayRef());
 }
 
 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
@@ -338,10 +338,10 @@ static std::unique_ptr makeLoggerFromCommandLine() {
   return Logger::html(std::move(StreamFactory));
 }
 
-DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S,
-                                                 Options Opts)
-    : S(std::move(S)), A(std::make_unique()), Opts(Opts) {
-  assert(this->S != nullptr);
+DataflowAnalysisContext::DataflowAnalysisContext(
+    Solver &S, std::unique_ptr &&OwnedSolver, Options Opts)
+    : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique()),
+      Opts(Opts) {
   // If the -dataflow-log command-line flag was set, synthesize a logger.
   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
   // based tools.

From e44600f3ab58b0e93a2a80f18e17181c2bc007a4 Mon Sep 17 00:00:00 2001
From: Freddy Ye 
Date: Wed, 8 May 2024 15:07:18 +0800
Subject: [PATCH 64/64] [X86][CFE] Support EGPR in GCCRegNames. (#91323)

---
 clang/lib/Basic/Targets/X86.cpp              |  19 ++-
 clang/test/CodeGen/X86/inline-asm-gcc-regs.c | 121 +++++++++++++++++++
 2 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/X86/inline-asm-gcc-regs.c

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index bf1767c87fe1ce..67e2126cf766b1 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -64,6 +64,8 @@ static const char *const GCCRegNames[] = {
     "dr0",   "dr1",   "dr2",   "dr3",   "dr6",     "dr7",
     "bnd0",  "bnd1",  "bnd2",  "bnd3",
     "tmm0",  "tmm1",  "tmm2",  "tmm3",  "tmm4",    "tmm5",  "tmm6",  "tmm7",
+    "r16",   "r17",   "r18",   "r19",   "r20",     "r21",   "r22",   "r23",
+    "r24",   "r25",   "r26",   "r27",   "r28",     "r29",   "r30",   "r31",
 };
 
 const TargetInfo::AddlRegName AddlRegNames[] = {
@@ -83,8 +85,23 @@ const TargetInfo::AddlRegName AddlRegNames[] = {
     {{"r13d", "r13w", "r13b"}, 43},
     {{"r14d", "r14w", "r14b"}, 44},
     {{"r15d", "r15w", "r15b"}, 45},
+    {{"r16d", "r16w", "r16b"}, 165},
+    {{"r17d", "r17w", "r17b"}, 166},
+    {{"r18d", "r18w", "r18b"}, 167},
+    {{"r19d", "r19w", "r19b"}, 168},
+    {{"r20d", "r20w", "r20b"}, 169},
+    {{"r21d", "r21w", "r21b"}, 170},
+    {{"r22d", "r22w", "r22b"}, 171},
+    {{"r23d", "r23w", "r23b"}, 172},
+    {{"r24d", "r24w", "r24b"}, 173},
+    {{"r25d", "r25w", "r25b"}, 174},
+    {{"r26d", "r26w", "r26b"}, 175},
+    {{"r27d", "r27w", "r27b"}, 176},
+    {{"r28d", "r28w", "r28b"}, 177},
+    {{"r29d", "r29w", "r29b"}, 178},
+    {{"r30d", "r30w", "r30b"}, 179},
+    {{"r31d", "r31w", "r31b"}, 180},
 };
-
 } // namespace targets
 } // namespace clang
 
diff --git a/clang/test/CodeGen/X86/inline-asm-gcc-regs.c b/clang/test/CodeGen/X86/inline-asm-gcc-regs.c
new file mode 100644
index 00000000000000..17adbdc20a406c
--- /dev/null
+++ b/clang/test/CodeGen/X86/inline-asm-gcc-regs.c
@@ -0,0 +1,121 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -O2 %s -o - | FileCheck %s
+
+// CHECK-LABEL: @test_r15
+// CHECK: call void asm sideeffect "", "{r15},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r15() {
+    register int a asm ("r15");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r16
+// CHECK: call void asm sideeffect "", "{r16},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r16() {
+    register int a asm ("r16");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r17
+// CHECK: call void asm sideeffect "", "{r17},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r17() {
+    register int a asm ("r17");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r18
+// CHECK: call void asm sideeffect "", "{r18},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r18() {
+    register int a asm ("r18");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r19
+// CHECK: call void asm sideeffect "", "{r19},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r19() {
+    register int a asm ("r19");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r20
+// CHECK: call void asm sideeffect "", "{r20},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r20() {
+    register int a asm ("r20");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r21
+// CHECK: call void asm sideeffect "", "{r21},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r21() {
+    register int a asm ("r21");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r22
+// CHECK: call void asm sideeffect "", "{r22},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r22() {
+    register int a asm ("r22");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r23
+// CHECK: call void asm sideeffect "", "{r23},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r23() {
+    register int a asm ("r23");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r24
+// CHECK: call void asm sideeffect "", "{r24},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r24() {
+    register int a asm ("r24");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r25
+// CHECK: call void asm sideeffect "", "{r25},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r25() {
+    register int a asm ("r25");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r26
+// CHECK: call void asm sideeffect "", "{r26},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r26() {
+    register int a asm ("r26");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r27
+// CHECK: call void asm sideeffect "", "{r27},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r27() {
+    register int a asm ("r27");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r28
+// CHECK: call void asm sideeffect "", "{r28},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r28() {
+    register int a asm ("r28");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r29
+// CHECK: call void asm sideeffect "", "{r29},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r29() {
+    register int a asm ("r29");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r30
+// CHECK: call void asm sideeffect "", "{r30},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r30() {
+    register int a asm ("r30");
+    asm ("" :: "r" (a));
+}
+
+// CHECK-LABEL: @test_r31
+// CHECK: call void asm sideeffect "", "{r31},~{dirflag},~{fpsr},~{flags}"(i32 undef)
+void test_r31() {
+    register int a asm ("r31");
+    asm ("" :: "r" (a));
+}
+